]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-sra.cc
Update Copyright year in ChangeLog files
[thirdparty/gcc.git] / gcc / ipa-sra.cc
CommitLineData
ff6686d2 1/* Interprocedural scalar replacement of aggregates
c76b3bc5 2 Copyright (C) 2019-2022 Free Software Foundation, Inc.
ff6686d2
MJ
3 Contributed by Martin Jambor <mjambor@suse.cz>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
c76b3bc5 23 functions) and removes unused function parameters. It can also replace an
ff6686d2
MJ
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
27
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
44
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
51
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
57
58
ff6686d2
MJ
59#include "config.h"
60#include "system.h"
61#include "coretypes.h"
62#include "backend.h"
63#include "tree.h"
64#include "gimple.h"
65#include "predict.h"
66#include "tree-pass.h"
67#include "ssa.h"
68#include "cgraph.h"
69#include "gimple-pretty-print.h"
70#include "alias.h"
71#include "tree-eh.h"
72#include "gimple-iterator.h"
73#include "gimple-walk.h"
74#include "tree-dfa.h"
75#include "tree-sra.h"
a895e6d7 76#include "alloc-pool.h"
ff6686d2 77#include "symbol-summary.h"
ff6686d2
MJ
78#include "dbgcnt.h"
79#include "tree-inline.h"
80#include "ipa-utils.h"
81#include "builtins.h"
82#include "cfganal.h"
83#include "tree-streamer.h"
9707b593 84#include "internal-fn.h"
ae7a23a3 85#include "symtab-clones.h"
ecdf414b 86#include "attribs.h"
4834e936 87#include "ipa-prop.h"
ff6686d2 88
40e67ab8
JH
89static void ipa_sra_summarize_function (cgraph_node *);
90
ff6686d2
MJ
91/* Bits used to track size of an aggregate in bytes interprocedurally. */
92#define ISRA_ARG_SIZE_LIMIT_BITS 16
93#define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94/* How many parameters can feed into a call actual argument and still be
c76b3bc5 95 tracked. */
ff6686d2
MJ
96#define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97
98/* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
102
103struct GTY(()) param_access
104{
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
dfea3d6f 107 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
108 stage. */
109 tree type;
110
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
114
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
c76b3bc5 124 /* Set if the access has reverse scalar storage order. */
ff6686d2
MJ
125 unsigned reverse : 1;
126};
127
dfea3d6f 128/* This structure has the same purpose as the one above and additionally it
ff6686d2
MJ
129 contains some fields that are only necessary in the summary generation
130 phase. */
131
132struct gensum_param_access
133{
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
137
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
144
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
dfea3d6f 147 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
148 stage. */
149 tree type;
dfea3d6f 150 /* Alias reference type to be used in MEM_REFs when adjusting caller
ff6686d2
MJ
151 arguments. */
152 tree alias_ptr_type;
153
10478270
MJ
154 /* Cumulative count of all loads. */
155 profile_count load_count;
ff6686d2
MJ
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
158 bool nonarg;
159
c76b3bc5 160 /* Set if the access has reverse scalar storage order. */
ff6686d2
MJ
161 bool reverse;
162};
163
164/* Summary describing a parameter in the IPA stages. */
165
166struct GTY(()) isra_param_desc
167{
168 /* List of access representatives to the parameters, sorted according to
169 their offset. */
170 vec <param_access *, va_gc> *accesses;
171
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
f2cf4c61
MJ
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
178 callers. */
179 unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
ff6686d2
MJ
180
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused : 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate : 1;
186 /* Is this a parameter passing stuff by reference? */
187 unsigned by_ref : 1;
e3a5cc32
MJ
188 /* Parameter hint set during IPA analysis when there is a caller which does
189 not construct the argument just to pass it to calls. Only meaningful for
190 by_ref parameters. */
191 unsigned not_specially_constructed : 1;
f2cf4c61
MJ
192 /* Only meaningful for by_ref parameters. If set, this parameter can only be
193 a split candidate if all callers pass pointers that are known to point to
194 a chunk of memory large enough to contain all accesses. */
195 unsigned conditionally_dereferenceable : 1;
196 /* Set when safe_size has been updated from at least one caller. */
197 unsigned safe_size_set : 1;
ff6686d2
MJ
198};
199
200/* Structure used when generating summaries that describes a parameter. */
201
202struct gensum_param_desc
203{
204 /* Roots of param_accesses. */
205 gensum_param_access *accesses;
206 /* Number of accesses in the access tree rooted in field accesses. */
207 unsigned access_count;
208
dfea3d6f 209 /* If the below is non-zero, this is the number of uses as actual arguments. */
ff6686d2
MJ
210 int call_uses;
211 /* Number of times this parameter has been directly passed to. */
212 unsigned ptr_pt_count;
213
214 /* Size limit of total size of all replacements. */
215 unsigned param_size_limit;
216 /* Sum of sizes of nonarg accesses. */
217 unsigned nonarg_acc_size;
218
219 /* A parameter that is used only in call arguments and can be removed if all
220 concerned actual arguments are removed. */
221 bool locally_unused;
222 /* An aggregate that is a candidate for breaking up or a pointer passing data
223 by reference that is a candidate for being converted to a set of
dfea3d6f 224 parameters passing those data by value. */
ff6686d2 225 bool split_candidate;
10478270
MJ
226 /* Is this a parameter passing stuff by reference (either a pointer or a
227 source language reference type)? */
ff6686d2 228 bool by_ref;
10478270
MJ
229 /* If this parameter passes stuff by reference, can it be safely dereferenced
230 without performing further checks (for example because it is a
231 REFERENCE_TYPE)? */
232 bool safe_ref;
f2cf4c61
MJ
233 /* Only meaningful for by_ref parameters. If set, this parameter can only be
234 a split candidate if all callers pass pointers that are known to point to
235 a chunk of memory large enough to contain all accesses. */
236 bool conditionally_dereferenceable;
ff6686d2
MJ
237
238 /* The number of this parameter as they are ordered in function decl. */
239 int param_number;
240 /* For parameters passing data by reference, this is parameter index to
241 compute indices to bb_dereferences. */
242 int deref_index;
243};
244
dfea3d6f 245/* Properly deallocate accesses of DESC. TODO: Since this data structure is
c76b3bc5
EB
246 allocated in GC memory, this is not necessary and we can consider removing
247 the function. */
ff6686d2
MJ
248
249static void
250free_param_decl_accesses (isra_param_desc *desc)
251{
252 unsigned len = vec_safe_length (desc->accesses);
253 for (unsigned i = 0; i < len; ++i)
254 ggc_free ((*desc->accesses)[i]);
255 vec_free (desc->accesses);
256}
257
258/* Class used to convey information about functions from the
dfea3d6f 259 intra-procedural analysis stage to inter-procedural one. */
ff6686d2
MJ
260
261class GTY((for_user)) isra_func_summary
262{
263public:
264 /* initialize the object. */
265
266 isra_func_summary ()
267 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
268 m_return_ignored (false), m_queued (false)
269 {}
270
271 /* Destroy m_parameters. */
272
273 ~isra_func_summary ();
274
dfea3d6f 275 /* Mark the function as not a candidate for any IPA-SRA transformation.
ff6686d2
MJ
276 Return true if it was a candidate until now. */
277
278 bool zap ();
279
280 /* Vector of parameter descriptors corresponding to the function being
281 analyzed. */
282 vec<isra_param_desc, va_gc> *m_parameters;
283
284 /* Whether the node is even a candidate for any IPA-SRA transformation at
285 all. */
286 unsigned m_candidate : 1;
287
288 /* Whether the original function returns any value. */
289 unsigned m_returns_value : 1;
290
291 /* Set to true if all call statements do not actually use the returned
292 value. */
293
294 unsigned m_return_ignored : 1;
295
296 /* Whether the node is already queued in IPA SRA stack during processing of
297 call graphs SCCs. */
298
299 unsigned m_queued : 1;
300};
301
c76b3bc5
EB
302/* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
303 data structure is allocated in GC memory, this is not necessary and we can
304 consider removing the destructor. */
ff6686d2
MJ
305
306isra_func_summary::~isra_func_summary ()
307{
308 unsigned len = vec_safe_length (m_parameters);
309 for (unsigned i = 0; i < len; ++i)
310 free_param_decl_accesses (&(*m_parameters)[i]);
311 vec_free (m_parameters);
312}
313
dfea3d6f 314/* Mark the function as not a candidate for any IPA-SRA transformation. Return
ff6686d2
MJ
315 true if it was a candidate until now. */
316
317bool
318isra_func_summary::zap ()
319{
320 bool ret = m_candidate;
321 m_candidate = false;
322
c76b3bc5 323 /* TODO: see the destructor above. */
ff6686d2
MJ
324 unsigned len = vec_safe_length (m_parameters);
325 for (unsigned i = 0; i < len; ++i)
326 free_param_decl_accesses (&(*m_parameters)[i]);
327 vec_free (m_parameters);
328
329 return ret;
330}
331
332/* Structure to describe which formal parameters feed into a particular actual
c76b3bc5 333 argument. */
ff6686d2
MJ
334
335struct isra_param_flow
336{
337 /* Number of elements in array inputs that contain valid data. */
338 char length;
339 /* Indices of formal parameters that feed into the described actual argument.
340 If aggregate_pass_through or pointer_pass_through below are true, it must
341 contain exactly one element which is passed through from a formal
342 parameter if the given number. Otherwise, the array contains indices of
dfea3d6f 343 callee's formal parameters which are used to calculate value of this
ff6686d2
MJ
344 actual argument. */
345 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
346
347 /* Offset within the formal parameter. */
348 unsigned unit_offset;
f2cf4c61
MJ
349 /* When aggregate_pass_through is set, this is the size of the portion of an
350 aggregate formal parameter that is being passed. Otherwise, this is size
351 of pointed to memory that is known to be valid be dereferenced. */
ff6686d2
MJ
352 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
353
f2cf4c61 354 /* True when the value of this actual argument is a portion of a formal
ff6686d2
MJ
355 parameter. */
356 unsigned aggregate_pass_through : 1;
357 /* True when the value of this actual copy is a verbatim pass through of an
358 obtained pointer. */
359 unsigned pointer_pass_through : 1;
360 /* True when it is safe to copy access candidates here from the callee, which
361 would mean introducing dereferences into callers of the caller. */
362 unsigned safe_to_import_accesses : 1;
e3a5cc32
MJ
363 /* True when the passed value is an address of a structure that has been
364 constructed in the caller just to be passed by reference to functions
365 (i.e. is never read). */
366 unsigned constructed_for_calls : 1;
ff6686d2
MJ
367};
368
dfea3d6f 369/* Structure used to convey information about calls from the intra-procedural
ff6686d2
MJ
370 analysis stage to inter-procedural one. */
371
372class isra_call_summary
373{
374public:
375 isra_call_summary ()
376 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
763121cc 377 m_bit_aligned_arg (false), m_before_any_store (false)
ff6686d2
MJ
378 {}
379
380 void init_inputs (unsigned arg_count);
381 void dump (FILE *f);
382
383 /* Information about what formal parameters of the caller are used to compute
dfea3d6f 384 individual actual arguments of this call. */
ff6686d2
MJ
385 auto_vec <isra_param_flow> m_arg_flow;
386
387 /* Set to true if the call statement does not have a LHS. */
388 unsigned m_return_ignored : 1;
389
390 /* Set to true if the LHS of call statement is only used to construct the
391 return value of the caller. */
392 unsigned m_return_returned : 1;
393
394 /* Set when any of the call arguments are not byte-aligned. */
395 unsigned m_bit_aligned_arg : 1;
763121cc
MJ
396
397 /* Set to true if the call happend before any (other) store to memory in the
398 caller. */
399 unsigned m_before_any_store : 1;
ff6686d2
MJ
400};
401
402/* Class to manage function summaries. */
403
404class GTY((user)) ipa_sra_function_summaries
405 : public function_summary <isra_func_summary *>
406{
407public:
408 ipa_sra_function_summaries (symbol_table *table, bool ggc):
409 function_summary<isra_func_summary *> (table, ggc) { }
410
f31ba116
DM
411 void duplicate (cgraph_node *, cgraph_node *,
412 isra_func_summary *old_sum,
413 isra_func_summary *new_sum) final override;
414 void insert (cgraph_node *, isra_func_summary *) final override;
ff6686d2
MJ
415};
416
417/* Hook that is called by summary when a node is duplicated. */
418
419void
420ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
421 isra_func_summary *old_sum,
422 isra_func_summary *new_sum)
423{
424 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
425 useless. */
426 new_sum->m_candidate = old_sum->m_candidate;
427 new_sum->m_returns_value = old_sum->m_returns_value;
428 new_sum->m_return_ignored = old_sum->m_return_ignored;
429 gcc_assert (!old_sum->m_queued);
430 new_sum->m_queued = false;
431
432 unsigned param_count = vec_safe_length (old_sum->m_parameters);
433 if (!param_count)
434 return;
435 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
436 new_sum->m_parameters->quick_grow_cleared (param_count);
437 for (unsigned i = 0; i < param_count; i++)
438 {
439 isra_param_desc *s = &(*old_sum->m_parameters)[i];
440 isra_param_desc *d = &(*new_sum->m_parameters)[i];
441
442 d->param_size_limit = s->param_size_limit;
443 d->size_reached = s->size_reached;
f2cf4c61 444 d->safe_size = s->safe_size;
ff6686d2
MJ
445 d->locally_unused = s->locally_unused;
446 d->split_candidate = s->split_candidate;
447 d->by_ref = s->by_ref;
e3a5cc32 448 d->not_specially_constructed = s->not_specially_constructed;
f2cf4c61
MJ
449 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
450 d->safe_size_set = s->safe_size_set;
ff6686d2
MJ
451
452 unsigned acc_count = vec_safe_length (s->accesses);
453 vec_safe_reserve_exact (d->accesses, acc_count);
454 for (unsigned j = 0; j < acc_count; j++)
455 {
456 param_access *from = (*s->accesses)[j];
457 param_access *to = ggc_cleared_alloc<param_access> ();
458 to->type = from->type;
459 to->alias_ptr_type = from->alias_ptr_type;
460 to->unit_offset = from->unit_offset;
461 to->unit_size = from->unit_size;
462 to->certain = from->certain;
c76b3bc5 463 to->reverse = from->reverse;
ff6686d2
MJ
464 d->accesses->quick_push (to);
465 }
466 }
467}
468
469/* Pointer to the pass function summary holder. */
470
471static GTY(()) ipa_sra_function_summaries *func_sums;
472
40e67ab8
JH
473/* Hook that is called by summary when new node appears. */
474
475void
476ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
477{
478 if (opt_for_fn (node->decl, flag_ipa_sra))
479 {
480 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
481 ipa_sra_summarize_function (node);
482 pop_cfun ();
483 }
484 else
485 func_sums->remove (node);
486}
487
ff6686d2
MJ
488/* Class to manage call summaries. */
489
490class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
491{
492public:
493 ipa_sra_call_summaries (symbol_table *table):
494 call_summary<isra_call_summary *> (table) { }
495
496 /* Duplicate info when an edge is cloned. */
f31ba116
DM
497 void duplicate (cgraph_edge *, cgraph_edge *,
498 isra_call_summary *old_sum,
499 isra_call_summary *new_sum) final override;
ff6686d2
MJ
500};
501
502static ipa_sra_call_summaries *call_sums;
503
504
505/* Initialize m_arg_flow of a particular instance of isra_call_summary.
506 ARG_COUNT is the number of actual arguments passed. */
507
508void
509isra_call_summary::init_inputs (unsigned arg_count)
510{
511 if (arg_count == 0)
512 {
513 gcc_checking_assert (m_arg_flow.length () == 0);
514 return;
515 }
516 if (m_arg_flow.length () == 0)
517 {
518 m_arg_flow.reserve_exact (arg_count);
519 m_arg_flow.quick_grow_cleared (arg_count);
520 }
521 else
522 gcc_checking_assert (arg_count == m_arg_flow.length ());
523}
524
525/* Dump all information in call summary to F. */
526
527void
528isra_call_summary::dump (FILE *f)
529{
530 if (m_return_ignored)
531 fprintf (f, " return value ignored\n");
532 if (m_return_returned)
533 fprintf (f, " return value used only to compute caller return value\n");
763121cc
MJ
534 if (m_before_any_store)
535 fprintf (f, " happens before any store to memory\n");
ff6686d2
MJ
536 for (unsigned i = 0; i < m_arg_flow.length (); i++)
537 {
538 fprintf (f, " Parameter %u:\n", i);
539 isra_param_flow *ipf = &m_arg_flow[i];
540
541 if (ipf->length)
542 {
543 bool first = true;
544 fprintf (f, " Scalar param sources: ");
545 for (int j = 0; j < ipf->length; j++)
546 {
547 if (!first)
548 fprintf (f, ", ");
549 else
550 first = false;
551 fprintf (f, "%i", (int) ipf->inputs[j]);
552 }
553 fprintf (f, "\n");
554 }
555 if (ipf->aggregate_pass_through)
556 fprintf (f, " Aggregate pass through from the param given above, "
557 "unit offset: %u , unit size: %u\n",
558 ipf->unit_offset, ipf->unit_size);
f2cf4c61
MJ
559 else if (ipf->unit_size > 0)
560 fprintf (f, " Known dereferenceable size: %u\n", ipf->unit_size);
ff6686d2
MJ
561 if (ipf->pointer_pass_through)
562 fprintf (f, " Pointer pass through from the param given above, "
563 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
e3a5cc32
MJ
564 if (ipf->constructed_for_calls)
565 fprintf (f, " Variable constructed just to be passed to "
566 "calls.\n");
ff6686d2
MJ
567 }
568}
569
dfea3d6f 570/* Duplicate edge summary when an edge is cloned. */
ff6686d2
MJ
571
572void
573ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
574 isra_call_summary *old_sum,
575 isra_call_summary *new_sum)
576{
577 unsigned arg_count = old_sum->m_arg_flow.length ();
578 new_sum->init_inputs (arg_count);
579 for (unsigned i = 0; i < arg_count; i++)
580 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
581
582 new_sum->m_return_ignored = old_sum->m_return_ignored;
583 new_sum->m_return_returned = old_sum->m_return_returned;
584 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
763121cc 585 new_sum->m_before_any_store = old_sum->m_before_any_store;
ff6686d2
MJ
586}
587
588
589/* With all GTY stuff done, we can move to anonymous namespace. */
590namespace {
591/* Quick mapping from a decl to its param descriptor. */
592
593hash_map<tree, gensum_param_desc *> *decl2desc;
594
6539bbc1
MJ
595/* All local DECLs ever loaded from of and of those that have their address
596 assigned to a variable. */
e3a5cc32
MJ
597
598hash_set <tree> *loaded_decls;
599
c76b3bc5 600/* Countdown of allowed Alias Analysis steps during summary building. */
ff6686d2
MJ
601
602int aa_walking_limit;
603
604/* This is a table in which for each basic block and parameter there is a
605 distance (offset + size) in that parameter which is dereferenced and
606 accessed in that BB. */
607HOST_WIDE_INT *bb_dereferences = NULL;
608/* How many by-reference parameters there are in the current function. */
10478270 609int unsafe_by_ref_count;
ff6686d2
MJ
610
611/* Bitmap of BBs that can cause the function to "stop" progressing by
612 returning, throwing externally, looping infinitely or calling a function
613 which might abort etc.. */
614bitmap final_bbs;
615
616/* Obstack to allocate various small structures required only when generating
617 summary for a function. */
618struct obstack gensum_obstack;
619
620/* Return false the function is apparently unsuitable for IPA-SRA based on it's
621 attributes, return true otherwise. NODE is the cgraph node of the current
622 function. */
623
624static bool
625ipa_sra_preliminary_function_checks (cgraph_node *node)
626{
87f94429 627 if (!node->can_change_signature)
ff6686d2
MJ
628 {
629 if (dump_file)
630 fprintf (dump_file, "Function cannot change signature.\n");
631 return false;
632 }
633
634 if (!tree_versionable_function_p (node->decl))
635 {
636 if (dump_file)
637 fprintf (dump_file, "Function is not versionable.\n");
638 return false;
639 }
640
641 if (!opt_for_fn (node->decl, optimize)
642 || !opt_for_fn (node->decl, flag_ipa_sra))
643 {
644 if (dump_file)
645 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
646 "function.\n");
647 return false;
648 }
649
650 if (DECL_VIRTUAL_P (node->decl))
651 {
652 if (dump_file)
653 fprintf (dump_file, "Function is a virtual method.\n");
654 return false;
655 }
656
657 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
658 if (fun->stdarg)
659 {
660 if (dump_file)
661 fprintf (dump_file, "Function uses stdarg. \n");
662 return false;
663 }
664
ff6686d2
MJ
665 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
666 {
667 if (dump_file)
668 fprintf (dump_file, "Always inline function will be inlined "
669 "anyway. \n");
670 return false;
671 }
672
673 return true;
674}
675
676/* Print access tree starting at ACCESS to F. */
677
678static void
679dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
680{
681 fprintf (f, " ");
682 for (unsigned i = 0; i < indent; i++)
683 fprintf (f, " ");
684 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
685 access->offset);
686 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
687 fprintf (f, ", type: ");
688 print_generic_expr (f, access->type);
689 fprintf (f, ", alias_ptr_type: ");
690 print_generic_expr (f, access->alias_ptr_type);
10478270
MJ
691 fprintf (f, ", load_count: ");
692 access->load_count.dump (f);
ff6686d2
MJ
693 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
694 for (gensum_param_access *ch = access->first_child;
695 ch;
696 ch = ch->next_sibling)
697 dump_gensum_access (f, ch, indent + 2);
698}
699
700
701/* Print access tree starting at ACCESS to F. */
702
703static void
704dump_isra_access (FILE *f, param_access *access)
705{
706 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
707 fprintf (f, ", unit size: %u", access->unit_size);
708 fprintf (f, ", type: ");
709 print_generic_expr (f, access->type);
710 fprintf (f, ", alias_ptr_type: ");
711 print_generic_expr (f, access->alias_ptr_type);
712 if (access->certain)
713 fprintf (f, ", certain");
714 else
c76b3bc5 715 fprintf (f, ", not certain");
ff6686d2
MJ
716 if (access->reverse)
717 fprintf (f, ", reverse");
718 fprintf (f, "\n");
719}
720
721/* Dump access tree starting at ACCESS to stderr. */
722
723DEBUG_FUNCTION void
724debug_isra_access (param_access *access)
725{
726 dump_isra_access (stderr, access);
727}
728
729/* Dump DESC to F. */
730
731static void
732dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
733{
734 if (desc->locally_unused)
735 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
736 if (!desc->split_candidate)
737 {
738 fprintf (f, " not a candidate\n");
739 return;
740 }
741 if (desc->by_ref)
f2cf4c61
MJ
742 fprintf (f, " %s%s by_ref with %u pass throughs\n",
743 desc->safe_ref ? "safe" : "unsafe",
744 desc->conditionally_dereferenceable
745 ? " conditionally_dereferenceable" : " ok",
746 desc->ptr_pt_count);
ff6686d2
MJ
747
748 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
749 dump_gensum_access (f, acc, 2);
750}
751
dfea3d6f 752/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
ff6686d2
MJ
753 F. */
754
755static void
756dump_gensum_param_descriptors (FILE *f, tree fndecl,
757 vec<gensum_param_desc> *param_descriptions)
758{
759 tree parm = DECL_ARGUMENTS (fndecl);
760 for (unsigned i = 0;
761 i < param_descriptions->length ();
762 ++i, parm = DECL_CHAIN (parm))
763 {
764 fprintf (f, " Descriptor for parameter %i ", i);
765 print_generic_expr (f, parm, TDF_UID);
766 fprintf (f, "\n");
767 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
768 }
769}
770
771
e3a5cc32
MJ
772/* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
773 hints. */
ff6686d2
MJ
774
775static void
e3a5cc32 776dump_isra_param_descriptor (FILE *f, isra_param_desc *desc, bool hints)
ff6686d2
MJ
777{
778 if (desc->locally_unused)
779 {
780 fprintf (f, " (locally) unused\n");
781 }
782 if (!desc->split_candidate)
783 {
f2cf4c61
MJ
784 fprintf (f, " not a candidate for splitting");
785 if (hints && desc->by_ref && desc->safe_size_set)
786 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
787 fprintf (f, "\n");
ff6686d2
MJ
788 return;
789 }
e3a5cc32 790 fprintf (f, " param_size_limit: %u, size_reached: %u%s",
ff6686d2
MJ
791 desc->param_size_limit, desc->size_reached,
792 desc->by_ref ? ", by_ref" : "");
f2cf4c61
MJ
793 if (desc->by_ref && desc->conditionally_dereferenceable)
794 fprintf (f, ", conditionally_dereferenceable");
e3a5cc32
MJ
795 if (hints)
796 {
797 if (desc->by_ref && !desc->not_specially_constructed)
798 fprintf (f, ", args_specially_constructed");
f2cf4c61
MJ
799 if (desc->by_ref && desc->safe_size_set)
800 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
e3a5cc32
MJ
801 }
802 fprintf (f, "\n");
ff6686d2
MJ
803
804 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
805 {
806 param_access *access = (*desc->accesses)[i];
807 dump_isra_access (f, access);
808 }
809}
810
e3a5cc32
MJ
811/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
812 If HINTS is true, also dump IPA-analysis computed hints. */
ff6686d2
MJ
813
814static void
e3a5cc32
MJ
815dump_isra_param_descriptors (FILE *f, tree fndecl, isra_func_summary *ifs,
816 bool hints)
ff6686d2
MJ
817{
818 tree parm = DECL_ARGUMENTS (fndecl);
819 if (!ifs->m_parameters)
820 {
821 fprintf (f, " parameter descriptors not available\n");
822 return;
823 }
824
825 for (unsigned i = 0;
826 i < ifs->m_parameters->length ();
827 ++i, parm = DECL_CHAIN (parm))
828 {
829 fprintf (f, " Descriptor for parameter %i ", i);
830 print_generic_expr (f, parm, TDF_UID);
831 fprintf (f, "\n");
e3a5cc32 832 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
ff6686d2
MJ
833 }
834}
835
836/* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
837 function fails return false, otherwise return true. SRC must fit into an
838 unsigned char. Used for purposes of transitive unused parameter
839 removal. */
840
841static bool
842add_src_to_param_flow (isra_param_flow *param_flow, int src)
843{
844 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
845 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
846 return false;
847
848 param_flow->inputs[(int) param_flow->length] = src;
849 param_flow->length++;
850 return true;
851}
852
853/* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
854 it is the only input. Used for purposes of transitive parameter
855 splitting. */
856
857static void
858set_single_param_flow_source (isra_param_flow *param_flow, int src)
859{
860 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
861 if (param_flow->length == 0)
862 {
863 param_flow->inputs[0] = src;
864 param_flow->length = 1;
865 }
866 else if (param_flow->length == 1)
867 gcc_assert (param_flow->inputs[0] == src);
868 else
869 gcc_unreachable ();
870}
871
872/* Assert that there is only a single value in PARAM_FLOW's inputs and return
873 it. */
874
875static unsigned
876get_single_param_flow_source (const isra_param_flow *param_flow)
877{
878 gcc_assert (param_flow->length == 1);
879 return param_flow->inputs[0];
880}
881
882/* Inspect all uses of NAME and simple arithmetic calculations involving NAME
1980ffec
MJ
883 in FUN represented with NODE and return a negative number if any of them is
884 used for something else than either an actual call argument, simple
885 arithmetic operation or debug statement. If there are no such uses, return
886 the number of actual arguments that this parameter eventually feeds to (or
887 zero if there is none). For any such parameter, mark PARM_NUM as one of its
888 sources. ANALYZED is a bitmap that tracks which SSA names we have already
889 started investigating. */
ff6686d2
MJ
890
891static int
1980ffec
MJ
892isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
893 int parm_num, bitmap analyzed)
ff6686d2
MJ
894{
895 int res = 0;
896 imm_use_iterator imm_iter;
897 gimple *stmt;
898
899 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
900 {
901 if (is_gimple_debug (stmt))
902 continue;
903
904 /* TODO: We could handle at least const builtin functions like arithmetic
905 operations below. */
906 if (is_gimple_call (stmt))
907 {
908 int all_uses = 0;
909 use_operand_p use_p;
910 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
911 all_uses++;
912
913 gcall *call = as_a <gcall *> (stmt);
914 unsigned arg_count;
915 if (gimple_call_internal_p (call)
916 || (arg_count = gimple_call_num_args (call)) == 0)
917 {
918 res = -1;
640296c3 919 break;
ff6686d2
MJ
920 }
921
922 cgraph_edge *cs = node->get_edge (stmt);
923 gcc_checking_assert (cs);
924 isra_call_summary *csum = call_sums->get_create (cs);
925 csum->init_inputs (arg_count);
926
927 int simple_uses = 0;
928 for (unsigned i = 0; i < arg_count; i++)
929 if (gimple_call_arg (call, i) == name)
930 {
931 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
932 {
933 simple_uses = -1;
934 break;
935 }
936 simple_uses++;
937 }
938
939 if (simple_uses < 0
940 || all_uses != simple_uses)
941 {
942 res = -1;
640296c3 943 break;
ff6686d2
MJ
944 }
945 res += all_uses;
946 }
1980ffec
MJ
947 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
948 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
949 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
950 {
951 tree lhs;
952 if (gimple_code (stmt) == GIMPLE_PHI)
953 lhs = gimple_phi_result (stmt);
954 else
955 lhs = gimple_assign_lhs (stmt);
956
957 if (TREE_CODE (lhs) != SSA_NAME)
958 {
959 res = -1;
640296c3 960 break;
ff6686d2
MJ
961 }
962 gcc_assert (!gimple_vdef (stmt));
963 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
964 {
1980ffec 965 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
ff6686d2
MJ
966 analyzed);
967 if (tmp < 0)
968 {
969 res = tmp;
640296c3 970 break;
ff6686d2
MJ
971 }
972 res += tmp;
973 }
974 }
975 else
976 {
977 res = -1;
640296c3 978 break;
ff6686d2
MJ
979 }
980 }
981 return res;
982}
983
984/* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
985 also described by NODE) and simple arithmetic calculations involving PARM
986 and return false if any of them is used for something else than either an
987 actual call argument, simple arithmetic operation or debug statement. If
988 there are no such uses, return true and store the number of actual arguments
989 that this parameter eventually feeds to (or zero if there is none) to
990 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
991 sources.
992
993 This function is similar to ptr_parm_has_nonarg_uses but its results are
994 meant for unused parameter removal, as opposed to splitting of parameters
c76b3bc5 995 passed by reference or converting them to passed by value. */
ff6686d2
MJ
996
997static bool
998isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
999 int parm_num, int *call_uses_p)
1000{
1001 gcc_checking_assert (is_gimple_reg (parm));
1002
1003 tree name = ssa_default_def (fun, parm);
1004 if (!name || has_zero_uses (name))
1005 {
1006 *call_uses_p = 0;
1007 return false;
1008 }
1009
1010 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1011 if (parm_num > UCHAR_MAX)
1012 return true;
1013
1014 bitmap analyzed = BITMAP_ALLOC (NULL);
1980ffec
MJ
1015 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
1016 analyzed);
ff6686d2
MJ
1017 BITMAP_FREE (analyzed);
1018 if (call_uses < 0)
1019 return true;
1020 *call_uses_p = call_uses;
1021 return false;
1022}
1023
1024/* Scan immediate uses of a default definition SSA name of a parameter PARM and
1025 examine whether there are any nonarg uses that are not actual arguments or
1026 otherwise infeasible uses. If so, return true, otherwise return false.
1027 Create pass-through IPA flow records for any direct uses as argument calls
1028 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
1029 must represent the function that is currently analyzed, PARM_NUM must be the
1030 index of the analyzed parameter.
1031
1032 This function is similar to isra_track_scalar_param_local_uses but its
1033 results are meant for splitting of parameters passed by reference or turning
1034 them into bits passed by value, as opposed to generic unused parameter
c76b3bc5 1035 removal. */
ff6686d2
MJ
1036
1037static bool
1038ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
1039 int parm_num, unsigned *pt_count_p)
1040{
1041 imm_use_iterator ui;
1042 gimple *stmt;
1043 tree name = ssa_default_def (fun, parm);
1044 bool ret = false;
1045 unsigned pt_count = 0;
1046
1047 if (!name || has_zero_uses (name))
1048 return false;
1049
1050 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1051 if (parm_num > UCHAR_MAX)
1052 return true;
1053
1054 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
1055 {
1056 unsigned uses_ok = 0;
1057 use_operand_p use_p;
1058
1059 if (is_gimple_debug (stmt))
1060 continue;
1061
1062 if (gimple_assign_single_p (stmt))
1063 {
1064 tree rhs = gimple_assign_rhs1 (stmt);
23cd18c6
RB
1065 if (!TREE_THIS_VOLATILE (rhs))
1066 {
1067 while (handled_component_p (rhs))
1068 rhs = TREE_OPERAND (rhs, 0);
1069 if (TREE_CODE (rhs) == MEM_REF
1070 && TREE_OPERAND (rhs, 0) == name
1071 && integer_zerop (TREE_OPERAND (rhs, 1))
1072 && types_compatible_p (TREE_TYPE (rhs),
1073 TREE_TYPE (TREE_TYPE (name))))
1074 uses_ok++;
1075 }
ff6686d2
MJ
1076 }
1077 else if (is_gimple_call (stmt))
1078 {
1079 gcall *call = as_a <gcall *> (stmt);
1080 unsigned arg_count;
1081 if (gimple_call_internal_p (call)
1082 || (arg_count = gimple_call_num_args (call)) == 0)
1083 {
1084 ret = true;
640296c3 1085 break;
ff6686d2
MJ
1086 }
1087
1088 cgraph_edge *cs = node->get_edge (stmt);
1089 gcc_checking_assert (cs);
1090 isra_call_summary *csum = call_sums->get_create (cs);
1091 csum->init_inputs (arg_count);
1092
1093 for (unsigned i = 0; i < arg_count; ++i)
1094 {
1095 tree arg = gimple_call_arg (stmt, i);
1096
1097 if (arg == name)
1098 {
1099 /* TODO: Allow &MEM_REF[name + offset] here,
1100 ipa_param_body_adjustments::modify_call_stmt has to be
1101 adjusted too. */
1102 csum->m_arg_flow[i].pointer_pass_through = true;
1103 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1104 pt_count++;
1105 uses_ok++;
1106 continue;
1107 }
1108
23cd18c6
RB
1109 if (!TREE_THIS_VOLATILE (arg))
1110 {
1111 while (handled_component_p (arg))
1112 arg = TREE_OPERAND (arg, 0);
1113 if (TREE_CODE (arg) == MEM_REF
1114 && TREE_OPERAND (arg, 0) == name
1115 && integer_zerop (TREE_OPERAND (arg, 1))
1116 && types_compatible_p (TREE_TYPE (arg),
1117 TREE_TYPE (TREE_TYPE (name))))
1118 uses_ok++;
1119 }
ff6686d2
MJ
1120 }
1121 }
1122
1123 /* If the number of valid uses does not match the number of
1124 uses in this stmt there is an unhandled use. */
1125 unsigned all_uses = 0;
1126 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1127 all_uses++;
1128
1129 gcc_checking_assert (uses_ok <= all_uses);
1130 if (uses_ok != all_uses)
1131 {
1132 ret = true;
640296c3 1133 break;
ff6686d2
MJ
1134 }
1135 }
1136
1137 *pt_count_p = pt_count;
1138 return ret;
1139}
1140
1141/* Initialize vector of parameter descriptors of NODE. Return true if there
1142 are any candidates for splitting or unused aggregate parameter removal (the
1143 function may return false if there are candidates for removal of register
e3a5cc32 1144 parameters). */
ff6686d2
MJ
1145
1146static bool
1147create_parameter_descriptors (cgraph_node *node,
1148 vec<gensum_param_desc> *param_descriptions)
1149{
1150 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1151 bool ret = false;
1152
1153 int num = 0;
1154 for (tree parm = DECL_ARGUMENTS (node->decl);
1155 parm;
1156 parm = DECL_CHAIN (parm), num++)
1157 {
1158 const char *msg;
1159 gensum_param_desc *desc = &(*param_descriptions)[num];
1160 /* param_descriptions vector is grown cleared in the caller. */
1161 desc->param_number = num;
1162 decl2desc->put (parm, desc);
1163
1164 if (dump_file && (dump_flags & TDF_DETAILS))
1165 print_generic_expr (dump_file, parm, TDF_UID);
1166
1167 int scalar_call_uses;
1168 tree type = TREE_TYPE (parm);
1169 if (TREE_THIS_VOLATILE (parm))
1170 {
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file, " not a candidate, is volatile\n");
1173 continue;
1174 }
1175 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1176 {
1177 if (dump_file && (dump_flags & TDF_DETAILS))
1178 fprintf (dump_file, " not a candidate, is a va_list type\n");
1179 continue;
1180 }
1181 if (TREE_ADDRESSABLE (parm))
1182 {
1183 if (dump_file && (dump_flags & TDF_DETAILS))
1184 fprintf (dump_file, " not a candidate, is addressable\n");
1185 continue;
1186 }
1187 if (TREE_ADDRESSABLE (type))
1188 {
1189 if (dump_file && (dump_flags & TDF_DETAILS))
1190 fprintf (dump_file, " not a candidate, type cannot be split\n");
1191 continue;
1192 }
1193
1194 if (is_gimple_reg (parm)
1195 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1196 &scalar_call_uses))
1197 {
1198 if (dump_file && (dump_flags & TDF_DETAILS))
1199 fprintf (dump_file, " is a scalar with only %i call uses\n",
1200 scalar_call_uses);
1201
1202 desc->locally_unused = true;
1203 desc->call_uses = scalar_call_uses;
1204 }
1205
1206 if (POINTER_TYPE_P (type))
1207 {
10478270 1208 desc->by_ref = true;
0e949530
MJ
1209 if (TREE_CODE (type) == REFERENCE_TYPE
1210 || (num == 0
1211 && TREE_CODE (TREE_TYPE (node->decl)) == METHOD_TYPE))
1212 desc->safe_ref = true;
1213 else
1214 desc->safe_ref = false;
ff6686d2
MJ
1215 type = TREE_TYPE (type);
1216
1217 if (TREE_CODE (type) == FUNCTION_TYPE
1218 || TREE_CODE (type) == METHOD_TYPE)
1219 {
1220 if (dump_file && (dump_flags & TDF_DETAILS))
1221 fprintf (dump_file, " not a candidate, reference to "
1222 "a function\n");
1223 continue;
1224 }
1225 if (TYPE_VOLATILE (type))
1226 {
1227 if (dump_file && (dump_flags & TDF_DETAILS))
1228 fprintf (dump_file, " not a candidate, reference to "
1229 "a volatile type\n");
1230 continue;
1231 }
1232 if (TREE_CODE (type) == ARRAY_TYPE
1233 && TYPE_NONALIASED_COMPONENT (type))
1234 {
1235 if (dump_file && (dump_flags & TDF_DETAILS))
f8cb8bcd
JJ
1236 fprintf (dump_file, " not a candidate, reference to "
1237 "a nonaliased component array\n");
ff6686d2
MJ
1238 continue;
1239 }
1240 if (!is_gimple_reg (parm))
1241 {
1242 if (dump_file && (dump_flags & TDF_DETAILS))
1243 fprintf (dump_file, " not a candidate, a reference which is "
1244 "not a gimple register (probably addressable)\n");
1245 continue;
1246 }
1247 if (is_va_list_type (type))
1248 {
1249 if (dump_file && (dump_flags & TDF_DETAILS))
1250 fprintf (dump_file, " not a candidate, reference to "
1251 "a va list\n");
1252 continue;
1253 }
1254 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1255 &desc->ptr_pt_count))
1256 {
1257 if (dump_file && (dump_flags & TDF_DETAILS))
1258 fprintf (dump_file, " not a candidate, reference has "
1259 "nonarg uses\n");
1260 continue;
1261 }
ff6686d2
MJ
1262 }
1263 else if (!AGGREGATE_TYPE_P (type))
1264 {
1265 /* This is in an else branch because scalars passed by reference are
1266 still candidates to be passed by value. */
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1268 fprintf (dump_file, " not a candidate, not an aggregate\n");
1269 continue;
1270 }
1271
1272 if (!COMPLETE_TYPE_P (type))
1273 {
1274 if (dump_file && (dump_flags & TDF_DETAILS))
1275 fprintf (dump_file, " not a candidate, not a complete type\n");
1276 continue;
1277 }
1278 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1279 {
1280 if (dump_file && (dump_flags & TDF_DETAILS))
1281 fprintf (dump_file, " not a candidate, size not representable\n");
1282 continue;
1283 }
1284 unsigned HOST_WIDE_INT type_size
1285 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1286 if (type_size == 0
1287 || type_size >= ISRA_ARG_SIZE_LIMIT)
1288 {
1289 if (dump_file && (dump_flags & TDF_DETAILS))
1290 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1291 continue;
1292 }
1293 if (type_internals_preclude_sra_p (type, &msg))
1294 {
1295 if (dump_file && (dump_flags & TDF_DETAILS))
1296 fprintf (dump_file, " not a candidate, %s\n", msg);
1297 continue;
1298 }
1299
1300 if (dump_file && (dump_flags & TDF_DETAILS))
1301 fprintf (dump_file, " is a candidate\n");
1302
1303 ret = true;
1304 desc->split_candidate = true;
10478270
MJ
1305 if (desc->by_ref && !desc->safe_ref)
1306 desc->deref_index = unsafe_by_ref_count++;
ff6686d2
MJ
1307 }
1308 return ret;
1309}
1310
1311/* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1312 found, which happens if DECL is for a static chain. */
1313
1314static gensum_param_desc *
1315get_gensum_param_desc (tree decl)
1316{
e3a5cc32
MJ
1317 if (!decl2desc)
1318 return NULL;
ff6686d2
MJ
1319 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1320 gensum_param_desc **slot = decl2desc->get (decl);
1321 if (!slot)
1322 /* This can happen for static chains which we cannot handle so far. */
1323 return NULL;
1324 gcc_checking_assert (*slot);
1325 return *slot;
1326}
1327
1328
1329/* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1330 write REASON to the dump file if there is one. */
1331
1332static void
1333disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1334{
1335 if (!desc->split_candidate)
1336 return;
1337
1338 if (dump_file && (dump_flags & TDF_DETAILS))
1339 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1340 desc->param_number, reason);
1341
1342 desc->split_candidate = false;
1343}
1344
1345/* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1346 there is one. */
1347
1348static void
1349disqualify_split_candidate (tree decl, const char *reason)
1350{
1351 gensum_param_desc *desc = get_gensum_param_desc (decl);
1352 if (desc)
1353 disqualify_split_candidate (desc, reason);
1354}
1355
1356/* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1357 first, check that there are not too many of them already. If so, do not
1358 allocate anything and return NULL. */
1359
1360static gensum_param_access *
1361allocate_access (gensum_param_desc *desc,
1362 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1363{
1364 if (desc->access_count
028d4092 1365 == (unsigned) param_ipa_sra_max_replacements)
ff6686d2
MJ
1366 {
1367 disqualify_split_candidate (desc, "Too many replacement candidates");
1368 return NULL;
1369 }
1370
1371 gensum_param_access *access
1372 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1373 sizeof (gensum_param_access));
1374 memset (access, 0, sizeof (*access));
1375 access->offset = offset;
1376 access->size = size;
10478270 1377 access->load_count = profile_count::zero ();
ff6686d2
MJ
1378 return access;
1379}
1380
1381/* In what context scan_expr_access has been called, whether it deals with a
9707b593
MJ
1382 load, a function argument, or a store. Please note that in rare
1383 circumstances when it is not clear if the access is a load or store,
1384 ISRA_CTX_STORE is used too. */
ff6686d2
MJ
1385
1386enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1387
1388/* Return an access describing memory access to the variable described by DESC
1389 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
dfea3d6f 1390 at a certain tree level FIRST. Attempt to create it and put into the
ff6686d2
MJ
1391 appropriate place in the access tree if does not exist, but fail and return
1392 NULL if there are already too many accesses, if it would create a partially
dfea3d6f 1393 overlapping access or if an access would end up within a pre-existing
ff6686d2
MJ
1394 non-call access. */
1395
1396static gensum_param_access *
1397get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1398 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1399{
1400 gensum_param_access *access = *first, **ptr = first;
1401
1402 if (!access)
1403 {
1404 /* No pre-existing access at this level, just create it. */
1405 gensum_param_access *a = allocate_access (desc, offset, size);
1406 if (!a)
1407 return NULL;
1408 *first = a;
1409 return *first;
1410 }
1411
1412 if (access->offset >= offset + size)
1413 {
1414 /* We want to squeeze it in front of the very first access, just do
1415 it. */
1416 gensum_param_access *r = allocate_access (desc, offset, size);
1417 if (!r)
1418 return NULL;
1419 r->next_sibling = access;
1420 *first = r;
1421 return r;
1422 }
1423
1424 /* Skip all accesses that have to come before us until the next sibling is
1425 already too far. */
1426 while (offset >= access->offset + access->size
1427 && access->next_sibling
1428 && access->next_sibling->offset < offset + size)
1429 {
1430 ptr = &access->next_sibling;
1431 access = access->next_sibling;
1432 }
1433
1434 /* At this point we know we do not belong before access. */
1435 gcc_assert (access->offset < offset + size);
1436
1437 if (access->offset == offset && access->size == size)
1438 /* We found what we were looking for. */
1439 return access;
1440
1441 if (access->offset <= offset
1442 && access->offset + access->size >= offset + size)
1443 {
1444 /* We fit into access which is larger than us. We need to find/create
1445 something below access. But we only allow nesting in call
1446 arguments. */
1447 if (access->nonarg)
1448 return NULL;
1449
1450 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1451 }
1452
1453 if (offset <= access->offset
1454 && offset + size >= access->offset + access->size)
1455 /* We are actually bigger than access, which fully fits into us, take its
1456 place and make all accesses fitting into it its children. */
1457 {
1458 /* But first, we only allow nesting in call arguments so check if that is
1459 what we are trying to represent. */
1460 if (ctx != ISRA_CTX_ARG)
1461 return NULL;
1462
1463 gensum_param_access *r = allocate_access (desc, offset, size);
1464 if (!r)
1465 return NULL;
1466 r->first_child = access;
1467
1468 while (access->next_sibling
1469 && access->next_sibling->offset < offset + size)
1470 access = access->next_sibling;
1471 if (access->offset + access->size > offset + size)
1472 {
1473 /* This must be a different access, which are sorted, so the
1474 following must be true and this signals a partial overlap. */
1475 gcc_assert (access->offset > offset);
1476 return NULL;
1477 }
1478
1479 r->next_sibling = access->next_sibling;
1480 access->next_sibling = NULL;
1481 *ptr = r;
1482 return r;
1483 }
1484
1485 if (offset >= access->offset + access->size)
1486 {
1487 /* We belong after access. */
1488 gensum_param_access *r = allocate_access (desc, offset, size);
1489 if (!r)
1490 return NULL;
1491 r->next_sibling = access->next_sibling;
1492 access->next_sibling = r;
1493 return r;
1494 }
1495
1496 if (offset < access->offset)
1497 {
1498 /* We know the following, otherwise we would have created a
1499 super-access. */
1500 gcc_checking_assert (offset + size < access->offset + access->size);
1501 return NULL;
1502 }
1503
1504 if (offset + size > access->offset + access->size)
1505 {
1506 /* Likewise. */
1507 gcc_checking_assert (offset > access->offset);
1508 return NULL;
1509 }
1510
1511 gcc_unreachable ();
1512}
1513
1514/* Return an access describing memory access to the variable described by DESC
1515 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1516 to create if it does not exist, but fail and return NULL if there are
1517 already too many accesses, if it would create a partially overlapping access
dfea3d6f 1518 or if an access would end up in a non-call access. */
ff6686d2
MJ
1519
1520static gensum_param_access *
1521get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1522 isra_scan_context ctx)
1523{
1524 gcc_checking_assert (desc->split_candidate);
1525
1526 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1527 size, ctx);
1528 if (!access)
1529 {
1530 disqualify_split_candidate (desc,
1531 "Bad access overlap or too many accesses");
1532 return NULL;
1533 }
1534
1535 switch (ctx)
1536 {
1537 case ISRA_CTX_STORE:
1538 gcc_assert (!desc->by_ref);
1539 /* Fall-through */
1540 case ISRA_CTX_LOAD:
1541 access->nonarg = true;
1542 break;
1543 case ISRA_CTX_ARG:
1544 break;
1545 }
1546
1547 return access;
1548}
1549
1550/* Verify that parameter access tree starting with ACCESS is in good shape.
dfea3d6f 1551 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
ff6686d2
MJ
1552 ACCESS or zero if there is none. */
1553
1554static bool
1555verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1556 HOST_WIDE_INT parent_size)
1557{
1558 while (access)
1559 {
241a2c49 1560 gcc_assert (access->offset >= 0 && access->size >= 0);
ff6686d2
MJ
1561
1562 if (parent_size != 0)
1563 {
1564 if (access->offset < parent_offset)
1565 {
1566 error ("Access offset before parent offset");
1567 return true;
1568 }
1569 if (access->size >= parent_size)
1570 {
1571 error ("Access size greater or equal to its parent size");
1572 return true;
1573 }
1574 if (access->offset + access->size > parent_offset + parent_size)
1575 {
1576 error ("Access terminates outside of its parent");
1577 return true;
1578 }
1579 }
1580
1581 if (verify_access_tree_1 (access->first_child, access->offset,
1582 access->size))
1583 return true;
1584
1585 if (access->next_sibling
1586 && (access->next_sibling->offset < access->offset + access->size))
1587 {
1588 error ("Access overlaps with its sibling");
1589 return true;
1590 }
1591
1592 access = access->next_sibling;
1593 }
1594 return false;
1595}
1596
1597/* Verify that parameter access tree starting with ACCESS is in good shape,
1598 halt compilation and dump the tree to stderr if not. */
1599
1600DEBUG_FUNCTION void
1601isra_verify_access_tree (gensum_param_access *access)
1602{
1603 if (verify_access_tree_1 (access, 0, 0))
1604 {
1605 for (; access; access = access->next_sibling)
1606 dump_gensum_access (stderr, access, 2);
1607 internal_error ("IPA-SRA access verification failed");
1608 }
1609}
1610
1611
1612/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1613 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1614
1615static bool
1616asm_visit_addr (gimple *, tree op, tree, void *)
1617{
1618 op = get_base_address (op);
1619 if (op
1620 && TREE_CODE (op) == PARM_DECL)
1621 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1622
1623 return false;
1624}
1625
1626/* Mark a dereference of parameter identified by DESC of distance DIST in a
1627 basic block BB, unless the BB has already been marked as a potentially
1628 final. */
1629
1630static void
1631mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
10478270 1632 basic_block bb)
ff6686d2
MJ
1633{
1634 gcc_assert (desc->by_ref);
1635 gcc_checking_assert (desc->split_candidate);
1636
10478270
MJ
1637 if (desc->safe_ref
1638 || bitmap_bit_p (final_bbs, bb->index))
ff6686d2
MJ
1639 return;
1640
10478270 1641 int idx = bb->index * unsafe_by_ref_count + desc->deref_index;
ff6686d2
MJ
1642 if (bb_dereferences[idx] < dist)
1643 bb_dereferences[idx] = dist;
1644}
1645
1646/* Return true, if any potential replacements should use NEW_TYPE as opposed to
1647 previously recorded OLD_TYPE. */
1648
1649static bool
1650type_prevails_p (tree old_type, tree new_type)
1651{
1652 if (old_type == new_type)
1653 return false;
1654
1655 /* Non-aggregates are always better. */
1656 if (!is_gimple_reg_type (old_type)
1657 && is_gimple_reg_type (new_type))
1658 return true;
1659 if (is_gimple_reg_type (old_type)
1660 && !is_gimple_reg_type (new_type))
1661 return false;
1662
1663 /* Prefer any complex or vector type over any other scalar type. */
1664 if (TREE_CODE (old_type) != COMPLEX_TYPE
1665 && TREE_CODE (old_type) != VECTOR_TYPE
1666 && (TREE_CODE (new_type) == COMPLEX_TYPE
1667 || TREE_CODE (new_type) == VECTOR_TYPE))
1668 return true;
1669 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1670 || TREE_CODE (old_type) == VECTOR_TYPE)
1671 && TREE_CODE (new_type) != COMPLEX_TYPE
1672 && TREE_CODE (new_type) != VECTOR_TYPE)
1673 return false;
1674
1675 /* Use the integral type with the bigger precision. */
1676 if (INTEGRAL_TYPE_P (old_type)
1677 && INTEGRAL_TYPE_P (new_type))
1678 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1679
1680 /* Attempt to disregard any integral type with non-full precision. */
1681 if (INTEGRAL_TYPE_P (old_type)
1682 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1683 != TYPE_PRECISION (old_type)))
1684 return true;
1685 if (INTEGRAL_TYPE_P (new_type)
1686 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1687 != TYPE_PRECISION (new_type)))
1688 return false;
1689 /* Stabilize the selection. */
1690 return TYPE_UID (old_type) < TYPE_UID (new_type);
1691}
1692
1693/* When scanning an expression which is a call argument, this structure
dfea3d6f 1694 specifies the call and the position of the argument. */
ff6686d2
MJ
1695
1696struct scan_call_info
1697{
1698 /* Call graph edge representing the call. */
1699 cgraph_edge *cs;
1700 /* Total number of arguments in the call. */
1701 unsigned argument_count;
1702 /* Number of the actual argument being scanned. */
1703 unsigned arg_idx;
1704};
1705
1706/* Record use of ACCESS which belongs to a parameter described by DESC in a
1707 call argument described by CALL_INFO. */
1708
1709static void
1710record_nonregister_call_use (gensum_param_desc *desc,
1711 scan_call_info *call_info,
1712 unsigned unit_offset, unsigned unit_size)
1713{
1714 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1715 csum->init_inputs (call_info->argument_count);
1716
1717 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1718 param_flow->aggregate_pass_through = true;
1719 set_single_param_flow_source (param_flow, desc->param_number);
1720 param_flow->unit_offset = unit_offset;
1721 param_flow->unit_size = unit_size;
1722 desc->call_uses++;
1723}
1724
1725/* Callback of walk_aliased_vdefs, just mark that there was a possible
c76b3bc5 1726 modification. */
ff6686d2
MJ
1727
1728static bool
1729mark_maybe_modified (ao_ref *, tree, void *data)
1730{
1731 bool *maybe_modified = (bool *) data;
1732 *maybe_modified = true;
1733 return true;
1734}
1735
1736/* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1737 specifies whether EXPR is used in a load, store or as an argument call. BB
1738 must be the basic block in which expr resides. If CTX specifies call
dfea3d6f 1739 argument context, CALL_INFO must describe that call and argument position,
ff6686d2
MJ
1740 otherwise it is ignored. */
1741
1742static void
1743scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1744 basic_block bb, scan_call_info *call_info = NULL)
1745{
1746 poly_int64 poffset, psize, pmax_size;
1747 HOST_WIDE_INT offset, size, max_size;
1748 tree base;
1749 bool deref = false;
1750 bool reverse;
1751
6539bbc1
MJ
1752 if (TREE_CODE (expr) == ADDR_EXPR)
1753 {
1754 if (ctx == ISRA_CTX_ARG)
1755 return;
1756 tree t = get_base_address (TREE_OPERAND (expr, 0));
1757 if (TREE_CODE (t) == VAR_DECL && !TREE_STATIC (t))
1758 loaded_decls->add (t);
1759 return;
1760 }
1761 if (TREE_CODE (expr) == SSA_NAME
1762 || CONSTANT_CLASS_P (expr))
1763 return;
1764
ff6686d2
MJ
1765 if (TREE_CODE (expr) == BIT_FIELD_REF
1766 || TREE_CODE (expr) == IMAGPART_EXPR
1767 || TREE_CODE (expr) == REALPART_EXPR)
1768 expr = TREE_OPERAND (expr, 0);
1769
1770 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1771
1772 if (TREE_CODE (base) == MEM_REF)
1773 {
1774 tree op = TREE_OPERAND (base, 0);
1775 if (TREE_CODE (op) != SSA_NAME
1776 || !SSA_NAME_IS_DEFAULT_DEF (op))
1777 return;
1778 base = SSA_NAME_VAR (op);
1779 if (!base)
1780 return;
1781 deref = true;
1782 }
e3a5cc32
MJ
1783 else if (TREE_CODE (base) == VAR_DECL
1784 && !TREE_STATIC (base)
1785 && (ctx == ISRA_CTX_ARG
1786 || ctx == ISRA_CTX_LOAD))
1787 loaded_decls->add (base);
1788
ff6686d2
MJ
1789 if (TREE_CODE (base) != PARM_DECL)
1790 return;
1791
1792 gensum_param_desc *desc = get_gensum_param_desc (base);
1793 if (!desc || !desc->split_candidate)
1794 return;
1795
1796 if (!poffset.is_constant (&offset)
1797 || !psize.is_constant (&size)
1798 || !pmax_size.is_constant (&max_size))
1799 {
1800 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1801 "access.");
1802 return;
1803 }
1804 if (size < 0 || size != max_size)
1805 {
1806 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1807 return;
1808 }
1809 if (TREE_CODE (expr) == COMPONENT_REF
1810 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1811 {
1812 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1813 return;
1814 }
5a4d0da4
MJ
1815 if (offset < 0)
1816 {
1817 disqualify_split_candidate (desc, "Encountered an access at a "
1818 "negative offset.");
1819 return;
1820 }
ff6686d2
MJ
1821 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1822 gcc_assert ((size % BITS_PER_UNIT) == 0);
1823 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1824 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1825 {
1826 disqualify_split_candidate (desc, "Encountered an access with too big "
1827 "offset or size");
1828 return;
1829 }
1830
1831 tree type = TREE_TYPE (expr);
1832 unsigned int exp_align = get_object_alignment (expr);
1833
1834 if (exp_align < TYPE_ALIGN (type))
1835 {
1836 disqualify_split_candidate (desc, "Underaligned access.");
1837 return;
1838 }
1839
1840 if (deref)
1841 {
1842 if (!desc->by_ref)
1843 {
1844 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1845 return;
1846 }
1847 else if (ctx == ISRA_CTX_STORE)
1848 {
1849 disqualify_split_candidate (desc, "Storing to data passed by "
1850 "reference.");
1851 return;
1852 }
1853
1854 if (!aa_walking_limit)
1855 {
1856 disqualify_split_candidate (desc, "Out of alias analysis step "
1857 "limit.");
1858 return;
1859 }
1860
1861 gcc_checking_assert (gimple_vuse (stmt));
1862 bool maybe_modified = false;
1863 ao_ref ar;
1864
1865 ao_ref_init (&ar, expr);
1866 bitmap visited = BITMAP_ALLOC (NULL);
1867 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1868 mark_maybe_modified, &maybe_modified,
1869 &visited, NULL, aa_walking_limit);
1870 BITMAP_FREE (visited);
1871 if (walked > 0)
1872 {
1873 gcc_assert (aa_walking_limit > walked);
1874 aa_walking_limit = aa_walking_limit - walked;
1875 }
1876 if (walked < 0)
1877 aa_walking_limit = 0;
1878 if (maybe_modified || walked < 0)
1879 {
1880 disqualify_split_candidate (desc, "Data passed by reference possibly "
1881 "modified through an alias.");
1882 return;
1883 }
1884 else
1885 mark_param_dereference (desc, offset + size, bb);
1886 }
1887 else
1888 /* Pointer parameters with direct uses should have been ruled out by
dfea3d6f 1889 analyzing SSA default def when looking at the parameters. */
ff6686d2
MJ
1890 gcc_assert (!desc->by_ref);
1891
1892 gensum_param_access *access = get_access (desc, offset, size, ctx);
1893 if (!access)
1894 return;
1895
1896 if (ctx == ISRA_CTX_ARG)
1897 {
1898 gcc_checking_assert (call_info);
1899
1900 if (!deref)
1901 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1902 size / BITS_PER_UNIT);
1903 else
1904 /* This is not a pass-through of a pointer, this is a use like any
1905 other. */
1906 access->nonarg = true;
1907 }
10478270
MJ
1908 else if (ctx == ISRA_CTX_LOAD && bb->count.initialized_p ())
1909 access->load_count += bb->count;
ff6686d2
MJ
1910
1911 if (!access->type)
1912 {
1913 access->type = type;
1914 access->alias_ptr_type = reference_alias_ptr_type (expr);
1915 access->reverse = reverse;
1916 }
1917 else
1918 {
1919 if (exp_align < TYPE_ALIGN (access->type))
1920 {
1921 disqualify_split_candidate (desc, "Reference has lower alignment "
1922 "than a previous one.");
1923 return;
1924 }
1925 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1926 {
1927 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1928 return;
1929 }
1930 if (access->reverse != reverse)
1931 {
1932 disqualify_split_candidate (desc, "Both normal and reverse "
1933 "scalar storage order.");
1934 return;
1935 }
1936 if (!deref
1937 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1938 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1939 {
1940 /* We need the same aggregate type on all accesses to be able to
1941 distinguish transformation spots from pass-through arguments in
dfea3d6f
JJ
1942 the transformation phase. */
1943 disqualify_split_candidate (desc, "We do not support aggregate "
ff6686d2
MJ
1944 "type punning.");
1945 return;
1946 }
1947
1948 if (type_prevails_p (access->type, type))
1949 access->type = type;
1950 }
1951}
1952
1953/* Scan body function described by NODE and FUN and create access trees for
1954 parameters. */
1955
1956static void
1957scan_function (cgraph_node *node, struct function *fun)
1958{
1959 basic_block bb;
1960
1961 FOR_EACH_BB_FN (bb, fun)
1962 {
1963 gimple_stmt_iterator gsi;
1964 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1965 {
1966 gimple *stmt = gsi_stmt (gsi);
1967
e3a5cc32 1968 if (final_bbs && stmt_can_throw_external (fun, stmt))
ff6686d2
MJ
1969 bitmap_set_bit (final_bbs, bb->index);
1970 switch (gimple_code (stmt))
1971 {
1972 case GIMPLE_RETURN:
1973 {
1974 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1975 if (t != NULL_TREE)
1976 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
e3a5cc32
MJ
1977 if (final_bbs)
1978 bitmap_set_bit (final_bbs, bb->index);
ff6686d2
MJ
1979 }
1980 break;
1981
1982 case GIMPLE_ASSIGN:
1983 if (gimple_assign_single_p (stmt)
1984 && !gimple_clobber_p (stmt))
1985 {
1986 tree rhs = gimple_assign_rhs1 (stmt);
1987 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1988 tree lhs = gimple_assign_lhs (stmt);
1989 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1990 }
1991 break;
1992
1993 case GIMPLE_CALL:
1994 {
1995 unsigned argument_count = gimple_call_num_args (stmt);
9707b593
MJ
1996 isra_scan_context ctx = ISRA_CTX_ARG;
1997 scan_call_info call_info, *call_info_p = &call_info;
1998 if (gimple_call_internal_p (stmt))
1999 {
2000 call_info_p = NULL;
2001 ctx = ISRA_CTX_LOAD;
2002 internal_fn ifn = gimple_call_internal_fn (stmt);
2003 if (internal_store_fn_p (ifn))
2004 ctx = ISRA_CTX_STORE;
2005 }
2006 else
2007 {
2008 call_info.cs = node->get_edge (stmt);
2009 call_info.argument_count = argument_count;
2010 }
ff6686d2
MJ
2011
2012 for (unsigned i = 0; i < argument_count; i++)
2013 {
2014 call_info.arg_idx = i;
2015 scan_expr_access (gimple_call_arg (stmt, i), stmt,
9707b593 2016 ctx, bb, call_info_p);
ff6686d2
MJ
2017 }
2018
2019 tree lhs = gimple_call_lhs (stmt);
2020 if (lhs)
2021 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2022 int flags = gimple_call_flags (stmt);
e3a5cc32
MJ
2023 if (final_bbs
2024 && (((flags & (ECF_CONST | ECF_PURE)) == 0)
2025 || (flags & ECF_LOOPING_CONST_OR_PURE)))
ff6686d2
MJ
2026 bitmap_set_bit (final_bbs, bb->index);
2027 }
2028 break;
2029
2030 case GIMPLE_ASM:
2031 {
2032 gasm *asm_stmt = as_a <gasm *> (stmt);
2033 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
2034 asm_visit_addr);
e3a5cc32
MJ
2035 if (final_bbs)
2036 bitmap_set_bit (final_bbs, bb->index);
ff6686d2
MJ
2037
2038 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
2039 {
2040 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
2041 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2042 }
2043 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
2044 {
2045 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
2046 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
2047 }
2048 }
2049 break;
2050
2051 default:
2052 break;
2053 }
2054 }
2055 }
2056}
2057
9b8741c9
MJ
2058/* Return true if SSA_NAME NAME of function described by FUN is only used in
2059 return statements, or if results of any operations it is involved in are
2060 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2061 names we have already started investigating. */
ff6686d2
MJ
2062
2063static bool
9b8741c9 2064ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
ff6686d2
MJ
2065{
2066 bool res = true;
2067 imm_use_iterator imm_iter;
2068 gimple *stmt;
2069
2070 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
2071 {
2072 if (is_gimple_debug (stmt))
2073 continue;
2074
2075 if (gimple_code (stmt) == GIMPLE_RETURN)
2076 {
2077 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2078 if (t != name)
2079 {
2080 res = false;
640296c3 2081 break;
ff6686d2
MJ
2082 }
2083 }
9b8741c9
MJ
2084 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
2085 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
2086 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
2087 {
2088 /* TODO: And perhaps for const function calls too? */
2089 tree lhs;
2090 if (gimple_code (stmt) == GIMPLE_PHI)
2091 lhs = gimple_phi_result (stmt);
2092 else
2093 lhs = gimple_assign_lhs (stmt);
2094
2095 if (TREE_CODE (lhs) != SSA_NAME)
2096 {
2097 res = false;
640296c3 2098 break;
ff6686d2
MJ
2099 }
2100 gcc_assert (!gimple_vdef (stmt));
2101 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
9b8741c9 2102 && !ssa_name_only_returned_p (fun, lhs, analyzed))
ff6686d2
MJ
2103 {
2104 res = false;
640296c3 2105 break;
ff6686d2
MJ
2106 }
2107 }
2108 else
2109 {
2110 res = false;
640296c3 2111 break;
ff6686d2
MJ
2112 }
2113 }
2114 return res;
2115}
2116
2117/* Inspect the uses of the return value of the call associated with CS, and if
2118 it is not used or if it is only used to construct the return value of the
2119 caller, mark it as such in call or caller summary. Also check for
2120 misaligned arguments. */
2121
2122static void
2123isra_analyze_call (cgraph_edge *cs)
2124{
2125 gcall *call_stmt = cs->call_stmt;
2126 unsigned count = gimple_call_num_args (call_stmt);
2127 isra_call_summary *csum = call_sums->get_create (cs);
2128
2129 for (unsigned i = 0; i < count; i++)
2130 {
2131 tree arg = gimple_call_arg (call_stmt, i);
e3a5cc32
MJ
2132 if (TREE_CODE (arg) == ADDR_EXPR)
2133 {
2134 poly_int64 poffset, psize, pmax_size;
2135 bool reverse;
2136 tree base = get_ref_base_and_extent (TREE_OPERAND (arg, 0), &poffset,
2137 &psize, &pmax_size, &reverse);
f2cf4c61
MJ
2138 HOST_WIDE_INT offset;
2139 unsigned HOST_WIDE_INT ds;
2140 if (DECL_P (base)
2141 && (poffset.is_constant (&offset))
2142 && tree_fits_uhwi_p (DECL_SIZE (base))
2143 && ((ds = tree_to_uhwi (DECL_SIZE (base)) - offset)
2144 < ISRA_ARG_SIZE_LIMIT * BITS_PER_UNIT))
2145 {
2146 csum->init_inputs (count);
2147 gcc_assert (!csum->m_arg_flow[i].aggregate_pass_through);
2148 csum->m_arg_flow[i].unit_size = ds / BITS_PER_UNIT;
2149 }
2150
e3a5cc32
MJ
2151 if (TREE_CODE (base) == VAR_DECL
2152 && !TREE_STATIC (base)
2153 && !loaded_decls->contains (base))
2154 {
2155 csum->init_inputs (count);
2156 csum->m_arg_flow[i].constructed_for_calls = true;
2157 }
2158 }
2159
ff6686d2
MJ
2160 if (is_gimple_reg (arg))
2161 continue;
2162
2163 tree offset;
2164 poly_int64 bitsize, bitpos;
2165 machine_mode mode;
2166 int unsignedp, reversep, volatilep = 0;
2167 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2168 &unsignedp, &reversep, &volatilep);
2169 if (!multiple_p (bitpos, BITS_PER_UNIT))
2170 {
2171 csum->m_bit_aligned_arg = true;
2172 break;
2173 }
2174 }
2175
2176 tree lhs = gimple_call_lhs (call_stmt);
2177 if (lhs)
2178 {
2179 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2180 from this function (without being read anywhere). */
2181 if (TREE_CODE (lhs) == SSA_NAME)
2182 {
2183 bitmap analyzed = BITMAP_ALLOC (NULL);
9b8741c9
MJ
2184 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2185 lhs, analyzed))
ff6686d2
MJ
2186 csum->m_return_returned = true;
2187 BITMAP_FREE (analyzed);
2188 }
2189 }
2190 else
2191 csum->m_return_ignored = true;
2192}
2193
2194/* Look at all calls going out of NODE, described also by IFS and perform all
2195 analyses necessary for IPA-SRA that are not done at body scan time or done
2196 even when body is not scanned because the function is not a candidate. */
2197
2198static void
2199isra_analyze_all_outgoing_calls (cgraph_node *node)
2200{
2201 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2202 isra_analyze_call (cs);
2203 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2204 isra_analyze_call (cs);
2205}
2206
2207/* Dump a dereferences table with heading STR to file F. */
2208
2209static void
2210dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2211{
2212 basic_block bb;
2213
2214 fprintf (dump_file, "%s", str);
2215 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2216 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2217 {
2218 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2219 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2220 {
2221 int i;
10478270 2222 for (i = 0; i < unsafe_by_ref_count; i++)
ff6686d2 2223 {
10478270 2224 int idx = bb->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2225 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2226 }
2227 }
2228 fprintf (f, "\n");
2229 }
2230 fprintf (dump_file, "\n");
2231}
2232
2233/* Propagate distances in bb_dereferences in the opposite direction than the
2234 control flow edges, in each step storing the maximum of the current value
2235 and the minimum of all successors. These steps are repeated until the table
2236 stabilizes. Note that BBs which might terminate the functions (according to
2237 final_bbs bitmap) never updated in this way. */
2238
2239static void
2240propagate_dereference_distances (struct function *fun)
2241{
2242 basic_block bb;
2243
2244 if (dump_file && (dump_flags & TDF_DETAILS))
2245 dump_dereferences_table (dump_file, fun,
2246 "Dereference table before propagation:\n");
2247
2248 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2249 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2250 FOR_EACH_BB_FN (bb, fun)
2251 {
2252 queue.quick_push (bb);
2253 bb->aux = bb;
2254 }
2255
2256 while (!queue.is_empty ())
2257 {
2258 edge_iterator ei;
2259 edge e;
2260 bool change = false;
2261 int i;
2262
2263 bb = queue.pop ();
2264 bb->aux = NULL;
2265
2266 if (bitmap_bit_p (final_bbs, bb->index))
2267 continue;
2268
10478270 2269 for (i = 0; i < unsafe_by_ref_count; i++)
ff6686d2 2270 {
10478270 2271 int idx = bb->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2272 bool first = true;
2273 HOST_WIDE_INT inh = 0;
2274
2275 FOR_EACH_EDGE (e, ei, bb->succs)
2276 {
10478270 2277 int succ_idx = e->dest->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2278
2279 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2280 continue;
2281
2282 if (first)
2283 {
2284 first = false;
2285 inh = bb_dereferences [succ_idx];
2286 }
2287 else if (bb_dereferences [succ_idx] < inh)
2288 inh = bb_dereferences [succ_idx];
2289 }
2290
2291 if (!first && bb_dereferences[idx] < inh)
2292 {
2293 bb_dereferences[idx] = inh;
2294 change = true;
2295 }
2296 }
2297
2298 if (change)
2299 FOR_EACH_EDGE (e, ei, bb->preds)
2300 {
2301 if (e->src->aux)
2302 continue;
2303
2304 e->src->aux = e->src;
2305 queue.quick_push (e->src);
2306 }
2307 }
2308
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 dump_dereferences_table (dump_file, fun,
2311 "Dereference table after propagation:\n");
2312}
2313
10478270
MJ
2314/* Return true if the ACCESS loads happen frequently enough in FUN to risk
2315 moving them to the caller and only pass the result. */
ff6686d2
MJ
2316
2317static bool
10478270
MJ
2318dereference_probable_p (struct function *fun, gensum_param_access *access)
2319{
2320 int threshold = opt_for_fn (fun->decl, param_ipa_sra_deref_prob_threshold);
2321 return access->load_count
2322 >= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (threshold, 100);
2323}
2324
2325/* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2326 its children, return true if the parameter cannot be split, otherwise return
2327 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2328 the index of the entry BB in the function of PARM. */
2329
2330static bool
2331check_gensum_access (struct function *fun, tree parm, gensum_param_desc *desc,
ff6686d2
MJ
2332 gensum_param_access *access,
2333 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
c76b3bc5 2334 int entry_bb_index)
ff6686d2
MJ
2335{
2336 if (access->nonarg)
2337 {
2338 *only_calls = false;
2339 *nonarg_acc_size += access->size;
2340
2341 if (access->first_child)
2342 {
2343 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2344 return true;
2345 }
2346 }
2347 /* Do not decompose a non-BLKmode param in a way that would create
2348 BLKmode params. Especially for by-reference passing (thus,
2349 pointer-type param) this is hardly worthwhile. */
2350 if (DECL_MODE (parm) != BLKmode
2351 && TYPE_MODE (access->type) == BLKmode)
2352 {
2353 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2354 return true;
2355 }
2356
2357 if (desc->by_ref)
2358 {
10478270 2359 if (desc->safe_ref)
ff6686d2 2360 {
10478270
MJ
2361 if (!dereference_probable_p (fun, access))
2362 {
2363 disqualify_split_candidate (desc, "Dereferences in callers "
2364 "would happen much more frequently.");
2365 return true;
2366 }
2367 }
2368 else
2369 {
2370 int idx = (entry_bb_index * unsafe_by_ref_count + desc->deref_index);
2371 if ((access->offset + access->size) > bb_dereferences[idx])
2372 {
f2cf4c61
MJ
2373 if (!dereference_probable_p (fun, access))
2374 {
2375 disqualify_split_candidate (desc, "Would create a possibly "
2376 "illegal dereference in a "
2377 "caller.");
2378 return true;
2379 }
2380 desc->conditionally_dereferenceable = true;
10478270 2381 }
ff6686d2
MJ
2382 }
2383 }
2384
2385 for (gensum_param_access *ch = access->first_child;
2386 ch;
2387 ch = ch->next_sibling)
10478270 2388 if (check_gensum_access (fun, parm, desc, ch, nonarg_acc_size, only_calls,
ff6686d2
MJ
2389 entry_bb_index))
2390 return true;
2391
2392 return false;
2393}
2394
2395/* Copy data from FROM and all of its children to a vector of accesses in IPA
2396 descriptor DESC. */
2397
2398static void
2399copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2400{
2401 param_access *to = ggc_cleared_alloc<param_access> ();
2402 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2403 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2404 to->unit_offset = from->offset / BITS_PER_UNIT;
2405 to->unit_size = from->size / BITS_PER_UNIT;
2406 to->type = from->type;
2407 to->alias_ptr_type = from->alias_ptr_type;
2408 to->certain = from->nonarg;
2409 to->reverse = from->reverse;
2410 vec_safe_push (desc->accesses, to);
2411
2412 for (gensum_param_access *ch = from->first_child;
2413 ch;
2414 ch = ch->next_sibling)
2415 copy_accesses_to_ipa_desc (ch, desc);
2416}
2417
2418/* Analyze function body scan results stored in param_accesses and
2419 param_accesses, detect possible transformations and store information of
2420 those in function summary. NODE, FUN and IFS are all various structures
2421 describing the currently analyzed function. */
2422
2423static void
2424process_scan_results (cgraph_node *node, struct function *fun,
2425 isra_func_summary *ifs,
2426 vec<gensum_param_desc> *param_descriptions)
2427{
2428 bool check_pass_throughs = false;
2429 bool dereferences_propagated = false;
2430 tree parm = DECL_ARGUMENTS (node->decl);
2431 unsigned param_count = param_descriptions->length();
2432
2433 for (unsigned desc_index = 0;
2434 desc_index < param_count;
2435 desc_index++, parm = DECL_CHAIN (parm))
2436 {
2437 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
581b519f 2438 if (!desc->split_candidate)
ff6686d2
MJ
2439 continue;
2440
2441 if (flag_checking)
2442 isra_verify_access_tree (desc->accesses);
2443
2444 if (!dereferences_propagated
2445 && desc->by_ref
10478270 2446 && !desc->safe_ref
ff6686d2
MJ
2447 && desc->accesses)
2448 {
2449 propagate_dereference_distances (fun);
2450 dereferences_propagated = true;
2451 }
2452
2453 HOST_WIDE_INT nonarg_acc_size = 0;
2454 bool only_calls = true;
2455 bool check_failed = false;
2456
2457 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2458 for (gensum_param_access *acc = desc->accesses;
2459 acc;
2460 acc = acc->next_sibling)
10478270
MJ
2461 if (check_gensum_access (fun, parm, desc, acc, &nonarg_acc_size,
2462 &only_calls, entry_bb_index))
ff6686d2
MJ
2463 {
2464 check_failed = true;
2465 break;
2466 }
2467 if (check_failed)
2468 continue;
2469
2470 if (only_calls)
2471 desc->locally_unused = true;
2472
2473 HOST_WIDE_INT cur_param_size
2474 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
e3a5cc32 2475 HOST_WIDE_INT param_size_limit, optimistic_limit;
ff6686d2 2476 if (!desc->by_ref || optimize_function_for_size_p (fun))
e3a5cc32
MJ
2477 {
2478 param_size_limit = cur_param_size;
2479 optimistic_limit = cur_param_size;
2480 }
ff6686d2 2481 else
e3a5cc32 2482 {
fdfd7f53
ML
2483 param_size_limit
2484 = opt_for_fn (node->decl,
2485 param_ipa_sra_ptr_growth_factor) * cur_param_size;
e3a5cc32
MJ
2486 optimistic_limit
2487 = (opt_for_fn (node->decl, param_ipa_sra_ptrwrap_growth_factor)
2488 * param_size_limit);
2489 }
2490
2491 if (nonarg_acc_size > optimistic_limit
ff6686d2
MJ
2492 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2493 {
f8cb8bcd 2494 disqualify_split_candidate (desc, "Would result into a too big set "
e3a5cc32
MJ
2495 "of replacements even in best "
2496 "scenarios.");
ff6686d2
MJ
2497 }
2498 else
2499 {
2500 /* create_parameter_descriptors makes sure unit sizes of all
2501 candidate parameters fit unsigned integers restricted to
2502 ISRA_ARG_SIZE_LIMIT. */
2503 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2504 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2505 if (desc->split_candidate && desc->ptr_pt_count)
2506 {
2507 gcc_assert (desc->by_ref);
2508 check_pass_throughs = true;
2509 }
2510 }
2511 }
2512
2513 /* When a pointer parameter is passed-through to a callee, in which it is
2514 only used to read only one or a few items, we can attempt to transform it
2515 to obtaining and passing through the items instead of the pointer. But we
2516 must take extra care that 1) we do not introduce any segfault by moving
2517 dereferences above control flow and that 2) the data is not modified
2518 through an alias in this function. The IPA analysis must not introduce
2519 any accesses candidates unless it can prove both.
2520
2521 The current solution is very crude as it consists of ensuring that the
2522 call postdominates entry BB and that the definition of VUSE of the call is
2523 default definition. TODO: For non-recursive callees in the same
2524 compilation unit we could do better by doing analysis in topological order
2525 an looking into access candidates of callees, using their alias_ptr_types
2526 to attempt real AA. We could also use the maximum known dereferenced
2527 offset in this function at IPA level.
2528
2529 TODO: Measure the overhead and the effect of just being pessimistic.
c76b3bc5
EB
2530 Maybe this is only -O3 material? */
2531
ff6686d2
MJ
2532 bool pdoms_calculated = false;
2533 if (check_pass_throughs)
2534 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2535 {
2536 gcall *call_stmt = cs->call_stmt;
2537 tree vuse = gimple_vuse (call_stmt);
2538
2539 /* If the callee is a const function, we don't get a VUSE. In such
2540 case there will be no memory accesses in the called function (or the
2541 const attribute is wrong) and then we just don't care. */
2542 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2543
2544 unsigned count = gimple_call_num_args (call_stmt);
2545 isra_call_summary *csum = call_sums->get_create (cs);
2546 csum->init_inputs (count);
763121cc 2547 csum->m_before_any_store = uses_memory_as_obtained;
ff6686d2
MJ
2548 for (unsigned argidx = 0; argidx < count; argidx++)
2549 {
2550 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2551 continue;
2552 unsigned pidx
2553 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2554 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2555 if (!desc->split_candidate)
2556 {
2557 csum->m_arg_flow[argidx].pointer_pass_through = false;
2558 continue;
2559 }
2560 if (!uses_memory_as_obtained)
2561 continue;
2562
10478270
MJ
2563 if (desc->safe_ref)
2564 {
2565 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2566 continue;
2567 }
2568
ff6686d2
MJ
2569 /* Post-dominator check placed last, hoping that it usually won't
2570 be needed. */
2571 if (!pdoms_calculated)
2572 {
2573 gcc_checking_assert (cfun);
ff6686d2
MJ
2574 connect_infinite_loops_to_exit ();
2575 calculate_dominance_info (CDI_POST_DOMINATORS);
2576 pdoms_calculated = true;
2577 }
2578 if (dominated_by_p (CDI_POST_DOMINATORS,
2579 gimple_bb (call_stmt),
2580 single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun))))
2581 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2582 }
2583
2584 }
2585 if (pdoms_calculated)
2586 {
2587 free_dominance_info (CDI_POST_DOMINATORS);
2588 remove_fake_exit_edges ();
2589 }
2590
2591 /* TODO: Add early exit if we disqualified everything. This also requires
2592 that we either relax the restriction that
dfea3d6f 2593 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
ff6686d2
MJ
2594 or store the number of parameters to IPA-SRA function summary and use that
2595 when just removing params. */
2596
2597 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2598 ifs->m_parameters->quick_grow_cleared (param_count);
2599 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2600 {
2601 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2602 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2603
2604 d->param_size_limit = s->param_size_limit;
2605 d->size_reached = s->nonarg_acc_size;
2606 d->locally_unused = s->locally_unused;
2607 d->split_candidate = s->split_candidate;
2608 d->by_ref = s->by_ref;
f2cf4c61 2609 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
ff6686d2
MJ
2610
2611 for (gensum_param_access *acc = s->accesses;
2612 acc;
2613 acc = acc->next_sibling)
2614 copy_accesses_to_ipa_desc (acc, d);
2615 }
2616
2617 if (dump_file)
e3a5cc32 2618 dump_isra_param_descriptors (dump_file, node->decl, ifs, false);
ff6686d2
MJ
2619}
2620
2621/* Return true if there are any overlaps among certain accesses of DESC. If
dfea3d6f 2622 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
ff6686d2
MJ
2623 too. DESC is assumed to be a split candidate that is not locally
2624 unused. */
2625
2626static bool
2627overlapping_certain_accesses_p (isra_param_desc *desc,
2628 bool *certain_access_present_p)
2629{
2630 unsigned pclen = vec_safe_length (desc->accesses);
2631 for (unsigned i = 0; i < pclen; i++)
2632 {
2633 param_access *a1 = (*desc->accesses)[i];
2634
2635 if (!a1->certain)
2636 continue;
2637 if (certain_access_present_p)
2638 *certain_access_present_p = true;
2639 for (unsigned j = i + 1; j < pclen; j++)
2640 {
2641 param_access *a2 = (*desc->accesses)[j];
2642 if (a2->certain
2643 && a1->unit_offset < a2->unit_offset + a2->unit_size
2644 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2645 return true;
2646 }
2647 }
2648 return false;
2649}
2650
2651/* Check for any overlaps of certain param accesses among splitting candidates
2652 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2653 check that used splitting candidates have at least one certain access. */
2654
2655static void
2656verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2657{
2658 isra_func_summary *ifs = func_sums->get (node);
2659 if (!ifs || !ifs->m_candidate)
2660 return;
2661 unsigned param_count = vec_safe_length (ifs->m_parameters);
2662 for (unsigned pidx = 0; pidx < param_count; pidx++)
2663 {
2664 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2665 if (!desc->split_candidate || desc->locally_unused)
2666 continue;
2667
2668 bool certain_access_present = !certain_must_exist;
2669 if (overlapping_certain_accesses_p (desc, &certain_access_present))
206222e0 2670 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
ff6686d2
MJ
2671 "which overlap", node->dump_name (), pidx);
2672 if (!certain_access_present)
206222e0 2673 internal_error ("function %qs, parameter %u, is used but does not "
ff6686d2
MJ
2674 "have any certain IPA-SRA access",
2675 node->dump_name (), pidx);
2676 }
2677}
2678
ff6686d2
MJ
2679/* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2680 this compilation unit and create summary structures describing IPA-SRA
2681 opportunities and constraints in them. */
2682
2683static void
2684ipa_sra_generate_summary (void)
2685{
2686 struct cgraph_node *node;
2687
2688 gcc_checking_assert (!func_sums);
2689 gcc_checking_assert (!call_sums);
2690 func_sums
78cd68c0 2691 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2692 ipa_sra_function_summaries (symtab, true));
2693 call_sums = new ipa_sra_call_summaries (symtab);
2694
2695 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2696 ipa_sra_summarize_function (node);
2697 return;
2698}
2699
dfea3d6f 2700/* Write intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2701 edges into a stream for LTO WPA. */
2702
2703static void
2704isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2705{
2706 isra_call_summary *csum = call_sums->get (e);
2707 unsigned input_count = csum->m_arg_flow.length ();
2708 streamer_write_uhwi (ob, input_count);
2709 for (unsigned i = 0; i < input_count; i++)
2710 {
2711 isra_param_flow *ipf = &csum->m_arg_flow[i];
2712 streamer_write_hwi (ob, ipf->length);
2713 bitpack_d bp = bitpack_create (ob->main_stream);
2714 for (int j = 0; j < ipf->length; j++)
2715 bp_pack_value (&bp, ipf->inputs[j], 8);
2716 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2717 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2718 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
e3a5cc32 2719 bp_pack_value (&bp, ipf->constructed_for_calls, 1);
ff6686d2
MJ
2720 streamer_write_bitpack (&bp);
2721 streamer_write_uhwi (ob, ipf->unit_offset);
2722 streamer_write_uhwi (ob, ipf->unit_size);
2723 }
2724 bitpack_d bp = bitpack_create (ob->main_stream);
2725 bp_pack_value (&bp, csum->m_return_ignored, 1);
2726 bp_pack_value (&bp, csum->m_return_returned, 1);
2727 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
763121cc 2728 bp_pack_value (&bp, csum->m_before_any_store, 1);
ff6686d2
MJ
2729 streamer_write_bitpack (&bp);
2730}
2731
dfea3d6f 2732/* Write intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2733 edges into a stream for LTO WPA. */
2734
2735static void
2736isra_write_node_summary (output_block *ob, cgraph_node *node)
2737{
2738 isra_func_summary *ifs = func_sums->get (node);
2739 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2740 int node_ref = lto_symtab_encoder_encode (encoder, node);
2741 streamer_write_uhwi (ob, node_ref);
2742
2743 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2744 streamer_write_uhwi (ob, param_desc_count);
2745 for (unsigned i = 0; i < param_desc_count; i++)
2746 {
2747 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2748 unsigned access_count = vec_safe_length (desc->accesses);
2749 streamer_write_uhwi (ob, access_count);
2750 for (unsigned j = 0; j < access_count; j++)
2751 {
2752 param_access *acc = (*desc->accesses)[j];
2753 stream_write_tree (ob, acc->type, true);
2754 stream_write_tree (ob, acc->alias_ptr_type, true);
2755 streamer_write_uhwi (ob, acc->unit_offset);
2756 streamer_write_uhwi (ob, acc->unit_size);
2757 bitpack_d bp = bitpack_create (ob->main_stream);
2758 bp_pack_value (&bp, acc->certain, 1);
c76b3bc5 2759 bp_pack_value (&bp, acc->reverse, 1);
ff6686d2
MJ
2760 streamer_write_bitpack (&bp);
2761 }
2762 streamer_write_uhwi (ob, desc->param_size_limit);
2763 streamer_write_uhwi (ob, desc->size_reached);
f2cf4c61 2764 gcc_assert (desc->safe_size == 0);
ff6686d2
MJ
2765 bitpack_d bp = bitpack_create (ob->main_stream);
2766 bp_pack_value (&bp, desc->locally_unused, 1);
2767 bp_pack_value (&bp, desc->split_candidate, 1);
2768 bp_pack_value (&bp, desc->by_ref, 1);
e3a5cc32 2769 gcc_assert (!desc->not_specially_constructed);
f2cf4c61
MJ
2770 bp_pack_value (&bp, desc->conditionally_dereferenceable, 1);
2771 gcc_assert (!desc->safe_size_set);
ff6686d2
MJ
2772 streamer_write_bitpack (&bp);
2773 }
2774 bitpack_d bp = bitpack_create (ob->main_stream);
2775 bp_pack_value (&bp, ifs->m_candidate, 1);
2776 bp_pack_value (&bp, ifs->m_returns_value, 1);
2777 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2778 gcc_assert (!ifs->m_queued);
2779 streamer_write_bitpack (&bp);
2780
2781 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2782 isra_write_edge_summary (ob, e);
2783 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2784 isra_write_edge_summary (ob, e);
2785}
2786
dfea3d6f 2787/* Write intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2788
2789static void
2790ipa_sra_write_summary (void)
2791{
2792 if (!func_sums || !call_sums)
2793 return;
2794
2795 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2796 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2797 ob->symbol = NULL;
2798
2799 unsigned int count = 0;
2800 lto_symtab_encoder_iterator lsei;
2801 for (lsei = lsei_start_function_in_partition (encoder);
2802 !lsei_end_p (lsei);
2803 lsei_next_function_in_partition (&lsei))
2804 {
2805 cgraph_node *node = lsei_cgraph_node (lsei);
2806 if (node->has_gimple_body_p ()
2807 && func_sums->get (node) != NULL)
2808 count++;
2809 }
2810 streamer_write_uhwi (ob, count);
2811
2812 /* Process all of the functions. */
2813 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2814 lsei_next_function_in_partition (&lsei))
2815 {
2816 cgraph_node *node = lsei_cgraph_node (lsei);
2817 if (node->has_gimple_body_p ()
2818 && func_sums->get (node) != NULL)
2819 isra_write_node_summary (ob, node);
2820 }
2821 streamer_write_char_stream (ob->main_stream, 0);
2822 produce_asm (ob, NULL);
2823 destroy_output_block (ob);
2824}
2825
dfea3d6f 2826/* Read intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2827 edges into a stream for LTO WPA. */
2828
2829static void
2830isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2831{
2832 isra_call_summary *csum = call_sums->get_create (cs);
2833 unsigned input_count = streamer_read_uhwi (ib);
2834 csum->init_inputs (input_count);
2835 for (unsigned i = 0; i < input_count; i++)
2836 {
2837 isra_param_flow *ipf = &csum->m_arg_flow[i];
2838 ipf->length = streamer_read_hwi (ib);
2839 bitpack_d bp = streamer_read_bitpack (ib);
2840 for (int j = 0; j < ipf->length; j++)
2841 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2842 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2843 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2844 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
e3a5cc32 2845 ipf->constructed_for_calls = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2846 ipf->unit_offset = streamer_read_uhwi (ib);
2847 ipf->unit_size = streamer_read_uhwi (ib);
2848 }
2849 bitpack_d bp = streamer_read_bitpack (ib);
2850 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2851 csum->m_return_returned = bp_unpack_value (&bp, 1);
2852 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
763121cc 2853 csum->m_before_any_store = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2854}
2855
dfea3d6f 2856/* Read intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2857 edges into a stream for LTO WPA. */
2858
2859static void
2860isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2861 struct data_in *data_in)
2862{
2863 isra_func_summary *ifs = func_sums->get_create (node);
2864 unsigned param_desc_count = streamer_read_uhwi (ib);
2865 if (param_desc_count > 0)
2866 {
2867 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2868 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2869 }
2870 for (unsigned i = 0; i < param_desc_count; i++)
2871 {
2872 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2873 unsigned access_count = streamer_read_uhwi (ib);
2874 for (unsigned j = 0; j < access_count; j++)
2875 {
2876 param_access *acc = ggc_cleared_alloc<param_access> ();
2877 acc->type = stream_read_tree (ib, data_in);
2878 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2879 acc->unit_offset = streamer_read_uhwi (ib);
2880 acc->unit_size = streamer_read_uhwi (ib);
2881 bitpack_d bp = streamer_read_bitpack (ib);
2882 acc->certain = bp_unpack_value (&bp, 1);
c76b3bc5 2883 acc->reverse = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2884 vec_safe_push (desc->accesses, acc);
2885 }
2886 desc->param_size_limit = streamer_read_uhwi (ib);
2887 desc->size_reached = streamer_read_uhwi (ib);
f2cf4c61 2888 desc->safe_size = 0;
ff6686d2
MJ
2889 bitpack_d bp = streamer_read_bitpack (ib);
2890 desc->locally_unused = bp_unpack_value (&bp, 1);
2891 desc->split_candidate = bp_unpack_value (&bp, 1);
2892 desc->by_ref = bp_unpack_value (&bp, 1);
e3a5cc32 2893 desc->not_specially_constructed = 0;
f2cf4c61
MJ
2894 desc->conditionally_dereferenceable = bp_unpack_value (&bp, 1);
2895 desc->safe_size_set = 0;
ff6686d2
MJ
2896 }
2897 bitpack_d bp = streamer_read_bitpack (ib);
2898 ifs->m_candidate = bp_unpack_value (&bp, 1);
2899 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2900 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2901 ifs->m_queued = 0;
2902
2903 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2904 isra_read_edge_summary (ib, e);
2905 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2906 isra_read_edge_summary (ib, e);
2907}
2908
2909/* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
e53b6e56 2910 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
ff6686d2
MJ
2911 it should be possible to unify them somehow. */
2912
2913static void
2914isra_read_summary_section (struct lto_file_decl_data *file_data,
2915 const char *data, size_t len)
2916{
2917 const struct lto_function_header *header =
2918 (const struct lto_function_header *) data;
2919 const int cfg_offset = sizeof (struct lto_function_header);
2920 const int main_offset = cfg_offset + header->cfg_size;
2921 const int string_offset = main_offset + header->main_size;
2922 struct data_in *data_in;
2923 unsigned int i;
2924 unsigned int count;
2925
2926 lto_input_block ib_main ((const char *) data + main_offset,
2927 header->main_size, file_data->mode_table);
2928
2929 data_in =
2930 lto_data_in_create (file_data, (const char *) data + string_offset,
2931 header->string_size, vNULL);
2932 count = streamer_read_uhwi (&ib_main);
2933
2934 for (i = 0; i < count; i++)
2935 {
2936 unsigned int index;
2937 struct cgraph_node *node;
2938 lto_symtab_encoder_t encoder;
2939
2940 index = streamer_read_uhwi (&ib_main);
2941 encoder = file_data->symtab_node_encoder;
2942 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2943 index));
2944 gcc_assert (node->definition);
2945 isra_read_node_info (&ib_main, node, data_in);
2946 }
2947 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2948 len);
2949 lto_data_in_delete (data_in);
2950}
2951
dfea3d6f 2952/* Read intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2953
2954static void
2955ipa_sra_read_summary (void)
2956{
2957 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2958 struct lto_file_decl_data *file_data;
2959 unsigned int j = 0;
2960
2961 gcc_checking_assert (!func_sums);
2962 gcc_checking_assert (!call_sums);
2963 func_sums
78cd68c0 2964 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2965 ipa_sra_function_summaries (symtab, true));
2966 call_sums = new ipa_sra_call_summaries (symtab);
2967
2968 while ((file_data = file_data_vec[j++]))
2969 {
2970 size_t len;
3c56d8d8
ML
2971 const char *data
2972 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
ff6686d2
MJ
2973 if (data)
2974 isra_read_summary_section (file_data, data, len);
2975 }
2976}
2977
e3a5cc32
MJ
2978/* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
2979 HINTS is true, also dump IPA-analysis computed hints. */
ff6686d2
MJ
2980
2981static void
e3a5cc32 2982ipa_sra_dump_all_summaries (FILE *f, bool hints)
ff6686d2
MJ
2983{
2984 cgraph_node *node;
2985 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2986 {
2987 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
2988
2989 isra_func_summary *ifs = func_sums->get (node);
2990 if (!ifs)
717d278a
MJ
2991 fprintf (f, " Function does not have any associated IPA-SRA "
2992 "summary\n");
95489a2a
MJ
2993 else if (!ifs->m_candidate)
2994 fprintf (f, " Not a candidate function\n");
717d278a 2995 else
ff6686d2 2996 {
717d278a
MJ
2997 if (ifs->m_returns_value)
2998 fprintf (f, " Returns value\n");
2999 if (vec_safe_is_empty (ifs->m_parameters))
3000 fprintf (f, " No parameter information. \n");
3001 else
3002 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
3003 {
3004 fprintf (f, " Descriptor for parameter %i:\n", i);
e3a5cc32 3005 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
717d278a
MJ
3006 }
3007 fprintf (f, "\n");
ff6686d2 3008 }
ff6686d2
MJ
3009
3010 struct cgraph_edge *cs;
3011 for (cs = node->callees; cs; cs = cs->next_callee)
3012 {
3013 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
3014 cs->callee->dump_name ());
3015 isra_call_summary *csum = call_sums->get (cs);
3016 if (csum)
3017 csum->dump (f);
3018 else
3019 fprintf (f, " Call summary is MISSING!\n");
3020 }
3021
3022 }
3023 fprintf (f, "\n\n");
3024}
3025
3026/* Perform function-scope viability tests that can be only made at IPA level
3027 and return false if the function is deemed unsuitable for IPA-SRA. */
3028
3029static bool
3030ipa_sra_ipa_function_checks (cgraph_node *node)
3031{
3032 if (!node->can_be_local_p ())
3033 {
3034 if (dump_file)
3035 fprintf (dump_file, "Function %s disqualified because it cannot be "
3036 "made local.\n", node->dump_name ());
3037 return false;
3038 }
87f94429 3039 if (!node->can_change_signature)
ff6686d2
MJ
3040 {
3041 if (dump_file)
3042 fprintf (dump_file, "Function can not change signature.\n");
3043 return false;
3044 }
3045
3046 return true;
3047}
3048
3049/* Issues found out by check_callers_for_issues. */
3050
3051struct caller_issues
3052{
b90061c6
MJ
3053 /* The candidate being considered. */
3054 cgraph_node *candidate;
ff6686d2
MJ
3055 /* There is a thunk among callers. */
3056 bool thunk;
3057 /* Call site with no available information. */
3058 bool unknown_callsite;
027e3041 3059 /* Call from outside the candidate's comdat group. */
b90061c6 3060 bool call_from_outside_comdat;
ff6686d2
MJ
3061 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3062 bool bit_aligned_aggregate_argument;
3063};
3064
3065/* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3066 that apply. */
3067
3068static bool
3069check_for_caller_issues (struct cgraph_node *node, void *data)
3070{
3071 struct caller_issues *issues = (struct caller_issues *) data;
3072
3073 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3074 {
67f3791f 3075 if (cs->caller->thunk)
ff6686d2
MJ
3076 {
3077 issues->thunk = true;
3078 /* TODO: We should be able to process at least some types of
3079 thunks. */
3080 return true;
3081 }
b90061c6
MJ
3082 if (issues->candidate->calls_comdat_local
3083 && issues->candidate->same_comdat_group
3084 && !issues->candidate->in_same_comdat_group_p (cs->caller))
3085 {
3086 issues->call_from_outside_comdat = true;
3087 return true;
3088 }
ff6686d2
MJ
3089
3090 isra_call_summary *csum = call_sums->get (cs);
3091 if (!csum)
3092 {
3093 issues->unknown_callsite = true;
3094 return true;
3095 }
3096
3097 if (csum->m_bit_aligned_arg)
3098 issues->bit_aligned_aggregate_argument = true;
3099 }
3100 return false;
3101}
3102
3103/* Look at all incoming edges to NODE, including aliases and thunks and look
3104 for problems. Return true if NODE type should not be modified at all. */
3105
3106static bool
3107check_all_callers_for_issues (cgraph_node *node)
3108{
3109 struct caller_issues issues;
3110 memset (&issues, 0, sizeof (issues));
b90061c6 3111 issues.candidate = node;
ff6686d2
MJ
3112
3113 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
3114 if (issues.unknown_callsite)
3115 {
3116 if (dump_file && (dump_flags & TDF_DETAILS))
3117 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
3118 "all modifications.\n", node->dump_name ());
3119 return true;
3120 }
3121 /* TODO: We should be able to process at least some types of thunks. */
3122 if (issues.thunk)
3123 {
3124 if (dump_file && (dump_flags & TDF_DETAILS))
3125 fprintf (dump_file, "A call of %s is through thunk, which are not"
3126 " handled yet. Disabling all modifications.\n",
3127 node->dump_name ());
3128 return true;
3129 }
b90061c6
MJ
3130 if (issues.call_from_outside_comdat)
3131 {
3132 if (dump_file)
3133 fprintf (dump_file, "Function would become private comdat called "
3134 "outside of its comdat group.\n");
3135 return true;
3136 }
ff6686d2
MJ
3137
3138 if (issues.bit_aligned_aggregate_argument)
3139 {
3140 /* Let's only remove parameters/return values from such functions.
3141 TODO: We could only prevent splitting the problematic parameters if
3142 anybody thinks it is worth it. */
3143 if (dump_file && (dump_flags & TDF_DETAILS))
dfea3d6f 3144 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
ff6686d2
MJ
3145 " disabling parameter splitting.\n", node->dump_name ());
3146
3147 isra_func_summary *ifs = func_sums->get (node);
3148 gcc_checking_assert (ifs);
3149 unsigned param_count = vec_safe_length (ifs->m_parameters);
3150 for (unsigned i = 0; i < param_count; i++)
3151 (*ifs->m_parameters)[i].split_candidate = false;
3152 }
3153 return false;
3154}
3155
3156/* Find the access with corresponding OFFSET and SIZE among accesses in
3157 PARAM_DESC and return it or NULL if such an access is not there. */
3158
3159static param_access *
3160find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3161{
3162 unsigned pclen = vec_safe_length (param_desc->accesses);
3163
3164 /* The search is linear but the number of stored accesses is bound by
3165 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3166
3167 for (unsigned i = 0; i < pclen; i++)
3168 if ((*param_desc->accesses)[i]->unit_offset == offset
3169 && (*param_desc->accesses)[i]->unit_size == size)
3170 return (*param_desc->accesses)[i];
3171
3172 return NULL;
3173}
3174
3175/* Return iff the total size of definite replacement SIZE would violate the
3176 limit set for it in PARAM. */
3177
3178static bool
3179size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3180{
3181 unsigned limit = desc->param_size_limit;
3182 if (size > limit
3183 || (!desc->by_ref && size == limit))
3184 return true;
3185 return false;
3186}
3187
3188/* Increase reached size of DESC by SIZE or disqualify it if it would violate
3189 the set limit. IDX is the parameter number which is dumped when
3190 disqualifying. */
3191
3192static void
3193bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3194{
3195 unsigned after = desc->size_reached + size;
3196 if (size_would_violate_limit_p (desc, after))
3197 {
3198 if (dump_file && (dump_flags & TDF_DETAILS))
3199 fprintf (dump_file, " ...size limit reached, disqualifying "
3200 "candidate parameter %u\n", idx);
3201 desc->split_candidate = false;
3202 return;
3203 }
3204 desc->size_reached = after;
3205}
3206
3207/* Take all actions required to deal with an edge CS that represents a call to
3208 an unknown or un-analyzed function, for both parameter removal and
3209 splitting. */
3210
3211static void
3212process_edge_to_unknown_caller (cgraph_edge *cs)
3213{
3214 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3215 gcc_checking_assert (from_ifs);
3216 isra_call_summary *csum = call_sums->get (cs);
3217
3218 if (dump_file && (dump_flags & TDF_DETAILS))
3219 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3220 cs->caller->dump_name ());
3221
3222 unsigned args_count = csum->m_arg_flow.length ();
3223 for (unsigned i = 0; i < args_count; i++)
3224 {
3225 isra_param_flow *ipf = &csum->m_arg_flow[i];
3226
3227 if (ipf->pointer_pass_through)
3228 {
3229 isra_param_desc *param_desc
3230 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3231 param_desc->locally_unused = false;
3232 param_desc->split_candidate = false;
3233 continue;
3234 }
3235 if (ipf->aggregate_pass_through)
3236 {
3237 unsigned idx = get_single_param_flow_source (ipf);
3238 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3239
3240 param_desc->locally_unused = false;
3241 if (!param_desc->split_candidate)
3242 continue;
3243 gcc_assert (!param_desc->by_ref);
3244 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3245 ipf->unit_size);
3246 gcc_checking_assert (pacc);
3247 pacc->certain = true;
3248 if (overlapping_certain_accesses_p (param_desc, NULL))
3249 {
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 fprintf (dump_file, " ...leading to overlap, "
3252 " disqualifying candidate parameter %u\n",
3253 idx);
3254 param_desc->split_candidate = false;
3255 }
3256 else
3257 bump_reached_size (param_desc, pacc->unit_size, idx);
3258 ipf->aggregate_pass_through = false;
3259 continue;
3260 }
3261
3262 for (int j = 0; j < ipf->length; j++)
3263 {
3264 int input_idx = ipf->inputs[j];
3265 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3266 }
3267 }
3268}
3269
3270/* Propagate parameter removal information through cross-SCC edge CS,
3271 i.e. decrease the use count in the caller parameter descriptor for each use
3272 in this call. */
3273
3274static void
3275param_removal_cross_scc_edge (cgraph_edge *cs)
3276{
3277 enum availability availability;
3278 cgraph_node *callee = cs->callee->function_symbol (&availability);
3279 isra_func_summary *to_ifs = func_sums->get (callee);
3280 if (!to_ifs || !to_ifs->m_candidate
3281 || (availability < AVAIL_AVAILABLE)
3282 || vec_safe_is_empty (to_ifs->m_parameters))
3283 {
3284 process_edge_to_unknown_caller (cs);
3285 return;
3286 }
3287 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3288 gcc_checking_assert (from_ifs);
3289
3290 isra_call_summary *csum = call_sums->get (cs);
3291 unsigned args_count = csum->m_arg_flow.length ();
3292 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3293
3294 for (unsigned i = 0; i < args_count; i++)
3295 {
3296 bool unused_in_callee;
3297 if (i < param_count)
3298 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3299 else
3300 unused_in_callee = false;
3301
3302 if (!unused_in_callee)
3303 {
3304 isra_param_flow *ipf = &csum->m_arg_flow[i];
3305 for (int j = 0; j < ipf->length; j++)
3306 {
3307 int input_idx = ipf->inputs[j];
3308 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3309 }
3310 }
3311 }
3312}
3313
3314/* Unless it is already there, push NODE which is also described by IFS to
3315 STACK. */
3316
3317static void
3318isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3319 vec<cgraph_node *> *stack)
3320{
3321 if (!ifs->m_queued)
3322 {
3323 ifs->m_queued = true;
3324 stack->safe_push (node);
3325 }
3326}
3327
3328/* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3329 used and push CALLER on STACK. */
3330
3331static void
3332isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3333 cgraph_node *caller, vec<cgraph_node *> *stack)
3334{
3335 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3336 {
3337 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3338 isra_push_node_to_stack (caller, from_ifs, stack);
3339 }
3340}
3341
f2cf4c61 3342/* Combine safe_size of DESC with SIZE and return true if it has changed. */
e3a5cc32 3343
f2cf4c61
MJ
3344static bool
3345update_safe_size (isra_param_desc *desc, unsigned size)
3346{
3347 if (!desc->safe_size_set)
3348 {
3349 desc->safe_size_set = 1;
3350 desc->safe_size = size;
3351 return true;
3352 }
3353 if (desc->safe_size <= size)
3354 return false;
3355 desc->safe_size = size;
3356 return true;
3357}
3358
3359/* Set all param hints in DESC to the pessimistic values. Return true if any
3360 hints that need to potentially trigger further propagation have changed. */
3361
3362static bool
e3a5cc32
MJ
3363flip_all_hints_pessimistic (isra_param_desc *desc)
3364{
3365 desc->not_specially_constructed = true;
f2cf4c61 3366 return update_safe_size (desc, 0);
e3a5cc32
MJ
3367}
3368
f2cf4c61
MJ
3369/* Because we have not analyzed or otherwise problematic caller, go over all
3370 parameter int flags of IFS describing a call graph node of a calllee and
3371 turn them pessimistic. Return true if any hints that need to potentially
3372 trigger further propagation have changed. */
e3a5cc32 3373
f2cf4c61
MJ
3374static bool
3375flip_all_param_hints_pessimistic (isra_func_summary *ifs)
e3a5cc32 3376{
e3a5cc32 3377 if (!ifs || !ifs->m_candidate)
f2cf4c61 3378 return false;
e3a5cc32 3379
f2cf4c61 3380 bool ret = false;
e3a5cc32
MJ
3381 unsigned param_count = vec_safe_length (ifs->m_parameters);
3382
3383 for (unsigned i = 0; i < param_count; i++)
f2cf4c61 3384 ret |= flip_all_hints_pessimistic (&(*ifs->m_parameters)[i]);
e3a5cc32 3385
f2cf4c61 3386 return ret;
e3a5cc32
MJ
3387}
3388
f2cf4c61
MJ
3389/* Propagate hints accross edge CS which ultimately leads to a node described
3390 by TO_IFS. Return true if any hints of the callee which should potentially
3391 trigger further propagation have changed. */
e3a5cc32 3392
f2cf4c61
MJ
3393static bool
3394propagate_param_hints_accross_call (cgraph_edge *cs, isra_func_summary *to_ifs)
e3a5cc32 3395{
e3a5cc32 3396 if (!to_ifs || !to_ifs->m_candidate)
f2cf4c61 3397 return false;
e3a5cc32 3398
f2cf4c61
MJ
3399 isra_call_summary *csum = call_sums->get (cs);
3400 bool ret = false;
e3a5cc32
MJ
3401 unsigned args_count = csum->m_arg_flow.length ();
3402 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3403
3404 for (unsigned i = 0; i < param_count; i++)
3405 {
3406 isra_param_desc *desc = &(*to_ifs->m_parameters)[i];
3407 if (i >= args_count)
3408 {
f2cf4c61 3409 ret |= flip_all_hints_pessimistic (desc);
e3a5cc32
MJ
3410 continue;
3411 }
3412
f2cf4c61
MJ
3413 if (desc->by_ref)
3414 {
3415 isra_param_flow *ipf = &csum->m_arg_flow[i];
3416
3417 if (!ipf->constructed_for_calls)
3418 desc->not_specially_constructed = true;
3419
3420 if (ipf->pointer_pass_through)
3421 {
3422 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3423 int srcidx = get_single_param_flow_source (ipf);
3424 if (vec_safe_length (from_ifs->m_parameters) > (unsigned) srcidx)
3425 {
3426 isra_param_desc *src_d = &(*from_ifs->m_parameters)[srcidx];
3427 if (src_d->safe_size_set)
3428 ret |= update_safe_size (desc, src_d->safe_size);
3429 }
3430 else
3431 ret |= update_safe_size (desc, 0);
3432 }
3433 else if (!ipf->aggregate_pass_through)
3434 ret |= update_safe_size (desc, ipf->unit_size);
3435 else
3436 /* LTOing type-mismatched programs can end up here. */
3437 ret |= update_safe_size (desc, 0);
3438 }
3439 }
3440 return ret;
3441}
3442
3443/* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3444 push those that may need re-visiting onto STACK. */
3445
3446static void
3447propagate_hints_to_all_callees (cgraph_node *node, isra_func_summary *from_ifs,
3448 vec<cgraph_node *> *stack)
3449{
3450 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3451 {
3452 enum availability availability;
3453 cgraph_node *callee = cs->callee->function_symbol (&availability);
3454 isra_func_summary *to_ifs = func_sums->get (callee);
3455 if (!from_ifs)
3456 {
3457 if (flip_all_param_hints_pessimistic (to_ifs)
3458 && ipa_edge_within_scc (cs))
3459 isra_push_node_to_stack (callee, to_ifs, stack);
3460 }
3461 else if (propagate_param_hints_accross_call (cs, to_ifs)
3462 && ipa_edge_within_scc (cs))
3463 isra_push_node_to_stack (callee, to_ifs, stack);
e3a5cc32 3464 }
e3a5cc32 3465}
ff6686d2
MJ
3466
3467/* Propagate information that any parameter is not used only locally within a
6b6a8065 3468 SCC across CS to the caller, which must be in the same SCC as the
c76b3bc5 3469 callee. Push any callers that need to be re-processed to STACK. */
ff6686d2
MJ
3470
3471static void
3472propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3473{
3474 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3475 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3476 return;
3477
3478 isra_call_summary *csum = call_sums->get (cs);
3479 gcc_checking_assert (csum);
3480 unsigned args_count = csum->m_arg_flow.length ();
3481 enum availability availability;
3482 cgraph_node *callee = cs->callee->function_symbol (&availability);
3483 isra_func_summary *to_ifs = func_sums->get (callee);
3484
3485 unsigned param_count
3486 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3487 ? vec_safe_length (to_ifs->m_parameters) : 0;
3488 for (unsigned i = 0; i < args_count; i++)
3489 {
3490 if (i < param_count
3491 && (*to_ifs->m_parameters)[i].locally_unused)
3492 continue;
3493
3494 /* The argument is needed in the callee it, we must mark the parameter as
3495 used also in the caller and its callers within this SCC. */
3496 isra_param_flow *ipf = &csum->m_arg_flow[i];
3497 for (int j = 0; j < ipf->length; j++)
3498 {
3499 int input_idx = ipf->inputs[j];
3500 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3501 }
3502 }
3503}
3504
3505/* Propagate information that any parameter is not used only locally within a
3506 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
c76b3bc5 3507 same SCC. Push any callers that need to be re-processed to STACK. */
ff6686d2
MJ
3508
3509static bool
3510propagate_used_to_scc_callers (cgraph_node *node, void *data)
3511{
3512 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3513 cgraph_edge *cs;
3514 for (cs = node->callers; cs; cs = cs->next_caller)
3515 if (ipa_edge_within_scc (cs))
3516 propagate_used_across_scc_edge (cs, stack);
3517 return false;
3518}
3519
3520/* Return true iff all certain accesses in ARG_DESC are also present as
3521 certain accesses in PARAM_DESC. */
3522
3523static bool
3524all_callee_accesses_present_p (isra_param_desc *param_desc,
3525 isra_param_desc *arg_desc)
3526{
3527 unsigned aclen = vec_safe_length (arg_desc->accesses);
3528 for (unsigned j = 0; j < aclen; j++)
3529 {
3530 param_access *argacc = (*arg_desc->accesses)[j];
3531 if (!argacc->certain)
3532 continue;
3533 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3534 argacc->unit_size);
b9a15a83
MJ
3535 if (!pacc
3536 || !pacc->certain
3537 || !types_compatible_p (argacc->type, pacc->type))
ff6686d2
MJ
3538 return false;
3539 }
3540 return true;
3541}
3542
3543/* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3544 does not allow instantiating an auto_vec with a type defined within a
3545 function so it is a global type. */
3546enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3547
3548
1a315435
MJ
3549/* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3550 (which belongs to CALLER) if they would not violate some constraint there.
3551 If successful, return NULL, otherwise return the string reason for failure
3552 (which can be written to the dump file). DELTA_OFFSET is the known offset
3553 of the actual argument withing the formal parameter (so of ARG_DESCS within
3554 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3555 known. In case of success, set *CHANGE_P to true if propagation actually
3556 changed anything. */
ff6686d2
MJ
3557
3558static const char *
1a315435 3559pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
ff6686d2
MJ
3560 isra_param_desc *arg_desc,
3561 unsigned delta_offset, unsigned arg_size,
3562 bool *change_p)
3563{
3564 unsigned pclen = vec_safe_length (param_desc->accesses);
3565 unsigned aclen = vec_safe_length (arg_desc->accesses);
3566 unsigned prop_count = 0;
3567 unsigned prop_size = 0;
3568 bool change = false;
3569
3570 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3571 for (unsigned j = 0; j < aclen; j++)
3572 {
3573 param_access *argacc = (*arg_desc->accesses)[j];
3574 prop_kinds.safe_push (ACC_PROP_DONT);
3575
3576 if (arg_size > 0
3577 && argacc->unit_offset + argacc->unit_size > arg_size)
3578 return "callee access outsize size boundary";
3579
3580 if (!argacc->certain)
3581 continue;
3582
3583 unsigned offset = argacc->unit_offset + delta_offset;
3584 /* Given that accesses are initially stored according to increasing
3585 offset and decreasing size in case of equal offsets, the following
3586 searches could be written more efficiently if we kept the ordering
3587 when copying. But the number of accesses is capped at
3588 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3589 messy quickly, so let's improve on that only if necessary. */
3590
3591 bool exact_match = false;
3592 for (unsigned i = 0; i < pclen; i++)
3593 {
3594 /* Check for overlaps. */
3595 param_access *pacc = (*param_desc->accesses)[i];
3596 if (pacc->unit_offset == offset
3597 && pacc->unit_size == argacc->unit_size)
3598 {
3599 if (argacc->alias_ptr_type != pacc->alias_ptr_type
c76b3bc5
EB
3600 || !types_compatible_p (argacc->type, pacc->type)
3601 || argacc->reverse != pacc->reverse)
ff6686d2
MJ
3602 return "propagated access types would not match existing ones";
3603
3604 exact_match = true;
3605 if (!pacc->certain)
3606 {
3607 prop_kinds[j] = ACC_PROP_CERTAIN;
3608 prop_size += argacc->unit_size;
3609 change = true;
3610 }
3611 continue;
3612 }
3613
3614 if (offset < pacc->unit_offset + pacc->unit_size
3615 && offset + argacc->unit_size > pacc->unit_offset)
3616 {
3617 /* None permissible with load accesses, possible to fit into
3618 argument ones. */
3619 if (pacc->certain
3620 || offset < pacc->unit_offset
3621 || (offset + argacc->unit_size
3622 > pacc->unit_offset + pacc->unit_size))
3623 return "a propagated access would conflict in caller";
3624 }
3625 }
3626
3627 if (!exact_match)
3628 {
3629 prop_kinds[j] = ACC_PROP_COPY;
3630 prop_count++;
3631 prop_size += argacc->unit_size;
3632 change = true;
3633 }
3634 }
3635
3636 if (!change)
3637 return NULL;
3638
3639 if ((prop_count + pclen
1a315435 3640 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
ff6686d2
MJ
3641 || size_would_violate_limit_p (param_desc,
3642 param_desc->size_reached + prop_size))
3643 return "propagating accesses would violate the count or size limit";
3644
3645 *change_p = true;
3646 for (unsigned j = 0; j < aclen; j++)
3647 {
3648 if (prop_kinds[j] == ACC_PROP_COPY)
3649 {
3650 param_access *argacc = (*arg_desc->accesses)[j];
3651
3652 param_access *copy = ggc_cleared_alloc<param_access> ();
3653 copy->unit_offset = argacc->unit_offset + delta_offset;
3654 copy->unit_size = argacc->unit_size;
3655 copy->type = argacc->type;
3656 copy->alias_ptr_type = argacc->alias_ptr_type;
3657 copy->certain = true;
c76b3bc5 3658 copy->reverse = argacc->reverse;
ff6686d2
MJ
3659 vec_safe_push (param_desc->accesses, copy);
3660 }
3661 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3662 {
3663 param_access *argacc = (*arg_desc->accesses)[j];
3664 param_access *csp
3665 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3666 argacc->unit_size);
3667 csp->certain = true;
3668 }
3669 }
3670
3671 param_desc->size_reached += prop_size;
3672
3673 return NULL;
3674}
3675
3676/* Propagate parameter splitting information through call graph edge CS.
3677 Return true if any changes that might need to be propagated within SCCs have
3678 been made. The function also clears the aggregate_pass_through and
dfea3d6f 3679 pointer_pass_through in call summaries which do not need to be processed
ff6686d2
MJ
3680 again if this CS is revisited when iterating while changes are propagated
3681 within an SCC. */
3682
3683static bool
3684param_splitting_across_edge (cgraph_edge *cs)
3685{
3686 bool res = false;
3687 bool cross_scc = !ipa_edge_within_scc (cs);
3688 enum availability availability;
3689 cgraph_node *callee = cs->callee->function_symbol (&availability);
3690 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3691 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3692
3693 isra_call_summary *csum = call_sums->get (cs);
3694 gcc_checking_assert (csum);
3695 unsigned args_count = csum->m_arg_flow.length ();
3696 isra_func_summary *to_ifs = func_sums->get (callee);
3697 unsigned param_count
3698 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3699 ? vec_safe_length (to_ifs->m_parameters)
3700 : 0);
3701
3702 if (dump_file && (dump_flags & TDF_DETAILS))
6b6a8065 3703 fprintf (dump_file, "Splitting across %s->%s:\n",
ff6686d2
MJ
3704 cs->caller->dump_name (), callee->dump_name ());
3705
3706 unsigned i;
3707 for (i = 0; (i < args_count) && (i < param_count); i++)
3708 {
3709 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3710 isra_param_flow *ipf = &csum->m_arg_flow[i];
3711
3712 if (arg_desc->locally_unused)
3713 {
3714 if (dump_file && (dump_flags & TDF_DETAILS))
3715 fprintf (dump_file, " ->%u: unused in callee\n", i);
3716 ipf->pointer_pass_through = false;
3717 continue;
3718 }
3719
3720 if (ipf->pointer_pass_through)
3721 {
3722 int idx = get_single_param_flow_source (ipf);
3723 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3724 if (!param_desc->split_candidate)
3725 continue;
3726 gcc_assert (param_desc->by_ref);
3727
3728 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3729 {
3730 if (dump_file && (dump_flags & TDF_DETAILS))
3731 fprintf (dump_file, " %u->%u: not candidate or not by "
3732 "reference in callee\n", idx, i);
3733 param_desc->split_candidate = false;
3734 ipf->pointer_pass_through = false;
3735 res = true;
3736 }
3737 else if (!ipf->safe_to_import_accesses)
3738 {
763121cc
MJ
3739 if (!csum->m_before_any_store
3740 || !all_callee_accesses_present_p (param_desc, arg_desc))
ff6686d2
MJ
3741 {
3742 if (dump_file && (dump_flags & TDF_DETAILS))
3743 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3744 idx, i);
3745 param_desc->split_candidate = false;
3746 ipf->pointer_pass_through = false;
3747 res = true;
3748
3749 }
3750 else
3751 {
3752 if (dump_file && (dump_flags & TDF_DETAILS))
3753 fprintf (dump_file, " %u->%u: verified callee accesses "
3754 "present.\n", idx, i);
3755 if (cross_scc)
3756 ipf->pointer_pass_through = false;
3757 }
3758 }
3759 else
3760 {
3761 const char *pull_failure
1a315435
MJ
3762 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3763 0, 0, &res);
ff6686d2
MJ
3764 if (pull_failure)
3765 {
3766 if (dump_file && (dump_flags & TDF_DETAILS))
3767 fprintf (dump_file, " %u->%u: by_ref access pull "
3768 "failed: %s.\n", idx, i, pull_failure);
3769 param_desc->split_candidate = false;
3770 ipf->pointer_pass_through = false;
3771 res = true;
3772 }
3773 else
3774 {
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3776 fprintf (dump_file, " %u->%u: by_ref access pull "
3777 "succeeded.\n", idx, i);
3778 if (cross_scc)
3779 ipf->pointer_pass_through = false;
3780 }
3781 }
3782 }
3783 else if (ipf->aggregate_pass_through)
3784 {
3785 int idx = get_single_param_flow_source (ipf);
3786 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3787 if (!param_desc->split_candidate)
3788 continue;
3789 gcc_assert (!param_desc->by_ref);
3790 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3791 ipf->unit_size);
3792 gcc_checking_assert (pacc);
3793
3794 if (pacc->certain)
3795 {
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3798 ipf->aggregate_pass_through = false;
3799 }
3800 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3801 {
3802 if (dump_file && (dump_flags & TDF_DETAILS))
3803 fprintf (dump_file, " %u->%u: not candidate or by "
3804 "reference in callee\n", idx, i);
3805
3806 pacc->certain = true;
3807 if (overlapping_certain_accesses_p (param_desc, NULL))
3808 {
3809 if (dump_file && (dump_flags & TDF_DETAILS))
3810 fprintf (dump_file, " ...leading to overlap, "
3811 " disqualifying candidate parameter %u\n",
3812 idx);
3813 param_desc->split_candidate = false;
3814 }
3815 else
3816 bump_reached_size (param_desc, pacc->unit_size, idx);
3817
3818 ipf->aggregate_pass_through = false;
3819 res = true;
3820 }
3821 else
3822 {
3823 const char *pull_failure
1a315435 3824 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
ff6686d2
MJ
3825 ipf->unit_offset,
3826 ipf->unit_size, &res);
3827 if (pull_failure)
3828 {
3829 if (dump_file && (dump_flags & TDF_DETAILS))
3830 fprintf (dump_file, " %u->%u: arg access pull "
3831 "failed: %s.\n", idx, i, pull_failure);
3832
3833 ipf->aggregate_pass_through = false;
3834 pacc->certain = true;
3835
3836 if (overlapping_certain_accesses_p (param_desc, NULL))
3837 {
3838 if (dump_file && (dump_flags & TDF_DETAILS))
3839 fprintf (dump_file, " ...leading to overlap, "
3840 " disqualifying candidate parameter %u\n",
3841 idx);
3842 param_desc->split_candidate = false;
3843 }
3844 else
3845 bump_reached_size (param_desc, pacc->unit_size, idx);
3846
3847 res = true;
3848 }
3849 else
3850 {
3851 if (dump_file && (dump_flags & TDF_DETAILS))
3852 fprintf (dump_file, " %u->%u: arg access pull "
3853 "succeeded.\n", idx, i);
3854 if (cross_scc)
3855 ipf->aggregate_pass_through = false;
3856 }
3857 }
3858 }
3859 }
3860
3861 /* Handle argument-parameter count mismatches. */
3862 for (; (i < args_count); i++)
3863 {
3864 isra_param_flow *ipf = &csum->m_arg_flow[i];
3865
3866 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3867 {
3868 int idx = get_single_param_flow_source (ipf);
3869 ipf->pointer_pass_through = false;
3870 ipf->aggregate_pass_through = false;
3871 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3872 if (!param_desc->split_candidate)
3873 continue;
3874
3875 if (dump_file && (dump_flags & TDF_DETAILS))
3876 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3877 idx, i);
3878 param_desc->split_candidate = false;
3879 res = true;
3880 }
3881 }
3882 return res;
3883}
3884
3885/* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3886 callers ignore the return value, or come from the same SCC and use the
3887 return value only to compute their return value, return false, otherwise
3888 return true. */
3889
3890static bool
3891retval_used_p (cgraph_node *node, void *)
3892{
3893 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3894 {
3895 isra_call_summary *csum = call_sums->get (cs);
3896 gcc_checking_assert (csum);
3897 if (csum->m_return_ignored)
3898 continue;
3899 if (!csum->m_return_returned)
3900 return true;
3901
3902 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3903 if (!from_ifs || !from_ifs->m_candidate)
3904 return true;
3905
3906 if (!ipa_edge_within_scc (cs)
3907 && !from_ifs->m_return_ignored)
3908 return true;
3909 }
3910
3911 return false;
3912}
3913
3914/* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3915 modify parameter which originally had index BASE_INDEX, in the adjustment
3916 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4834e936
MJ
3917 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
3918 original node, it needs to be passed in IPCP_TS, otherwise it should be
3919 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
3920 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
ff6686d2 3921
ff6686d2
MJ
3922static void
3923push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3924 unsigned prev_clone_index,
3925 ipa_adjusted_param *prev_adjustment,
4834e936 3926 ipcp_transformation *ipcp_ts,
ff6686d2
MJ
3927 vec<ipa_adjusted_param, va_gc> **new_params)
3928{
3929 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3930 if (desc->locally_unused)
3931 {
3932 if (dump_file)
3933 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3934 return;
3935 }
3936
3937 if (!desc->split_candidate)
3938 {
3939 ipa_adjusted_param adj;
3940 if (prev_adjustment)
3941 {
3942 adj = *prev_adjustment;
3943 adj.prev_clone_adjustment = true;
3944 adj.prev_clone_index = prev_clone_index;
3945 }
3946 else
3947 {
3948 memset (&adj, 0, sizeof (adj));
3949 adj.op = IPA_PARAM_OP_COPY;
3950 adj.base_index = base_index;
3951 adj.prev_clone_index = prev_clone_index;
3952 }
3953 vec_safe_push ((*new_params), adj);
3954 return;
3955 }
3956
3957 if (dump_file)
3958 fprintf (dump_file, " Will split parameter %u\n", base_index);
3959
3960 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3961 unsigned aclen = vec_safe_length (desc->accesses);
3962 for (unsigned j = 0; j < aclen; j++)
3963 {
3964 param_access *pa = (*desc->accesses)[j];
3965 if (!pa->certain)
3966 continue;
4834e936
MJ
3967
3968 if (ipcp_ts)
3969 {
3970 ipa_argagg_value_list avl (ipcp_ts);
3971 tree value = avl.get_value (base_index, pa->unit_offset);
3972 if (value
3973 && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value))) / BITS_PER_UNIT
3974 == pa->unit_size))
3975 {
3976 if (dump_file)
3977 fprintf (dump_file, " - omitting component at byte "
3978 "offset %u which is known to have a constant value\n ",
3979 pa->unit_offset);
3980 continue;
3981 }
3982 }
3983
ff6686d2
MJ
3984 if (dump_file)
3985 fprintf (dump_file, " - component at byte offset %u, "
3986 "size %u\n", pa->unit_offset, pa->unit_size);
3987
3988 ipa_adjusted_param adj;
3989 memset (&adj, 0, sizeof (adj));
3990 adj.op = IPA_PARAM_OP_SPLIT;
3991 adj.base_index = base_index;
3992 adj.prev_clone_index = prev_clone_index;
3993 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
3994 adj.reverse = pa->reverse;
3995 adj.type = pa->type;
3996 adj.alias_ptr_type = pa->alias_ptr_type;
3997 adj.unit_offset = pa->unit_offset;
3998 vec_safe_push ((*new_params), adj);
3999 }
4000}
4001
b90061c6
MJ
4002/* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4003 flag of all callers of NODE. */
4004
4005static bool
4006mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
4007{
4008 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4009 cs->caller->calls_comdat_local = true;
4010 return false;
4011}
4012
ff6686d2 4013
dfea3d6f 4014/* Do final processing of results of IPA propagation regarding NODE, clone it
ff6686d2
MJ
4015 if appropriate. */
4016
4017static void
4018process_isra_node_results (cgraph_node *node,
4019 hash_map<const char *, unsigned> *clone_num_suffixes)
4020{
4021 isra_func_summary *ifs = func_sums->get (node);
4022 if (!ifs || !ifs->m_candidate)
4023 return;
4024
4025 auto_vec<bool, 16> surviving_params;
4026 bool check_surviving = false;
ae7a23a3
JH
4027 clone_info *cinfo = clone_info::get (node);
4028 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
4029 {
4030 check_surviving = true;
ae7a23a3 4031 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2
MJ
4032 }
4033
4034 unsigned param_count = vec_safe_length (ifs->m_parameters);
4035 bool will_change_function = false;
4036 if (ifs->m_returns_value && ifs->m_return_ignored)
4037 will_change_function = true;
4038 else
4039 for (unsigned i = 0; i < param_count; i++)
4040 {
4041 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4042 if ((desc->locally_unused || desc->split_candidate)
4043 /* Make sure we do not clone just to attempt to remove an already
4044 removed unused argument. */
4045 && (!check_surviving
4046 || (i < surviving_params.length ()
4047 && surviving_params[i])))
4048 {
4049 will_change_function = true;
4050 break;
4051 }
4052 }
4053 if (!will_change_function)
4054 return;
4055
4056 if (dump_file)
4057 {
4058 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
4059 node->dump_name ());
4060 if (ifs->m_returns_value && ifs->m_return_ignored)
4061 fprintf (dump_file, " Will remove return value.\n");
4062 }
4063
4834e936 4064 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
ff6686d2 4065 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
ae7a23a3
JH
4066 if (ipa_param_adjustments *old_adjustments
4067 = cinfo ? cinfo->param_adjustments : NULL)
ff6686d2
MJ
4068 {
4069 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
4070 for (unsigned i = 0; i < old_adj_len; i++)
4071 {
4072 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4073 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
4834e936 4074 old_adj, ipcp_ts, &new_params);
ff6686d2
MJ
4075 }
4076 }
4077 else
4078 for (unsigned i = 0; i < param_count; i++)
4834e936 4079 push_param_adjustments_for_index (ifs, i, i, NULL, ipcp_ts, &new_params);
ff6686d2
MJ
4080
4081 ipa_param_adjustments *new_adjustments
4082 = (new (ggc_alloc <ipa_param_adjustments> ())
4083 ipa_param_adjustments (new_params, param_count,
4084 ifs->m_returns_value && ifs->m_return_ignored));
4085
4086 if (dump_file && (dump_flags & TDF_DETAILS))
4087 {
4088 fprintf (dump_file, "\n Created adjustments:\n");
4089 new_adjustments->dump (dump_file);
4090 }
4091
4092 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4093 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4094 node->decl)));
265af872 4095 auto_vec<cgraph_edge *> callers = node->collect_callers ();
ff6686d2
MJ
4096 cgraph_node *new_node
4097 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
4098 suffix_counter);
4099 suffix_counter++;
b90061c6
MJ
4100 if (node->calls_comdat_local && node->same_comdat_group)
4101 {
4102 new_node->add_to_same_comdat_group (node);
4103 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
4104 NULL, true);
4105 }
ed649cda 4106 new_node->calls_comdat_local = node->calls_comdat_local;
ff6686d2
MJ
4107
4108 if (dump_file)
4109 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
4110 callers.release ();
4111}
4112
4113/* Check which parameters of NODE described by IFS have survived until IPA-SRA
4114 and disable transformations for those which have not or which should not
4115 transformed because the associated debug counter reached its limit. Return
e3a5cc32
MJ
4116 true if none survived or if there were no candidates to begin with.
4117 Additionally, also adjust parameter descriptions based on debug counters and
4118 hints propagated earlier. */
ff6686d2
MJ
4119
4120static bool
e3a5cc32 4121adjust_parameter_descriptions (cgraph_node *node, isra_func_summary *ifs)
ff6686d2
MJ
4122{
4123 bool ret = true;
4124 unsigned len = vec_safe_length (ifs->m_parameters);
4125 if (!len)
4126 return true;
4127
4128 auto_vec<bool, 16> surviving_params;
4129 bool check_surviving = false;
ae7a23a3
JH
4130 clone_info *cinfo = clone_info::get (node);
4131 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
4132 {
4133 check_surviving = true;
ae7a23a3 4134 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2 4135 }
f2cf4c61
MJ
4136 auto_vec <unsigned> dump_dead_indices;
4137 auto_vec <unsigned> dump_bad_cond_indices;
ff6686d2
MJ
4138 for (unsigned i = 0; i < len; i++)
4139 {
4140 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4141 if (!dbg_cnt (ipa_sra_params))
4142 {
4143 desc->locally_unused = false;
4144 desc->split_candidate = false;
4145 }
4146 else if (check_surviving
4147 && (i >= surviving_params.length ()
4148 || !surviving_params[i]))
4149 {
4150 /* Even if the parameter was removed by a previous IPA pass, we do
4151 not clear locally_unused because if it really is unused, this
4152 information might be useful in callers. */
4153 desc->split_candidate = false;
4154
4155 if (dump_file && (dump_flags & TDF_DETAILS))
f2cf4c61 4156 dump_dead_indices.safe_push (i);
ff6686d2 4157 }
e3a5cc32
MJ
4158 else
4159 {
f2cf4c61
MJ
4160 if (desc->split_candidate && desc->conditionally_dereferenceable)
4161 {
4162 gcc_assert (desc->safe_size_set);
4163 for (param_access *pa : *desc->accesses)
4164 if ((pa->unit_offset + pa->unit_size) > desc->safe_size)
4165 {
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 dump_bad_cond_indices.safe_push (i);
4168 desc->split_candidate = false;
4169 break;
4170 }
4171 }
4172
e3a5cc32
MJ
4173 if (desc->split_candidate)
4174 {
4175 if (desc->by_ref && !desc->not_specially_constructed)
4176 {
4177 int extra_factor
4178 = opt_for_fn (node->decl,
4179 param_ipa_sra_ptrwrap_growth_factor);
4180 desc->param_size_limit = extra_factor * desc->param_size_limit;
4181 }
4182 if (size_would_violate_limit_p (desc, desc->size_reached))
4183 desc->split_candidate = false;
4184 }
4185 if (desc->locally_unused || desc->split_candidate)
4186 ret = false;
4187 }
ff6686d2
MJ
4188 }
4189
f2cf4c61
MJ
4190 if (!dump_dead_indices.is_empty ())
4191 {
4192 fprintf (dump_file, "The following parameters of %s are dead on arrival:",
4193 node->dump_name ());
4194 for (unsigned i : dump_dead_indices)
4195 fprintf (dump_file, " %u", i);
4196 fprintf (dump_file, "\n");
4197 }
4198 if (!dump_bad_cond_indices.is_empty ())
4199 {
4200 fprintf (dump_file, "The following parameters of %s are not safe to "
4201 "derefernce in all callers:", node->dump_name ());
4202 for (unsigned i : dump_bad_cond_indices)
4203 fprintf (dump_file, " %u", i);
4204 fprintf (dump_file, "\n");
4205 }
ff6686d2
MJ
4206
4207 return ret;
4208}
4209
4210
4211/* Run the interprocedural part of IPA-SRA. */
4212
4213static unsigned int
4214ipa_sra_analysis (void)
4215{
4216 if (dump_file)
4217 {
4218 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
e3a5cc32 4219 ipa_sra_dump_all_summaries (dump_file, false);
ff6686d2
MJ
4220 }
4221
4222 gcc_checking_assert (func_sums);
4223 gcc_checking_assert (call_sums);
4224 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
4225 auto_vec <cgraph_node *, 16> stack;
4226 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
4227
803a9133
MJ
4228 /* One sweep from callers to callees for return value removal. */
4229 for (int i = node_scc_count - 1; i >= 0 ; i--)
ff6686d2
MJ
4230 {
4231 cgraph_node *scc_rep = order[i];
4232 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
ff6686d2 4233
803a9133
MJ
4234 /* Preliminary IPA function level checks. */
4235 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4236 {
4237 isra_func_summary *ifs = func_sums->get (v);
4238 if (!ifs || !ifs->m_candidate)
4239 continue;
4240 if (!ipa_sra_ipa_function_checks (v)
4241 || check_all_callers_for_issues (v))
803a9133
MJ
4242 ifs->zap ();
4243 }
4244
4245 for (cgraph_node *v : cycle_nodes)
4246 {
4247 isra_func_summary *ifs = func_sums->get (v);
4248 if (!ifs || !ifs->m_candidate)
4249 continue;
4250 bool return_needed
4251 = (ifs->m_returns_value
4252 && (!dbg_cnt (ipa_sra_retvalues)
4253 || v->call_for_symbol_and_aliases (retval_used_p,
4254 NULL, true)));
4255 ifs->m_return_ignored = !return_needed;
4256 if (return_needed)
4257 isra_push_node_to_stack (v, ifs, &stack);
4258 }
4259
4260 while (!stack.is_empty ())
4261 {
4262 cgraph_node *node = stack.pop ();
4263 isra_func_summary *ifs = func_sums->get (node);
4264 gcc_checking_assert (ifs && ifs->m_queued);
4265 ifs->m_queued = false;
4266
4267 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4268 if (ipa_edge_within_scc (cs)
4269 && call_sums->get (cs)->m_return_returned)
4270 {
4271 enum availability av;
4272 cgraph_node *callee = cs->callee->function_symbol (&av);
4273 isra_func_summary *to_ifs = func_sums->get (callee);
4274 if (to_ifs && to_ifs->m_return_ignored)
4275 {
4276 to_ifs->m_return_ignored = false;
4277 isra_push_node_to_stack (callee, to_ifs, &stack);
4278 }
4279 }
4280 }
e3a5cc32
MJ
4281
4282 /* Parameter hint propagation. */
4283 for (cgraph_node *v : cycle_nodes)
4284 {
4285 isra_func_summary *ifs = func_sums->get (v);
f2cf4c61
MJ
4286 propagate_hints_to_all_callees (v, ifs, &stack);
4287 }
4288
4289 while (!stack.is_empty ())
4290 {
4291 cgraph_node *node = stack.pop ();
4292 isra_func_summary *ifs = func_sums->get (node);
4293 gcc_checking_assert (ifs && ifs->m_queued);
4294 ifs->m_queued = false;
4295 propagate_hints_to_all_callees (node, ifs, &stack);
e3a5cc32
MJ
4296 }
4297
803a9133
MJ
4298 cycle_nodes.release ();
4299 }
4300
4301 /* One sweep from callees to callers for parameter removal and splitting. */
4302 for (int i = 0; i < node_scc_count; i++)
4303 {
4304 cgraph_node *scc_rep = order[i];
4305 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4306
4307 /* First step of parameter removal. */
4308 for (cgraph_node *v : cycle_nodes)
4309 {
4310 isra_func_summary *ifs = func_sums->get (v);
4311 if (!ifs || !ifs->m_candidate)
4312 continue;
e3a5cc32 4313 if (adjust_parameter_descriptions (v, ifs))
ff6686d2
MJ
4314 continue;
4315 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
4316 process_edge_to_unknown_caller (cs);
4317 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4318 if (!ipa_edge_within_scc (cs))
4319 param_removal_cross_scc_edge (cs);
4320 }
4321
6b6a8065
JJ
4322 /* Look at edges within the current SCC and propagate used-ness across
4323 them, pushing onto the stack all notes which might need to be
4324 revisited. */
803a9133 4325 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4326 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4327 &stack, true);
4328
4329 /* Keep revisiting and pushing until nothing changes. */
4330 while (!stack.is_empty ())
4331 {
4332 cgraph_node *v = stack.pop ();
4333 isra_func_summary *ifs = func_sums->get (v);
4334 gcc_checking_assert (ifs && ifs->m_queued);
4335 ifs->m_queued = false;
4336
4337 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4338 &stack, true);
4339 }
4340
4341 /* Parameter splitting. */
4342 bool repeat_scc_access_propagation;
4343 do
4344 {
4345 repeat_scc_access_propagation = false;
803a9133 4346 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4347 {
4348 isra_func_summary *ifs = func_sums->get (v);
4349 if (!ifs
4350 || !ifs->m_candidate
4351 || vec_safe_is_empty (ifs->m_parameters))
4352 continue;
4353 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4354 if (param_splitting_across_edge (cs))
4355 repeat_scc_access_propagation = true;
4356 }
4357 }
4358 while (repeat_scc_access_propagation);
4359
4360 if (flag_checking)
803a9133 4361 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4362 verify_splitting_accesses (v, true);
4363
4364 cycle_nodes.release ();
4365 }
4366
ff6686d2
MJ
4367 ipa_free_postorder_info ();
4368 free (order);
4369
4370 if (dump_file)
4371 {
4372 if (dump_flags & TDF_DETAILS)
4373 {
4374 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4375 " ==========\n");
e3a5cc32 4376 ipa_sra_dump_all_summaries (dump_file, true);
ff6686d2
MJ
4377 }
4378 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4379 }
4380
4381 hash_map<const char *, unsigned> *clone_num_suffixes
4382 = new hash_map<const char *, unsigned>;
4383
4384 cgraph_node *node;
4385 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4386 process_isra_node_results (node, clone_num_suffixes);
4387
4388 delete clone_num_suffixes;
ddf628e4 4389 ggc_delete (func_sums);
ff6686d2 4390 func_sums = NULL;
78cd68c0 4391 delete call_sums;
ff6686d2
MJ
4392 call_sums = NULL;
4393
4394 if (dump_file)
4395 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4396 "==========\n\n");
4397 return 0;
4398}
4399
4400
4401const pass_data pass_data_ipa_sra =
4402{
4403 IPA_PASS, /* type */
4404 "sra", /* name */
4405 OPTGROUP_NONE, /* optinfo_flags */
4406 TV_IPA_SRA, /* tv_id */
4407 0, /* properties_required */
4408 0, /* properties_provided */
4409 0, /* properties_destroyed */
4410 0, /* todo_flags_start */
4411 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4412};
4413
4414class pass_ipa_sra : public ipa_opt_pass_d
4415{
4416public:
4417 pass_ipa_sra (gcc::context *ctxt)
4418 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4419 ipa_sra_generate_summary, /* generate_summary */
4420 ipa_sra_write_summary, /* write_summary */
4421 ipa_sra_read_summary, /* read_summary */
4422 NULL , /* write_optimization_summary */
4423 NULL, /* read_optimization_summary */
4424 NULL, /* stmt_fixup */
4425 0, /* function_transform_todo_flags_start */
4426 NULL, /* function_transform */
4427 NULL) /* variable_transform */
4428 {}
4429
4430 /* opt_pass methods: */
725793af 4431 bool gate (function *) final override
ff6686d2
MJ
4432 {
4433 /* TODO: We should remove the optimize check after we ensure we never run
4434 IPA passes when not optimizing. */
4435 return (flag_ipa_sra && optimize);
4436 }
4437
725793af
DM
4438 unsigned int execute (function *) final override
4439 {
4440 return ipa_sra_analysis ();
4441 }
ff6686d2
MJ
4442
4443}; // class pass_ipa_sra
4444
4445} // anon namespace
4446
40e67ab8
JH
4447/* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4448 create a summary structure describing IPA-SRA opportunities and constraints
4449 in it. */
4450
4451static void
4452ipa_sra_summarize_function (cgraph_node *node)
4453{
4454 if (dump_file)
4455 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4456 node->order);
40e67ab8 4457 gcc_obstack_init (&gensum_obstack);
e3a5cc32 4458 loaded_decls = new hash_set<tree>;
40e67ab8 4459
e3a5cc32 4460 isra_func_summary *ifs = NULL;
40e67ab8 4461 unsigned count = 0;
e3a5cc32
MJ
4462 if (ipa_sra_preliminary_function_checks (node))
4463 {
4464 ifs = func_sums->get_create (node);
4465 ifs->m_candidate = true;
4466 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4467 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4468 for (tree parm = DECL_ARGUMENTS (node->decl);
4469 parm;
4470 parm = DECL_CHAIN (parm))
4471 count++;
4472 }
4473 auto_vec<gensum_param_desc, 16> param_descriptions (count);
40e67ab8 4474
e3a5cc32
MJ
4475 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4476 bool cfun_pushed = false;
40e67ab8
JH
4477 if (count > 0)
4478 {
e3a5cc32 4479 decl2desc = new hash_map<tree, gensum_param_desc *>;
40e67ab8
JH
4480 param_descriptions.reserve_exact (count);
4481 param_descriptions.quick_grow_cleared (count);
4482
40e67ab8
JH
4483 if (create_parameter_descriptors (node, &param_descriptions))
4484 {
4485 push_cfun (fun);
4486 cfun_pushed = true;
4487 final_bbs = BITMAP_ALLOC (NULL);
4488 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
10478270 4489 unsafe_by_ref_count
40e67ab8
JH
4490 * last_basic_block_for_fn (fun));
4491 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
e3a5cc32
MJ
4492 }
4493 }
4494 /* Scan function is run even when there are no removal or splitting
4495 candidates so that we can calculate hints on call edges which can be
4496 useful in callees. */
4497 scan_function (node, fun);
40e67ab8 4498
e3a5cc32
MJ
4499 if (count > 0)
4500 {
4501 if (dump_file)
4502 {
4503 dump_gensum_param_descriptors (dump_file, node->decl,
4504 &param_descriptions);
4505 fprintf (dump_file, "----------------------------------------\n");
40e67ab8 4506 }
e3a5cc32 4507
40e67ab8
JH
4508 process_scan_results (node, fun, ifs, &param_descriptions);
4509
4510 if (cfun_pushed)
4511 pop_cfun ();
4512 if (bb_dereferences)
4513 {
4514 free (bb_dereferences);
4515 bb_dereferences = NULL;
4516 BITMAP_FREE (final_bbs);
4517 final_bbs = NULL;
4518 }
4519 }
4520 isra_analyze_all_outgoing_calls (node);
4521
e3a5cc32
MJ
4522 delete loaded_decls;
4523 loaded_decls = NULL;
4524 if (decl2desc)
4525 {
4526 delete decl2desc;
4527 decl2desc = NULL;
4528 }
40e67ab8
JH
4529 obstack_free (&gensum_obstack, NULL);
4530 if (dump_file)
4531 fprintf (dump_file, "\n\n");
4532 if (flag_checking)
4533 verify_splitting_accesses (node, false);
4534 return;
4535}
4536
ff6686d2
MJ
4537ipa_opt_pass_d *
4538make_pass_ipa_sra (gcc::context *ctxt)
4539{
4540 return new pass_ipa_sra (ctxt);
4541}
4542
4543
4544#include "gt-ipa-sra.h"