]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-sra.cc
check undefine_p for one more vr
[thirdparty/gcc.git] / gcc / ipa-sra.cc
CommitLineData
ff6686d2 1/* Interprocedural scalar replacement of aggregates
aeee4812 2 Copyright (C) 2019-2023 Free Software Foundation, Inc.
ff6686d2
MJ
3 Contributed by Martin Jambor <mjambor@suse.cz>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
c76b3bc5 23 functions) and removes unused function parameters. It can also replace an
ff6686d2
MJ
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
27
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
44
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
51
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
57
58
ff6686d2
MJ
59#include "config.h"
60#include "system.h"
61#include "coretypes.h"
62#include "backend.h"
63#include "tree.h"
64#include "gimple.h"
65#include "predict.h"
66#include "tree-pass.h"
67#include "ssa.h"
68#include "cgraph.h"
69#include "gimple-pretty-print.h"
70#include "alias.h"
71#include "tree-eh.h"
72#include "gimple-iterator.h"
73#include "gimple-walk.h"
74#include "tree-dfa.h"
75#include "tree-sra.h"
a895e6d7 76#include "alloc-pool.h"
ff6686d2 77#include "symbol-summary.h"
ff6686d2
MJ
78#include "dbgcnt.h"
79#include "tree-inline.h"
80#include "ipa-utils.h"
81#include "builtins.h"
82#include "cfganal.h"
83#include "tree-streamer.h"
9707b593 84#include "internal-fn.h"
ae7a23a3 85#include "symtab-clones.h"
ecdf414b 86#include "attribs.h"
4834e936 87#include "ipa-prop.h"
ff6686d2 88
40e67ab8
JH
89static void ipa_sra_summarize_function (cgraph_node *);
90
ff6686d2
MJ
91/* Bits used to track size of an aggregate in bytes interprocedurally. */
92#define ISRA_ARG_SIZE_LIMIT_BITS 16
93#define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94/* How many parameters can feed into a call actual argument and still be
c76b3bc5 95 tracked. */
ff6686d2
MJ
96#define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97
98/* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
102
103struct GTY(()) param_access
104{
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
dfea3d6f 107 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
108 stage. */
109 tree type;
110
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
114
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
c76b3bc5 124 /* Set if the access has reverse scalar storage order. */
ff6686d2
MJ
125 unsigned reverse : 1;
126};
127
dfea3d6f 128/* This structure has the same purpose as the one above and additionally it
ff6686d2
MJ
129 contains some fields that are only necessary in the summary generation
130 phase. */
131
132struct gensum_param_access
133{
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
137
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
144
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
dfea3d6f 147 reconstructed from the body. Must not be touched in IPA analysis
ff6686d2
MJ
148 stage. */
149 tree type;
dfea3d6f 150 /* Alias reference type to be used in MEM_REFs when adjusting caller
ff6686d2
MJ
151 arguments. */
152 tree alias_ptr_type;
153
10478270
MJ
154 /* Cumulative count of all loads. */
155 profile_count load_count;
ff6686d2
MJ
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
158 bool nonarg;
159
c76b3bc5 160 /* Set if the access has reverse scalar storage order. */
ff6686d2
MJ
161 bool reverse;
162};
163
164/* Summary describing a parameter in the IPA stages. */
165
166struct GTY(()) isra_param_desc
167{
168 /* List of access representatives to the parameters, sorted according to
169 their offset. */
170 vec <param_access *, va_gc> *accesses;
171
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
f2cf4c61
MJ
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
178 callers. */
179 unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
ff6686d2
MJ
180
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused : 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate : 1;
186 /* Is this a parameter passing stuff by reference? */
187 unsigned by_ref : 1;
e3a5cc32
MJ
188 /* Parameter hint set during IPA analysis when there is a caller which does
189 not construct the argument just to pass it to calls. Only meaningful for
190 by_ref parameters. */
191 unsigned not_specially_constructed : 1;
f2cf4c61
MJ
192 /* Only meaningful for by_ref parameters. If set, this parameter can only be
193 a split candidate if all callers pass pointers that are known to point to
194 a chunk of memory large enough to contain all accesses. */
195 unsigned conditionally_dereferenceable : 1;
196 /* Set when safe_size has been updated from at least one caller. */
197 unsigned safe_size_set : 1;
ff6686d2
MJ
198};
199
200/* Structure used when generating summaries that describes a parameter. */
201
202struct gensum_param_desc
203{
204 /* Roots of param_accesses. */
205 gensum_param_access *accesses;
206 /* Number of accesses in the access tree rooted in field accesses. */
207 unsigned access_count;
208
dfea3d6f 209 /* If the below is non-zero, this is the number of uses as actual arguments. */
ff6686d2
MJ
210 int call_uses;
211 /* Number of times this parameter has been directly passed to. */
212 unsigned ptr_pt_count;
213
214 /* Size limit of total size of all replacements. */
215 unsigned param_size_limit;
216 /* Sum of sizes of nonarg accesses. */
217 unsigned nonarg_acc_size;
218
219 /* A parameter that is used only in call arguments and can be removed if all
220 concerned actual arguments are removed. */
221 bool locally_unused;
222 /* An aggregate that is a candidate for breaking up or a pointer passing data
223 by reference that is a candidate for being converted to a set of
dfea3d6f 224 parameters passing those data by value. */
ff6686d2 225 bool split_candidate;
10478270
MJ
226 /* Is this a parameter passing stuff by reference (either a pointer or a
227 source language reference type)? */
ff6686d2 228 bool by_ref;
10478270
MJ
229 /* If this parameter passes stuff by reference, can it be safely dereferenced
230 without performing further checks (for example because it is a
231 REFERENCE_TYPE)? */
232 bool safe_ref;
f2cf4c61
MJ
233 /* Only meaningful for by_ref parameters. If set, this parameter can only be
234 a split candidate if all callers pass pointers that are known to point to
235 a chunk of memory large enough to contain all accesses. */
236 bool conditionally_dereferenceable;
ff6686d2
MJ
237
238 /* The number of this parameter as they are ordered in function decl. */
239 int param_number;
240 /* For parameters passing data by reference, this is parameter index to
241 compute indices to bb_dereferences. */
242 int deref_index;
243};
244
dfea3d6f 245/* Properly deallocate accesses of DESC. TODO: Since this data structure is
c76b3bc5
EB
246 allocated in GC memory, this is not necessary and we can consider removing
247 the function. */
ff6686d2
MJ
248
249static void
250free_param_decl_accesses (isra_param_desc *desc)
251{
252 unsigned len = vec_safe_length (desc->accesses);
253 for (unsigned i = 0; i < len; ++i)
254 ggc_free ((*desc->accesses)[i]);
255 vec_free (desc->accesses);
256}
257
258/* Class used to convey information about functions from the
dfea3d6f 259 intra-procedural analysis stage to inter-procedural one. */
ff6686d2
MJ
260
261class GTY((for_user)) isra_func_summary
262{
263public:
264 /* initialize the object. */
265
266 isra_func_summary ()
267 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
268 m_return_ignored (false), m_queued (false)
269 {}
270
271 /* Destroy m_parameters. */
272
273 ~isra_func_summary ();
274
dfea3d6f 275 /* Mark the function as not a candidate for any IPA-SRA transformation.
ff6686d2
MJ
276 Return true if it was a candidate until now. */
277
278 bool zap ();
279
280 /* Vector of parameter descriptors corresponding to the function being
281 analyzed. */
282 vec<isra_param_desc, va_gc> *m_parameters;
283
284 /* Whether the node is even a candidate for any IPA-SRA transformation at
285 all. */
286 unsigned m_candidate : 1;
287
288 /* Whether the original function returns any value. */
289 unsigned m_returns_value : 1;
290
291 /* Set to true if all call statements do not actually use the returned
292 value. */
293
294 unsigned m_return_ignored : 1;
295
296 /* Whether the node is already queued in IPA SRA stack during processing of
297 call graphs SCCs. */
298
299 unsigned m_queued : 1;
300};
301
c76b3bc5
EB
302/* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
303 data structure is allocated in GC memory, this is not necessary and we can
304 consider removing the destructor. */
ff6686d2
MJ
305
306isra_func_summary::~isra_func_summary ()
307{
308 unsigned len = vec_safe_length (m_parameters);
309 for (unsigned i = 0; i < len; ++i)
310 free_param_decl_accesses (&(*m_parameters)[i]);
311 vec_free (m_parameters);
312}
313
dfea3d6f 314/* Mark the function as not a candidate for any IPA-SRA transformation. Return
ff6686d2
MJ
315 true if it was a candidate until now. */
316
317bool
318isra_func_summary::zap ()
319{
320 bool ret = m_candidate;
321 m_candidate = false;
322
c76b3bc5 323 /* TODO: see the destructor above. */
ff6686d2
MJ
324 unsigned len = vec_safe_length (m_parameters);
325 for (unsigned i = 0; i < len; ++i)
326 free_param_decl_accesses (&(*m_parameters)[i]);
327 vec_free (m_parameters);
328
329 return ret;
330}
331
332/* Structure to describe which formal parameters feed into a particular actual
c76b3bc5 333 argument. */
ff6686d2
MJ
334
335struct isra_param_flow
336{
337 /* Number of elements in array inputs that contain valid data. */
338 char length;
339 /* Indices of formal parameters that feed into the described actual argument.
340 If aggregate_pass_through or pointer_pass_through below are true, it must
341 contain exactly one element which is passed through from a formal
342 parameter if the given number. Otherwise, the array contains indices of
dfea3d6f 343 callee's formal parameters which are used to calculate value of this
ff6686d2
MJ
344 actual argument. */
345 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
346
347 /* Offset within the formal parameter. */
348 unsigned unit_offset;
f2cf4c61
MJ
349 /* When aggregate_pass_through is set, this is the size of the portion of an
350 aggregate formal parameter that is being passed. Otherwise, this is size
351 of pointed to memory that is known to be valid be dereferenced. */
ff6686d2
MJ
352 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
353
f2cf4c61 354 /* True when the value of this actual argument is a portion of a formal
ff6686d2
MJ
355 parameter. */
356 unsigned aggregate_pass_through : 1;
357 /* True when the value of this actual copy is a verbatim pass through of an
358 obtained pointer. */
359 unsigned pointer_pass_through : 1;
360 /* True when it is safe to copy access candidates here from the callee, which
361 would mean introducing dereferences into callers of the caller. */
362 unsigned safe_to_import_accesses : 1;
e3a5cc32
MJ
363 /* True when the passed value is an address of a structure that has been
364 constructed in the caller just to be passed by reference to functions
365 (i.e. is never read). */
366 unsigned constructed_for_calls : 1;
ff6686d2
MJ
367};
368
dfea3d6f 369/* Structure used to convey information about calls from the intra-procedural
ff6686d2
MJ
370 analysis stage to inter-procedural one. */
371
372class isra_call_summary
373{
374public:
375 isra_call_summary ()
376 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
763121cc 377 m_bit_aligned_arg (false), m_before_any_store (false)
ff6686d2
MJ
378 {}
379
380 void init_inputs (unsigned arg_count);
381 void dump (FILE *f);
382
383 /* Information about what formal parameters of the caller are used to compute
dfea3d6f 384 individual actual arguments of this call. */
ff6686d2
MJ
385 auto_vec <isra_param_flow> m_arg_flow;
386
387 /* Set to true if the call statement does not have a LHS. */
388 unsigned m_return_ignored : 1;
389
390 /* Set to true if the LHS of call statement is only used to construct the
391 return value of the caller. */
392 unsigned m_return_returned : 1;
393
394 /* Set when any of the call arguments are not byte-aligned. */
395 unsigned m_bit_aligned_arg : 1;
763121cc
MJ
396
397 /* Set to true if the call happend before any (other) store to memory in the
398 caller. */
399 unsigned m_before_any_store : 1;
ff6686d2
MJ
400};
401
402/* Class to manage function summaries. */
403
404class GTY((user)) ipa_sra_function_summaries
405 : public function_summary <isra_func_summary *>
406{
407public:
408 ipa_sra_function_summaries (symbol_table *table, bool ggc):
409 function_summary<isra_func_summary *> (table, ggc) { }
410
f31ba116
DM
411 void duplicate (cgraph_node *, cgraph_node *,
412 isra_func_summary *old_sum,
413 isra_func_summary *new_sum) final override;
414 void insert (cgraph_node *, isra_func_summary *) final override;
ff6686d2
MJ
415};
416
417/* Hook that is called by summary when a node is duplicated. */
418
419void
420ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
421 isra_func_summary *old_sum,
422 isra_func_summary *new_sum)
423{
424 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
425 useless. */
426 new_sum->m_candidate = old_sum->m_candidate;
427 new_sum->m_returns_value = old_sum->m_returns_value;
428 new_sum->m_return_ignored = old_sum->m_return_ignored;
429 gcc_assert (!old_sum->m_queued);
430 new_sum->m_queued = false;
431
432 unsigned param_count = vec_safe_length (old_sum->m_parameters);
433 if (!param_count)
434 return;
435 vec_safe_reserve_exact (new_sum->m_parameters, param_count);
436 new_sum->m_parameters->quick_grow_cleared (param_count);
437 for (unsigned i = 0; i < param_count; i++)
438 {
439 isra_param_desc *s = &(*old_sum->m_parameters)[i];
440 isra_param_desc *d = &(*new_sum->m_parameters)[i];
441
442 d->param_size_limit = s->param_size_limit;
443 d->size_reached = s->size_reached;
f2cf4c61 444 d->safe_size = s->safe_size;
ff6686d2
MJ
445 d->locally_unused = s->locally_unused;
446 d->split_candidate = s->split_candidate;
447 d->by_ref = s->by_ref;
e3a5cc32 448 d->not_specially_constructed = s->not_specially_constructed;
f2cf4c61
MJ
449 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
450 d->safe_size_set = s->safe_size_set;
ff6686d2
MJ
451
452 unsigned acc_count = vec_safe_length (s->accesses);
453 vec_safe_reserve_exact (d->accesses, acc_count);
454 for (unsigned j = 0; j < acc_count; j++)
455 {
456 param_access *from = (*s->accesses)[j];
457 param_access *to = ggc_cleared_alloc<param_access> ();
458 to->type = from->type;
459 to->alias_ptr_type = from->alias_ptr_type;
460 to->unit_offset = from->unit_offset;
461 to->unit_size = from->unit_size;
462 to->certain = from->certain;
c76b3bc5 463 to->reverse = from->reverse;
ff6686d2
MJ
464 d->accesses->quick_push (to);
465 }
466 }
467}
468
469/* Pointer to the pass function summary holder. */
470
471static GTY(()) ipa_sra_function_summaries *func_sums;
472
40e67ab8
JH
473/* Hook that is called by summary when new node appears. */
474
475void
476ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
477{
478 if (opt_for_fn (node->decl, flag_ipa_sra))
479 {
480 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
481 ipa_sra_summarize_function (node);
482 pop_cfun ();
483 }
484 else
485 func_sums->remove (node);
486}
487
ff6686d2
MJ
488/* Class to manage call summaries. */
489
490class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
491{
492public:
493 ipa_sra_call_summaries (symbol_table *table):
494 call_summary<isra_call_summary *> (table) { }
495
496 /* Duplicate info when an edge is cloned. */
f31ba116
DM
497 void duplicate (cgraph_edge *, cgraph_edge *,
498 isra_call_summary *old_sum,
499 isra_call_summary *new_sum) final override;
ff6686d2
MJ
500};
501
502static ipa_sra_call_summaries *call_sums;
503
504
505/* Initialize m_arg_flow of a particular instance of isra_call_summary.
506 ARG_COUNT is the number of actual arguments passed. */
507
508void
509isra_call_summary::init_inputs (unsigned arg_count)
510{
511 if (arg_count == 0)
512 {
513 gcc_checking_assert (m_arg_flow.length () == 0);
514 return;
515 }
516 if (m_arg_flow.length () == 0)
517 {
518 m_arg_flow.reserve_exact (arg_count);
519 m_arg_flow.quick_grow_cleared (arg_count);
520 }
521 else
522 gcc_checking_assert (arg_count == m_arg_flow.length ());
523}
524
525/* Dump all information in call summary to F. */
526
527void
528isra_call_summary::dump (FILE *f)
529{
530 if (m_return_ignored)
531 fprintf (f, " return value ignored\n");
532 if (m_return_returned)
533 fprintf (f, " return value used only to compute caller return value\n");
763121cc
MJ
534 if (m_before_any_store)
535 fprintf (f, " happens before any store to memory\n");
ff6686d2
MJ
536 for (unsigned i = 0; i < m_arg_flow.length (); i++)
537 {
538 fprintf (f, " Parameter %u:\n", i);
539 isra_param_flow *ipf = &m_arg_flow[i];
540
541 if (ipf->length)
542 {
543 bool first = true;
544 fprintf (f, " Scalar param sources: ");
545 for (int j = 0; j < ipf->length; j++)
546 {
547 if (!first)
548 fprintf (f, ", ");
549 else
550 first = false;
551 fprintf (f, "%i", (int) ipf->inputs[j]);
552 }
553 fprintf (f, "\n");
554 }
555 if (ipf->aggregate_pass_through)
556 fprintf (f, " Aggregate pass through from the param given above, "
557 "unit offset: %u , unit size: %u\n",
558 ipf->unit_offset, ipf->unit_size);
f2cf4c61
MJ
559 else if (ipf->unit_size > 0)
560 fprintf (f, " Known dereferenceable size: %u\n", ipf->unit_size);
ff6686d2
MJ
561 if (ipf->pointer_pass_through)
562 fprintf (f, " Pointer pass through from the param given above, "
563 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
e3a5cc32
MJ
564 if (ipf->constructed_for_calls)
565 fprintf (f, " Variable constructed just to be passed to "
566 "calls.\n");
ff6686d2
MJ
567 }
568}
569
dfea3d6f 570/* Duplicate edge summary when an edge is cloned. */
ff6686d2
MJ
571
572void
573ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
574 isra_call_summary *old_sum,
575 isra_call_summary *new_sum)
576{
577 unsigned arg_count = old_sum->m_arg_flow.length ();
578 new_sum->init_inputs (arg_count);
579 for (unsigned i = 0; i < arg_count; i++)
580 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
581
582 new_sum->m_return_ignored = old_sum->m_return_ignored;
583 new_sum->m_return_returned = old_sum->m_return_returned;
584 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
763121cc 585 new_sum->m_before_any_store = old_sum->m_before_any_store;
ff6686d2
MJ
586}
587
588
589/* With all GTY stuff done, we can move to anonymous namespace. */
590namespace {
591/* Quick mapping from a decl to its param descriptor. */
592
593hash_map<tree, gensum_param_desc *> *decl2desc;
594
6539bbc1
MJ
595/* All local DECLs ever loaded from of and of those that have their address
596 assigned to a variable. */
e3a5cc32
MJ
597
598hash_set <tree> *loaded_decls;
599
c76b3bc5 600/* Countdown of allowed Alias Analysis steps during summary building. */
ff6686d2
MJ
601
602int aa_walking_limit;
603
604/* This is a table in which for each basic block and parameter there is a
605 distance (offset + size) in that parameter which is dereferenced and
606 accessed in that BB. */
607HOST_WIDE_INT *bb_dereferences = NULL;
608/* How many by-reference parameters there are in the current function. */
10478270 609int unsafe_by_ref_count;
ff6686d2
MJ
610
611/* Bitmap of BBs that can cause the function to "stop" progressing by
612 returning, throwing externally, looping infinitely or calling a function
613 which might abort etc.. */
614bitmap final_bbs;
615
616/* Obstack to allocate various small structures required only when generating
617 summary for a function. */
618struct obstack gensum_obstack;
619
620/* Return false the function is apparently unsuitable for IPA-SRA based on it's
621 attributes, return true otherwise. NODE is the cgraph node of the current
622 function. */
623
624static bool
625ipa_sra_preliminary_function_checks (cgraph_node *node)
626{
87f94429 627 if (!node->can_change_signature)
ff6686d2
MJ
628 {
629 if (dump_file)
630 fprintf (dump_file, "Function cannot change signature.\n");
631 return false;
632 }
633
634 if (!tree_versionable_function_p (node->decl))
635 {
636 if (dump_file)
637 fprintf (dump_file, "Function is not versionable.\n");
638 return false;
639 }
640
641 if (!opt_for_fn (node->decl, optimize)
642 || !opt_for_fn (node->decl, flag_ipa_sra))
643 {
644 if (dump_file)
645 fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this "
646 "function.\n");
647 return false;
648 }
649
650 if (DECL_VIRTUAL_P (node->decl))
651 {
652 if (dump_file)
653 fprintf (dump_file, "Function is a virtual method.\n");
654 return false;
655 }
656
657 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
658 if (fun->stdarg)
659 {
660 if (dump_file)
661 fprintf (dump_file, "Function uses stdarg. \n");
662 return false;
663 }
664
ff6686d2
MJ
665 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
666 {
667 if (dump_file)
668 fprintf (dump_file, "Always inline function will be inlined "
669 "anyway. \n");
670 return false;
671 }
672
673 return true;
674}
675
676/* Print access tree starting at ACCESS to F. */
677
678static void
679dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
680{
681 fprintf (f, " ");
682 for (unsigned i = 0; i < indent; i++)
683 fprintf (f, " ");
684 fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
685 access->offset);
686 fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
687 fprintf (f, ", type: ");
688 print_generic_expr (f, access->type);
689 fprintf (f, ", alias_ptr_type: ");
690 print_generic_expr (f, access->alias_ptr_type);
10478270
MJ
691 fprintf (f, ", load_count: ");
692 access->load_count.dump (f);
ff6686d2
MJ
693 fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
694 for (gensum_param_access *ch = access->first_child;
695 ch;
696 ch = ch->next_sibling)
697 dump_gensum_access (f, ch, indent + 2);
698}
699
700
701/* Print access tree starting at ACCESS to F. */
702
703static void
704dump_isra_access (FILE *f, param_access *access)
705{
706 fprintf (f, " * Access to unit offset: %u", access->unit_offset);
707 fprintf (f, ", unit size: %u", access->unit_size);
708 fprintf (f, ", type: ");
709 print_generic_expr (f, access->type);
710 fprintf (f, ", alias_ptr_type: ");
711 print_generic_expr (f, access->alias_ptr_type);
712 if (access->certain)
713 fprintf (f, ", certain");
714 else
c76b3bc5 715 fprintf (f, ", not certain");
ff6686d2
MJ
716 if (access->reverse)
717 fprintf (f, ", reverse");
718 fprintf (f, "\n");
719}
720
721/* Dump access tree starting at ACCESS to stderr. */
722
723DEBUG_FUNCTION void
724debug_isra_access (param_access *access)
725{
726 dump_isra_access (stderr, access);
727}
728
729/* Dump DESC to F. */
730
731static void
732dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
733{
734 if (desc->locally_unused)
735 fprintf (f, " unused with %i call_uses\n", desc->call_uses);
736 if (!desc->split_candidate)
737 {
738 fprintf (f, " not a candidate\n");
739 return;
740 }
741 if (desc->by_ref)
f2cf4c61
MJ
742 fprintf (f, " %s%s by_ref with %u pass throughs\n",
743 desc->safe_ref ? "safe" : "unsafe",
744 desc->conditionally_dereferenceable
745 ? " conditionally_dereferenceable" : " ok",
746 desc->ptr_pt_count);
ff6686d2
MJ
747
748 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
749 dump_gensum_access (f, acc, 2);
750}
751
dfea3d6f 752/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
ff6686d2
MJ
753 F. */
754
755static void
756dump_gensum_param_descriptors (FILE *f, tree fndecl,
757 vec<gensum_param_desc> *param_descriptions)
758{
759 tree parm = DECL_ARGUMENTS (fndecl);
760 for (unsigned i = 0;
761 i < param_descriptions->length ();
762 ++i, parm = DECL_CHAIN (parm))
763 {
764 fprintf (f, " Descriptor for parameter %i ", i);
765 print_generic_expr (f, parm, TDF_UID);
766 fprintf (f, "\n");
767 dump_gensum_param_descriptor (f, &(*param_descriptions)[i]);
768 }
769}
770
771
e3a5cc32
MJ
772/* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
773 hints. */
ff6686d2
MJ
774
775static void
e3a5cc32 776dump_isra_param_descriptor (FILE *f, isra_param_desc *desc, bool hints)
ff6686d2
MJ
777{
778 if (desc->locally_unused)
779 {
780 fprintf (f, " (locally) unused\n");
781 }
782 if (!desc->split_candidate)
783 {
f2cf4c61
MJ
784 fprintf (f, " not a candidate for splitting");
785 if (hints && desc->by_ref && desc->safe_size_set)
786 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
787 fprintf (f, "\n");
ff6686d2
MJ
788 return;
789 }
e3a5cc32 790 fprintf (f, " param_size_limit: %u, size_reached: %u%s",
ff6686d2
MJ
791 desc->param_size_limit, desc->size_reached,
792 desc->by_ref ? ", by_ref" : "");
f2cf4c61
MJ
793 if (desc->by_ref && desc->conditionally_dereferenceable)
794 fprintf (f, ", conditionally_dereferenceable");
e3a5cc32
MJ
795 if (hints)
796 {
797 if (desc->by_ref && !desc->not_specially_constructed)
798 fprintf (f, ", args_specially_constructed");
f2cf4c61
MJ
799 if (desc->by_ref && desc->safe_size_set)
800 fprintf (f, ", safe_size: %u", (unsigned) desc->safe_size);
e3a5cc32
MJ
801 }
802 fprintf (f, "\n");
ff6686d2
MJ
803
804 for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i)
805 {
806 param_access *access = (*desc->accesses)[i];
807 dump_isra_access (f, access);
808 }
809}
810
e3a5cc32
MJ
811/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
812 If HINTS is true, also dump IPA-analysis computed hints. */
ff6686d2
MJ
813
814static void
e3a5cc32
MJ
815dump_isra_param_descriptors (FILE *f, tree fndecl, isra_func_summary *ifs,
816 bool hints)
ff6686d2
MJ
817{
818 tree parm = DECL_ARGUMENTS (fndecl);
819 if (!ifs->m_parameters)
820 {
821 fprintf (f, " parameter descriptors not available\n");
822 return;
823 }
824
825 for (unsigned i = 0;
826 i < ifs->m_parameters->length ();
827 ++i, parm = DECL_CHAIN (parm))
828 {
829 fprintf (f, " Descriptor for parameter %i ", i);
830 print_generic_expr (f, parm, TDF_UID);
831 fprintf (f, "\n");
e3a5cc32 832 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
ff6686d2
MJ
833 }
834}
835
836/* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
837 function fails return false, otherwise return true. SRC must fit into an
838 unsigned char. Used for purposes of transitive unused parameter
839 removal. */
840
841static bool
842add_src_to_param_flow (isra_param_flow *param_flow, int src)
843{
844 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
845 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
846 return false;
847
848 param_flow->inputs[(int) param_flow->length] = src;
849 param_flow->length++;
850 return true;
851}
852
853/* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
854 it is the only input. Used for purposes of transitive parameter
855 splitting. */
856
857static void
858set_single_param_flow_source (isra_param_flow *param_flow, int src)
859{
860 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
861 if (param_flow->length == 0)
862 {
863 param_flow->inputs[0] = src;
864 param_flow->length = 1;
865 }
866 else if (param_flow->length == 1)
867 gcc_assert (param_flow->inputs[0] == src);
868 else
869 gcc_unreachable ();
870}
871
872/* Assert that there is only a single value in PARAM_FLOW's inputs and return
873 it. */
874
875static unsigned
876get_single_param_flow_source (const isra_param_flow *param_flow)
877{
878 gcc_assert (param_flow->length == 1);
879 return param_flow->inputs[0];
880}
881
882/* Inspect all uses of NAME and simple arithmetic calculations involving NAME
1980ffec
MJ
883 in FUN represented with NODE and return a negative number if any of them is
884 used for something else than either an actual call argument, simple
885 arithmetic operation or debug statement. If there are no such uses, return
886 the number of actual arguments that this parameter eventually feeds to (or
887 zero if there is none). For any such parameter, mark PARM_NUM as one of its
888 sources. ANALYZED is a bitmap that tracks which SSA names we have already
889 started investigating. */
ff6686d2
MJ
890
891static int
1980ffec
MJ
892isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
893 int parm_num, bitmap analyzed)
ff6686d2
MJ
894{
895 int res = 0;
896 imm_use_iterator imm_iter;
897 gimple *stmt;
898
899 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
900 {
da1a888b
MJ
901 if (is_gimple_debug (stmt)
902 || gimple_clobber_p (stmt))
ff6686d2
MJ
903 continue;
904
905 /* TODO: We could handle at least const builtin functions like arithmetic
906 operations below. */
907 if (is_gimple_call (stmt))
908 {
909 int all_uses = 0;
910 use_operand_p use_p;
911 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
912 all_uses++;
913
914 gcall *call = as_a <gcall *> (stmt);
915 unsigned arg_count;
916 if (gimple_call_internal_p (call)
917 || (arg_count = gimple_call_num_args (call)) == 0)
918 {
919 res = -1;
640296c3 920 break;
ff6686d2
MJ
921 }
922
923 cgraph_edge *cs = node->get_edge (stmt);
924 gcc_checking_assert (cs);
925 isra_call_summary *csum = call_sums->get_create (cs);
926 csum->init_inputs (arg_count);
927
928 int simple_uses = 0;
929 for (unsigned i = 0; i < arg_count; i++)
930 if (gimple_call_arg (call, i) == name)
931 {
932 if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num))
933 {
934 simple_uses = -1;
935 break;
936 }
937 simple_uses++;
938 }
939
940 if (simple_uses < 0
941 || all_uses != simple_uses)
942 {
943 res = -1;
640296c3 944 break;
ff6686d2
MJ
945 }
946 res += all_uses;
947 }
1980ffec
MJ
948 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
949 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
950 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
951 {
952 tree lhs;
953 if (gimple_code (stmt) == GIMPLE_PHI)
954 lhs = gimple_phi_result (stmt);
955 else
956 lhs = gimple_assign_lhs (stmt);
957
958 if (TREE_CODE (lhs) != SSA_NAME)
959 {
960 res = -1;
640296c3 961 break;
ff6686d2
MJ
962 }
963 gcc_assert (!gimple_vdef (stmt));
964 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
965 {
1980ffec 966 int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num,
ff6686d2
MJ
967 analyzed);
968 if (tmp < 0)
969 {
970 res = tmp;
640296c3 971 break;
ff6686d2
MJ
972 }
973 res += tmp;
974 }
975 }
976 else
977 {
978 res = -1;
640296c3 979 break;
ff6686d2
MJ
980 }
981 }
982 return res;
983}
984
985/* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
986 also described by NODE) and simple arithmetic calculations involving PARM
987 and return false if any of them is used for something else than either an
988 actual call argument, simple arithmetic operation or debug statement. If
989 there are no such uses, return true and store the number of actual arguments
990 that this parameter eventually feeds to (or zero if there is none) to
991 *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its
992 sources.
993
994 This function is similar to ptr_parm_has_nonarg_uses but its results are
995 meant for unused parameter removal, as opposed to splitting of parameters
c76b3bc5 996 passed by reference or converting them to passed by value. */
ff6686d2
MJ
997
998static bool
999isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
1000 int parm_num, int *call_uses_p)
1001{
1002 gcc_checking_assert (is_gimple_reg (parm));
1003
1004 tree name = ssa_default_def (fun, parm);
1005 if (!name || has_zero_uses (name))
1006 {
1007 *call_uses_p = 0;
1008 return false;
1009 }
1010
1011 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1012 if (parm_num > UCHAR_MAX)
1013 return true;
1014
1015 bitmap analyzed = BITMAP_ALLOC (NULL);
1980ffec
MJ
1016 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
1017 analyzed);
ff6686d2
MJ
1018 BITMAP_FREE (analyzed);
1019 if (call_uses < 0)
1020 return true;
1021 *call_uses_p = call_uses;
1022 return false;
1023}
1024
1025/* Scan immediate uses of a default definition SSA name of a parameter PARM and
1026 examine whether there are any nonarg uses that are not actual arguments or
1027 otherwise infeasible uses. If so, return true, otherwise return false.
1028 Create pass-through IPA flow records for any direct uses as argument calls
1029 and if returning false, store their number into *PT_COUNT_P. NODE and FUN
1030 must represent the function that is currently analyzed, PARM_NUM must be the
1031 index of the analyzed parameter.
1032
1033 This function is similar to isra_track_scalar_param_local_uses but its
1034 results are meant for splitting of parameters passed by reference or turning
1035 them into bits passed by value, as opposed to generic unused parameter
c76b3bc5 1036 removal. */
ff6686d2
MJ
1037
1038static bool
1039ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
1040 int parm_num, unsigned *pt_count_p)
1041{
1042 imm_use_iterator ui;
1043 gimple *stmt;
1044 tree name = ssa_default_def (fun, parm);
1045 bool ret = false;
1046 unsigned pt_count = 0;
1047
1048 if (!name || has_zero_uses (name))
1049 return false;
1050
1051 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1052 if (parm_num > UCHAR_MAX)
1053 return true;
1054
1055 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
1056 {
1057 unsigned uses_ok = 0;
1058 use_operand_p use_p;
1059
da1a888b
MJ
1060 if (is_gimple_debug (stmt)
1061 || gimple_clobber_p (stmt))
ff6686d2
MJ
1062 continue;
1063
1064 if (gimple_assign_single_p (stmt))
1065 {
1066 tree rhs = gimple_assign_rhs1 (stmt);
23cd18c6
RB
1067 if (!TREE_THIS_VOLATILE (rhs))
1068 {
1069 while (handled_component_p (rhs))
1070 rhs = TREE_OPERAND (rhs, 0);
1071 if (TREE_CODE (rhs) == MEM_REF
1072 && TREE_OPERAND (rhs, 0) == name
1073 && integer_zerop (TREE_OPERAND (rhs, 1))
1074 && types_compatible_p (TREE_TYPE (rhs),
1075 TREE_TYPE (TREE_TYPE (name))))
1076 uses_ok++;
1077 }
ff6686d2
MJ
1078 }
1079 else if (is_gimple_call (stmt))
1080 {
1081 gcall *call = as_a <gcall *> (stmt);
1082 unsigned arg_count;
1083 if (gimple_call_internal_p (call)
1084 || (arg_count = gimple_call_num_args (call)) == 0)
1085 {
1086 ret = true;
640296c3 1087 break;
ff6686d2
MJ
1088 }
1089
1090 cgraph_edge *cs = node->get_edge (stmt);
1091 gcc_checking_assert (cs);
1092 isra_call_summary *csum = call_sums->get_create (cs);
1093 csum->init_inputs (arg_count);
1094
1095 for (unsigned i = 0; i < arg_count; ++i)
1096 {
1097 tree arg = gimple_call_arg (stmt, i);
1098
1099 if (arg == name)
1100 {
1101 /* TODO: Allow &MEM_REF[name + offset] here,
1102 ipa_param_body_adjustments::modify_call_stmt has to be
1103 adjusted too. */
1104 csum->m_arg_flow[i].pointer_pass_through = true;
1105 set_single_param_flow_source (&csum->m_arg_flow[i], parm_num);
1106 pt_count++;
1107 uses_ok++;
1108 continue;
1109 }
1110
23cd18c6
RB
1111 if (!TREE_THIS_VOLATILE (arg))
1112 {
1113 while (handled_component_p (arg))
1114 arg = TREE_OPERAND (arg, 0);
1115 if (TREE_CODE (arg) == MEM_REF
1116 && TREE_OPERAND (arg, 0) == name
1117 && integer_zerop (TREE_OPERAND (arg, 1))
1118 && types_compatible_p (TREE_TYPE (arg),
1119 TREE_TYPE (TREE_TYPE (name))))
1120 uses_ok++;
1121 }
ff6686d2
MJ
1122 }
1123 }
1124
1125 /* If the number of valid uses does not match the number of
1126 uses in this stmt there is an unhandled use. */
1127 unsigned all_uses = 0;
1128 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1129 all_uses++;
1130
1131 gcc_checking_assert (uses_ok <= all_uses);
1132 if (uses_ok != all_uses)
1133 {
1134 ret = true;
640296c3 1135 break;
ff6686d2
MJ
1136 }
1137 }
1138
1139 *pt_count_p = pt_count;
1140 return ret;
1141}
1142
1143/* Initialize vector of parameter descriptors of NODE. Return true if there
1144 are any candidates for splitting or unused aggregate parameter removal (the
1145 function may return false if there are candidates for removal of register
e3a5cc32 1146 parameters). */
ff6686d2
MJ
1147
1148static bool
1149create_parameter_descriptors (cgraph_node *node,
1150 vec<gensum_param_desc> *param_descriptions)
1151{
1152 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1153 bool ret = false;
1154
1155 int num = 0;
1156 for (tree parm = DECL_ARGUMENTS (node->decl);
1157 parm;
1158 parm = DECL_CHAIN (parm), num++)
1159 {
1160 const char *msg;
1161 gensum_param_desc *desc = &(*param_descriptions)[num];
1162 /* param_descriptions vector is grown cleared in the caller. */
1163 desc->param_number = num;
1164 decl2desc->put (parm, desc);
1165
1166 if (dump_file && (dump_flags & TDF_DETAILS))
1167 print_generic_expr (dump_file, parm, TDF_UID);
1168
1169 int scalar_call_uses;
1170 tree type = TREE_TYPE (parm);
1171 if (TREE_THIS_VOLATILE (parm))
1172 {
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1174 fprintf (dump_file, " not a candidate, is volatile\n");
1175 continue;
1176 }
1177 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1178 {
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 fprintf (dump_file, " not a candidate, is a va_list type\n");
1181 continue;
1182 }
1183 if (TREE_ADDRESSABLE (parm))
1184 {
1185 if (dump_file && (dump_flags & TDF_DETAILS))
1186 fprintf (dump_file, " not a candidate, is addressable\n");
1187 continue;
1188 }
1189 if (TREE_ADDRESSABLE (type))
1190 {
1191 if (dump_file && (dump_flags & TDF_DETAILS))
1192 fprintf (dump_file, " not a candidate, type cannot be split\n");
1193 continue;
1194 }
1195
1196 if (is_gimple_reg (parm)
1197 && !isra_track_scalar_param_local_uses (fun, node, parm, num,
1198 &scalar_call_uses))
1199 {
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 fprintf (dump_file, " is a scalar with only %i call uses\n",
1202 scalar_call_uses);
1203
1204 desc->locally_unused = true;
1205 desc->call_uses = scalar_call_uses;
1206 }
1207
1208 if (POINTER_TYPE_P (type))
1209 {
10478270 1210 desc->by_ref = true;
0e949530
MJ
1211 if (TREE_CODE (type) == REFERENCE_TYPE
1212 || (num == 0
1213 && TREE_CODE (TREE_TYPE (node->decl)) == METHOD_TYPE))
1214 desc->safe_ref = true;
1215 else
1216 desc->safe_ref = false;
ff6686d2
MJ
1217 type = TREE_TYPE (type);
1218
1219 if (TREE_CODE (type) == FUNCTION_TYPE
1220 || TREE_CODE (type) == METHOD_TYPE)
1221 {
1222 if (dump_file && (dump_flags & TDF_DETAILS))
1223 fprintf (dump_file, " not a candidate, reference to "
1224 "a function\n");
1225 continue;
1226 }
1227 if (TYPE_VOLATILE (type))
1228 {
1229 if (dump_file && (dump_flags & TDF_DETAILS))
1230 fprintf (dump_file, " not a candidate, reference to "
1231 "a volatile type\n");
1232 continue;
1233 }
1234 if (TREE_CODE (type) == ARRAY_TYPE
1235 && TYPE_NONALIASED_COMPONENT (type))
1236 {
1237 if (dump_file && (dump_flags & TDF_DETAILS))
f8cb8bcd
JJ
1238 fprintf (dump_file, " not a candidate, reference to "
1239 "a nonaliased component array\n");
ff6686d2
MJ
1240 continue;
1241 }
1242 if (!is_gimple_reg (parm))
1243 {
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1245 fprintf (dump_file, " not a candidate, a reference which is "
1246 "not a gimple register (probably addressable)\n");
1247 continue;
1248 }
1249 if (is_va_list_type (type))
1250 {
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 fprintf (dump_file, " not a candidate, reference to "
1253 "a va list\n");
1254 continue;
1255 }
1256 if (ptr_parm_has_nonarg_uses (node, fun, parm, num,
1257 &desc->ptr_pt_count))
1258 {
1259 if (dump_file && (dump_flags & TDF_DETAILS))
1260 fprintf (dump_file, " not a candidate, reference has "
1261 "nonarg uses\n");
1262 continue;
1263 }
ff6686d2
MJ
1264 }
1265 else if (!AGGREGATE_TYPE_P (type))
1266 {
1267 /* This is in an else branch because scalars passed by reference are
1268 still candidates to be passed by value. */
1269 if (dump_file && (dump_flags & TDF_DETAILS))
1270 fprintf (dump_file, " not a candidate, not an aggregate\n");
1271 continue;
1272 }
1273
1274 if (!COMPLETE_TYPE_P (type))
1275 {
1276 if (dump_file && (dump_flags & TDF_DETAILS))
1277 fprintf (dump_file, " not a candidate, not a complete type\n");
1278 continue;
1279 }
1280 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1281 {
1282 if (dump_file && (dump_flags & TDF_DETAILS))
1283 fprintf (dump_file, " not a candidate, size not representable\n");
1284 continue;
1285 }
1286 unsigned HOST_WIDE_INT type_size
1287 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1288 if (type_size == 0
1289 || type_size >= ISRA_ARG_SIZE_LIMIT)
1290 {
1291 if (dump_file && (dump_flags & TDF_DETAILS))
1292 fprintf (dump_file, " not a candidate, has zero or huge size\n");
1293 continue;
1294 }
1295 if (type_internals_preclude_sra_p (type, &msg))
1296 {
1297 if (dump_file && (dump_flags & TDF_DETAILS))
1298 fprintf (dump_file, " not a candidate, %s\n", msg);
1299 continue;
1300 }
1301
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1303 fprintf (dump_file, " is a candidate\n");
1304
1305 ret = true;
1306 desc->split_candidate = true;
10478270
MJ
1307 if (desc->by_ref && !desc->safe_ref)
1308 desc->deref_index = unsafe_by_ref_count++;
ff6686d2
MJ
1309 }
1310 return ret;
1311}
1312
1313/* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1314 found, which happens if DECL is for a static chain. */
1315
1316static gensum_param_desc *
1317get_gensum_param_desc (tree decl)
1318{
e3a5cc32
MJ
1319 if (!decl2desc)
1320 return NULL;
ff6686d2
MJ
1321 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1322 gensum_param_desc **slot = decl2desc->get (decl);
1323 if (!slot)
1324 /* This can happen for static chains which we cannot handle so far. */
1325 return NULL;
1326 gcc_checking_assert (*slot);
1327 return *slot;
1328}
1329
1330
1331/* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1332 write REASON to the dump file if there is one. */
1333
1334static void
1335disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1336{
1337 if (!desc->split_candidate)
1338 return;
1339
1340 if (dump_file && (dump_flags & TDF_DETAILS))
1341 fprintf (dump_file, "! Disqualifying parameter number %i - %s\n",
1342 desc->param_number, reason);
1343
1344 desc->split_candidate = false;
1345}
1346
1347/* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1348 there is one. */
1349
1350static void
1351disqualify_split_candidate (tree decl, const char *reason)
1352{
1353 gensum_param_desc *desc = get_gensum_param_desc (decl);
1354 if (desc)
1355 disqualify_split_candidate (desc, reason);
1356}
1357
1358/* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1359 first, check that there are not too many of them already. If so, do not
1360 allocate anything and return NULL. */
1361
1362static gensum_param_access *
1363allocate_access (gensum_param_desc *desc,
1364 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1365{
1366 if (desc->access_count
028d4092 1367 == (unsigned) param_ipa_sra_max_replacements)
ff6686d2
MJ
1368 {
1369 disqualify_split_candidate (desc, "Too many replacement candidates");
1370 return NULL;
1371 }
1372
1373 gensum_param_access *access
1374 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1375 sizeof (gensum_param_access));
1376 memset (access, 0, sizeof (*access));
1377 access->offset = offset;
1378 access->size = size;
10478270 1379 access->load_count = profile_count::zero ();
ff6686d2
MJ
1380 return access;
1381}
1382
1383/* In what context scan_expr_access has been called, whether it deals with a
9707b593
MJ
1384 load, a function argument, or a store. Please note that in rare
1385 circumstances when it is not clear if the access is a load or store,
1386 ISRA_CTX_STORE is used too. */
ff6686d2
MJ
1387
1388enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1389
1390/* Return an access describing memory access to the variable described by DESC
1391 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
dfea3d6f 1392 at a certain tree level FIRST. Attempt to create it and put into the
ff6686d2
MJ
1393 appropriate place in the access tree if does not exist, but fail and return
1394 NULL if there are already too many accesses, if it would create a partially
dfea3d6f 1395 overlapping access or if an access would end up within a pre-existing
ff6686d2
MJ
1396 non-call access. */
1397
1398static gensum_param_access *
1399get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1400 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1401{
1402 gensum_param_access *access = *first, **ptr = first;
1403
1404 if (!access)
1405 {
1406 /* No pre-existing access at this level, just create it. */
1407 gensum_param_access *a = allocate_access (desc, offset, size);
1408 if (!a)
1409 return NULL;
1410 *first = a;
1411 return *first;
1412 }
1413
1414 if (access->offset >= offset + size)
1415 {
1416 /* We want to squeeze it in front of the very first access, just do
1417 it. */
1418 gensum_param_access *r = allocate_access (desc, offset, size);
1419 if (!r)
1420 return NULL;
1421 r->next_sibling = access;
1422 *first = r;
1423 return r;
1424 }
1425
1426 /* Skip all accesses that have to come before us until the next sibling is
1427 already too far. */
1428 while (offset >= access->offset + access->size
1429 && access->next_sibling
1430 && access->next_sibling->offset < offset + size)
1431 {
1432 ptr = &access->next_sibling;
1433 access = access->next_sibling;
1434 }
1435
1436 /* At this point we know we do not belong before access. */
1437 gcc_assert (access->offset < offset + size);
1438
1439 if (access->offset == offset && access->size == size)
1440 /* We found what we were looking for. */
1441 return access;
1442
1443 if (access->offset <= offset
1444 && access->offset + access->size >= offset + size)
1445 {
1446 /* We fit into access which is larger than us. We need to find/create
1447 something below access. But we only allow nesting in call
1448 arguments. */
1449 if (access->nonarg)
1450 return NULL;
1451
1452 return get_access_1 (desc, &access->first_child, offset, size, ctx);
1453 }
1454
1455 if (offset <= access->offset
1456 && offset + size >= access->offset + access->size)
1457 /* We are actually bigger than access, which fully fits into us, take its
1458 place and make all accesses fitting into it its children. */
1459 {
1460 /* But first, we only allow nesting in call arguments so check if that is
1461 what we are trying to represent. */
1462 if (ctx != ISRA_CTX_ARG)
1463 return NULL;
1464
1465 gensum_param_access *r = allocate_access (desc, offset, size);
1466 if (!r)
1467 return NULL;
1468 r->first_child = access;
1469
1470 while (access->next_sibling
1471 && access->next_sibling->offset < offset + size)
1472 access = access->next_sibling;
1473 if (access->offset + access->size > offset + size)
1474 {
1475 /* This must be a different access, which are sorted, so the
1476 following must be true and this signals a partial overlap. */
1477 gcc_assert (access->offset > offset);
1478 return NULL;
1479 }
1480
1481 r->next_sibling = access->next_sibling;
1482 access->next_sibling = NULL;
1483 *ptr = r;
1484 return r;
1485 }
1486
1487 if (offset >= access->offset + access->size)
1488 {
1489 /* We belong after access. */
1490 gensum_param_access *r = allocate_access (desc, offset, size);
1491 if (!r)
1492 return NULL;
1493 r->next_sibling = access->next_sibling;
1494 access->next_sibling = r;
1495 return r;
1496 }
1497
1498 if (offset < access->offset)
1499 {
1500 /* We know the following, otherwise we would have created a
1501 super-access. */
1502 gcc_checking_assert (offset + size < access->offset + access->size);
1503 return NULL;
1504 }
1505
1506 if (offset + size > access->offset + access->size)
1507 {
1508 /* Likewise. */
1509 gcc_checking_assert (offset > access->offset);
1510 return NULL;
1511 }
1512
1513 gcc_unreachable ();
1514}
1515
1516/* Return an access describing memory access to the variable described by DESC
1517 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1518 to create if it does not exist, but fail and return NULL if there are
1519 already too many accesses, if it would create a partially overlapping access
dfea3d6f 1520 or if an access would end up in a non-call access. */
ff6686d2
MJ
1521
1522static gensum_param_access *
1523get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1524 isra_scan_context ctx)
1525{
1526 gcc_checking_assert (desc->split_candidate);
1527
1528 gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset,
1529 size, ctx);
1530 if (!access)
1531 {
1532 disqualify_split_candidate (desc,
1533 "Bad access overlap or too many accesses");
1534 return NULL;
1535 }
1536
1537 switch (ctx)
1538 {
1539 case ISRA_CTX_STORE:
1540 gcc_assert (!desc->by_ref);
1541 /* Fall-through */
1542 case ISRA_CTX_LOAD:
1543 access->nonarg = true;
1544 break;
1545 case ISRA_CTX_ARG:
1546 break;
1547 }
1548
1549 return access;
1550}
1551
1552/* Verify that parameter access tree starting with ACCESS is in good shape.
dfea3d6f 1553 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
ff6686d2
MJ
1554 ACCESS or zero if there is none. */
1555
1556static bool
1557verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1558 HOST_WIDE_INT parent_size)
1559{
1560 while (access)
1561 {
241a2c49 1562 gcc_assert (access->offset >= 0 && access->size >= 0);
ff6686d2
MJ
1563
1564 if (parent_size != 0)
1565 {
1566 if (access->offset < parent_offset)
1567 {
1568 error ("Access offset before parent offset");
1569 return true;
1570 }
1571 if (access->size >= parent_size)
1572 {
1573 error ("Access size greater or equal to its parent size");
1574 return true;
1575 }
1576 if (access->offset + access->size > parent_offset + parent_size)
1577 {
1578 error ("Access terminates outside of its parent");
1579 return true;
1580 }
1581 }
1582
1583 if (verify_access_tree_1 (access->first_child, access->offset,
1584 access->size))
1585 return true;
1586
1587 if (access->next_sibling
1588 && (access->next_sibling->offset < access->offset + access->size))
1589 {
1590 error ("Access overlaps with its sibling");
1591 return true;
1592 }
1593
1594 access = access->next_sibling;
1595 }
1596 return false;
1597}
1598
1599/* Verify that parameter access tree starting with ACCESS is in good shape,
1600 halt compilation and dump the tree to stderr if not. */
1601
1602DEBUG_FUNCTION void
1603isra_verify_access_tree (gensum_param_access *access)
1604{
1605 if (verify_access_tree_1 (access, 0, 0))
1606 {
1607 for (; access; access = access->next_sibling)
1608 dump_gensum_access (stderr, access, 2);
1609 internal_error ("IPA-SRA access verification failed");
1610 }
1611}
1612
1613
1614/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1615 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1616
1617static bool
1618asm_visit_addr (gimple *, tree op, tree, void *)
1619{
1620 op = get_base_address (op);
1621 if (op
1622 && TREE_CODE (op) == PARM_DECL)
1623 disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand.");
1624
1625 return false;
1626}
1627
1628/* Mark a dereference of parameter identified by DESC of distance DIST in a
1629 basic block BB, unless the BB has already been marked as a potentially
1630 final. */
1631
1632static void
1633mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
10478270 1634 basic_block bb)
ff6686d2
MJ
1635{
1636 gcc_assert (desc->by_ref);
1637 gcc_checking_assert (desc->split_candidate);
1638
10478270
MJ
1639 if (desc->safe_ref
1640 || bitmap_bit_p (final_bbs, bb->index))
ff6686d2
MJ
1641 return;
1642
10478270 1643 int idx = bb->index * unsafe_by_ref_count + desc->deref_index;
ff6686d2
MJ
1644 if (bb_dereferences[idx] < dist)
1645 bb_dereferences[idx] = dist;
1646}
1647
1648/* Return true, if any potential replacements should use NEW_TYPE as opposed to
1649 previously recorded OLD_TYPE. */
1650
1651static bool
1652type_prevails_p (tree old_type, tree new_type)
1653{
1654 if (old_type == new_type)
1655 return false;
1656
1657 /* Non-aggregates are always better. */
1658 if (!is_gimple_reg_type (old_type)
1659 && is_gimple_reg_type (new_type))
1660 return true;
1661 if (is_gimple_reg_type (old_type)
1662 && !is_gimple_reg_type (new_type))
1663 return false;
1664
1665 /* Prefer any complex or vector type over any other scalar type. */
1666 if (TREE_CODE (old_type) != COMPLEX_TYPE
1667 && TREE_CODE (old_type) != VECTOR_TYPE
1668 && (TREE_CODE (new_type) == COMPLEX_TYPE
ca2007a9 1669 || VECTOR_TYPE_P (new_type)))
ff6686d2
MJ
1670 return true;
1671 if ((TREE_CODE (old_type) == COMPLEX_TYPE
ca2007a9 1672 || VECTOR_TYPE_P (old_type))
ff6686d2
MJ
1673 && TREE_CODE (new_type) != COMPLEX_TYPE
1674 && TREE_CODE (new_type) != VECTOR_TYPE)
1675 return false;
1676
1677 /* Use the integral type with the bigger precision. */
1678 if (INTEGRAL_TYPE_P (old_type)
1679 && INTEGRAL_TYPE_P (new_type))
1680 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1681
1682 /* Attempt to disregard any integral type with non-full precision. */
1683 if (INTEGRAL_TYPE_P (old_type)
1684 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1685 != TYPE_PRECISION (old_type)))
1686 return true;
1687 if (INTEGRAL_TYPE_P (new_type)
1688 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1689 != TYPE_PRECISION (new_type)))
1690 return false;
1691 /* Stabilize the selection. */
1692 return TYPE_UID (old_type) < TYPE_UID (new_type);
1693}
1694
1695/* When scanning an expression which is a call argument, this structure
dfea3d6f 1696 specifies the call and the position of the argument. */
ff6686d2
MJ
1697
1698struct scan_call_info
1699{
1700 /* Call graph edge representing the call. */
1701 cgraph_edge *cs;
1702 /* Total number of arguments in the call. */
1703 unsigned argument_count;
1704 /* Number of the actual argument being scanned. */
1705 unsigned arg_idx;
1706};
1707
1708/* Record use of ACCESS which belongs to a parameter described by DESC in a
1709 call argument described by CALL_INFO. */
1710
1711static void
1712record_nonregister_call_use (gensum_param_desc *desc,
1713 scan_call_info *call_info,
1714 unsigned unit_offset, unsigned unit_size)
1715{
1716 isra_call_summary *csum = call_sums->get_create (call_info->cs);
1717 csum->init_inputs (call_info->argument_count);
1718
1719 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1720 param_flow->aggregate_pass_through = true;
1721 set_single_param_flow_source (param_flow, desc->param_number);
1722 param_flow->unit_offset = unit_offset;
1723 param_flow->unit_size = unit_size;
1724 desc->call_uses++;
1725}
1726
1727/* Callback of walk_aliased_vdefs, just mark that there was a possible
c76b3bc5 1728 modification. */
ff6686d2
MJ
1729
1730static bool
1731mark_maybe_modified (ao_ref *, tree, void *data)
1732{
1733 bool *maybe_modified = (bool *) data;
1734 *maybe_modified = true;
1735 return true;
1736}
1737
1738/* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1739 specifies whether EXPR is used in a load, store or as an argument call. BB
1740 must be the basic block in which expr resides. If CTX specifies call
dfea3d6f 1741 argument context, CALL_INFO must describe that call and argument position,
ff6686d2
MJ
1742 otherwise it is ignored. */
1743
1744static void
1745scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1746 basic_block bb, scan_call_info *call_info = NULL)
1747{
1748 poly_int64 poffset, psize, pmax_size;
1749 HOST_WIDE_INT offset, size, max_size;
1750 tree base;
1751 bool deref = false;
1752 bool reverse;
1753
6539bbc1
MJ
1754 if (TREE_CODE (expr) == ADDR_EXPR)
1755 {
1756 if (ctx == ISRA_CTX_ARG)
1757 return;
1758 tree t = get_base_address (TREE_OPERAND (expr, 0));
ca2007a9 1759 if (VAR_P (t) && !TREE_STATIC (t))
6539bbc1
MJ
1760 loaded_decls->add (t);
1761 return;
1762 }
1763 if (TREE_CODE (expr) == SSA_NAME
1764 || CONSTANT_CLASS_P (expr))
1765 return;
1766
ff6686d2
MJ
1767 if (TREE_CODE (expr) == BIT_FIELD_REF
1768 || TREE_CODE (expr) == IMAGPART_EXPR
1769 || TREE_CODE (expr) == REALPART_EXPR)
1770 expr = TREE_OPERAND (expr, 0);
1771
1772 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1773
1774 if (TREE_CODE (base) == MEM_REF)
1775 {
1776 tree op = TREE_OPERAND (base, 0);
1777 if (TREE_CODE (op) != SSA_NAME
1778 || !SSA_NAME_IS_DEFAULT_DEF (op))
1779 return;
1780 base = SSA_NAME_VAR (op);
1781 if (!base)
1782 return;
1783 deref = true;
1784 }
ca2007a9 1785 else if (VAR_P (base)
e3a5cc32
MJ
1786 && !TREE_STATIC (base)
1787 && (ctx == ISRA_CTX_ARG
1788 || ctx == ISRA_CTX_LOAD))
1789 loaded_decls->add (base);
1790
ff6686d2
MJ
1791 if (TREE_CODE (base) != PARM_DECL)
1792 return;
1793
1794 gensum_param_desc *desc = get_gensum_param_desc (base);
1795 if (!desc || !desc->split_candidate)
1796 return;
1797
1798 if (!poffset.is_constant (&offset)
1799 || !psize.is_constant (&size)
1800 || !pmax_size.is_constant (&max_size))
1801 {
1802 disqualify_split_candidate (desc, "Encountered a polynomial-sized "
1803 "access.");
1804 return;
1805 }
1806 if (size < 0 || size != max_size)
1807 {
1808 disqualify_split_candidate (desc, "Encountered a variable sized access.");
1809 return;
1810 }
1811 if (TREE_CODE (expr) == COMPONENT_REF
1812 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1813 {
1814 disqualify_split_candidate (desc, "Encountered a bit-field access.");
1815 return;
1816 }
5a4d0da4
MJ
1817 if (offset < 0)
1818 {
1819 disqualify_split_candidate (desc, "Encountered an access at a "
1820 "negative offset.");
1821 return;
1822 }
ff6686d2
MJ
1823 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1824 gcc_assert ((size % BITS_PER_UNIT) == 0);
1825 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1826 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1827 {
1828 disqualify_split_candidate (desc, "Encountered an access with too big "
1829 "offset or size");
1830 return;
1831 }
1832
1833 tree type = TREE_TYPE (expr);
1834 unsigned int exp_align = get_object_alignment (expr);
1835
1836 if (exp_align < TYPE_ALIGN (type))
1837 {
1838 disqualify_split_candidate (desc, "Underaligned access.");
1839 return;
1840 }
1841
1842 if (deref)
1843 {
1844 if (!desc->by_ref)
1845 {
1846 disqualify_split_candidate (desc, "Dereferencing a non-reference.");
1847 return;
1848 }
1849 else if (ctx == ISRA_CTX_STORE)
1850 {
1851 disqualify_split_candidate (desc, "Storing to data passed by "
1852 "reference.");
1853 return;
1854 }
1855
1856 if (!aa_walking_limit)
1857 {
1858 disqualify_split_candidate (desc, "Out of alias analysis step "
1859 "limit.");
1860 return;
1861 }
1862
1863 gcc_checking_assert (gimple_vuse (stmt));
1864 bool maybe_modified = false;
1865 ao_ref ar;
1866
1867 ao_ref_init (&ar, expr);
1868 bitmap visited = BITMAP_ALLOC (NULL);
1869 int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt),
1870 mark_maybe_modified, &maybe_modified,
1871 &visited, NULL, aa_walking_limit);
1872 BITMAP_FREE (visited);
1873 if (walked > 0)
1874 {
1875 gcc_assert (aa_walking_limit > walked);
1876 aa_walking_limit = aa_walking_limit - walked;
1877 }
1878 if (walked < 0)
1879 aa_walking_limit = 0;
1880 if (maybe_modified || walked < 0)
1881 {
1882 disqualify_split_candidate (desc, "Data passed by reference possibly "
1883 "modified through an alias.");
1884 return;
1885 }
1886 else
1887 mark_param_dereference (desc, offset + size, bb);
1888 }
1889 else
1890 /* Pointer parameters with direct uses should have been ruled out by
dfea3d6f 1891 analyzing SSA default def when looking at the parameters. */
ff6686d2
MJ
1892 gcc_assert (!desc->by_ref);
1893
1894 gensum_param_access *access = get_access (desc, offset, size, ctx);
1895 if (!access)
1896 return;
1897
1898 if (ctx == ISRA_CTX_ARG)
1899 {
1900 gcc_checking_assert (call_info);
1901
1902 if (!deref)
1903 record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT,
1904 size / BITS_PER_UNIT);
1905 else
1906 /* This is not a pass-through of a pointer, this is a use like any
1907 other. */
1908 access->nonarg = true;
1909 }
10478270
MJ
1910 else if (ctx == ISRA_CTX_LOAD && bb->count.initialized_p ())
1911 access->load_count += bb->count;
ff6686d2
MJ
1912
1913 if (!access->type)
1914 {
1915 access->type = type;
1916 access->alias_ptr_type = reference_alias_ptr_type (expr);
1917 access->reverse = reverse;
1918 }
1919 else
1920 {
1921 if (exp_align < TYPE_ALIGN (access->type))
1922 {
1923 disqualify_split_candidate (desc, "Reference has lower alignment "
1924 "than a previous one.");
1925 return;
1926 }
1927 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1928 {
1929 disqualify_split_candidate (desc, "Multiple alias pointer types.");
1930 return;
1931 }
1932 if (access->reverse != reverse)
1933 {
1934 disqualify_split_candidate (desc, "Both normal and reverse "
1935 "scalar storage order.");
1936 return;
1937 }
1938 if (!deref
1939 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1940 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1941 {
1942 /* We need the same aggregate type on all accesses to be able to
1943 distinguish transformation spots from pass-through arguments in
dfea3d6f
JJ
1944 the transformation phase. */
1945 disqualify_split_candidate (desc, "We do not support aggregate "
ff6686d2
MJ
1946 "type punning.");
1947 return;
1948 }
1949
1950 if (type_prevails_p (access->type, type))
1951 access->type = type;
1952 }
1953}
1954
1955/* Scan body function described by NODE and FUN and create access trees for
1956 parameters. */
1957
1958static void
1959scan_function (cgraph_node *node, struct function *fun)
1960{
1961 basic_block bb;
1962
1963 FOR_EACH_BB_FN (bb, fun)
1964 {
1965 gimple_stmt_iterator gsi;
1966 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1967 {
1968 gimple *stmt = gsi_stmt (gsi);
1969
e3a5cc32 1970 if (final_bbs && stmt_can_throw_external (fun, stmt))
ff6686d2
MJ
1971 bitmap_set_bit (final_bbs, bb->index);
1972 switch (gimple_code (stmt))
1973 {
1974 case GIMPLE_RETURN:
1975 {
1976 tree t = gimple_return_retval (as_a <greturn *> (stmt));
1977 if (t != NULL_TREE)
1978 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
e3a5cc32
MJ
1979 if (final_bbs)
1980 bitmap_set_bit (final_bbs, bb->index);
ff6686d2
MJ
1981 }
1982 break;
1983
1984 case GIMPLE_ASSIGN:
1985 if (gimple_assign_single_p (stmt)
1986 && !gimple_clobber_p (stmt))
1987 {
1988 tree rhs = gimple_assign_rhs1 (stmt);
1989 scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb);
1990 tree lhs = gimple_assign_lhs (stmt);
1991 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
1992 }
1993 break;
1994
1995 case GIMPLE_CALL:
1996 {
1997 unsigned argument_count = gimple_call_num_args (stmt);
9707b593
MJ
1998 isra_scan_context ctx = ISRA_CTX_ARG;
1999 scan_call_info call_info, *call_info_p = &call_info;
2000 if (gimple_call_internal_p (stmt))
2001 {
2002 call_info_p = NULL;
2003 ctx = ISRA_CTX_LOAD;
2004 internal_fn ifn = gimple_call_internal_fn (stmt);
2005 if (internal_store_fn_p (ifn))
2006 ctx = ISRA_CTX_STORE;
2007 }
2008 else
2009 {
2010 call_info.cs = node->get_edge (stmt);
2011 call_info.argument_count = argument_count;
2012 }
ff6686d2
MJ
2013
2014 for (unsigned i = 0; i < argument_count; i++)
2015 {
2016 call_info.arg_idx = i;
2017 scan_expr_access (gimple_call_arg (stmt, i), stmt,
9707b593 2018 ctx, bb, call_info_p);
ff6686d2
MJ
2019 }
2020
2021 tree lhs = gimple_call_lhs (stmt);
2022 if (lhs)
2023 scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb);
2024 int flags = gimple_call_flags (stmt);
e3a5cc32
MJ
2025 if (final_bbs
2026 && (((flags & (ECF_CONST | ECF_PURE)) == 0)
2027 || (flags & ECF_LOOPING_CONST_OR_PURE)))
ff6686d2
MJ
2028 bitmap_set_bit (final_bbs, bb->index);
2029 }
2030 break;
2031
2032 case GIMPLE_ASM:
2033 {
2034 gasm *asm_stmt = as_a <gasm *> (stmt);
2035 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
2036 asm_visit_addr);
e3a5cc32
MJ
2037 if (final_bbs)
2038 bitmap_set_bit (final_bbs, bb->index);
ff6686d2
MJ
2039
2040 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
2041 {
2042 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
2043 scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb);
2044 }
2045 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
2046 {
2047 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
2048 scan_expr_access (t, stmt, ISRA_CTX_STORE, bb);
2049 }
2050 }
2051 break;
2052
2053 default:
2054 break;
2055 }
2056 }
2057 }
2058}
2059
9b8741c9
MJ
2060/* Return true if SSA_NAME NAME of function described by FUN is only used in
2061 return statements, or if results of any operations it is involved in are
2062 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2063 names we have already started investigating. */
ff6686d2
MJ
2064
2065static bool
9b8741c9 2066ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
ff6686d2
MJ
2067{
2068 bool res = true;
2069 imm_use_iterator imm_iter;
2070 gimple *stmt;
2071
2072 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
2073 {
2074 if (is_gimple_debug (stmt))
2075 continue;
2076
2077 if (gimple_code (stmt) == GIMPLE_RETURN)
2078 {
2079 tree t = gimple_return_retval (as_a <greturn *> (stmt));
2080 if (t != name)
2081 {
2082 res = false;
640296c3 2083 break;
ff6686d2
MJ
2084 }
2085 }
9b8741c9
MJ
2086 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
2087 && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt))
2088 || gimple_code (stmt) == GIMPLE_PHI))
ff6686d2
MJ
2089 {
2090 /* TODO: And perhaps for const function calls too? */
2091 tree lhs;
2092 if (gimple_code (stmt) == GIMPLE_PHI)
2093 lhs = gimple_phi_result (stmt);
2094 else
2095 lhs = gimple_assign_lhs (stmt);
2096
2097 if (TREE_CODE (lhs) != SSA_NAME)
2098 {
2099 res = false;
640296c3 2100 break;
ff6686d2
MJ
2101 }
2102 gcc_assert (!gimple_vdef (stmt));
2103 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
9b8741c9 2104 && !ssa_name_only_returned_p (fun, lhs, analyzed))
ff6686d2
MJ
2105 {
2106 res = false;
640296c3 2107 break;
ff6686d2
MJ
2108 }
2109 }
2110 else
2111 {
2112 res = false;
640296c3 2113 break;
ff6686d2
MJ
2114 }
2115 }
2116 return res;
2117}
2118
2119/* Inspect the uses of the return value of the call associated with CS, and if
2120 it is not used or if it is only used to construct the return value of the
2121 caller, mark it as such in call or caller summary. Also check for
2122 misaligned arguments. */
2123
2124static void
2125isra_analyze_call (cgraph_edge *cs)
2126{
2127 gcall *call_stmt = cs->call_stmt;
2128 unsigned count = gimple_call_num_args (call_stmt);
2129 isra_call_summary *csum = call_sums->get_create (cs);
2130
2131 for (unsigned i = 0; i < count; i++)
2132 {
2133 tree arg = gimple_call_arg (call_stmt, i);
e3a5cc32
MJ
2134 if (TREE_CODE (arg) == ADDR_EXPR)
2135 {
2136 poly_int64 poffset, psize, pmax_size;
2137 bool reverse;
2138 tree base = get_ref_base_and_extent (TREE_OPERAND (arg, 0), &poffset,
2139 &psize, &pmax_size, &reverse);
f2cf4c61
MJ
2140 HOST_WIDE_INT offset;
2141 unsigned HOST_WIDE_INT ds;
2142 if (DECL_P (base)
2143 && (poffset.is_constant (&offset))
2144 && tree_fits_uhwi_p (DECL_SIZE (base))
2145 && ((ds = tree_to_uhwi (DECL_SIZE (base)) - offset)
2146 < ISRA_ARG_SIZE_LIMIT * BITS_PER_UNIT))
2147 {
2148 csum->init_inputs (count);
2149 gcc_assert (!csum->m_arg_flow[i].aggregate_pass_through);
2150 csum->m_arg_flow[i].unit_size = ds / BITS_PER_UNIT;
2151 }
2152
e3a5cc32
MJ
2153 if (TREE_CODE (base) == VAR_DECL
2154 && !TREE_STATIC (base)
2155 && !loaded_decls->contains (base))
2156 {
2157 csum->init_inputs (count);
2158 csum->m_arg_flow[i].constructed_for_calls = true;
2159 }
2160 }
2161
ff6686d2
MJ
2162 if (is_gimple_reg (arg))
2163 continue;
2164
2165 tree offset;
2166 poly_int64 bitsize, bitpos;
2167 machine_mode mode;
2168 int unsignedp, reversep, volatilep = 0;
2169 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2170 &unsignedp, &reversep, &volatilep);
2171 if (!multiple_p (bitpos, BITS_PER_UNIT))
2172 {
2173 csum->m_bit_aligned_arg = true;
2174 break;
2175 }
2176 }
2177
2178 tree lhs = gimple_call_lhs (call_stmt);
2179 if (lhs)
2180 {
2181 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2182 from this function (without being read anywhere). */
2183 if (TREE_CODE (lhs) == SSA_NAME)
2184 {
2185 bitmap analyzed = BITMAP_ALLOC (NULL);
9b8741c9
MJ
2186 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2187 lhs, analyzed))
ff6686d2
MJ
2188 csum->m_return_returned = true;
2189 BITMAP_FREE (analyzed);
2190 }
2191 }
2192 else
2193 csum->m_return_ignored = true;
2194}
2195
2196/* Look at all calls going out of NODE, described also by IFS and perform all
2197 analyses necessary for IPA-SRA that are not done at body scan time or done
2198 even when body is not scanned because the function is not a candidate. */
2199
2200static void
2201isra_analyze_all_outgoing_calls (cgraph_node *node)
2202{
2203 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2204 isra_analyze_call (cs);
2205 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2206 isra_analyze_call (cs);
2207}
2208
2209/* Dump a dereferences table with heading STR to file F. */
2210
2211static void
2212dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2213{
2214 basic_block bb;
2215
2216 fprintf (dump_file, "%s", str);
2217 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2218 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2219 {
2220 fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2221 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2222 {
2223 int i;
10478270 2224 for (i = 0; i < unsafe_by_ref_count; i++)
ff6686d2 2225 {
10478270 2226 int idx = bb->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2227 fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2228 }
2229 }
2230 fprintf (f, "\n");
2231 }
2232 fprintf (dump_file, "\n");
2233}
2234
2235/* Propagate distances in bb_dereferences in the opposite direction than the
2236 control flow edges, in each step storing the maximum of the current value
2237 and the minimum of all successors. These steps are repeated until the table
2238 stabilizes. Note that BBs which might terminate the functions (according to
2239 final_bbs bitmap) never updated in this way. */
2240
2241static void
2242propagate_dereference_distances (struct function *fun)
2243{
2244 basic_block bb;
2245
2246 if (dump_file && (dump_flags & TDF_DETAILS))
2247 dump_dereferences_table (dump_file, fun,
2248 "Dereference table before propagation:\n");
2249
2250 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2251 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2252 FOR_EACH_BB_FN (bb, fun)
2253 {
2254 queue.quick_push (bb);
2255 bb->aux = bb;
2256 }
2257
2258 while (!queue.is_empty ())
2259 {
2260 edge_iterator ei;
2261 edge e;
2262 bool change = false;
2263 int i;
2264
2265 bb = queue.pop ();
2266 bb->aux = NULL;
2267
2268 if (bitmap_bit_p (final_bbs, bb->index))
2269 continue;
2270
10478270 2271 for (i = 0; i < unsafe_by_ref_count; i++)
ff6686d2 2272 {
10478270 2273 int idx = bb->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2274 bool first = true;
2275 HOST_WIDE_INT inh = 0;
2276
2277 FOR_EACH_EDGE (e, ei, bb->succs)
2278 {
10478270 2279 int succ_idx = e->dest->index * unsafe_by_ref_count + i;
ff6686d2
MJ
2280
2281 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2282 continue;
2283
2284 if (first)
2285 {
2286 first = false;
2287 inh = bb_dereferences [succ_idx];
2288 }
2289 else if (bb_dereferences [succ_idx] < inh)
2290 inh = bb_dereferences [succ_idx];
2291 }
2292
2293 if (!first && bb_dereferences[idx] < inh)
2294 {
2295 bb_dereferences[idx] = inh;
2296 change = true;
2297 }
2298 }
2299
2300 if (change)
2301 FOR_EACH_EDGE (e, ei, bb->preds)
2302 {
2303 if (e->src->aux)
2304 continue;
2305
2306 e->src->aux = e->src;
2307 queue.quick_push (e->src);
2308 }
2309 }
2310
2311 if (dump_file && (dump_flags & TDF_DETAILS))
2312 dump_dereferences_table (dump_file, fun,
2313 "Dereference table after propagation:\n");
2314}
2315
10478270
MJ
2316/* Return true if the ACCESS loads happen frequently enough in FUN to risk
2317 moving them to the caller and only pass the result. */
ff6686d2
MJ
2318
2319static bool
10478270
MJ
2320dereference_probable_p (struct function *fun, gensum_param_access *access)
2321{
2322 int threshold = opt_for_fn (fun->decl, param_ipa_sra_deref_prob_threshold);
2323 return access->load_count
2324 >= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (threshold, 100);
2325}
2326
2327/* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2328 its children, return true if the parameter cannot be split, otherwise return
2329 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2330 the index of the entry BB in the function of PARM. */
2331
2332static bool
2333check_gensum_access (struct function *fun, tree parm, gensum_param_desc *desc,
ff6686d2
MJ
2334 gensum_param_access *access,
2335 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
c76b3bc5 2336 int entry_bb_index)
ff6686d2
MJ
2337{
2338 if (access->nonarg)
2339 {
2340 *only_calls = false;
2341 *nonarg_acc_size += access->size;
2342
2343 if (access->first_child)
2344 {
2345 disqualify_split_candidate (desc, "Overlapping non-call uses.");
2346 return true;
2347 }
2348 }
2349 /* Do not decompose a non-BLKmode param in a way that would create
2350 BLKmode params. Especially for by-reference passing (thus,
2351 pointer-type param) this is hardly worthwhile. */
2352 if (DECL_MODE (parm) != BLKmode
2353 && TYPE_MODE (access->type) == BLKmode)
2354 {
2355 disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK.");
2356 return true;
2357 }
2358
2359 if (desc->by_ref)
2360 {
10478270 2361 if (desc->safe_ref)
ff6686d2 2362 {
10478270
MJ
2363 if (!dereference_probable_p (fun, access))
2364 {
2365 disqualify_split_candidate (desc, "Dereferences in callers "
2366 "would happen much more frequently.");
2367 return true;
2368 }
2369 }
2370 else
2371 {
2372 int idx = (entry_bb_index * unsafe_by_ref_count + desc->deref_index);
2373 if ((access->offset + access->size) > bb_dereferences[idx])
2374 {
f2cf4c61
MJ
2375 if (!dereference_probable_p (fun, access))
2376 {
2377 disqualify_split_candidate (desc, "Would create a possibly "
2378 "illegal dereference in a "
2379 "caller.");
2380 return true;
2381 }
2382 desc->conditionally_dereferenceable = true;
10478270 2383 }
ff6686d2
MJ
2384 }
2385 }
2386
2387 for (gensum_param_access *ch = access->first_child;
2388 ch;
2389 ch = ch->next_sibling)
10478270 2390 if (check_gensum_access (fun, parm, desc, ch, nonarg_acc_size, only_calls,
ff6686d2
MJ
2391 entry_bb_index))
2392 return true;
2393
2394 return false;
2395}
2396
2397/* Copy data from FROM and all of its children to a vector of accesses in IPA
2398 descriptor DESC. */
2399
2400static void
2401copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2402{
2403 param_access *to = ggc_cleared_alloc<param_access> ();
2404 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2405 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2406 to->unit_offset = from->offset / BITS_PER_UNIT;
2407 to->unit_size = from->size / BITS_PER_UNIT;
2408 to->type = from->type;
2409 to->alias_ptr_type = from->alias_ptr_type;
2410 to->certain = from->nonarg;
2411 to->reverse = from->reverse;
2412 vec_safe_push (desc->accesses, to);
2413
2414 for (gensum_param_access *ch = from->first_child;
2415 ch;
2416 ch = ch->next_sibling)
2417 copy_accesses_to_ipa_desc (ch, desc);
2418}
2419
2420/* Analyze function body scan results stored in param_accesses and
2421 param_accesses, detect possible transformations and store information of
2422 those in function summary. NODE, FUN and IFS are all various structures
2423 describing the currently analyzed function. */
2424
2425static void
2426process_scan_results (cgraph_node *node, struct function *fun,
2427 isra_func_summary *ifs,
2428 vec<gensum_param_desc> *param_descriptions)
2429{
2430 bool check_pass_throughs = false;
2431 bool dereferences_propagated = false;
2432 tree parm = DECL_ARGUMENTS (node->decl);
2433 unsigned param_count = param_descriptions->length();
2434
2435 for (unsigned desc_index = 0;
2436 desc_index < param_count;
2437 desc_index++, parm = DECL_CHAIN (parm))
2438 {
2439 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
581b519f 2440 if (!desc->split_candidate)
ff6686d2
MJ
2441 continue;
2442
2443 if (flag_checking)
2444 isra_verify_access_tree (desc->accesses);
2445
2446 if (!dereferences_propagated
2447 && desc->by_ref
10478270 2448 && !desc->safe_ref
ff6686d2
MJ
2449 && desc->accesses)
2450 {
2451 propagate_dereference_distances (fun);
2452 dereferences_propagated = true;
2453 }
2454
2455 HOST_WIDE_INT nonarg_acc_size = 0;
2456 bool only_calls = true;
2457 bool check_failed = false;
2458
2459 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2460 for (gensum_param_access *acc = desc->accesses;
2461 acc;
2462 acc = acc->next_sibling)
10478270
MJ
2463 if (check_gensum_access (fun, parm, desc, acc, &nonarg_acc_size,
2464 &only_calls, entry_bb_index))
ff6686d2
MJ
2465 {
2466 check_failed = true;
2467 break;
2468 }
2469 if (check_failed)
2470 continue;
2471
2472 if (only_calls)
2473 desc->locally_unused = true;
2474
2475 HOST_WIDE_INT cur_param_size
2476 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
e3a5cc32 2477 HOST_WIDE_INT param_size_limit, optimistic_limit;
ff6686d2 2478 if (!desc->by_ref || optimize_function_for_size_p (fun))
e3a5cc32
MJ
2479 {
2480 param_size_limit = cur_param_size;
2481 optimistic_limit = cur_param_size;
2482 }
ff6686d2 2483 else
e3a5cc32 2484 {
fdfd7f53
ML
2485 param_size_limit
2486 = opt_for_fn (node->decl,
2487 param_ipa_sra_ptr_growth_factor) * cur_param_size;
e3a5cc32
MJ
2488 optimistic_limit
2489 = (opt_for_fn (node->decl, param_ipa_sra_ptrwrap_growth_factor)
2490 * param_size_limit);
2491 }
2492
2493 if (nonarg_acc_size > optimistic_limit
ff6686d2
MJ
2494 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2495 {
f8cb8bcd 2496 disqualify_split_candidate (desc, "Would result into a too big set "
e3a5cc32
MJ
2497 "of replacements even in best "
2498 "scenarios.");
ff6686d2
MJ
2499 }
2500 else
2501 {
2502 /* create_parameter_descriptors makes sure unit sizes of all
2503 candidate parameters fit unsigned integers restricted to
2504 ISRA_ARG_SIZE_LIMIT. */
2505 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2506 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2507 if (desc->split_candidate && desc->ptr_pt_count)
2508 {
2509 gcc_assert (desc->by_ref);
2510 check_pass_throughs = true;
2511 }
2512 }
2513 }
2514
2515 /* When a pointer parameter is passed-through to a callee, in which it is
2516 only used to read only one or a few items, we can attempt to transform it
2517 to obtaining and passing through the items instead of the pointer. But we
2518 must take extra care that 1) we do not introduce any segfault by moving
2519 dereferences above control flow and that 2) the data is not modified
2520 through an alias in this function. The IPA analysis must not introduce
2521 any accesses candidates unless it can prove both.
2522
2523 The current solution is very crude as it consists of ensuring that the
2524 call postdominates entry BB and that the definition of VUSE of the call is
2525 default definition. TODO: For non-recursive callees in the same
2526 compilation unit we could do better by doing analysis in topological order
2527 an looking into access candidates of callees, using their alias_ptr_types
2528 to attempt real AA. We could also use the maximum known dereferenced
2529 offset in this function at IPA level.
2530
2531 TODO: Measure the overhead and the effect of just being pessimistic.
c76b3bc5
EB
2532 Maybe this is only -O3 material? */
2533
b1f30bf4
JH
2534 hash_map<gimple *, bool> analyzed_stmts;
2535 bitmap always_executed_bbs = NULL;
ff6686d2
MJ
2536 if (check_pass_throughs)
2537 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2538 {
2539 gcall *call_stmt = cs->call_stmt;
2540 tree vuse = gimple_vuse (call_stmt);
2541
2542 /* If the callee is a const function, we don't get a VUSE. In such
2543 case there will be no memory accesses in the called function (or the
2544 const attribute is wrong) and then we just don't care. */
2545 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2546
2547 unsigned count = gimple_call_num_args (call_stmt);
2548 isra_call_summary *csum = call_sums->get_create (cs);
2549 csum->init_inputs (count);
763121cc 2550 csum->m_before_any_store = uses_memory_as_obtained;
ff6686d2
MJ
2551 for (unsigned argidx = 0; argidx < count; argidx++)
2552 {
2553 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2554 continue;
2555 unsigned pidx
2556 = get_single_param_flow_source (&csum->m_arg_flow[argidx]);
2557 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2558 if (!desc->split_candidate)
2559 {
2560 csum->m_arg_flow[argidx].pointer_pass_through = false;
2561 continue;
2562 }
2563 if (!uses_memory_as_obtained)
2564 continue;
2565
10478270
MJ
2566 if (desc->safe_ref)
2567 {
2568 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2569 continue;
2570 }
2571
b1f30bf4
JH
2572 /* Walk basic block and see if its execution can terminate earlier.
2573 Keep the info for later re-use to avoid quadratic behavoiur here. */
2574 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
2575 bool safe = true;
2576 int n = 0;
2577 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
ff6686d2 2578 {
b1f30bf4
JH
2579 bool *b = analyzed_stmts.get (gsi_stmt (gsi));
2580 if (b)
2581 {
2582 safe = *b;
2583 gsi_next (&gsi);
2584 break;
2585 }
2586 n++;
2587 if (stmt_may_terminate_function_p (fun, gsi_stmt (gsi), false))
2588 {
2589 safe = false;
2590 break;
2591 }
ff6686d2 2592 }
b1f30bf4
JH
2593 if (n)
2594 {
2595 if (gsi_end_p (gsi))
2596 gsi = gsi_start_bb (gimple_bb (call_stmt));
2597 for (; gsi_stmt (gsi) != call_stmt; gsi_next (&gsi))
2598 analyzed_stmts.get_or_insert (gsi_stmt (gsi)) = safe;
2599 }
2600
2601 if (safe && !always_executed_bbs)
2602 {
2603 mark_dfs_back_edges ();
2604 always_executed_bbs = find_always_executed_bbs (fun, false);
2605 }
2606 if (safe && bitmap_bit_p (always_executed_bbs, gimple_bb (call_stmt)->index))
ff6686d2
MJ
2607 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2608 }
2609
2610 }
b1f30bf4 2611 BITMAP_FREE (always_executed_bbs);
ff6686d2
MJ
2612
2613 /* TODO: Add early exit if we disqualified everything. This also requires
2614 that we either relax the restriction that
dfea3d6f 2615 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
ff6686d2
MJ
2616 or store the number of parameters to IPA-SRA function summary and use that
2617 when just removing params. */
2618
2619 vec_safe_reserve_exact (ifs->m_parameters, param_count);
2620 ifs->m_parameters->quick_grow_cleared (param_count);
2621 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2622 {
2623 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2624 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2625
2626 d->param_size_limit = s->param_size_limit;
2627 d->size_reached = s->nonarg_acc_size;
2628 d->locally_unused = s->locally_unused;
2629 d->split_candidate = s->split_candidate;
2630 d->by_ref = s->by_ref;
f2cf4c61 2631 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
ff6686d2
MJ
2632
2633 for (gensum_param_access *acc = s->accesses;
2634 acc;
2635 acc = acc->next_sibling)
2636 copy_accesses_to_ipa_desc (acc, d);
2637 }
2638
2639 if (dump_file)
e3a5cc32 2640 dump_isra_param_descriptors (dump_file, node->decl, ifs, false);
ff6686d2
MJ
2641}
2642
2643/* Return true if there are any overlaps among certain accesses of DESC. If
dfea3d6f 2644 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
ff6686d2
MJ
2645 too. DESC is assumed to be a split candidate that is not locally
2646 unused. */
2647
2648static bool
2649overlapping_certain_accesses_p (isra_param_desc *desc,
2650 bool *certain_access_present_p)
2651{
2652 unsigned pclen = vec_safe_length (desc->accesses);
2653 for (unsigned i = 0; i < pclen; i++)
2654 {
2655 param_access *a1 = (*desc->accesses)[i];
2656
2657 if (!a1->certain)
2658 continue;
2659 if (certain_access_present_p)
2660 *certain_access_present_p = true;
2661 for (unsigned j = i + 1; j < pclen; j++)
2662 {
2663 param_access *a2 = (*desc->accesses)[j];
2664 if (a2->certain
2665 && a1->unit_offset < a2->unit_offset + a2->unit_size
2666 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2667 return true;
2668 }
2669 }
2670 return false;
2671}
2672
2673/* Check for any overlaps of certain param accesses among splitting candidates
2674 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2675 check that used splitting candidates have at least one certain access. */
2676
2677static void
2678verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2679{
2680 isra_func_summary *ifs = func_sums->get (node);
2681 if (!ifs || !ifs->m_candidate)
2682 return;
2683 unsigned param_count = vec_safe_length (ifs->m_parameters);
2684 for (unsigned pidx = 0; pidx < param_count; pidx++)
2685 {
2686 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2687 if (!desc->split_candidate || desc->locally_unused)
2688 continue;
2689
2690 bool certain_access_present = !certain_must_exist;
2691 if (overlapping_certain_accesses_p (desc, &certain_access_present))
206222e0 2692 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
ff6686d2
MJ
2693 "which overlap", node->dump_name (), pidx);
2694 if (!certain_access_present)
206222e0 2695 internal_error ("function %qs, parameter %u, is used but does not "
ff6686d2
MJ
2696 "have any certain IPA-SRA access",
2697 node->dump_name (), pidx);
2698 }
2699}
2700
ff6686d2
MJ
2701/* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2702 this compilation unit and create summary structures describing IPA-SRA
2703 opportunities and constraints in them. */
2704
2705static void
2706ipa_sra_generate_summary (void)
2707{
2708 struct cgraph_node *node;
2709
2710 gcc_checking_assert (!func_sums);
2711 gcc_checking_assert (!call_sums);
2712 func_sums
78cd68c0 2713 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2714 ipa_sra_function_summaries (symtab, true));
2715 call_sums = new ipa_sra_call_summaries (symtab);
2716
2717 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2718 ipa_sra_summarize_function (node);
2719 return;
2720}
2721
dfea3d6f 2722/* Write intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2723 edges into a stream for LTO WPA. */
2724
2725static void
2726isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2727{
2728 isra_call_summary *csum = call_sums->get (e);
2729 unsigned input_count = csum->m_arg_flow.length ();
2730 streamer_write_uhwi (ob, input_count);
2731 for (unsigned i = 0; i < input_count; i++)
2732 {
2733 isra_param_flow *ipf = &csum->m_arg_flow[i];
2734 streamer_write_hwi (ob, ipf->length);
2735 bitpack_d bp = bitpack_create (ob->main_stream);
2736 for (int j = 0; j < ipf->length; j++)
2737 bp_pack_value (&bp, ipf->inputs[j], 8);
2738 bp_pack_value (&bp, ipf->aggregate_pass_through, 1);
2739 bp_pack_value (&bp, ipf->pointer_pass_through, 1);
2740 bp_pack_value (&bp, ipf->safe_to_import_accesses, 1);
e3a5cc32 2741 bp_pack_value (&bp, ipf->constructed_for_calls, 1);
ff6686d2
MJ
2742 streamer_write_bitpack (&bp);
2743 streamer_write_uhwi (ob, ipf->unit_offset);
2744 streamer_write_uhwi (ob, ipf->unit_size);
2745 }
2746 bitpack_d bp = bitpack_create (ob->main_stream);
2747 bp_pack_value (&bp, csum->m_return_ignored, 1);
2748 bp_pack_value (&bp, csum->m_return_returned, 1);
2749 bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
763121cc 2750 bp_pack_value (&bp, csum->m_before_any_store, 1);
ff6686d2
MJ
2751 streamer_write_bitpack (&bp);
2752}
2753
dfea3d6f 2754/* Write intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2755 edges into a stream for LTO WPA. */
2756
2757static void
2758isra_write_node_summary (output_block *ob, cgraph_node *node)
2759{
2760 isra_func_summary *ifs = func_sums->get (node);
2761 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2762 int node_ref = lto_symtab_encoder_encode (encoder, node);
2763 streamer_write_uhwi (ob, node_ref);
2764
2765 unsigned param_desc_count = vec_safe_length (ifs->m_parameters);
2766 streamer_write_uhwi (ob, param_desc_count);
2767 for (unsigned i = 0; i < param_desc_count; i++)
2768 {
2769 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2770 unsigned access_count = vec_safe_length (desc->accesses);
2771 streamer_write_uhwi (ob, access_count);
2772 for (unsigned j = 0; j < access_count; j++)
2773 {
2774 param_access *acc = (*desc->accesses)[j];
2775 stream_write_tree (ob, acc->type, true);
2776 stream_write_tree (ob, acc->alias_ptr_type, true);
2777 streamer_write_uhwi (ob, acc->unit_offset);
2778 streamer_write_uhwi (ob, acc->unit_size);
2779 bitpack_d bp = bitpack_create (ob->main_stream);
2780 bp_pack_value (&bp, acc->certain, 1);
c76b3bc5 2781 bp_pack_value (&bp, acc->reverse, 1);
ff6686d2
MJ
2782 streamer_write_bitpack (&bp);
2783 }
2784 streamer_write_uhwi (ob, desc->param_size_limit);
2785 streamer_write_uhwi (ob, desc->size_reached);
f2cf4c61 2786 gcc_assert (desc->safe_size == 0);
ff6686d2
MJ
2787 bitpack_d bp = bitpack_create (ob->main_stream);
2788 bp_pack_value (&bp, desc->locally_unused, 1);
2789 bp_pack_value (&bp, desc->split_candidate, 1);
2790 bp_pack_value (&bp, desc->by_ref, 1);
e3a5cc32 2791 gcc_assert (!desc->not_specially_constructed);
f2cf4c61
MJ
2792 bp_pack_value (&bp, desc->conditionally_dereferenceable, 1);
2793 gcc_assert (!desc->safe_size_set);
ff6686d2
MJ
2794 streamer_write_bitpack (&bp);
2795 }
2796 bitpack_d bp = bitpack_create (ob->main_stream);
2797 bp_pack_value (&bp, ifs->m_candidate, 1);
2798 bp_pack_value (&bp, ifs->m_returns_value, 1);
2799 bp_pack_value (&bp, ifs->m_return_ignored, 1);
2800 gcc_assert (!ifs->m_queued);
2801 streamer_write_bitpack (&bp);
2802
2803 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2804 isra_write_edge_summary (ob, e);
2805 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2806 isra_write_edge_summary (ob, e);
2807}
2808
dfea3d6f 2809/* Write intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2810
2811static void
2812ipa_sra_write_summary (void)
2813{
2814 if (!func_sums || !call_sums)
2815 return;
2816
2817 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2818 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2819 ob->symbol = NULL;
2820
2821 unsigned int count = 0;
2822 lto_symtab_encoder_iterator lsei;
2823 for (lsei = lsei_start_function_in_partition (encoder);
2824 !lsei_end_p (lsei);
2825 lsei_next_function_in_partition (&lsei))
2826 {
2827 cgraph_node *node = lsei_cgraph_node (lsei);
2828 if (node->has_gimple_body_p ()
2829 && func_sums->get (node) != NULL)
2830 count++;
2831 }
2832 streamer_write_uhwi (ob, count);
2833
2834 /* Process all of the functions. */
2835 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2836 lsei_next_function_in_partition (&lsei))
2837 {
2838 cgraph_node *node = lsei_cgraph_node (lsei);
2839 if (node->has_gimple_body_p ()
2840 && func_sums->get (node) != NULL)
2841 isra_write_node_summary (ob, node);
2842 }
2843 streamer_write_char_stream (ob->main_stream, 0);
2844 produce_asm (ob, NULL);
2845 destroy_output_block (ob);
2846}
2847
dfea3d6f 2848/* Read intraprocedural analysis information about E and all of its outgoing
ff6686d2
MJ
2849 edges into a stream for LTO WPA. */
2850
2851static void
2852isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2853{
2854 isra_call_summary *csum = call_sums->get_create (cs);
2855 unsigned input_count = streamer_read_uhwi (ib);
2856 csum->init_inputs (input_count);
2857 for (unsigned i = 0; i < input_count; i++)
2858 {
2859 isra_param_flow *ipf = &csum->m_arg_flow[i];
2860 ipf->length = streamer_read_hwi (ib);
2861 bitpack_d bp = streamer_read_bitpack (ib);
2862 for (int j = 0; j < ipf->length; j++)
2863 ipf->inputs[j] = bp_unpack_value (&bp, 8);
2864 ipf->aggregate_pass_through = bp_unpack_value (&bp, 1);
2865 ipf->pointer_pass_through = bp_unpack_value (&bp, 1);
2866 ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1);
e3a5cc32 2867 ipf->constructed_for_calls = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2868 ipf->unit_offset = streamer_read_uhwi (ib);
2869 ipf->unit_size = streamer_read_uhwi (ib);
2870 }
2871 bitpack_d bp = streamer_read_bitpack (ib);
2872 csum->m_return_ignored = bp_unpack_value (&bp, 1);
2873 csum->m_return_returned = bp_unpack_value (&bp, 1);
2874 csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
763121cc 2875 csum->m_before_any_store = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2876}
2877
dfea3d6f 2878/* Read intraprocedural analysis information about NODE and all of its outgoing
ff6686d2
MJ
2879 edges into a stream for LTO WPA. */
2880
2881static void
2882isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2883 struct data_in *data_in)
2884{
2885 isra_func_summary *ifs = func_sums->get_create (node);
2886 unsigned param_desc_count = streamer_read_uhwi (ib);
2887 if (param_desc_count > 0)
2888 {
2889 vec_safe_reserve_exact (ifs->m_parameters, param_desc_count);
2890 ifs->m_parameters->quick_grow_cleared (param_desc_count);
2891 }
2892 for (unsigned i = 0; i < param_desc_count; i++)
2893 {
2894 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2895 unsigned access_count = streamer_read_uhwi (ib);
2896 for (unsigned j = 0; j < access_count; j++)
2897 {
2898 param_access *acc = ggc_cleared_alloc<param_access> ();
2899 acc->type = stream_read_tree (ib, data_in);
2900 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2901 acc->unit_offset = streamer_read_uhwi (ib);
2902 acc->unit_size = streamer_read_uhwi (ib);
2903 bitpack_d bp = streamer_read_bitpack (ib);
2904 acc->certain = bp_unpack_value (&bp, 1);
c76b3bc5 2905 acc->reverse = bp_unpack_value (&bp, 1);
ff6686d2
MJ
2906 vec_safe_push (desc->accesses, acc);
2907 }
2908 desc->param_size_limit = streamer_read_uhwi (ib);
2909 desc->size_reached = streamer_read_uhwi (ib);
f2cf4c61 2910 desc->safe_size = 0;
ff6686d2
MJ
2911 bitpack_d bp = streamer_read_bitpack (ib);
2912 desc->locally_unused = bp_unpack_value (&bp, 1);
2913 desc->split_candidate = bp_unpack_value (&bp, 1);
2914 desc->by_ref = bp_unpack_value (&bp, 1);
e3a5cc32 2915 desc->not_specially_constructed = 0;
f2cf4c61
MJ
2916 desc->conditionally_dereferenceable = bp_unpack_value (&bp, 1);
2917 desc->safe_size_set = 0;
ff6686d2
MJ
2918 }
2919 bitpack_d bp = streamer_read_bitpack (ib);
2920 ifs->m_candidate = bp_unpack_value (&bp, 1);
2921 ifs->m_returns_value = bp_unpack_value (&bp, 1);
2922 ifs->m_return_ignored = bp_unpack_value (&bp, 1);
2923 ifs->m_queued = 0;
2924
2925 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2926 isra_read_edge_summary (ib, e);
2927 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2928 isra_read_edge_summary (ib, e);
2929}
2930
2931/* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
e53b6e56 2932 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
ff6686d2
MJ
2933 it should be possible to unify them somehow. */
2934
2935static void
2936isra_read_summary_section (struct lto_file_decl_data *file_data,
2937 const char *data, size_t len)
2938{
2939 const struct lto_function_header *header =
2940 (const struct lto_function_header *) data;
2941 const int cfg_offset = sizeof (struct lto_function_header);
2942 const int main_offset = cfg_offset + header->cfg_size;
2943 const int string_offset = main_offset + header->main_size;
2944 struct data_in *data_in;
2945 unsigned int i;
2946 unsigned int count;
2947
2948 lto_input_block ib_main ((const char *) data + main_offset,
d7faf7a5 2949 header->main_size, file_data);
ff6686d2
MJ
2950
2951 data_in =
2952 lto_data_in_create (file_data, (const char *) data + string_offset,
2953 header->string_size, vNULL);
2954 count = streamer_read_uhwi (&ib_main);
2955
2956 for (i = 0; i < count; i++)
2957 {
2958 unsigned int index;
2959 struct cgraph_node *node;
2960 lto_symtab_encoder_t encoder;
2961
2962 index = streamer_read_uhwi (&ib_main);
2963 encoder = file_data->symtab_node_encoder;
2964 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
2965 index));
2966 gcc_assert (node->definition);
2967 isra_read_node_info (&ib_main, node, data_in);
2968 }
2969 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
2970 len);
2971 lto_data_in_delete (data_in);
2972}
2973
dfea3d6f 2974/* Read intraprocedural analysis information into a stream for LTO WPA. */
ff6686d2
MJ
2975
2976static void
2977ipa_sra_read_summary (void)
2978{
2979 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
2980 struct lto_file_decl_data *file_data;
2981 unsigned int j = 0;
2982
2983 gcc_checking_assert (!func_sums);
2984 gcc_checking_assert (!call_sums);
2985 func_sums
78cd68c0 2986 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
ff6686d2
MJ
2987 ipa_sra_function_summaries (symtab, true));
2988 call_sums = new ipa_sra_call_summaries (symtab);
2989
2990 while ((file_data = file_data_vec[j++]))
2991 {
2992 size_t len;
3c56d8d8
ML
2993 const char *data
2994 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
ff6686d2
MJ
2995 if (data)
2996 isra_read_summary_section (file_data, data, len);
2997 }
2998}
2999
e3a5cc32
MJ
3000/* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
3001 HINTS is true, also dump IPA-analysis computed hints. */
ff6686d2
MJ
3002
3003static void
e3a5cc32 3004ipa_sra_dump_all_summaries (FILE *f, bool hints)
ff6686d2
MJ
3005{
3006 cgraph_node *node;
3007 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3008 {
3009 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
3010
3011 isra_func_summary *ifs = func_sums->get (node);
3012 if (!ifs)
717d278a
MJ
3013 fprintf (f, " Function does not have any associated IPA-SRA "
3014 "summary\n");
95489a2a
MJ
3015 else if (!ifs->m_candidate)
3016 fprintf (f, " Not a candidate function\n");
717d278a 3017 else
ff6686d2 3018 {
717d278a
MJ
3019 if (ifs->m_returns_value)
3020 fprintf (f, " Returns value\n");
3021 if (vec_safe_is_empty (ifs->m_parameters))
3022 fprintf (f, " No parameter information. \n");
3023 else
3024 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
3025 {
3026 fprintf (f, " Descriptor for parameter %i:\n", i);
e3a5cc32 3027 dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i], hints);
717d278a
MJ
3028 }
3029 fprintf (f, "\n");
ff6686d2 3030 }
ff6686d2
MJ
3031
3032 struct cgraph_edge *cs;
3033 for (cs = node->callees; cs; cs = cs->next_callee)
3034 {
3035 fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (),
3036 cs->callee->dump_name ());
3037 isra_call_summary *csum = call_sums->get (cs);
3038 if (csum)
3039 csum->dump (f);
3040 else
3041 fprintf (f, " Call summary is MISSING!\n");
3042 }
3043
3044 }
3045 fprintf (f, "\n\n");
3046}
3047
3048/* Perform function-scope viability tests that can be only made at IPA level
3049 and return false if the function is deemed unsuitable for IPA-SRA. */
3050
3051static bool
3052ipa_sra_ipa_function_checks (cgraph_node *node)
3053{
3054 if (!node->can_be_local_p ())
3055 {
3056 if (dump_file)
3057 fprintf (dump_file, "Function %s disqualified because it cannot be "
3058 "made local.\n", node->dump_name ());
3059 return false;
3060 }
87f94429 3061 if (!node->can_change_signature)
ff6686d2
MJ
3062 {
3063 if (dump_file)
3064 fprintf (dump_file, "Function can not change signature.\n");
3065 return false;
3066 }
3067
3068 return true;
3069}
3070
3071/* Issues found out by check_callers_for_issues. */
3072
3073struct caller_issues
3074{
b90061c6
MJ
3075 /* The candidate being considered. */
3076 cgraph_node *candidate;
ff6686d2
MJ
3077 /* There is a thunk among callers. */
3078 bool thunk;
0be3a051
MJ
3079 /* Set if there is at least one caller that is OK. */
3080 bool there_is_one;
ff6686d2
MJ
3081 /* Call site with no available information. */
3082 bool unknown_callsite;
027e3041 3083 /* Call from outside the candidate's comdat group. */
b90061c6 3084 bool call_from_outside_comdat;
ff6686d2
MJ
3085 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3086 bool bit_aligned_aggregate_argument;
3087};
3088
3089/* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3090 that apply. */
3091
3092static bool
3093check_for_caller_issues (struct cgraph_node *node, void *data)
3094{
3095 struct caller_issues *issues = (struct caller_issues *) data;
3096
3097 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3098 {
67f3791f 3099 if (cs->caller->thunk)
ff6686d2
MJ
3100 {
3101 issues->thunk = true;
3102 /* TODO: We should be able to process at least some types of
3103 thunks. */
3104 return true;
3105 }
b90061c6
MJ
3106 if (issues->candidate->calls_comdat_local
3107 && issues->candidate->same_comdat_group
3108 && !issues->candidate->in_same_comdat_group_p (cs->caller))
3109 {
3110 issues->call_from_outside_comdat = true;
3111 return true;
3112 }
ff6686d2
MJ
3113
3114 isra_call_summary *csum = call_sums->get (cs);
3115 if (!csum)
3116 {
3117 issues->unknown_callsite = true;
3118 return true;
3119 }
3120
3121 if (csum->m_bit_aligned_arg)
3122 issues->bit_aligned_aggregate_argument = true;
0be3a051
MJ
3123
3124 issues->there_is_one = true;
ff6686d2
MJ
3125 }
3126 return false;
3127}
3128
3129/* Look at all incoming edges to NODE, including aliases and thunks and look
3130 for problems. Return true if NODE type should not be modified at all. */
3131
3132static bool
3133check_all_callers_for_issues (cgraph_node *node)
3134{
3135 struct caller_issues issues;
3136 memset (&issues, 0, sizeof (issues));
b90061c6 3137 issues.candidate = node;
ff6686d2
MJ
3138
3139 node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true);
3140 if (issues.unknown_callsite)
3141 {
3142 if (dump_file && (dump_flags & TDF_DETAILS))
3143 fprintf (dump_file, "A call of %s has not been analyzed. Disabling "
3144 "all modifications.\n", node->dump_name ());
3145 return true;
3146 }
3147 /* TODO: We should be able to process at least some types of thunks. */
3148 if (issues.thunk)
3149 {
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3151 fprintf (dump_file, "A call of %s is through thunk, which are not"
3152 " handled yet. Disabling all modifications.\n",
3153 node->dump_name ());
3154 return true;
3155 }
b90061c6
MJ
3156 if (issues.call_from_outside_comdat)
3157 {
3158 if (dump_file)
3159 fprintf (dump_file, "Function would become private comdat called "
3160 "outside of its comdat group.\n");
3161 return true;
3162 }
ff6686d2
MJ
3163
3164 if (issues.bit_aligned_aggregate_argument)
3165 {
3166 /* Let's only remove parameters/return values from such functions.
3167 TODO: We could only prevent splitting the problematic parameters if
3168 anybody thinks it is worth it. */
3169 if (dump_file && (dump_flags & TDF_DETAILS))
dfea3d6f 3170 fprintf (dump_file, "A call of %s has bit-aligned aggregate argument,"
ff6686d2
MJ
3171 " disabling parameter splitting.\n", node->dump_name ());
3172
3173 isra_func_summary *ifs = func_sums->get (node);
3174 gcc_checking_assert (ifs);
3175 unsigned param_count = vec_safe_length (ifs->m_parameters);
3176 for (unsigned i = 0; i < param_count; i++)
3177 (*ifs->m_parameters)[i].split_candidate = false;
3178 }
0be3a051
MJ
3179 if (!issues.there_is_one)
3180 {
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3182 fprintf (dump_file, "There is no call to %s that we can modify. "
3183 "Disabling all modifications.\n", node->dump_name ());
3184 return true;
3185 }
ff6686d2
MJ
3186 return false;
3187}
3188
3189/* Find the access with corresponding OFFSET and SIZE among accesses in
3190 PARAM_DESC and return it or NULL if such an access is not there. */
3191
3192static param_access *
3193find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3194{
3195 unsigned pclen = vec_safe_length (param_desc->accesses);
3196
3197 /* The search is linear but the number of stored accesses is bound by
3198 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3199
3200 for (unsigned i = 0; i < pclen; i++)
3201 if ((*param_desc->accesses)[i]->unit_offset == offset
3202 && (*param_desc->accesses)[i]->unit_size == size)
3203 return (*param_desc->accesses)[i];
3204
3205 return NULL;
3206}
3207
3208/* Return iff the total size of definite replacement SIZE would violate the
3209 limit set for it in PARAM. */
3210
3211static bool
3212size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3213{
3214 unsigned limit = desc->param_size_limit;
3215 if (size > limit
3216 || (!desc->by_ref && size == limit))
3217 return true;
3218 return false;
3219}
3220
3221/* Increase reached size of DESC by SIZE or disqualify it if it would violate
3222 the set limit. IDX is the parameter number which is dumped when
3223 disqualifying. */
3224
3225static void
3226bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3227{
3228 unsigned after = desc->size_reached + size;
3229 if (size_would_violate_limit_p (desc, after))
3230 {
3231 if (dump_file && (dump_flags & TDF_DETAILS))
3232 fprintf (dump_file, " ...size limit reached, disqualifying "
3233 "candidate parameter %u\n", idx);
3234 desc->split_candidate = false;
3235 return;
3236 }
3237 desc->size_reached = after;
3238}
3239
3240/* Take all actions required to deal with an edge CS that represents a call to
3241 an unknown or un-analyzed function, for both parameter removal and
3242 splitting. */
3243
3244static void
3245process_edge_to_unknown_caller (cgraph_edge *cs)
3246{
3247 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3248 gcc_checking_assert (from_ifs);
3249 isra_call_summary *csum = call_sums->get (cs);
3250
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3252 fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n",
3253 cs->caller->dump_name ());
3254
3255 unsigned args_count = csum->m_arg_flow.length ();
3256 for (unsigned i = 0; i < args_count; i++)
3257 {
3258 isra_param_flow *ipf = &csum->m_arg_flow[i];
3259
3260 if (ipf->pointer_pass_through)
3261 {
3262 isra_param_desc *param_desc
3263 = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)];
3264 param_desc->locally_unused = false;
3265 param_desc->split_candidate = false;
3266 continue;
3267 }
3268 if (ipf->aggregate_pass_through)
3269 {
3270 unsigned idx = get_single_param_flow_source (ipf);
3271 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3272
3273 param_desc->locally_unused = false;
3274 if (!param_desc->split_candidate)
3275 continue;
3276 gcc_assert (!param_desc->by_ref);
3277 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3278 ipf->unit_size);
3279 gcc_checking_assert (pacc);
3280 pacc->certain = true;
3281 if (overlapping_certain_accesses_p (param_desc, NULL))
3282 {
3283 if (dump_file && (dump_flags & TDF_DETAILS))
3284 fprintf (dump_file, " ...leading to overlap, "
3285 " disqualifying candidate parameter %u\n",
3286 idx);
3287 param_desc->split_candidate = false;
3288 }
3289 else
3290 bump_reached_size (param_desc, pacc->unit_size, idx);
3291 ipf->aggregate_pass_through = false;
3292 continue;
3293 }
3294
3295 for (int j = 0; j < ipf->length; j++)
3296 {
3297 int input_idx = ipf->inputs[j];
3298 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3299 }
3300 }
3301}
3302
3303/* Propagate parameter removal information through cross-SCC edge CS,
3304 i.e. decrease the use count in the caller parameter descriptor for each use
3305 in this call. */
3306
3307static void
3308param_removal_cross_scc_edge (cgraph_edge *cs)
3309{
3310 enum availability availability;
3311 cgraph_node *callee = cs->callee->function_symbol (&availability);
3312 isra_func_summary *to_ifs = func_sums->get (callee);
3313 if (!to_ifs || !to_ifs->m_candidate
3314 || (availability < AVAIL_AVAILABLE)
3315 || vec_safe_is_empty (to_ifs->m_parameters))
3316 {
3317 process_edge_to_unknown_caller (cs);
3318 return;
3319 }
3320 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3321 gcc_checking_assert (from_ifs);
3322
3323 isra_call_summary *csum = call_sums->get (cs);
3324 unsigned args_count = csum->m_arg_flow.length ();
3325 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3326
3327 for (unsigned i = 0; i < args_count; i++)
3328 {
3329 bool unused_in_callee;
3330 if (i < param_count)
3331 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3332 else
3333 unused_in_callee = false;
3334
3335 if (!unused_in_callee)
3336 {
3337 isra_param_flow *ipf = &csum->m_arg_flow[i];
3338 for (int j = 0; j < ipf->length; j++)
3339 {
3340 int input_idx = ipf->inputs[j];
3341 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3342 }
3343 }
3344 }
3345}
3346
3347/* Unless it is already there, push NODE which is also described by IFS to
3348 STACK. */
3349
3350static void
3351isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3352 vec<cgraph_node *> *stack)
3353{
3354 if (!ifs->m_queued)
3355 {
3356 ifs->m_queued = true;
3357 stack->safe_push (node);
3358 }
3359}
3360
3361/* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3362 used and push CALLER on STACK. */
3363
3364static void
3365isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3366 cgraph_node *caller, vec<cgraph_node *> *stack)
3367{
3368 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3369 {
3370 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3371 isra_push_node_to_stack (caller, from_ifs, stack);
3372 }
3373}
3374
f2cf4c61 3375/* Combine safe_size of DESC with SIZE and return true if it has changed. */
e3a5cc32 3376
f2cf4c61
MJ
3377static bool
3378update_safe_size (isra_param_desc *desc, unsigned size)
3379{
3380 if (!desc->safe_size_set)
3381 {
3382 desc->safe_size_set = 1;
3383 desc->safe_size = size;
3384 return true;
3385 }
3386 if (desc->safe_size <= size)
3387 return false;
3388 desc->safe_size = size;
3389 return true;
3390}
3391
3392/* Set all param hints in DESC to the pessimistic values. Return true if any
3393 hints that need to potentially trigger further propagation have changed. */
3394
3395static bool
e3a5cc32
MJ
3396flip_all_hints_pessimistic (isra_param_desc *desc)
3397{
3398 desc->not_specially_constructed = true;
f2cf4c61 3399 return update_safe_size (desc, 0);
e3a5cc32
MJ
3400}
3401
f2cf4c61
MJ
3402/* Because we have not analyzed or otherwise problematic caller, go over all
3403 parameter int flags of IFS describing a call graph node of a calllee and
3404 turn them pessimistic. Return true if any hints that need to potentially
3405 trigger further propagation have changed. */
e3a5cc32 3406
f2cf4c61
MJ
3407static bool
3408flip_all_param_hints_pessimistic (isra_func_summary *ifs)
e3a5cc32 3409{
e3a5cc32 3410 if (!ifs || !ifs->m_candidate)
f2cf4c61 3411 return false;
e3a5cc32 3412
f2cf4c61 3413 bool ret = false;
e3a5cc32
MJ
3414 unsigned param_count = vec_safe_length (ifs->m_parameters);
3415
3416 for (unsigned i = 0; i < param_count; i++)
f2cf4c61 3417 ret |= flip_all_hints_pessimistic (&(*ifs->m_parameters)[i]);
e3a5cc32 3418
f2cf4c61 3419 return ret;
e3a5cc32
MJ
3420}
3421
f2cf4c61
MJ
3422/* Propagate hints accross edge CS which ultimately leads to a node described
3423 by TO_IFS. Return true if any hints of the callee which should potentially
3424 trigger further propagation have changed. */
e3a5cc32 3425
f2cf4c61
MJ
3426static bool
3427propagate_param_hints_accross_call (cgraph_edge *cs, isra_func_summary *to_ifs)
e3a5cc32 3428{
e3a5cc32 3429 if (!to_ifs || !to_ifs->m_candidate)
f2cf4c61 3430 return false;
e3a5cc32 3431
f2cf4c61
MJ
3432 isra_call_summary *csum = call_sums->get (cs);
3433 bool ret = false;
e3a5cc32
MJ
3434 unsigned args_count = csum->m_arg_flow.length ();
3435 unsigned param_count = vec_safe_length (to_ifs->m_parameters);
3436
3437 for (unsigned i = 0; i < param_count; i++)
3438 {
3439 isra_param_desc *desc = &(*to_ifs->m_parameters)[i];
3440 if (i >= args_count)
3441 {
f2cf4c61 3442 ret |= flip_all_hints_pessimistic (desc);
e3a5cc32
MJ
3443 continue;
3444 }
3445
f2cf4c61
MJ
3446 if (desc->by_ref)
3447 {
3448 isra_param_flow *ipf = &csum->m_arg_flow[i];
3449
3450 if (!ipf->constructed_for_calls)
3451 desc->not_specially_constructed = true;
3452
3453 if (ipf->pointer_pass_through)
3454 {
3455 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3456 int srcidx = get_single_param_flow_source (ipf);
3457 if (vec_safe_length (from_ifs->m_parameters) > (unsigned) srcidx)
3458 {
3459 isra_param_desc *src_d = &(*from_ifs->m_parameters)[srcidx];
3460 if (src_d->safe_size_set)
3461 ret |= update_safe_size (desc, src_d->safe_size);
3462 }
3463 else
3464 ret |= update_safe_size (desc, 0);
3465 }
3466 else if (!ipf->aggregate_pass_through)
3467 ret |= update_safe_size (desc, ipf->unit_size);
3468 else
3469 /* LTOing type-mismatched programs can end up here. */
3470 ret |= update_safe_size (desc, 0);
3471 }
3472 }
3473 return ret;
3474}
3475
3476/* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3477 push those that may need re-visiting onto STACK. */
3478
3479static void
3480propagate_hints_to_all_callees (cgraph_node *node, isra_func_summary *from_ifs,
3481 vec<cgraph_node *> *stack)
3482{
3483 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3484 {
3485 enum availability availability;
3486 cgraph_node *callee = cs->callee->function_symbol (&availability);
3487 isra_func_summary *to_ifs = func_sums->get (callee);
3488 if (!from_ifs)
3489 {
3490 if (flip_all_param_hints_pessimistic (to_ifs)
3491 && ipa_edge_within_scc (cs))
3492 isra_push_node_to_stack (callee, to_ifs, stack);
3493 }
3494 else if (propagate_param_hints_accross_call (cs, to_ifs)
3495 && ipa_edge_within_scc (cs))
3496 isra_push_node_to_stack (callee, to_ifs, stack);
e3a5cc32 3497 }
e3a5cc32 3498}
ff6686d2
MJ
3499
3500/* Propagate information that any parameter is not used only locally within a
6b6a8065 3501 SCC across CS to the caller, which must be in the same SCC as the
c76b3bc5 3502 callee. Push any callers that need to be re-processed to STACK. */
ff6686d2
MJ
3503
3504static void
3505propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3506{
3507 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3508 if (!from_ifs || vec_safe_is_empty (from_ifs->m_parameters))
3509 return;
3510
3511 isra_call_summary *csum = call_sums->get (cs);
3512 gcc_checking_assert (csum);
3513 unsigned args_count = csum->m_arg_flow.length ();
3514 enum availability availability;
3515 cgraph_node *callee = cs->callee->function_symbol (&availability);
3516 isra_func_summary *to_ifs = func_sums->get (callee);
3517
3518 unsigned param_count
3519 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3520 ? vec_safe_length (to_ifs->m_parameters) : 0;
3521 for (unsigned i = 0; i < args_count; i++)
3522 {
3523 if (i < param_count
3524 && (*to_ifs->m_parameters)[i].locally_unused)
3525 continue;
3526
3527 /* The argument is needed in the callee it, we must mark the parameter as
3528 used also in the caller and its callers within this SCC. */
3529 isra_param_flow *ipf = &csum->m_arg_flow[i];
3530 for (int j = 0; j < ipf->length; j++)
3531 {
3532 int input_idx = ipf->inputs[j];
3533 isra_mark_caller_param_used (from_ifs, input_idx, cs->caller, stack);
3534 }
3535 }
3536}
3537
3538/* Propagate information that any parameter is not used only locally within a
3539 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
c76b3bc5 3540 same SCC. Push any callers that need to be re-processed to STACK. */
ff6686d2
MJ
3541
3542static bool
3543propagate_used_to_scc_callers (cgraph_node *node, void *data)
3544{
3545 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3546 cgraph_edge *cs;
3547 for (cs = node->callers; cs; cs = cs->next_caller)
3548 if (ipa_edge_within_scc (cs))
3549 propagate_used_across_scc_edge (cs, stack);
3550 return false;
3551}
3552
3553/* Return true iff all certain accesses in ARG_DESC are also present as
3554 certain accesses in PARAM_DESC. */
3555
3556static bool
3557all_callee_accesses_present_p (isra_param_desc *param_desc,
3558 isra_param_desc *arg_desc)
3559{
3560 unsigned aclen = vec_safe_length (arg_desc->accesses);
3561 for (unsigned j = 0; j < aclen; j++)
3562 {
3563 param_access *argacc = (*arg_desc->accesses)[j];
3564 if (!argacc->certain)
3565 continue;
3566 param_access *pacc = find_param_access (param_desc, argacc->unit_offset,
3567 argacc->unit_size);
b9a15a83
MJ
3568 if (!pacc
3569 || !pacc->certain
3570 || !types_compatible_p (argacc->type, pacc->type))
ff6686d2
MJ
3571 return false;
3572 }
3573 return true;
3574}
3575
3576/* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3577 does not allow instantiating an auto_vec with a type defined within a
3578 function so it is a global type. */
3579enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3580
3581
1a315435
MJ
3582/* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3583 (which belongs to CALLER) if they would not violate some constraint there.
3584 If successful, return NULL, otherwise return the string reason for failure
3585 (which can be written to the dump file). DELTA_OFFSET is the known offset
3586 of the actual argument withing the formal parameter (so of ARG_DESCS within
3587 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3588 known. In case of success, set *CHANGE_P to true if propagation actually
3589 changed anything. */
ff6686d2
MJ
3590
3591static const char *
1a315435 3592pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
ff6686d2
MJ
3593 isra_param_desc *arg_desc,
3594 unsigned delta_offset, unsigned arg_size,
3595 bool *change_p)
3596{
3597 unsigned pclen = vec_safe_length (param_desc->accesses);
3598 unsigned aclen = vec_safe_length (arg_desc->accesses);
3599 unsigned prop_count = 0;
3600 unsigned prop_size = 0;
3601 bool change = false;
3602
3603 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3604 for (unsigned j = 0; j < aclen; j++)
3605 {
3606 param_access *argacc = (*arg_desc->accesses)[j];
3607 prop_kinds.safe_push (ACC_PROP_DONT);
3608
3609 if (arg_size > 0
3610 && argacc->unit_offset + argacc->unit_size > arg_size)
3611 return "callee access outsize size boundary";
3612
3613 if (!argacc->certain)
3614 continue;
3615
3616 unsigned offset = argacc->unit_offset + delta_offset;
3617 /* Given that accesses are initially stored according to increasing
3618 offset and decreasing size in case of equal offsets, the following
3619 searches could be written more efficiently if we kept the ordering
3620 when copying. But the number of accesses is capped at
3621 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3622 messy quickly, so let's improve on that only if necessary. */
3623
3624 bool exact_match = false;
3625 for (unsigned i = 0; i < pclen; i++)
3626 {
3627 /* Check for overlaps. */
3628 param_access *pacc = (*param_desc->accesses)[i];
3629 if (pacc->unit_offset == offset
3630 && pacc->unit_size == argacc->unit_size)
3631 {
3632 if (argacc->alias_ptr_type != pacc->alias_ptr_type
c76b3bc5
EB
3633 || !types_compatible_p (argacc->type, pacc->type)
3634 || argacc->reverse != pacc->reverse)
ff6686d2
MJ
3635 return "propagated access types would not match existing ones";
3636
3637 exact_match = true;
3638 if (!pacc->certain)
3639 {
3640 prop_kinds[j] = ACC_PROP_CERTAIN;
3641 prop_size += argacc->unit_size;
3642 change = true;
3643 }
3644 continue;
3645 }
3646
3647 if (offset < pacc->unit_offset + pacc->unit_size
3648 && offset + argacc->unit_size > pacc->unit_offset)
3649 {
3650 /* None permissible with load accesses, possible to fit into
3651 argument ones. */
3652 if (pacc->certain
3653 || offset < pacc->unit_offset
3654 || (offset + argacc->unit_size
3655 > pacc->unit_offset + pacc->unit_size))
3656 return "a propagated access would conflict in caller";
3657 }
3658 }
3659
3660 if (!exact_match)
3661 {
3662 prop_kinds[j] = ACC_PROP_COPY;
3663 prop_count++;
3664 prop_size += argacc->unit_size;
3665 change = true;
3666 }
3667 }
3668
3669 if (!change)
3670 return NULL;
3671
3672 if ((prop_count + pclen
1a315435 3673 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
ff6686d2
MJ
3674 || size_would_violate_limit_p (param_desc,
3675 param_desc->size_reached + prop_size))
3676 return "propagating accesses would violate the count or size limit";
3677
3678 *change_p = true;
3679 for (unsigned j = 0; j < aclen; j++)
3680 {
3681 if (prop_kinds[j] == ACC_PROP_COPY)
3682 {
3683 param_access *argacc = (*arg_desc->accesses)[j];
3684
3685 param_access *copy = ggc_cleared_alloc<param_access> ();
3686 copy->unit_offset = argacc->unit_offset + delta_offset;
3687 copy->unit_size = argacc->unit_size;
3688 copy->type = argacc->type;
3689 copy->alias_ptr_type = argacc->alias_ptr_type;
3690 copy->certain = true;
c76b3bc5 3691 copy->reverse = argacc->reverse;
ff6686d2
MJ
3692 vec_safe_push (param_desc->accesses, copy);
3693 }
3694 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3695 {
3696 param_access *argacc = (*arg_desc->accesses)[j];
3697 param_access *csp
3698 = find_param_access (param_desc, argacc->unit_offset + delta_offset,
3699 argacc->unit_size);
3700 csp->certain = true;
3701 }
3702 }
3703
3704 param_desc->size_reached += prop_size;
3705
3706 return NULL;
3707}
3708
3709/* Propagate parameter splitting information through call graph edge CS.
3710 Return true if any changes that might need to be propagated within SCCs have
3711 been made. The function also clears the aggregate_pass_through and
dfea3d6f 3712 pointer_pass_through in call summaries which do not need to be processed
ff6686d2
MJ
3713 again if this CS is revisited when iterating while changes are propagated
3714 within an SCC. */
3715
3716static bool
3717param_splitting_across_edge (cgraph_edge *cs)
3718{
3719 bool res = false;
3720 bool cross_scc = !ipa_edge_within_scc (cs);
3721 enum availability availability;
3722 cgraph_node *callee = cs->callee->function_symbol (&availability);
3723 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3724 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3725
3726 isra_call_summary *csum = call_sums->get (cs);
3727 gcc_checking_assert (csum);
3728 unsigned args_count = csum->m_arg_flow.length ();
3729 isra_func_summary *to_ifs = func_sums->get (callee);
3730 unsigned param_count
3731 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3732 ? vec_safe_length (to_ifs->m_parameters)
3733 : 0);
3734
3735 if (dump_file && (dump_flags & TDF_DETAILS))
6b6a8065 3736 fprintf (dump_file, "Splitting across %s->%s:\n",
ff6686d2
MJ
3737 cs->caller->dump_name (), callee->dump_name ());
3738
3739 unsigned i;
3740 for (i = 0; (i < args_count) && (i < param_count); i++)
3741 {
3742 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3743 isra_param_flow *ipf = &csum->m_arg_flow[i];
3744
3745 if (arg_desc->locally_unused)
3746 {
3747 if (dump_file && (dump_flags & TDF_DETAILS))
3748 fprintf (dump_file, " ->%u: unused in callee\n", i);
3749 ipf->pointer_pass_through = false;
3750 continue;
3751 }
3752
3753 if (ipf->pointer_pass_through)
3754 {
3755 int idx = get_single_param_flow_source (ipf);
3756 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3757 if (!param_desc->split_candidate)
3758 continue;
3759 gcc_assert (param_desc->by_ref);
3760
3761 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3762 {
3763 if (dump_file && (dump_flags & TDF_DETAILS))
3764 fprintf (dump_file, " %u->%u: not candidate or not by "
3765 "reference in callee\n", idx, i);
3766 param_desc->split_candidate = false;
3767 ipf->pointer_pass_through = false;
3768 res = true;
3769 }
3770 else if (!ipf->safe_to_import_accesses)
3771 {
763121cc
MJ
3772 if (!csum->m_before_any_store
3773 || !all_callee_accesses_present_p (param_desc, arg_desc))
ff6686d2
MJ
3774 {
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3776 fprintf (dump_file, " %u->%u: cannot import accesses.\n",
3777 idx, i);
3778 param_desc->split_candidate = false;
3779 ipf->pointer_pass_through = false;
3780 res = true;
3781
3782 }
3783 else
3784 {
3785 if (dump_file && (dump_flags & TDF_DETAILS))
3786 fprintf (dump_file, " %u->%u: verified callee accesses "
3787 "present.\n", idx, i);
3788 if (cross_scc)
3789 ipf->pointer_pass_through = false;
3790 }
3791 }
3792 else
3793 {
3794 const char *pull_failure
1a315435
MJ
3795 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
3796 0, 0, &res);
ff6686d2
MJ
3797 if (pull_failure)
3798 {
3799 if (dump_file && (dump_flags & TDF_DETAILS))
3800 fprintf (dump_file, " %u->%u: by_ref access pull "
3801 "failed: %s.\n", idx, i, pull_failure);
3802 param_desc->split_candidate = false;
3803 ipf->pointer_pass_through = false;
3804 res = true;
3805 }
3806 else
3807 {
3808 if (dump_file && (dump_flags & TDF_DETAILS))
3809 fprintf (dump_file, " %u->%u: by_ref access pull "
3810 "succeeded.\n", idx, i);
3811 if (cross_scc)
3812 ipf->pointer_pass_through = false;
3813 }
3814 }
3815 }
3816 else if (ipf->aggregate_pass_through)
3817 {
3818 int idx = get_single_param_flow_source (ipf);
3819 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3820 if (!param_desc->split_candidate)
3821 continue;
3822 gcc_assert (!param_desc->by_ref);
3823 param_access *pacc = find_param_access (param_desc, ipf->unit_offset,
3824 ipf->unit_size);
3825 gcc_checking_assert (pacc);
3826
3827 if (pacc->certain)
3828 {
3829 if (dump_file && (dump_flags & TDF_DETAILS))
3830 fprintf (dump_file, " %u->%u: already certain\n", idx, i);
3831 ipf->aggregate_pass_through = false;
3832 }
3833 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3834 {
3835 if (dump_file && (dump_flags & TDF_DETAILS))
3836 fprintf (dump_file, " %u->%u: not candidate or by "
3837 "reference in callee\n", idx, i);
3838
3839 pacc->certain = true;
3840 if (overlapping_certain_accesses_p (param_desc, NULL))
3841 {
3842 if (dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (dump_file, " ...leading to overlap, "
3844 " disqualifying candidate parameter %u\n",
3845 idx);
3846 param_desc->split_candidate = false;
3847 }
3848 else
3849 bump_reached_size (param_desc, pacc->unit_size, idx);
3850
3851 ipf->aggregate_pass_through = false;
3852 res = true;
3853 }
3854 else
3855 {
3856 const char *pull_failure
1a315435 3857 = pull_accesses_from_callee (cs->caller, param_desc, arg_desc,
ff6686d2
MJ
3858 ipf->unit_offset,
3859 ipf->unit_size, &res);
3860 if (pull_failure)
3861 {
3862 if (dump_file && (dump_flags & TDF_DETAILS))
3863 fprintf (dump_file, " %u->%u: arg access pull "
3864 "failed: %s.\n", idx, i, pull_failure);
3865
3866 ipf->aggregate_pass_through = false;
3867 pacc->certain = true;
3868
3869 if (overlapping_certain_accesses_p (param_desc, NULL))
3870 {
3871 if (dump_file && (dump_flags & TDF_DETAILS))
3872 fprintf (dump_file, " ...leading to overlap, "
3873 " disqualifying candidate parameter %u\n",
3874 idx);
3875 param_desc->split_candidate = false;
3876 }
3877 else
3878 bump_reached_size (param_desc, pacc->unit_size, idx);
3879
3880 res = true;
3881 }
3882 else
3883 {
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3885 fprintf (dump_file, " %u->%u: arg access pull "
3886 "succeeded.\n", idx, i);
3887 if (cross_scc)
3888 ipf->aggregate_pass_through = false;
3889 }
3890 }
3891 }
3892 }
3893
3894 /* Handle argument-parameter count mismatches. */
3895 for (; (i < args_count); i++)
3896 {
3897 isra_param_flow *ipf = &csum->m_arg_flow[i];
3898
3899 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3900 {
3901 int idx = get_single_param_flow_source (ipf);
3902 ipf->pointer_pass_through = false;
3903 ipf->aggregate_pass_through = false;
3904 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3905 if (!param_desc->split_candidate)
3906 continue;
3907
3908 if (dump_file && (dump_flags & TDF_DETAILS))
3909 fprintf (dump_file, " %u->%u: no corresponding formal parameter\n",
3910 idx, i);
3911 param_desc->split_candidate = false;
3912 res = true;
3913 }
3914 }
3915 return res;
3916}
3917
3918/* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3919 callers ignore the return value, or come from the same SCC and use the
3920 return value only to compute their return value, return false, otherwise
3921 return true. */
3922
3923static bool
3924retval_used_p (cgraph_node *node, void *)
3925{
3926 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3927 {
3928 isra_call_summary *csum = call_sums->get (cs);
3929 gcc_checking_assert (csum);
3930 if (csum->m_return_ignored)
3931 continue;
3932 if (!csum->m_return_returned)
3933 return true;
3934
3935 isra_func_summary *from_ifs = func_sums->get (cs->caller);
3936 if (!from_ifs || !from_ifs->m_candidate)
3937 return true;
3938
3939 if (!ipa_edge_within_scc (cs)
3940 && !from_ifs->m_return_ignored)
3941 return true;
3942 }
3943
3944 return false;
3945}
3946
3947/* Push into NEW_PARAMS all required parameter adjustment entries to copy or
3948 modify parameter which originally had index BASE_INDEX, in the adjustment
3949 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4834e936
MJ
3950 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
3951 original node, it needs to be passed in IPCP_TS, otherwise it should be
3952 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
3953 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
ff6686d2 3954
ff6686d2
MJ
3955static void
3956push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
3957 unsigned prev_clone_index,
3958 ipa_adjusted_param *prev_adjustment,
4834e936 3959 ipcp_transformation *ipcp_ts,
ff6686d2
MJ
3960 vec<ipa_adjusted_param, va_gc> **new_params)
3961{
3962 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
3963 if (desc->locally_unused)
3964 {
3965 if (dump_file)
3966 fprintf (dump_file, " Will remove parameter %u\n", base_index);
3967 return;
3968 }
3969
3970 if (!desc->split_candidate)
3971 {
3972 ipa_adjusted_param adj;
3973 if (prev_adjustment)
3974 {
3975 adj = *prev_adjustment;
3976 adj.prev_clone_adjustment = true;
3977 adj.prev_clone_index = prev_clone_index;
3978 }
3979 else
3980 {
3981 memset (&adj, 0, sizeof (adj));
3982 adj.op = IPA_PARAM_OP_COPY;
3983 adj.base_index = base_index;
3984 adj.prev_clone_index = prev_clone_index;
3985 }
3986 vec_safe_push ((*new_params), adj);
3987 return;
3988 }
3989
3990 if (dump_file)
3991 fprintf (dump_file, " Will split parameter %u\n", base_index);
3992
3993 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
3994 unsigned aclen = vec_safe_length (desc->accesses);
3995 for (unsigned j = 0; j < aclen; j++)
3996 {
3997 param_access *pa = (*desc->accesses)[j];
3998 if (!pa->certain)
3999 continue;
4834e936
MJ
4000
4001 if (ipcp_ts)
4002 {
4003 ipa_argagg_value_list avl (ipcp_ts);
4004 tree value = avl.get_value (base_index, pa->unit_offset);
8b1b1b2d 4005 if (value && !AGGREGATE_TYPE_P (pa->type))
4834e936
MJ
4006 {
4007 if (dump_file)
4008 fprintf (dump_file, " - omitting component at byte "
4009 "offset %u which is known to have a constant value\n ",
4010 pa->unit_offset);
4011 continue;
4012 }
4013 }
4014
ff6686d2
MJ
4015 if (dump_file)
4016 fprintf (dump_file, " - component at byte offset %u, "
4017 "size %u\n", pa->unit_offset, pa->unit_size);
4018
4019 ipa_adjusted_param adj;
4020 memset (&adj, 0, sizeof (adj));
4021 adj.op = IPA_PARAM_OP_SPLIT;
4022 adj.base_index = base_index;
4023 adj.prev_clone_index = prev_clone_index;
4024 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
4025 adj.reverse = pa->reverse;
4026 adj.type = pa->type;
4027 adj.alias_ptr_type = pa->alias_ptr_type;
4028 adj.unit_offset = pa->unit_offset;
4029 vec_safe_push ((*new_params), adj);
4030 }
4031}
4032
b90061c6
MJ
4033/* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4034 flag of all callers of NODE. */
4035
4036static bool
4037mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
4038{
4039 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4040 cs->caller->calls_comdat_local = true;
4041 return false;
4042}
4043
f0f372fa
MJ
4044/* Remove any IPA-CP results stored in TS that are associated with removed
4045 parameters as marked in IFS. */
4046
4047static void
4048zap_useless_ipcp_results (const isra_func_summary *ifs, ipcp_transformation *ts)
4049{
4050 unsigned ts_len = vec_safe_length (ts->m_agg_values);
4051
4052 if (ts_len == 0)
4053 return;
4054
4055 bool removed_item = false;
4056 unsigned dst_index = 0;
4057
4058 for (unsigned i = 0; i < ts_len; i++)
4059 {
4060 ipa_argagg_value *v = &(*ts->m_agg_values)[i];
4061 const isra_param_desc *desc = &(*ifs->m_parameters)[v->index];
4062
4063 if (!desc->locally_unused)
4064 {
4065 if (removed_item)
4066 (*ts->m_agg_values)[dst_index] = *v;
4067 dst_index++;
4068 }
4069 else
4070 removed_item = true;
4071 }
4072 if (dst_index == 0)
4073 {
4074 ggc_free (ts->m_agg_values);
4075 ts->m_agg_values = NULL;
4076 }
4077 else if (removed_item)
4078 ts->m_agg_values->truncate (dst_index);
4079
4080 bool useful_bits = false;
4081 unsigned count = vec_safe_length (ts->bits);
4082 for (unsigned i = 0; i < count; i++)
4083 if ((*ts->bits)[i])
4084 {
4085 const isra_param_desc *desc = &(*ifs->m_parameters)[i];
4086 if (desc->locally_unused)
4087 (*ts->bits)[i] = NULL;
4088 else
4089 useful_bits = true;
4090 }
4091 if (!useful_bits)
4092 ts->bits = NULL;
4093
4094 bool useful_vr = false;
4095 count = vec_safe_length (ts->m_vr);
4096 for (unsigned i = 0; i < count; i++)
cfe17528 4097 if ((*ts->m_vr)[i].known_p ())
f0f372fa
MJ
4098 {
4099 const isra_param_desc *desc = &(*ifs->m_parameters)[i];
4100 if (desc->locally_unused)
cfe17528 4101 (*ts->m_vr)[i].set_unknown ();
f0f372fa
MJ
4102 else
4103 useful_vr = true;
4104 }
4105 if (!useful_vr)
4106 ts->m_vr = NULL;
4107}
ff6686d2 4108
dfea3d6f 4109/* Do final processing of results of IPA propagation regarding NODE, clone it
ff6686d2
MJ
4110 if appropriate. */
4111
4112static void
4113process_isra_node_results (cgraph_node *node,
4114 hash_map<const char *, unsigned> *clone_num_suffixes)
4115{
4116 isra_func_summary *ifs = func_sums->get (node);
4117 if (!ifs || !ifs->m_candidate)
4118 return;
4119
4120 auto_vec<bool, 16> surviving_params;
4121 bool check_surviving = false;
ae7a23a3
JH
4122 clone_info *cinfo = clone_info::get (node);
4123 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
4124 {
4125 check_surviving = true;
ae7a23a3 4126 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2
MJ
4127 }
4128
4129 unsigned param_count = vec_safe_length (ifs->m_parameters);
4130 bool will_change_function = false;
4131 if (ifs->m_returns_value && ifs->m_return_ignored)
4132 will_change_function = true;
4133 else
4134 for (unsigned i = 0; i < param_count; i++)
4135 {
4136 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4137 if ((desc->locally_unused || desc->split_candidate)
4138 /* Make sure we do not clone just to attempt to remove an already
4139 removed unused argument. */
4140 && (!check_surviving
4141 || (i < surviving_params.length ()
4142 && surviving_params[i])))
4143 {
4144 will_change_function = true;
4145 break;
4146 }
4147 }
4148 if (!will_change_function)
4149 return;
4150
4151 if (dump_file)
4152 {
4153 fprintf (dump_file, "\nEvaluating analysis results for %s\n",
4154 node->dump_name ());
4155 if (ifs->m_returns_value && ifs->m_return_ignored)
4156 fprintf (dump_file, " Will remove return value.\n");
4157 }
4158
4834e936 4159 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
f0f372fa
MJ
4160 if (ipcp_ts)
4161 zap_useless_ipcp_results (ifs, ipcp_ts);
ff6686d2 4162 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
ae7a23a3
JH
4163 if (ipa_param_adjustments *old_adjustments
4164 = cinfo ? cinfo->param_adjustments : NULL)
ff6686d2
MJ
4165 {
4166 unsigned old_adj_len = vec_safe_length (old_adjustments->m_adj_params);
4167 for (unsigned i = 0; i < old_adj_len; i++)
4168 {
4169 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4170 push_param_adjustments_for_index (ifs, old_adj->base_index, i,
4834e936 4171 old_adj, ipcp_ts, &new_params);
ff6686d2
MJ
4172 }
4173 }
4174 else
4175 for (unsigned i = 0; i < param_count; i++)
4834e936 4176 push_param_adjustments_for_index (ifs, i, i, NULL, ipcp_ts, &new_params);
ff6686d2
MJ
4177
4178 ipa_param_adjustments *new_adjustments
4179 = (new (ggc_alloc <ipa_param_adjustments> ())
4180 ipa_param_adjustments (new_params, param_count,
4181 ifs->m_returns_value && ifs->m_return_ignored));
4182
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4184 {
4185 fprintf (dump_file, "\n Created adjustments:\n");
4186 new_adjustments->dump (dump_file);
4187 }
4188
4189 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4190 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4191 node->decl)));
265af872 4192 auto_vec<cgraph_edge *> callers = node->collect_callers ();
ff6686d2
MJ
4193 cgraph_node *new_node
4194 = node->create_virtual_clone (callers, NULL, new_adjustments, "isra",
4195 suffix_counter);
4196 suffix_counter++;
b90061c6
MJ
4197 if (node->calls_comdat_local && node->same_comdat_group)
4198 {
4199 new_node->add_to_same_comdat_group (node);
4200 new_node->call_for_symbol_and_aliases (mark_callers_calls_comdat_local,
4201 NULL, true);
4202 }
ed649cda 4203 new_node->calls_comdat_local = node->calls_comdat_local;
ff6686d2
MJ
4204
4205 if (dump_file)
4206 fprintf (dump_file, " Created new node %s\n", new_node->dump_name ());
4207 callers.release ();
4208}
4209
e8109bd8
MJ
4210/* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4211 followed by the list of numbers in INDICES. */
4212
4213static void
4214dump_list_of_param_indices (const cgraph_node *node, const char* msg,
4215 const vec<unsigned> &indices)
4216{
4217 if (indices.is_empty ())
4218 return;
4219 fprintf (dump_file, "The following parameters of %s %s:", node->dump_name (),
4220 msg);
4221 for (unsigned i : indices)
4222 fprintf (dump_file, " %u", i);
4223 fprintf (dump_file, "\n");
4224}
4225
ff6686d2
MJ
4226/* Check which parameters of NODE described by IFS have survived until IPA-SRA
4227 and disable transformations for those which have not or which should not
4228 transformed because the associated debug counter reached its limit. Return
e3a5cc32
MJ
4229 true if none survived or if there were no candidates to begin with.
4230 Additionally, also adjust parameter descriptions based on debug counters and
4231 hints propagated earlier. */
ff6686d2
MJ
4232
4233static bool
e3a5cc32 4234adjust_parameter_descriptions (cgraph_node *node, isra_func_summary *ifs)
ff6686d2
MJ
4235{
4236 bool ret = true;
4237 unsigned len = vec_safe_length (ifs->m_parameters);
4238 if (!len)
4239 return true;
4240
4241 auto_vec<bool, 16> surviving_params;
4242 bool check_surviving = false;
ae7a23a3
JH
4243 clone_info *cinfo = clone_info::get (node);
4244 if (cinfo && cinfo->param_adjustments)
ff6686d2
MJ
4245 {
4246 check_surviving = true;
ae7a23a3 4247 cinfo->param_adjustments->get_surviving_params (&surviving_params);
ff6686d2 4248 }
e8109bd8 4249 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
f2cf4c61
MJ
4250 auto_vec <unsigned> dump_dead_indices;
4251 auto_vec <unsigned> dump_bad_cond_indices;
ff6686d2
MJ
4252 for (unsigned i = 0; i < len; i++)
4253 {
4254 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4255 if (!dbg_cnt (ipa_sra_params))
4256 {
4257 desc->locally_unused = false;
4258 desc->split_candidate = false;
4259 }
4260 else if (check_surviving
4261 && (i >= surviving_params.length ()
4262 || !surviving_params[i]))
4263 {
4264 /* Even if the parameter was removed by a previous IPA pass, we do
4265 not clear locally_unused because if it really is unused, this
4266 information might be useful in callers. */
4267 desc->split_candidate = false;
4268
4269 if (dump_file && (dump_flags & TDF_DETAILS))
f2cf4c61 4270 dump_dead_indices.safe_push (i);
ff6686d2 4271 }
e3a5cc32
MJ
4272 else
4273 {
f2cf4c61
MJ
4274 if (desc->split_candidate && desc->conditionally_dereferenceable)
4275 {
4276 gcc_assert (desc->safe_size_set);
4277 for (param_access *pa : *desc->accesses)
4278 if ((pa->unit_offset + pa->unit_size) > desc->safe_size)
4279 {
4280 if (dump_file && (dump_flags & TDF_DETAILS))
4281 dump_bad_cond_indices.safe_push (i);
4282 desc->split_candidate = false;
4283 break;
4284 }
4285 }
4286
e3a5cc32
MJ
4287 if (desc->split_candidate)
4288 {
4289 if (desc->by_ref && !desc->not_specially_constructed)
4290 {
4291 int extra_factor
4292 = opt_for_fn (node->decl,
4293 param_ipa_sra_ptrwrap_growth_factor);
4294 desc->param_size_limit = extra_factor * desc->param_size_limit;
4295 }
4296 if (size_would_violate_limit_p (desc, desc->size_reached))
4297 desc->split_candidate = false;
4298 }
e8109bd8
MJ
4299
4300 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4301 callees don't agree on types in aggregates and we try to do both
4302 IPA-CP and IPA-SRA. */
4303 if (ipcp_ts && desc->split_candidate)
4304 {
4305 ipa_argagg_value_list avl (ipcp_ts);
4306 for (const param_access *pa : desc->accesses)
4307 {
4308 if (!pa->certain)
4309 continue;
4310 tree value = avl.get_value (i, pa->unit_offset);
4311 if (value
4312 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value)))
4313 / BITS_PER_UNIT)
4314 != pa->unit_size))
4315 {
4316 desc->split_candidate = false;
4317 if (dump_file && (dump_flags & TDF_DETAILS))
4318 dump_dead_indices.safe_push (i);
4319 break;
4320 }
4321 }
4322 }
4323
e3a5cc32
MJ
4324 if (desc->locally_unused || desc->split_candidate)
4325 ret = false;
4326 }
ff6686d2
MJ
4327 }
4328
e8109bd8
MJ
4329 dump_list_of_param_indices (node, "are dead on arrival or have a type "
4330 "mismatch with IPA-CP", dump_dead_indices);
4a910708
MJ
4331 dump_list_of_param_indices (node, "are not safe to dereference in all "
4332 "callers", dump_bad_cond_indices);
ff6686d2
MJ
4333
4334 return ret;
4335}
4336
4337
4338/* Run the interprocedural part of IPA-SRA. */
4339
4340static unsigned int
4341ipa_sra_analysis (void)
4342{
4343 if (dump_file)
4344 {
4345 fprintf (dump_file, "\n========== IPA-SRA IPA stage ==========\n");
e3a5cc32 4346 ipa_sra_dump_all_summaries (dump_file, false);
ff6686d2
MJ
4347 }
4348
4349 gcc_checking_assert (func_sums);
4350 gcc_checking_assert (call_sums);
4351 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
4352 auto_vec <cgraph_node *, 16> stack;
4353 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
4354
803a9133
MJ
4355 /* One sweep from callers to callees for return value removal. */
4356 for (int i = node_scc_count - 1; i >= 0 ; i--)
ff6686d2
MJ
4357 {
4358 cgraph_node *scc_rep = order[i];
4359 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
ff6686d2 4360
803a9133
MJ
4361 /* Preliminary IPA function level checks. */
4362 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4363 {
4364 isra_func_summary *ifs = func_sums->get (v);
4365 if (!ifs || !ifs->m_candidate)
4366 continue;
4367 if (!ipa_sra_ipa_function_checks (v)
4368 || check_all_callers_for_issues (v))
803a9133
MJ
4369 ifs->zap ();
4370 }
4371
4372 for (cgraph_node *v : cycle_nodes)
4373 {
4374 isra_func_summary *ifs = func_sums->get (v);
4375 if (!ifs || !ifs->m_candidate)
4376 continue;
4377 bool return_needed
4378 = (ifs->m_returns_value
4379 && (!dbg_cnt (ipa_sra_retvalues)
4380 || v->call_for_symbol_and_aliases (retval_used_p,
4381 NULL, true)));
4382 ifs->m_return_ignored = !return_needed;
4383 if (return_needed)
4384 isra_push_node_to_stack (v, ifs, &stack);
4385 }
4386
4387 while (!stack.is_empty ())
4388 {
4389 cgraph_node *node = stack.pop ();
4390 isra_func_summary *ifs = func_sums->get (node);
4391 gcc_checking_assert (ifs && ifs->m_queued);
4392 ifs->m_queued = false;
4393
4394 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4395 if (ipa_edge_within_scc (cs)
4396 && call_sums->get (cs)->m_return_returned)
4397 {
4398 enum availability av;
4399 cgraph_node *callee = cs->callee->function_symbol (&av);
4400 isra_func_summary *to_ifs = func_sums->get (callee);
4401 if (to_ifs && to_ifs->m_return_ignored)
4402 {
4403 to_ifs->m_return_ignored = false;
4404 isra_push_node_to_stack (callee, to_ifs, &stack);
4405 }
4406 }
4407 }
e3a5cc32
MJ
4408
4409 /* Parameter hint propagation. */
4410 for (cgraph_node *v : cycle_nodes)
4411 {
4412 isra_func_summary *ifs = func_sums->get (v);
f2cf4c61
MJ
4413 propagate_hints_to_all_callees (v, ifs, &stack);
4414 }
4415
4416 while (!stack.is_empty ())
4417 {
4418 cgraph_node *node = stack.pop ();
4419 isra_func_summary *ifs = func_sums->get (node);
4420 gcc_checking_assert (ifs && ifs->m_queued);
4421 ifs->m_queued = false;
4422 propagate_hints_to_all_callees (node, ifs, &stack);
e3a5cc32
MJ
4423 }
4424
803a9133
MJ
4425 cycle_nodes.release ();
4426 }
4427
4428 /* One sweep from callees to callers for parameter removal and splitting. */
4429 for (int i = 0; i < node_scc_count; i++)
4430 {
4431 cgraph_node *scc_rep = order[i];
4432 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4433
4434 /* First step of parameter removal. */
4435 for (cgraph_node *v : cycle_nodes)
4436 {
4437 isra_func_summary *ifs = func_sums->get (v);
4438 if (!ifs || !ifs->m_candidate)
4439 continue;
e3a5cc32 4440 if (adjust_parameter_descriptions (v, ifs))
ff6686d2
MJ
4441 continue;
4442 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
4443 process_edge_to_unknown_caller (cs);
4444 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4445 if (!ipa_edge_within_scc (cs))
4446 param_removal_cross_scc_edge (cs);
4447 }
4448
6b6a8065
JJ
4449 /* Look at edges within the current SCC and propagate used-ness across
4450 them, pushing onto the stack all notes which might need to be
4451 revisited. */
803a9133 4452 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4453 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4454 &stack, true);
4455
4456 /* Keep revisiting and pushing until nothing changes. */
4457 while (!stack.is_empty ())
4458 {
4459 cgraph_node *v = stack.pop ();
4460 isra_func_summary *ifs = func_sums->get (v);
4461 gcc_checking_assert (ifs && ifs->m_queued);
4462 ifs->m_queued = false;
4463
4464 v->call_for_symbol_thunks_and_aliases (propagate_used_to_scc_callers,
4465 &stack, true);
4466 }
4467
4468 /* Parameter splitting. */
4469 bool repeat_scc_access_propagation;
4470 do
4471 {
4472 repeat_scc_access_propagation = false;
803a9133 4473 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4474 {
4475 isra_func_summary *ifs = func_sums->get (v);
4476 if (!ifs
4477 || !ifs->m_candidate
4478 || vec_safe_is_empty (ifs->m_parameters))
4479 continue;
4480 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4481 if (param_splitting_across_edge (cs))
4482 repeat_scc_access_propagation = true;
4483 }
4484 }
4485 while (repeat_scc_access_propagation);
4486
4487 if (flag_checking)
803a9133 4488 for (cgraph_node *v : cycle_nodes)
ff6686d2
MJ
4489 verify_splitting_accesses (v, true);
4490
4491 cycle_nodes.release ();
4492 }
4493
ff6686d2
MJ
4494 ipa_free_postorder_info ();
4495 free (order);
4496
4497 if (dump_file)
4498 {
4499 if (dump_flags & TDF_DETAILS)
4500 {
4501 fprintf (dump_file, "\n========== IPA-SRA propagation final state "
4502 " ==========\n");
e3a5cc32 4503 ipa_sra_dump_all_summaries (dump_file, true);
ff6686d2
MJ
4504 }
4505 fprintf (dump_file, "\n========== IPA-SRA decisions ==========\n");
4506 }
4507
4508 hash_map<const char *, unsigned> *clone_num_suffixes
4509 = new hash_map<const char *, unsigned>;
4510
4511 cgraph_node *node;
4512 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4513 process_isra_node_results (node, clone_num_suffixes);
4514
4515 delete clone_num_suffixes;
ddf628e4 4516 ggc_delete (func_sums);
ff6686d2 4517 func_sums = NULL;
78cd68c0 4518 delete call_sums;
ff6686d2
MJ
4519 call_sums = NULL;
4520
4521 if (dump_file)
4522 fprintf (dump_file, "\n========== IPA SRA IPA analysis done "
4523 "==========\n\n");
4524 return 0;
4525}
4526
4527
4528const pass_data pass_data_ipa_sra =
4529{
4530 IPA_PASS, /* type */
4531 "sra", /* name */
4532 OPTGROUP_NONE, /* optinfo_flags */
4533 TV_IPA_SRA, /* tv_id */
4534 0, /* properties_required */
4535 0, /* properties_provided */
4536 0, /* properties_destroyed */
4537 0, /* todo_flags_start */
4538 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4539};
4540
4541class pass_ipa_sra : public ipa_opt_pass_d
4542{
4543public:
4544 pass_ipa_sra (gcc::context *ctxt)
4545 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4546 ipa_sra_generate_summary, /* generate_summary */
4547 ipa_sra_write_summary, /* write_summary */
4548 ipa_sra_read_summary, /* read_summary */
4549 NULL , /* write_optimization_summary */
4550 NULL, /* read_optimization_summary */
4551 NULL, /* stmt_fixup */
4552 0, /* function_transform_todo_flags_start */
4553 NULL, /* function_transform */
4554 NULL) /* variable_transform */
4555 {}
4556
4557 /* opt_pass methods: */
725793af 4558 bool gate (function *) final override
ff6686d2
MJ
4559 {
4560 /* TODO: We should remove the optimize check after we ensure we never run
4561 IPA passes when not optimizing. */
4562 return (flag_ipa_sra && optimize);
4563 }
4564
725793af
DM
4565 unsigned int execute (function *) final override
4566 {
4567 return ipa_sra_analysis ();
4568 }
ff6686d2
MJ
4569
4570}; // class pass_ipa_sra
4571
4572} // anon namespace
4573
40e67ab8
JH
4574/* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4575 create a summary structure describing IPA-SRA opportunities and constraints
4576 in it. */
4577
4578static void
4579ipa_sra_summarize_function (cgraph_node *node)
4580{
4581 if (dump_file)
4582 fprintf (dump_file, "Creating summary for %s/%i:\n", node->name (),
4583 node->order);
40e67ab8 4584 gcc_obstack_init (&gensum_obstack);
e3a5cc32 4585 loaded_decls = new hash_set<tree>;
40e67ab8 4586
e3a5cc32 4587 isra_func_summary *ifs = NULL;
40e67ab8 4588 unsigned count = 0;
e3a5cc32
MJ
4589 if (ipa_sra_preliminary_function_checks (node))
4590 {
4591 ifs = func_sums->get_create (node);
4592 ifs->m_candidate = true;
4593 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4594 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4595 for (tree parm = DECL_ARGUMENTS (node->decl);
4596 parm;
4597 parm = DECL_CHAIN (parm))
4598 count++;
4599 }
4600 auto_vec<gensum_param_desc, 16> param_descriptions (count);
40e67ab8 4601
e3a5cc32
MJ
4602 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4603 bool cfun_pushed = false;
40e67ab8
JH
4604 if (count > 0)
4605 {
e3a5cc32 4606 decl2desc = new hash_map<tree, gensum_param_desc *>;
40e67ab8
JH
4607 param_descriptions.reserve_exact (count);
4608 param_descriptions.quick_grow_cleared (count);
4609
40e67ab8
JH
4610 if (create_parameter_descriptors (node, &param_descriptions))
4611 {
4612 push_cfun (fun);
4613 cfun_pushed = true;
4614 final_bbs = BITMAP_ALLOC (NULL);
4615 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
10478270 4616 unsafe_by_ref_count
40e67ab8
JH
4617 * last_basic_block_for_fn (fun));
4618 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
e3a5cc32
MJ
4619 }
4620 }
4621 /* Scan function is run even when there are no removal or splitting
4622 candidates so that we can calculate hints on call edges which can be
4623 useful in callees. */
4624 scan_function (node, fun);
40e67ab8 4625
e3a5cc32
MJ
4626 if (count > 0)
4627 {
4628 if (dump_file)
4629 {
4630 dump_gensum_param_descriptors (dump_file, node->decl,
4631 &param_descriptions);
4632 fprintf (dump_file, "----------------------------------------\n");
40e67ab8 4633 }
e3a5cc32 4634
40e67ab8
JH
4635 process_scan_results (node, fun, ifs, &param_descriptions);
4636
4637 if (cfun_pushed)
4638 pop_cfun ();
4639 if (bb_dereferences)
4640 {
4641 free (bb_dereferences);
4642 bb_dereferences = NULL;
4643 BITMAP_FREE (final_bbs);
4644 final_bbs = NULL;
4645 }
4646 }
4647 isra_analyze_all_outgoing_calls (node);
4648
e3a5cc32
MJ
4649 delete loaded_decls;
4650 loaded_decls = NULL;
4651 if (decl2desc)
4652 {
4653 delete decl2desc;
4654 decl2desc = NULL;
4655 }
40e67ab8
JH
4656 obstack_free (&gensum_obstack, NULL);
4657 if (dump_file)
4658 fprintf (dump_file, "\n\n");
4659 if (flag_checking)
4660 verify_splitting_accesses (node, false);
4661 return;
4662}
4663
ff6686d2
MJ
4664ipa_opt_pass_d *
4665make_pass_ipa_sra (gcc::context *ctxt)
4666{
4667 return new pass_ipa_sra (ctxt);
4668}
4669
4670
4671#include "gt-ipa-sra.h"