1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
103 #define INCLUDE_ALGORITHM
106 #include "coretypes.h"
109 #include "gimple-expr.h"
113 #include "alloc-pool.h"
114 #include "tree-pass.h"
116 #include "diagnostic.h"
117 #include "fold-const.h"
118 #include "gimple-iterator.h"
119 #include "gimple-fold.h"
120 #include "symbol-summary.h"
121 #include "tree-vrp.h"
123 #include "ipa-prop.h"
124 #include "tree-pretty-print.h"
125 #include "tree-inline.h"
126 #include "ipa-fnsummary.h"
127 #include "ipa-utils.h"
128 #include "tree-ssa-ccp.h"
129 #include "stringpool.h"
132 #include "symtab-clones.h"
133 #include "gimple-range.h"
136 /* Allocation pools for values and their sources in ipa-cp. */
138 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
141 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
144 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
145 ("IPA-CP value sources");
147 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
150 /* Base count to use in heuristics when using profile feedback. */
152 static profile_count base_count
;
154 /* Original overall size of the program. */
156 static long overall_size
, orig_overall_size
;
158 /* Node name to unique clone suffix number map. */
159 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
161 /* Return the param lattices structure corresponding to the Ith formal
162 parameter of the function described by INFO. */
163 static inline class ipcp_param_lattices
*
164 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
166 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
167 gcc_checking_assert (!info
->ipcp_orig_node
);
168 return &(info
->lattices
[i
]);
171 /* Return the lattice corresponding to the scalar value of the Ith formal
172 parameter of the function described by INFO. */
173 static inline ipcp_lattice
<tree
> *
174 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
176 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
177 return &plats
->itself
;
180 /* Return the lattice corresponding to the scalar value of the Ith formal
181 parameter of the function described by INFO. */
182 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
183 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
185 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
186 return &plats
->ctxlat
;
189 /* Return whether LAT is a lattice with a single constant and without an
192 template <typename valtype
>
194 ipcp_lattice
<valtype
>::is_single_const ()
196 if (bottom
|| contains_variable
|| values_count
!= 1)
202 /* Return true iff X and Y should be considered equal values by IPA-CP. */
205 values_equal_for_ipcp_p (tree x
, tree y
)
207 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
212 if (TREE_CODE (x
) == ADDR_EXPR
213 && TREE_CODE (y
) == ADDR_EXPR
214 && (TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
215 || (TREE_CODE (TREE_OPERAND (x
, 0)) == VAR_DECL
216 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x
, 0))))
217 && (TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
218 || (TREE_CODE (TREE_OPERAND (y
, 0)) == VAR_DECL
219 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y
, 0)))))
220 return TREE_OPERAND (x
, 0) == TREE_OPERAND (y
, 0)
221 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
222 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
224 return operand_equal_p (x
, y
, 0);
227 /* Print V which is extracted from a value in a lattice to F. */
230 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
235 /* Print a lattice LAT to F. */
237 template <typename valtype
>
239 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
241 ipcp_value
<valtype
> *val
;
246 fprintf (f
, "BOTTOM\n");
250 if (!values_count
&& !contains_variable
)
252 fprintf (f
, "TOP\n");
256 if (contains_variable
)
258 fprintf (f
, "VARIABLE");
264 for (val
= values
; val
; val
= val
->next
)
266 if (dump_benefits
&& prev
)
268 else if (!dump_benefits
&& prev
)
273 print_ipcp_constant_value (f
, val
->value
);
277 ipcp_value_source
<valtype
> *s
;
279 if (val
->self_recursion_generated_p ())
280 fprintf (f
, " [self_gen(%i), from:",
281 val
->self_recursion_generated_level
);
283 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
284 for (s
= val
->sources
; s
; s
= s
->next
)
285 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
286 s
->cs
->sreal_frequency ().to_double ());
291 fprintf (f
, " [loc_time: %g, loc_size: %i, "
292 "prop_time: %g, prop_size: %i]\n",
293 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
294 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
301 ipcp_bits_lattice::print (FILE *f
)
304 fprintf (f
, " Bits unknown (TOP)\n");
305 else if (bottom_p ())
306 fprintf (f
, " Bits unusable (BOTTOM)\n");
309 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
310 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
315 /* Print value range lattice to F. */
318 ipcp_vr_lattice::print (FILE * f
)
323 /* Print all ipcp_lattices of all functions to F. */
326 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
328 struct cgraph_node
*node
;
331 fprintf (f
, "\nLattices:\n");
332 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
334 class ipa_node_params
*info
;
336 info
= ipa_node_params_sum
->get (node
);
337 /* Skip unoptimized functions and constprop clones since we don't make
338 lattices for them. */
339 if (!info
|| info
->ipcp_orig_node
)
341 fprintf (f
, " Node: %s:\n", node
->dump_name ());
342 count
= ipa_get_param_count (info
);
343 for (i
= 0; i
< count
; i
++)
345 struct ipcp_agg_lattice
*aglat
;
346 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
347 fprintf (f
, " param [%d]: ", i
);
348 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
349 fprintf (f
, " ctxs: ");
350 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
351 plats
->bits_lattice
.print (f
);
353 plats
->m_value_range
.print (f
);
355 if (plats
->virt_call
)
356 fprintf (f
, " virt_call flag set\n");
358 if (plats
->aggs_bottom
)
360 fprintf (f
, " AGGS BOTTOM\n");
363 if (plats
->aggs_contain_variable
)
364 fprintf (f
, " AGGS VARIABLE\n");
365 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
367 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
368 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
369 aglat
->print (f
, dump_sources
, dump_benefits
);
375 /* Determine whether it is at all technically possible to create clones of NODE
376 and store this information in the ipa_node_params structure associated
380 determine_versionability (struct cgraph_node
*node
,
381 class ipa_node_params
*info
)
383 const char *reason
= NULL
;
385 /* There are a number of generic reasons functions cannot be versioned. We
386 also cannot remove parameters if there are type attributes such as fnspec
388 if (node
->alias
|| node
->thunk
)
389 reason
= "alias or thunk";
390 else if (!node
->versionable
)
391 reason
= "not a tree_versionable_function";
392 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
393 reason
= "insufficient body availability";
394 else if (!opt_for_fn (node
->decl
, optimize
)
395 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
396 reason
= "non-optimized function";
397 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
399 /* Ideally we should clone the SIMD clones themselves and create
400 vector copies of them, so IPA-cp and SIMD clones can happily
401 coexist, but that may not be worth the effort. */
402 reason
= "function has SIMD clones";
404 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
406 /* Ideally we should clone the target clones themselves and create
407 copies of them, so IPA-cp and target clones can happily
408 coexist, but that may not be worth the effort. */
409 reason
= "function target_clones attribute";
411 /* Don't clone decls local to a comdat group; it breaks and for C++
412 decloned constructors, inlining is always better anyway. */
413 else if (node
->comdat_local_p ())
414 reason
= "comdat-local function";
415 else if (node
->calls_comdat_local
)
417 /* TODO: call is versionable if we make sure that all
418 callers are inside of a comdat group. */
419 reason
= "calls comdat-local function";
422 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
423 work only when inlined. Cloning them may still lead to better code
424 because ipa-cp will not give up on cloning further. If the function is
425 external this however leads to wrong code because we may end up producing
426 offline copy of the function. */
427 if (DECL_EXTERNAL (node
->decl
))
428 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
429 edge
= edge
->next_callee
)
430 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
432 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
433 reason
= "external function which calls va_arg_pack";
434 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
435 == BUILT_IN_VA_ARG_PACK_LEN
)
436 reason
= "external function which calls va_arg_pack_len";
439 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
440 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
441 node
->dump_name (), reason
);
443 info
->versionable
= (reason
== NULL
);
446 /* Return true if it is at all technically possible to create clones of a
450 ipcp_versionable_function_p (struct cgraph_node
*node
)
452 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
453 return info
&& info
->versionable
;
456 /* Structure holding accumulated information about callers of a node. */
458 struct caller_statistics
460 /* If requested (see below), self-recursive call counts are summed into this
462 profile_count rec_count_sum
;
463 /* The sum of all ipa counts of all the other (non-recursive) calls. */
464 profile_count count_sum
;
465 /* Sum of all frequencies for all calls. */
467 /* Number of calls and hot calls respectively. */
468 int n_calls
, n_hot_calls
;
469 /* If itself is set up, also count the number of non-self-recursive
472 /* If non-NULL, this is the node itself and calls from it should have their
473 counts included in rec_count_sum and not count_sum. */
477 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
478 from IGNORED_CALLER are not counted. */
481 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
483 stats
->rec_count_sum
= profile_count::zero ();
484 stats
->count_sum
= profile_count::zero ();
486 stats
->n_hot_calls
= 0;
487 stats
->n_nonrec_calls
= 0;
489 stats
->itself
= itself
;
492 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
493 non-thunk incoming edges to NODE. */
496 gather_caller_stats (struct cgraph_node
*node
, void *data
)
498 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
499 struct cgraph_edge
*cs
;
501 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
502 if (!cs
->caller
->thunk
)
504 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
505 if (info
&& info
->node_dead
)
508 if (cs
->count
.ipa ().initialized_p ())
510 if (stats
->itself
&& stats
->itself
== cs
->caller
)
511 stats
->rec_count_sum
+= cs
->count
.ipa ();
513 stats
->count_sum
+= cs
->count
.ipa ();
515 stats
->freq_sum
+= cs
->sreal_frequency ();
517 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
518 stats
->n_nonrec_calls
++;
520 if (cs
->maybe_hot_p ())
521 stats
->n_hot_calls
++;
527 /* Return true if this NODE is viable candidate for cloning. */
530 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
532 struct caller_statistics stats
;
534 gcc_checking_assert (node
->has_gimple_body_p ());
536 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
539 fprintf (dump_file
, "Not considering %s for cloning; "
540 "-fipa-cp-clone disabled.\n",
545 if (node
->optimize_for_size_p ())
548 fprintf (dump_file
, "Not considering %s for cloning; "
549 "optimizing it for size.\n",
554 init_caller_stats (&stats
);
555 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
557 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
560 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
565 /* When profile is available and function is hot, propagate into it even if
566 calls seems cold; constant propagation can improve function's speed
568 if (stats
.count_sum
> profile_count::zero ()
569 && node
->count
.ipa ().initialized_p ())
571 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
574 fprintf (dump_file
, "Considering %s for cloning; "
575 "usually called directly.\n",
580 if (!stats
.n_hot_calls
)
583 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
588 fprintf (dump_file
, "Considering %s for cloning.\n",
593 template <typename valtype
>
594 class value_topo_info
597 /* Head of the linked list of topologically sorted values. */
598 ipcp_value
<valtype
> *values_topo
;
599 /* Stack for creating SCCs, represented by a linked list too. */
600 ipcp_value
<valtype
> *stack
;
601 /* Counter driving the algorithm in add_val_to_toposort. */
604 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
606 void add_val (ipcp_value
<valtype
> *cur_val
);
607 void propagate_effects ();
610 /* Arrays representing a topological ordering of call graph nodes and a stack
611 of nodes used during constant propagation and also data required to perform
612 topological sort of values and propagation of benefits in the determined
618 /* Array with obtained topological order of cgraph nodes. */
619 struct cgraph_node
**order
;
620 /* Stack of cgraph nodes used during propagation within SCC until all values
621 in the SCC stabilize. */
622 struct cgraph_node
**stack
;
623 int nnodes
, stack_top
;
625 value_topo_info
<tree
> constants
;
626 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
628 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
633 /* Skip edges from and to nodes without ipa_cp enabled.
634 Ignore not available symbols. */
637 ignore_edge_p (cgraph_edge
*e
)
639 enum availability avail
;
640 cgraph_node
*ultimate_target
641 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
643 return (avail
<= AVAIL_INTERPOSABLE
644 || !opt_for_fn (ultimate_target
->decl
, optimize
)
645 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
648 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
651 build_toporder_info (class ipa_topo_info
*topo
)
653 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
654 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
656 gcc_checking_assert (topo
->stack_top
== 0);
657 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
661 /* Free information about strongly connected components and the arrays in
665 free_toporder_info (class ipa_topo_info
*topo
)
667 ipa_free_postorder_info ();
672 /* Add NODE to the stack in TOPO, unless it is already there. */
675 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
677 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
678 if (info
->node_enqueued
)
680 info
->node_enqueued
= 1;
681 topo
->stack
[topo
->stack_top
++] = node
;
684 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
687 static struct cgraph_node
*
688 pop_node_from_stack (class ipa_topo_info
*topo
)
692 struct cgraph_node
*node
;
694 node
= topo
->stack
[topo
->stack_top
];
695 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
702 /* Set lattice LAT to bottom and return true if it previously was not set as
705 template <typename valtype
>
707 ipcp_lattice
<valtype
>::set_to_bottom ()
714 /* Mark lattice as containing an unknown value and return true if it previously
715 was not marked as such. */
717 template <typename valtype
>
719 ipcp_lattice
<valtype
>::set_contains_variable ()
721 bool ret
= !contains_variable
;
722 contains_variable
= true;
726 /* Set all aggregate lattices in PLATS to bottom and return true if they were
727 not previously set as such. */
730 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
732 bool ret
= !plats
->aggs_bottom
;
733 plats
->aggs_bottom
= true;
737 /* Mark all aggregate lattices in PLATS as containing an unknown value and
738 return true if they were not previously marked as such. */
741 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
743 bool ret
= !plats
->aggs_contain_variable
;
744 plats
->aggs_contain_variable
= true;
749 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
751 return meet_with_1 (other
.m_vr
);
754 /* Meet the current value of the lattice with the range described by
758 ipcp_vr_lattice::meet_with (const vrange
&p_vr
)
760 return meet_with_1 (p_vr
);
763 /* Meet the current value of the lattice with the range described by
764 OTHER_VR. Return TRUE if anything changed. */
767 ipcp_vr_lattice::meet_with_1 (const vrange
&other_vr
)
772 if (other_vr
.varying_p ())
773 return set_to_bottom ();
778 Value_Range
save (m_vr
);
779 res
= m_vr
.union_ (other_vr
);
780 gcc_assert (res
== (m_vr
!= save
));
783 res
= m_vr
.union_ (other_vr
);
787 /* Return true if value range information in the lattice is yet unknown. */
790 ipcp_vr_lattice::top_p () const
792 return m_vr
.undefined_p ();
795 /* Return true if value range information in the lattice is known to be
799 ipcp_vr_lattice::bottom_p () const
801 return m_vr
.varying_p ();
804 /* Set value range information in the lattice to bottom. Return true if it
805 previously was in a different state. */
808 ipcp_vr_lattice::set_to_bottom ()
810 if (m_vr
.varying_p ())
813 /* Setting an unsupported type here forces the temporary to default
814 to unsupported_range, which can handle VARYING/DEFINED ranges,
815 but nothing else (union, intersect, etc). This allows us to set
816 bottoms on any ranges, and is safe as all users of the lattice
817 check for bottom first. */
818 m_vr
.set_type (void_type_node
);
819 m_vr
.set_varying (void_type_node
);
824 /* Set lattice value to bottom, if it already isn't the case. */
827 ipcp_bits_lattice::set_to_bottom ()
831 m_lattice_val
= IPA_BITS_VARYING
;
837 /* Set to constant if it isn't already. Only meant to be called
838 when switching state from TOP. */
841 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
843 gcc_assert (top_p ());
844 m_lattice_val
= IPA_BITS_CONSTANT
;
845 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
850 /* Return true if any of the known bits are non-zero. */
853 ipcp_bits_lattice::known_nonzero_p () const
857 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
860 /* Convert operand to value, mask form. */
863 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
865 wide_int
get_nonzero_bits (const_tree
);
867 if (TREE_CODE (operand
) == INTEGER_CST
)
869 *valuep
= wi::to_widest (operand
);
879 /* Meet operation, similar to ccp_lattice_meet, we xor values
880 if this->value, value have different values at same bit positions, we want
881 to drop that bit to varying. Return true if mask is changed.
882 This function assumes that the lattice value is in CONSTANT state. If
883 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
886 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
887 unsigned precision
, bool drop_all_ones
)
889 gcc_assert (constant_p ());
891 widest_int old_mask
= m_mask
;
892 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
897 if (wi::sext (m_mask
, precision
) == -1)
898 return set_to_bottom ();
900 return m_mask
!= old_mask
;
903 /* Meet the bits lattice with operand
904 described by <value, mask, sgn, precision. */
907 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
915 if (wi::sext (mask
, precision
) == -1)
916 return set_to_bottom ();
917 return set_to_constant (value
, mask
);
920 return meet_with_1 (value
, mask
, precision
, false);
923 /* Meet bits lattice with the result of bit_value_binop (other, operand)
924 if code is binary operation or bit_value_unop (other) if code is unary op.
925 In the case when code is nop_expr, no adjustment is required. If
926 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
929 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
930 signop sgn
, enum tree_code code
, tree operand
,
933 if (other
.bottom_p ())
934 return set_to_bottom ();
936 if (bottom_p () || other
.top_p ())
939 widest_int adjusted_value
, adjusted_mask
;
941 if (TREE_CODE_CLASS (code
) == tcc_binary
)
943 tree type
= TREE_TYPE (operand
);
944 widest_int o_value
, o_mask
;
945 get_value_and_mask (operand
, &o_value
, &o_mask
);
947 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
948 sgn
, precision
, other
.get_value (), other
.get_mask (),
949 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
951 if (wi::sext (adjusted_mask
, precision
) == -1)
952 return set_to_bottom ();
955 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
957 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
958 &adjusted_mask
, sgn
, precision
, other
.get_value (),
961 if (wi::sext (adjusted_mask
, precision
) == -1)
962 return set_to_bottom ();
966 return set_to_bottom ();
972 adjusted_mask
|= adjusted_value
;
973 adjusted_value
&= ~adjusted_mask
;
975 if (wi::sext (adjusted_mask
, precision
) == -1)
976 return set_to_bottom ();
977 return set_to_constant (adjusted_value
, adjusted_mask
);
980 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
984 /* Dump the contents of the list to FILE. */
987 ipa_argagg_value_list::dump (FILE *f
)
990 for (const ipa_argagg_value
&av
: m_elts
)
992 fprintf (f
, "%s %i[%u]=", comma
? "," : "",
993 av
.index
, av
.unit_offset
);
994 print_generic_expr (f
, av
.value
);
996 fprintf (f
, "(by_ref)");
998 fprintf (f
, "(killed)");
1004 /* Dump the contents of the list to stderr. */
1007 ipa_argagg_value_list::debug ()
1012 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1013 NULL if there is no such constant. */
1015 const ipa_argagg_value
*
1016 ipa_argagg_value_list::get_elt (int index
, unsigned unit_offset
) const
1018 ipa_argagg_value key
;
1020 key
.unit_offset
= unit_offset
;
1021 const ipa_argagg_value
*res
1022 = std::lower_bound (m_elts
.begin (), m_elts
.end (), key
,
1023 [] (const ipa_argagg_value
&elt
,
1024 const ipa_argagg_value
&val
)
1026 if (elt
.index
< val
.index
)
1028 if (elt
.index
> val
.index
)
1030 if (elt
.unit_offset
< val
.unit_offset
)
1035 if (res
== m_elts
.end ()
1036 || res
->index
!= index
1037 || res
->unit_offset
!= unit_offset
)
1040 /* TODO: perhaps remove the check (that the underlying array is indeed
1041 sorted) if it turns out it can be too slow? */
1045 const ipa_argagg_value
*slow_res
= NULL
;
1046 int prev_index
= -1;
1047 unsigned prev_unit_offset
= 0;
1048 for (const ipa_argagg_value
&av
: m_elts
)
1050 gcc_assert (prev_index
< 0
1051 || prev_index
< av
.index
1052 || prev_unit_offset
< av
.unit_offset
);
1053 prev_index
= av
.index
;
1054 prev_unit_offset
= av
.unit_offset
;
1055 if (av
.index
== index
1056 && av
.unit_offset
== unit_offset
)
1059 gcc_assert (res
== slow_res
);
1064 /* Return the first item describing a constant stored for parameter with INDEX,
1065 regardless of offset or reference, or NULL if there is no such constant. */
1067 const ipa_argagg_value
*
1068 ipa_argagg_value_list::get_elt_for_index (int index
) const
1070 const ipa_argagg_value
*res
1071 = std::lower_bound (m_elts
.begin (), m_elts
.end (), index
,
1072 [] (const ipa_argagg_value
&elt
, unsigned idx
)
1074 return elt
.index
< idx
;
1076 if (res
== m_elts
.end ()
1077 || res
->index
!= index
)
1082 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1083 performing any check of whether value is passed by reference, or NULL_TREE
1084 if there is no such constant. */
1087 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
) const
1089 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1090 return av
? av
->value
: NULL_TREE
;
1093 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1094 passed by reference or not according to BY_REF, or NULL_TREE if there is
1095 no such constant. */
1098 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
,
1101 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1102 if (av
&& av
->by_ref
== by_ref
)
1107 /* Return true if all elements present in OTHER are also present in this
1111 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list
&other
) const
1114 for (unsigned i
= 0; i
< other
.m_elts
.size (); i
++)
1116 unsigned other_index
= other
.m_elts
[i
].index
;
1117 unsigned other_offset
= other
.m_elts
[i
].unit_offset
;
1119 while (j
< m_elts
.size ()
1120 && (m_elts
[j
].index
< other_index
1121 || (m_elts
[j
].index
== other_index
1122 && m_elts
[j
].unit_offset
< other_offset
)))
1125 if (j
>= m_elts
.size ()
1126 || m_elts
[j
].index
!= other_index
1127 || m_elts
[j
].unit_offset
!= other_offset
1128 || m_elts
[j
].by_ref
!= other
.m_elts
[i
].by_ref
1130 || !values_equal_for_ipcp_p (m_elts
[j
].value
, other
.m_elts
[i
].value
))
1136 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1137 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1138 offsets but skip those which would end up with a negative offset. */
1141 ipa_argagg_value_list::push_adjusted_values (unsigned src_index
,
1142 unsigned dest_index
,
1143 unsigned unit_delta
,
1144 vec
<ipa_argagg_value
> *res
) const
1146 const ipa_argagg_value
*av
= get_elt_for_index (src_index
);
1149 unsigned prev_unit_offset
= 0;
1151 for (; av
< m_elts
.end (); ++av
)
1153 if (av
->index
> src_index
)
1155 if (av
->index
== src_index
1156 && (av
->unit_offset
>= unit_delta
)
1159 ipa_argagg_value new_av
;
1160 gcc_checking_assert (av
->value
);
1161 new_av
.value
= av
->value
;
1162 new_av
.unit_offset
= av
->unit_offset
- unit_delta
;
1163 new_av
.index
= dest_index
;
1164 new_av
.by_ref
= av
->by_ref
;
1165 gcc_assert (!av
->killed
);
1166 new_av
.killed
= false;
1168 /* Quick check that the offsets we push are indeed increasing. */
1170 || new_av
.unit_offset
> prev_unit_offset
);
1171 prev_unit_offset
= new_av
.unit_offset
;
1174 res
->safe_push (new_av
);
1179 /* Push to RES information about single lattices describing aggregate values in
1180 PLATS as those describing parameter DEST_INDEX and the original offset minus
1181 UNIT_DELTA. Return true if any item has been pushed to RES. */
1184 push_agg_values_from_plats (ipcp_param_lattices
*plats
, int dest_index
,
1185 unsigned unit_delta
,
1186 vec
<ipa_argagg_value
> *res
)
1188 if (plats
->aggs_contain_variable
)
1191 bool pushed_sth
= false;
1193 unsigned prev_unit_offset
= 0;
1194 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
1195 if (aglat
->is_single_const ()
1196 && (aglat
->offset
/ BITS_PER_UNIT
- unit_delta
) >= 0)
1198 ipa_argagg_value iav
;
1199 iav
.value
= aglat
->values
->value
;
1200 iav
.unit_offset
= aglat
->offset
/ BITS_PER_UNIT
- unit_delta
;
1201 iav
.index
= dest_index
;
1202 iav
.by_ref
= plats
->aggs_by_ref
;
1206 || iav
.unit_offset
> prev_unit_offset
);
1207 prev_unit_offset
= iav
.unit_offset
;
1211 res
->safe_push (iav
);
1216 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1217 Return the number of remaining valid entries. */
1220 intersect_argaggs_with (vec
<ipa_argagg_value
> &elts
,
1221 const vec
<ipa_argagg_value
> &other
)
1223 unsigned valid_entries
= 0;
1225 for (unsigned i
= 0; i
< elts
.length (); i
++)
1230 unsigned this_index
= elts
[i
].index
;
1231 unsigned this_offset
= elts
[i
].unit_offset
;
1233 while (j
< other
.length ()
1234 && (other
[j
].index
< this_index
1235 || (other
[j
].index
== this_index
1236 && other
[j
].unit_offset
< this_offset
)))
1239 if (j
>= other
.length ())
1241 elts
[i
].value
= NULL_TREE
;
1245 if (other
[j
].index
== this_index
1246 && other
[j
].unit_offset
== this_offset
1247 && other
[j
].by_ref
== elts
[i
].by_ref
1249 && values_equal_for_ipcp_p (other
[j
].value
, elts
[i
].value
))
1252 elts
[i
].value
= NULL_TREE
;
1254 return valid_entries
;
1257 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1258 return true is any of them has not been marked as such so far. */
1261 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1264 ret
= plats
->itself
.set_contains_variable ();
1265 ret
|= plats
->ctxlat
.set_contains_variable ();
1266 ret
|= set_agg_lats_contain_variable (plats
);
1267 ret
|= plats
->bits_lattice
.set_to_bottom ();
1268 ret
|= plats
->m_value_range
.set_to_bottom ();
1272 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1273 points to by the number of callers to NODE. */
1276 count_callers (cgraph_node
*node
, void *data
)
1278 int *caller_count
= (int *) data
;
1280 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1281 /* Local thunks can be handled transparently, but if the thunk cannot
1282 be optimized out, count it as a real use. */
1283 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1288 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1289 the one caller of some other node. Set the caller's corresponding flag. */
1292 set_single_call_flag (cgraph_node
*node
, void *)
1294 cgraph_edge
*cs
= node
->callers
;
1295 /* Local thunks can be handled transparently, skip them. */
1296 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1297 cs
= cs
->next_caller
;
1299 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1301 info
->node_calling_single_call
= true;
1307 /* Initialize ipcp_lattices. */
1310 initialize_node_lattices (struct cgraph_node
*node
)
1312 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1313 struct cgraph_edge
*ie
;
1314 bool disable
= false, variable
= false;
1317 gcc_checking_assert (node
->has_gimple_body_p ());
1319 if (!ipa_get_param_count (info
))
1321 else if (node
->local
)
1323 int caller_count
= 0;
1324 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1326 gcc_checking_assert (caller_count
> 0);
1327 if (caller_count
== 1)
1328 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1333 /* When cloning is allowed, we can assume that externally visible
1334 functions are not called. We will compensate this by cloning
1336 if (ipcp_versionable_function_p (node
)
1337 && ipcp_cloning_candidate_p (node
))
1343 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1344 && !node
->alias
&& !node
->thunk
)
1346 fprintf (dump_file
, "Initializing lattices of %s\n",
1347 node
->dump_name ());
1348 if (disable
|| variable
)
1349 fprintf (dump_file
, " Marking all lattices as %s\n",
1350 disable
? "BOTTOM" : "VARIABLE");
1353 auto_vec
<bool, 16> surviving_params
;
1354 bool pre_modified
= false;
1356 clone_info
*cinfo
= clone_info::get (node
);
1358 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1360 /* At the moment all IPA optimizations should use the number of
1361 parameters of the prevailing decl as the m_always_copy_start.
1362 Handling any other value would complicate the code below, so for the
1363 time bing let's only assert it is so. */
1364 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1365 == ipa_get_param_count (info
))
1366 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1368 pre_modified
= true;
1369 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1371 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1372 && !node
->alias
&& !node
->thunk
)
1375 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1377 if (j
< (int) surviving_params
.length ()
1378 && surviving_params
[j
])
1383 " The following parameters are dead on arrival:");
1386 fprintf (dump_file
, " %u", j
);
1389 fprintf (dump_file
, "\n");
1393 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1395 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1396 tree type
= ipa_get_type (info
, i
);
1398 || !ipa_get_type (info
, i
)
1399 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1400 || !surviving_params
[i
])))
1402 plats
->itself
.set_to_bottom ();
1403 plats
->ctxlat
.set_to_bottom ();
1404 set_agg_lats_to_bottom (plats
);
1405 plats
->bits_lattice
.set_to_bottom ();
1406 plats
->m_value_range
.init (type
);
1407 plats
->m_value_range
.set_to_bottom ();
1411 plats
->m_value_range
.init (type
);
1413 set_all_contains_variable (plats
);
1417 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1418 if (ie
->indirect_info
->polymorphic
1419 && ie
->indirect_info
->param_index
>= 0)
1421 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1422 ipa_get_parm_lattices (info
,
1423 ie
->indirect_info
->param_index
)->virt_call
= 1;
1427 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1431 ipacp_value_safe_for_type (tree param_type
, tree value
)
1433 tree val_type
= TREE_TYPE (value
);
1434 if (param_type
== val_type
1435 || useless_type_conversion_p (param_type
, val_type
)
1436 || fold_convertible_p (param_type
, value
))
1442 /* Return the result of a (possibly arithmetic) operation on the constant
1443 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1444 the type of the parameter to which the result is passed. Return
1445 NULL_TREE if that cannot be determined or be considered an
1446 interprocedural invariant. */
1449 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1454 if (opcode
== NOP_EXPR
)
1456 if (!is_gimple_ip_invariant (input
))
1459 if (opcode
== ASSERT_EXPR
)
1461 if (values_equal_for_ipcp_p (input
, operand
))
1469 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1470 res_type
= boolean_type_node
;
1471 else if (expr_type_first_operand_type_p (opcode
))
1472 res_type
= TREE_TYPE (input
);
1477 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1478 res
= fold_unary (opcode
, res_type
, input
);
1480 res
= fold_binary (opcode
, res_type
, input
, operand
);
1482 if (res
&& !is_gimple_ip_invariant (res
))
1488 /* Return the result of a (possibly arithmetic) pass through jump function
1489 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1490 to which the result is passed. Return NULL_TREE if that cannot be
1491 determined or be considered an interprocedural invariant. */
1494 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1497 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1499 ipa_get_jf_pass_through_operand (jfunc
),
1503 /* Return the result of an ancestor jump function JFUNC on the constant value
1504 INPUT. Return NULL_TREE if that cannot be determined. */
1507 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1509 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1510 if (TREE_CODE (input
) == ADDR_EXPR
)
1512 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1513 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1514 if (known_eq (off
, 0))
1516 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1517 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1518 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1519 build_int_cst (ptr_type_node
, byte_offset
)));
1521 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1528 /* Determine whether JFUNC evaluates to a single known constant value and if
1529 so, return it. Otherwise return NULL. INFO describes the caller node or
1530 the one it is inlined to, so that pass-through jump functions can be
1531 evaluated. PARM_TYPE is the type of the parameter to which the result is
1535 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1538 if (jfunc
->type
== IPA_JF_CONST
)
1539 return ipa_get_jf_constant (jfunc
);
1540 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1541 || jfunc
->type
== IPA_JF_ANCESTOR
)
1546 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1547 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1549 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1551 if (info
->ipcp_orig_node
)
1552 input
= info
->known_csts
[idx
];
1555 ipcp_lattice
<tree
> *lat
;
1557 if (info
->lattices
.is_empty ()
1558 || idx
>= ipa_get_param_count (info
))
1560 lat
= ipa_get_scalar_lat (info
, idx
);
1561 if (!lat
->is_single_const ())
1563 input
= lat
->values
->value
;
1569 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1570 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1572 return ipa_get_jf_ancestor_result (jfunc
, input
);
1578 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1579 that INFO describes the caller node or the one it is inlined to, CS is the
1580 call graph edge corresponding to JFUNC and CSIDX index of the described
1583 ipa_polymorphic_call_context
1584 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1585 ipa_jump_func
*jfunc
)
1587 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1588 ipa_polymorphic_call_context ctx
;
1589 ipa_polymorphic_call_context
*edge_ctx
1590 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1592 if (edge_ctx
&& !edge_ctx
->useless_p ())
1595 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1596 || jfunc
->type
== IPA_JF_ANCESTOR
)
1598 ipa_polymorphic_call_context srcctx
;
1600 bool type_preserved
= true;
1601 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1603 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1605 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1606 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1610 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1611 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1613 if (info
->ipcp_orig_node
)
1615 if (info
->known_contexts
.exists ())
1616 srcctx
= info
->known_contexts
[srcidx
];
1620 if (info
->lattices
.is_empty ()
1621 || srcidx
>= ipa_get_param_count (info
))
1623 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1624 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1625 if (!lat
->is_single_const ())
1627 srcctx
= lat
->values
->value
;
1629 if (srcctx
.useless_p ())
1631 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1632 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1633 if (!type_preserved
)
1634 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1635 srcctx
.combine_with (ctx
);
1642 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1643 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1644 the result is a range that is not VARYING nor UNDEFINED. */
1647 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1648 const vrange
&src_vr
,
1649 enum tree_code operation
,
1650 tree dst_type
, tree src_type
)
1652 if (!ipa_supports_p (dst_type
) || !ipa_supports_p (src_type
))
1655 range_op_handler
handler (operation
);
1659 Value_Range
varying (dst_type
);
1660 varying
.set_varying (dst_type
);
1662 return (handler
.operand_check_p (dst_type
, src_type
, dst_type
)
1663 && handler
.fold_range (dst_vr
, dst_type
, src_vr
, varying
)
1664 && !dst_vr
.varying_p ()
1665 && !dst_vr
.undefined_p ());
1668 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1669 first be extracted onto a vrange. */
1672 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1673 const ipa_vr
&src_vr
,
1674 enum tree_code operation
,
1675 tree dst_type
, tree src_type
)
1678 src_vr
.get_vrange (tmp
);
1679 return ipa_vr_operation_and_type_effects (dst_vr
, tmp
, operation
,
1680 dst_type
, src_type
);
1683 /* Determine range of JFUNC given that INFO describes the caller node or
1684 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1685 and PARM_TYPE of the parameter. */
1688 ipa_value_range_from_jfunc (vrange
&vr
,
1689 ipa_node_params
*info
, cgraph_edge
*cs
,
1690 ipa_jump_func
*jfunc
, tree parm_type
)
1692 vr
.set_undefined ();
1695 ipa_vr_operation_and_type_effects (vr
,
1697 NOP_EXPR
, parm_type
,
1698 jfunc
->m_vr
->type ());
1699 if (vr
.singleton_p ())
1701 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1704 ipcp_transformation
*sum
1705 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1706 ? cs
->caller
->inlined_to
1708 if (!sum
|| !sum
->m_vr
)
1711 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1713 if (!(*sum
->m_vr
)[idx
].known_p ())
1715 tree vr_type
= ipa_get_type (info
, idx
);
1717 (*sum
->m_vr
)[idx
].get_vrange (srcvr
);
1719 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1721 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1723 Value_Range
res (parm_type
);
1725 if (ipa_vr_operation_and_type_effects (res
,
1727 operation
, parm_type
,
1733 Value_Range
op_res (vr_type
);
1734 Value_Range
res (vr_type
);
1735 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1736 Value_Range
op_vr (TREE_TYPE (op
));
1737 range_op_handler
handler (operation
);
1739 ipa_range_set_and_normalize (op_vr
, op
);
1742 || !op_res
.supports_type_p (vr_type
)
1743 || !handler
.fold_range (op_res
, vr_type
, srcvr
, op_vr
))
1744 op_res
.set_varying (vr_type
);
1746 if (ipa_vr_operation_and_type_effects (res
,
1748 NOP_EXPR
, parm_type
,
1755 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1756 single known constant value and if so, return it. Otherwise return NULL.
1757 NODE and INFO describes the caller node or the one it is inlined to, and
1758 its related info. */
1761 ipa_agg_value_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
1762 const ipa_agg_jf_item
*item
)
1764 tree value
= NULL_TREE
;
1767 if (item
->offset
< 0
1768 || item
->jftype
== IPA_JF_UNKNOWN
1769 || item
->offset
>= (HOST_WIDE_INT
) UINT_MAX
* BITS_PER_UNIT
)
1772 if (item
->jftype
== IPA_JF_CONST
)
1773 return item
->value
.constant
;
1775 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1776 || item
->jftype
== IPA_JF_LOAD_AGG
);
1778 src_idx
= item
->value
.pass_through
.formal_id
;
1780 if (info
->ipcp_orig_node
)
1782 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1783 value
= info
->known_csts
[src_idx
];
1784 else if (ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
))
1786 ipa_argagg_value_list
avl (ts
);
1787 value
= avl
.get_value (src_idx
,
1788 item
->value
.load_agg
.offset
/ BITS_PER_UNIT
,
1789 item
->value
.load_agg
.by_ref
);
1792 else if (!info
->lattices
.is_empty ())
1794 class ipcp_param_lattices
*src_plats
1795 = ipa_get_parm_lattices (info
, src_idx
);
1797 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1799 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1801 if (!lat
->is_single_const ())
1804 value
= lat
->values
->value
;
1806 else if (src_plats
->aggs
1807 && !src_plats
->aggs_bottom
1808 && !src_plats
->aggs_contain_variable
1809 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1811 struct ipcp_agg_lattice
*aglat
;
1813 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1815 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1818 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1820 if (aglat
->is_single_const ())
1821 value
= aglat
->values
->value
;
1831 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1833 tree load_type
= item
->value
.load_agg
.type
;
1834 tree value_type
= TREE_TYPE (value
);
1836 /* Ensure value type is compatible with load type. */
1837 if (!useless_type_conversion_p (load_type
, value_type
))
1841 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1843 item
->value
.pass_through
.operand
,
1847 /* Process all items in AGG_JFUNC relative to caller (or the node the original
1848 caller is inlined to) NODE which described by INFO and push the results to
1849 RES as describing values passed in parameter DST_INDEX. */
1852 ipa_push_agg_values_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
1853 ipa_agg_jump_function
*agg_jfunc
,
1855 vec
<ipa_argagg_value
> *res
)
1857 unsigned prev_unit_offset
= 0;
1860 for (const ipa_agg_jf_item
&item
: agg_jfunc
->items
)
1862 tree value
= ipa_agg_value_from_jfunc (info
, node
, &item
);
1866 ipa_argagg_value iav
;
1868 iav
.unit_offset
= item
.offset
/ BITS_PER_UNIT
;
1869 iav
.index
= dst_index
;
1870 iav
.by_ref
= agg_jfunc
->by_ref
;
1874 || iav
.unit_offset
> prev_unit_offset
);
1875 prev_unit_offset
= iav
.unit_offset
;
1878 res
->safe_push (iav
);
1882 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1883 bottom, not containing a variable component and without any known value at
1887 ipcp_verify_propagated_values (void)
1889 struct cgraph_node
*node
;
1891 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1893 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1894 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1895 || !opt_for_fn (node
->decl
, optimize
))
1897 int i
, count
= ipa_get_param_count (info
);
1899 for (i
= 0; i
< count
; i
++)
1901 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1904 && !lat
->contains_variable
1905 && lat
->values_count
== 0)
1909 symtab
->dump (dump_file
);
1910 fprintf (dump_file
, "\nIPA lattices after constant "
1911 "propagation, before gcc_unreachable:\n");
1912 print_all_lattices (dump_file
, true, false);
1921 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1924 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1925 ipa_polymorphic_call_context y
)
1927 return x
.equal_to (y
);
1931 /* Add a new value source to the value represented by THIS, marking that a
1932 value comes from edge CS and (if the underlying jump function is a
1933 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1934 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1935 scalar value of the parameter itself or the offset within an aggregate. */
1937 template <typename valtype
>
1939 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1940 int src_idx
, HOST_WIDE_INT offset
)
1942 ipcp_value_source
<valtype
> *src
;
1944 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1945 src
->offset
= offset
;
1948 src
->index
= src_idx
;
1950 src
->next
= sources
;
1954 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1955 SOURCE and clear all other fields. */
1957 static ipcp_value
<tree
> *
1958 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1960 ipcp_value
<tree
> *val
;
1962 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1964 val
->self_recursion_generated_level
= same_lat_gen_level
;
1968 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1969 value to SOURCE and clear all other fields. */
1971 static ipcp_value
<ipa_polymorphic_call_context
> *
1972 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1973 unsigned same_lat_gen_level
)
1975 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1977 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1978 ipcp_value
<ipa_polymorphic_call_context
>();
1980 val
->self_recursion_generated_level
= same_lat_gen_level
;
1984 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1985 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1986 meaning. OFFSET -1 means the source is scalar and not a part of an
1987 aggregate. If non-NULL, VAL_P records address of existing or newly added
1990 If the value is generated for a self-recursive call as a result of an
1991 arithmetic pass-through jump-function acting on a value in the same lattice,
1992 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1993 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1995 template <typename valtype
>
1997 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1998 ipcp_value
<valtype
> *src_val
,
1999 int src_idx
, HOST_WIDE_INT offset
,
2000 ipcp_value
<valtype
> **val_p
,
2001 unsigned same_lat_gen_level
)
2003 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
2011 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
2012 if (values_equal_for_ipcp_p (val
->value
, newval
))
2017 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
2018 val
->self_recursion_generated_level
= same_lat_gen_level
;
2020 if (ipa_edge_within_scc (cs
))
2022 ipcp_value_source
<valtype
> *s
;
2023 for (s
= val
->sources
; s
; s
= s
->next
)
2024 if (s
->cs
== cs
&& s
->val
== src_val
)
2030 val
->add_source (cs
, src_val
, src_idx
, offset
);
2034 if (!same_lat_gen_level
&& values_count
>= opt_for_fn (cs
->callee
->decl
,
2035 param_ipa_cp_value_list_size
))
2037 /* We can only free sources, not the values themselves, because sources
2038 of other values in this SCC might point to them. */
2039 for (val
= values
; val
; val
= val
->next
)
2041 while (val
->sources
)
2043 ipcp_value_source
<valtype
> *src
= val
->sources
;
2044 val
->sources
= src
->next
;
2045 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2049 return set_to_bottom ();
2053 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2054 val
->add_source (cs
, src_val
, src_idx
, offset
);
2057 /* Add the new value to end of value list, which can reduce iterations
2058 of propagation stage for recursive function. */
2060 last_val
->next
= val
;
2070 /* A helper function that returns result of operation specified by OPCODE on
2071 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2072 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2073 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2077 get_val_across_arith_op (enum tree_code opcode
,
2080 ipcp_value
<tree
> *src_val
,
2083 tree opnd1
= src_val
->value
;
2085 /* Skip source values that is incompatible with specified type. */
2087 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2090 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2093 /* Propagate values through an arithmetic transformation described by a jump
2094 function associated with edge CS, taking values from SRC_LAT and putting
2095 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2096 OPND2 is a constant value if transformation is a binary operation.
2097 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2098 a part of the aggregate. SRC_IDX is the index of the source parameter.
2099 RES_TYPE is the value type of result being propagated into. Return true if
2100 DEST_LAT changed. */
2103 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2104 enum tree_code opcode
,
2107 ipcp_lattice
<tree
> *src_lat
,
2108 ipcp_lattice
<tree
> *dest_lat
,
2109 HOST_WIDE_INT src_offset
,
2113 ipcp_value
<tree
> *src_val
;
2116 /* Due to circular dependencies, propagating within an SCC through arithmetic
2117 transformation would create infinite number of values. But for
2118 self-feeding recursive function, we could allow propagation in a limited
2119 count, and this can enable a simple kind of recursive function versioning.
2120 For other scenario, we would just make lattices bottom. */
2121 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2125 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2126 param_ipa_cp_max_recursive_depth
);
2127 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2128 return dest_lat
->set_contains_variable ();
2130 /* No benefit if recursive execution is in low probability. */
2131 if (cs
->sreal_frequency () * 100
2132 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2133 param_ipa_cp_min_recursive_probability
))
2134 return dest_lat
->set_contains_variable ();
2136 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2138 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2140 /* Now we do not use self-recursively generated value as propagation
2141 source, this is absolutely conservative, but could avoid explosion
2142 of lattice's value space, especially when one recursive function
2143 calls another recursive. */
2144 if (src_val
->self_recursion_generated_p ())
2146 ipcp_value_source
<tree
> *s
;
2148 /* If the lattice has already been propagated for the call site,
2149 no need to do that again. */
2150 for (s
= src_val
->sources
; s
; s
= s
->next
)
2152 return dest_lat
->set_contains_variable ();
2155 val_seeds
.safe_push (src_val
);
2158 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2160 /* Recursively generate lattice values with a limited count. */
2161 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2163 for (int j
= 1; j
< max_recursive_depth
; j
++)
2165 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2168 || !ipacp_value_safe_for_type (res_type
, cstval
))
2171 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2172 src_offset
, &src_val
, j
);
2173 gcc_checking_assert (src_val
);
2176 ret
|= dest_lat
->set_contains_variable ();
2179 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2181 /* Now we do not use self-recursively generated value as propagation
2182 source, otherwise it is easy to make value space of normal lattice
2184 if (src_val
->self_recursion_generated_p ())
2186 ret
|= dest_lat
->set_contains_variable ();
2190 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2193 && ipacp_value_safe_for_type (res_type
, cstval
))
2194 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2197 ret
|= dest_lat
->set_contains_variable ();
2203 /* Propagate values through a pass-through jump function JFUNC associated with
2204 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2205 is the index of the source parameter. PARM_TYPE is the type of the
2206 parameter to which the result is passed. */
2209 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2210 ipcp_lattice
<tree
> *src_lat
,
2211 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2214 return propagate_vals_across_arith_jfunc (cs
,
2215 ipa_get_jf_pass_through_operation (jfunc
),
2217 ipa_get_jf_pass_through_operand (jfunc
),
2218 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2221 /* Propagate values through an ancestor jump function JFUNC associated with
2222 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2223 is the index of the source parameter. */
2226 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2227 struct ipa_jump_func
*jfunc
,
2228 ipcp_lattice
<tree
> *src_lat
,
2229 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2232 ipcp_value
<tree
> *src_val
;
2235 if (ipa_edge_within_scc (cs
))
2236 return dest_lat
->set_contains_variable ();
2238 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2240 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2242 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2243 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2245 ret
|= dest_lat
->set_contains_variable ();
2251 /* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2253 parameter to which the result is passed. */
2256 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2257 struct ipa_jump_func
*jfunc
,
2258 ipcp_lattice
<tree
> *dest_lat
,
2261 if (dest_lat
->bottom
)
2264 if (jfunc
->type
== IPA_JF_CONST
)
2266 tree val
= ipa_get_jf_constant (jfunc
);
2267 if (ipacp_value_safe_for_type (param_type
, val
))
2268 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2270 return dest_lat
->set_contains_variable ();
2272 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2273 || jfunc
->type
== IPA_JF_ANCESTOR
)
2275 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2276 ipcp_lattice
<tree
> *src_lat
;
2280 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2281 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2283 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2285 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2286 if (src_lat
->bottom
)
2287 return dest_lat
->set_contains_variable ();
2289 /* If we would need to clone the caller and cannot, do not propagate. */
2290 if (!ipcp_versionable_function_p (cs
->caller
)
2291 && (src_lat
->contains_variable
2292 || (src_lat
->values_count
> 1)))
2293 return dest_lat
->set_contains_variable ();
2295 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2296 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2300 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2301 src_idx
, param_type
);
2303 if (src_lat
->contains_variable
)
2304 ret
|= dest_lat
->set_contains_variable ();
2309 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2310 use it for indirect inlining), we should propagate them too. */
2311 return dest_lat
->set_contains_variable ();
2314 /* Propagate scalar values across jump function JFUNC that is associated with
2315 edge CS and describes argument IDX and put the values into DEST_LAT. */
2318 propagate_context_across_jump_function (cgraph_edge
*cs
,
2319 ipa_jump_func
*jfunc
, int idx
,
2320 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2322 if (dest_lat
->bottom
)
2324 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2326 bool added_sth
= false;
2327 bool type_preserved
= true;
2329 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2330 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2333 edge_ctx
= *edge_ctx_ptr
;
2335 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2336 || jfunc
->type
== IPA_JF_ANCESTOR
)
2338 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2340 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2342 /* TODO: Once we figure out how to propagate speculations, it will
2343 probably be a good idea to switch to speculation if type_preserved is
2344 not set instead of punting. */
2345 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2347 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2349 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2350 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2354 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2355 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2358 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2359 /* If we would need to clone the caller and cannot, do not propagate. */
2360 if (!ipcp_versionable_function_p (cs
->caller
)
2361 && (src_lat
->contains_variable
2362 || (src_lat
->values_count
> 1)))
2365 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2366 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2368 ipa_polymorphic_call_context cur
= src_val
->value
;
2370 if (!type_preserved
)
2371 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2372 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2373 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2374 /* TODO: In cases we know how the context is going to be used,
2375 we can improve the result by passing proper OTR_TYPE. */
2376 cur
.combine_with (edge_ctx
);
2377 if (!cur
.useless_p ())
2379 if (src_lat
->contains_variable
2380 && !edge_ctx
.equal_to (cur
))
2381 ret
|= dest_lat
->set_contains_variable ();
2382 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2391 if (!edge_ctx
.useless_p ())
2392 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2394 ret
|= dest_lat
->set_contains_variable ();
2400 /* Propagate bits across jfunc that is associated with
2401 edge cs and update dest_lattice accordingly. */
2404 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2405 ipa_jump_func
*jfunc
,
2406 ipcp_bits_lattice
*dest_lattice
)
2408 if (dest_lattice
->bottom_p ())
2411 enum availability availability
;
2412 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2413 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2414 tree parm_type
= ipa_get_type (callee_info
, idx
);
2416 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2417 transform for these cases. Similarly, we can have bad type mismatches
2418 with LTO, avoid doing anything with those too. */
2420 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2423 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2424 "param %i of %s is NULL or unsuitable for bits propagation\n",
2425 idx
, cs
->callee
->dump_name ());
2427 return dest_lattice
->set_to_bottom ();
2430 unsigned precision
= TYPE_PRECISION (parm_type
);
2431 signop sgn
= TYPE_SIGN (parm_type
);
2433 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2434 || jfunc
->type
== IPA_JF_ANCESTOR
)
2436 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2437 tree operand
= NULL_TREE
;
2438 enum tree_code code
;
2440 bool keep_null
= false;
2442 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2444 code
= ipa_get_jf_pass_through_operation (jfunc
);
2445 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2446 if (code
!= NOP_EXPR
)
2447 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2451 code
= POINTER_PLUS_EXPR
;
2452 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2453 unsigned HOST_WIDE_INT offset
2454 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2455 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2456 operand
= build_int_cstu (size_type_node
, offset
);
2459 class ipcp_param_lattices
*src_lats
2460 = ipa_get_parm_lattices (caller_info
, src_idx
);
2462 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2468 Assume lattice for x is bottom, however we can still propagate
2469 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2470 and we store it in jump function during analysis stage. */
2472 if (!src_lats
->bits_lattice
.bottom_p ())
2475 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2477 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2478 sgn
, code
, operand
, drop_all_ones
);
2482 Value_Range
vr (parm_type
);
2485 jfunc
->m_vr
->get_vrange (vr
);
2486 if (!vr
.undefined_p () && !vr
.varying_p ())
2488 irange_bitmask bm
= vr
.get_bitmask ();
2490 = widest_int::from (bm
.mask (), TYPE_SIGN (parm_type
));
2492 = widest_int::from (bm
.value (), TYPE_SIGN (parm_type
));
2493 return dest_lattice
->meet_with (value
, mask
, precision
);
2496 return dest_lattice
->set_to_bottom ();
2499 /* Propagate value range across jump function JFUNC that is associated with
2500 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2504 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2505 class ipcp_param_lattices
*dest_plats
,
2508 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2510 if (dest_lat
->bottom_p ())
2514 || (!INTEGRAL_TYPE_P (param_type
)
2515 && !POINTER_TYPE_P (param_type
)))
2516 return dest_lat
->set_to_bottom ();
2518 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2520 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2521 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2522 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2523 class ipcp_param_lattices
*src_lats
2524 = ipa_get_parm_lattices (caller_info
, src_idx
);
2525 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2527 if (src_lats
->m_value_range
.bottom_p ())
2528 return dest_lat
->set_to_bottom ();
2530 Value_Range
vr (param_type
);
2531 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2532 ipa_vr_operation_and_type_effects (vr
,
2533 src_lats
->m_value_range
.m_vr
,
2534 operation
, param_type
,
2536 /* A crude way to prevent unbounded number of value range updates
2537 in SCC components. We should allow limited number of updates within
2539 else if (!ipa_edge_within_scc (cs
))
2541 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2542 Value_Range
op_vr (TREE_TYPE (op
));
2543 Value_Range
op_res (param_type
);
2544 range_op_handler
handler (operation
);
2546 ipa_range_set_and_normalize (op_vr
, op
);
2549 || !ipa_supports_p (operand_type
)
2550 || !handler
.fold_range (op_res
, operand_type
,
2551 src_lats
->m_value_range
.m_vr
, op_vr
))
2552 op_res
.set_varying (param_type
);
2554 ipa_vr_operation_and_type_effects (vr
,
2556 NOP_EXPR
, param_type
,
2559 if (!vr
.undefined_p () && !vr
.varying_p ())
2563 Value_Range
jvr (param_type
);
2564 if (ipa_vr_operation_and_type_effects (jvr
, *jfunc
->m_vr
,
2567 jfunc
->m_vr
->type ()))
2570 return dest_lat
->meet_with (vr
);
2573 else if (jfunc
->type
== IPA_JF_CONST
)
2575 tree val
= ipa_get_jf_constant (jfunc
);
2576 if (TREE_CODE (val
) == INTEGER_CST
)
2578 val
= fold_convert (param_type
, val
);
2579 if (TREE_OVERFLOW_P (val
))
2580 val
= drop_tree_overflow (val
);
2582 Value_Range
tmpvr (val
, val
);
2583 return dest_lat
->meet_with (tmpvr
);
2587 Value_Range
vr (param_type
);
2589 && ipa_vr_operation_and_type_effects (vr
, *jfunc
->m_vr
, NOP_EXPR
,
2591 jfunc
->m_vr
->type ()))
2592 return dest_lat
->meet_with (vr
);
2594 return dest_lat
->set_to_bottom ();
2597 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2598 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2599 other cases, return false). If there are no aggregate items, set
2600 aggs_by_ref to NEW_AGGS_BY_REF. */
2603 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2604 bool new_aggs_by_ref
)
2606 if (dest_plats
->aggs
)
2608 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2610 set_agg_lats_to_bottom (dest_plats
);
2615 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2619 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2620 already existing lattice for the given OFFSET and SIZE, marking all skipped
2621 lattices as containing variable and checking for overlaps. If there is no
2622 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2623 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2624 unless there are too many already. If there are two many, return false. If
2625 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2626 skipped lattices were newly marked as containing variable, set *CHANGE to
2627 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2630 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2631 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2632 struct ipcp_agg_lattice
***aglat
,
2633 bool pre_existing
, bool *change
, int max_agg_items
)
2635 gcc_checking_assert (offset
>= 0);
2637 while (**aglat
&& (**aglat
)->offset
< offset
)
2639 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2641 set_agg_lats_to_bottom (dest_plats
);
2644 *change
|= (**aglat
)->set_contains_variable ();
2645 *aglat
= &(**aglat
)->next
;
2648 if (**aglat
&& (**aglat
)->offset
== offset
)
2650 if ((**aglat
)->size
!= val_size
)
2652 set_agg_lats_to_bottom (dest_plats
);
2655 gcc_assert (!(**aglat
)->next
2656 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2661 struct ipcp_agg_lattice
*new_al
;
2663 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2665 set_agg_lats_to_bottom (dest_plats
);
2668 if (dest_plats
->aggs_count
== max_agg_items
)
2670 dest_plats
->aggs_count
++;
2671 new_al
= ipcp_agg_lattice_pool
.allocate ();
2673 new_al
->offset
= offset
;
2674 new_al
->size
= val_size
;
2675 new_al
->contains_variable
= pre_existing
;
2677 new_al
->next
= **aglat
;
2683 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2684 containing an unknown value. */
2687 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2692 ret
|= aglat
->set_contains_variable ();
2693 aglat
= aglat
->next
;
2698 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2699 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2700 parameter used for lattice value sources. Return true if DEST_PLATS changed
2704 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2705 class ipcp_param_lattices
*dest_plats
,
2706 class ipcp_param_lattices
*src_plats
,
2707 int src_idx
, HOST_WIDE_INT offset_delta
)
2709 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2710 struct ipcp_agg_lattice
**dst_aglat
;
2713 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2715 if (src_plats
->aggs_bottom
)
2716 return set_agg_lats_contain_variable (dest_plats
);
2717 if (src_plats
->aggs_contain_variable
)
2718 ret
|= set_agg_lats_contain_variable (dest_plats
);
2719 dst_aglat
= &dest_plats
->aggs
;
2721 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2722 param_ipa_max_agg_items
);
2723 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2725 src_aglat
= src_aglat
->next
)
2727 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2731 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2732 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2734 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2736 dst_aglat
= &(*dst_aglat
)->next
;
2737 if (src_aglat
->bottom
)
2739 ret
|= new_al
->set_contains_variable ();
2742 if (src_aglat
->contains_variable
)
2743 ret
|= new_al
->set_contains_variable ();
2744 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2747 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2750 else if (dest_plats
->aggs_bottom
)
2753 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2757 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2758 pass-through JFUNC and if so, whether it has conform and conforms to the
2759 rules about propagating values passed by reference. */
2762 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2763 struct ipa_jump_func
*jfunc
)
2765 return src_plats
->aggs
2766 && (!src_plats
->aggs_by_ref
2767 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2770 /* Propagate values through ITEM, jump function for a part of an aggregate,
2771 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2772 associated with the jump function. Return true if AGLAT changed in any
2776 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2777 struct ipa_agg_jf_item
*item
,
2778 struct ipcp_agg_lattice
*aglat
)
2780 class ipa_node_params
*caller_info
;
2781 class ipcp_param_lattices
*src_plats
;
2782 struct ipcp_lattice
<tree
> *src_lat
;
2783 HOST_WIDE_INT src_offset
;
2788 if (item
->jftype
== IPA_JF_CONST
)
2790 tree value
= item
->value
.constant
;
2792 gcc_checking_assert (is_gimple_ip_invariant (value
));
2793 return aglat
->add_value (value
, cs
, NULL
, 0);
2796 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2797 || item
->jftype
== IPA_JF_LOAD_AGG
);
2799 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2800 src_idx
= item
->value
.pass_through
.formal_id
;
2801 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2803 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2805 load_type
= NULL_TREE
;
2806 src_lat
= &src_plats
->itself
;
2811 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2812 struct ipcp_agg_lattice
*src_aglat
;
2814 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2815 if (src_aglat
->offset
>= load_offset
)
2818 load_type
= item
->value
.load_agg
.type
;
2820 || src_aglat
->offset
> load_offset
2821 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2822 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2823 return aglat
->set_contains_variable ();
2825 src_lat
= src_aglat
;
2826 src_offset
= load_offset
;
2830 || (!ipcp_versionable_function_p (cs
->caller
)
2831 && !src_lat
->is_single_const ()))
2832 return aglat
->set_contains_variable ();
2834 ret
= propagate_vals_across_arith_jfunc (cs
,
2835 item
->value
.pass_through
.operation
,
2837 item
->value
.pass_through
.operand
,
2843 if (src_lat
->contains_variable
)
2844 ret
|= aglat
->set_contains_variable ();
2849 /* Propagate scalar values across jump function JFUNC that is associated with
2850 edge CS and put the values into DEST_LAT. */
2853 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2854 struct ipa_jump_func
*jfunc
,
2855 class ipcp_param_lattices
*dest_plats
)
2859 if (dest_plats
->aggs_bottom
)
2862 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2863 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2865 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2866 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2867 class ipcp_param_lattices
*src_plats
;
2869 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2870 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2872 /* Currently we do not produce clobber aggregate jump
2873 functions, replace with merging when we do. */
2874 gcc_assert (!jfunc
->agg
.items
);
2875 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2880 else if (jfunc
->type
== IPA_JF_ANCESTOR
2881 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2883 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2884 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2885 class ipcp_param_lattices
*src_plats
;
2887 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2888 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2890 /* Currently we do not produce clobber aggregate jump
2891 functions, replace with merging when we do. */
2892 gcc_assert (!jfunc
->agg
.items
);
2893 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2894 ipa_get_jf_ancestor_offset (jfunc
));
2896 else if (!src_plats
->aggs_by_ref
)
2897 ret
|= set_agg_lats_to_bottom (dest_plats
);
2899 ret
|= set_agg_lats_contain_variable (dest_plats
);
2903 if (jfunc
->agg
.items
)
2905 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2906 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2907 struct ipa_agg_jf_item
*item
;
2910 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2913 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2914 param_ipa_max_agg_items
);
2915 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2917 HOST_WIDE_INT val_size
;
2919 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2921 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2923 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2924 &aglat
, pre_existing
, &ret
, max_agg_items
))
2926 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2927 aglat
= &(*aglat
)->next
;
2929 else if (dest_plats
->aggs_bottom
)
2933 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2936 ret
|= set_agg_lats_contain_variable (dest_plats
);
2941 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2942 non-thunk) destination, the call passes through a thunk. */
2945 call_passes_through_thunk (cgraph_edge
*cs
)
2947 cgraph_node
*alias_or_thunk
= cs
->callee
;
2948 while (alias_or_thunk
->alias
)
2949 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2950 return alias_or_thunk
->thunk
;
2953 /* Propagate constants from the caller to the callee of CS. INFO describes the
2957 propagate_constants_across_call (struct cgraph_edge
*cs
)
2959 class ipa_node_params
*callee_info
;
2960 enum availability availability
;
2961 cgraph_node
*callee
;
2962 class ipa_edge_args
*args
;
2964 int i
, args_count
, parms_count
;
2966 callee
= cs
->callee
->function_symbol (&availability
);
2967 if (!callee
->definition
)
2969 gcc_checking_assert (callee
->has_gimple_body_p ());
2970 callee_info
= ipa_node_params_sum
->get (callee
);
2974 args
= ipa_edge_args_sum
->get (cs
);
2975 parms_count
= ipa_get_param_count (callee_info
);
2976 if (parms_count
== 0)
2979 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2980 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2982 for (i
= 0; i
< parms_count
; i
++)
2983 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2987 args_count
= ipa_get_cs_argument_count (args
);
2989 /* If this call goes through a thunk we must not propagate to the first (0th)
2990 parameter. However, we might need to uncover a thunk from below a series
2991 of aliases first. */
2992 if (call_passes_through_thunk (cs
))
2994 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3001 for (; (i
< args_count
) && (i
< parms_count
); i
++)
3003 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
3004 class ipcp_param_lattices
*dest_plats
;
3005 tree param_type
= ipa_get_type (callee_info
, i
);
3007 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
3008 if (availability
== AVAIL_INTERPOSABLE
)
3009 ret
|= set_all_contains_variable (dest_plats
);
3012 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
3013 &dest_plats
->itself
,
3015 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
3016 &dest_plats
->ctxlat
);
3018 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
3019 &dest_plats
->bits_lattice
);
3020 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
3022 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
3023 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
3024 dest_plats
, param_type
);
3026 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
3029 for (; i
< parms_count
; i
++)
3030 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
3035 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3036 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3037 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3038 KNOWN_AGGS is ignored. */
3041 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
3042 const vec
<tree
> &known_csts
,
3043 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
3044 const ipa_argagg_value_list
&avs
,
3047 int param_index
= ie
->indirect_info
->param_index
;
3048 HOST_WIDE_INT anc_offset
;
3052 *speculative
= false;
3054 if (param_index
== -1)
3057 if (!ie
->indirect_info
->polymorphic
)
3061 if (ie
->indirect_info
->agg_contents
)
3064 if ((unsigned) param_index
< known_csts
.length ()
3065 && known_csts
[param_index
])
3066 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3067 ie
->indirect_info
->offset
,
3068 ie
->indirect_info
->by_ref
);
3070 if (!t
&& ie
->indirect_info
->guaranteed_unmodified
)
3071 t
= avs
.get_value (param_index
,
3072 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3073 ie
->indirect_info
->by_ref
);
3075 else if ((unsigned) param_index
< known_csts
.length ())
3076 t
= known_csts
[param_index
];
3079 && TREE_CODE (t
) == ADDR_EXPR
3080 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3081 return TREE_OPERAND (t
, 0);
3086 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3089 gcc_assert (!ie
->indirect_info
->agg_contents
);
3090 gcc_assert (!ie
->indirect_info
->by_ref
);
3091 anc_offset
= ie
->indirect_info
->offset
;
3095 if ((unsigned) param_index
< known_csts
.length ()
3096 && known_csts
[param_index
])
3097 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3098 ie
->indirect_info
->offset
, true);
3100 /* Try to work out value of virtual table pointer value in replacements. */
3101 /* or known aggregate values. */
3103 t
= avs
.get_value (param_index
,
3104 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3107 /* If we found the virtual table pointer, lookup the target. */
3111 unsigned HOST_WIDE_INT offset
;
3112 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3115 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3116 vtable
, offset
, &can_refer
);
3120 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3121 || !possible_polymorphic_call_target_p
3122 (ie
, cgraph_node::get (target
)))
3124 /* Do not speculate builtin_unreachable, it is stupid! */
3125 if (ie
->indirect_info
->vptr_changed
)
3127 target
= ipa_impossible_devirt_target (ie
, target
);
3129 *speculative
= ie
->indirect_info
->vptr_changed
;
3136 /* Do we know the constant value of pointer? */
3137 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3138 t
= known_csts
[param_index
];
3140 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3142 ipa_polymorphic_call_context context
;
3143 if (known_contexts
.length () > (unsigned int) param_index
)
3145 context
= known_contexts
[param_index
];
3146 context
.offset_by (anc_offset
);
3147 if (ie
->indirect_info
->vptr_changed
)
3148 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3149 ie
->indirect_info
->otr_type
);
3152 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3153 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3154 if (!ctx2
.useless_p ())
3155 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3160 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3162 if (ie
->indirect_info
->vptr_changed
)
3163 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3164 ie
->indirect_info
->otr_type
);
3169 vec
<cgraph_node
*>targets
;
3172 targets
= possible_polymorphic_call_targets
3173 (ie
->indirect_info
->otr_type
,
3174 ie
->indirect_info
->otr_token
,
3176 if (!final
|| targets
.length () > 1)
3178 struct cgraph_node
*node
;
3181 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3182 || ie
->speculative
|| !ie
->maybe_hot_p ())
3184 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3185 ie
->indirect_info
->otr_token
,
3189 *speculative
= true;
3190 target
= node
->decl
;
3197 *speculative
= false;
3198 if (targets
.length () == 1)
3199 target
= targets
[0]->decl
;
3201 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3204 if (target
&& !possible_polymorphic_call_target_p (ie
,
3205 cgraph_node::get (target
)))
3209 target
= ipa_impossible_devirt_target (ie
, target
);
3215 /* If an indirect edge IE can be turned into a direct one based on data in
3216 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3217 whether the discovered target is only speculative guess. */
3220 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3221 ipa_call_arg_values
*avals
,
3224 ipa_argagg_value_list
avl (avals
);
3225 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3226 avals
->m_known_contexts
,
3230 /* Calculate devirtualization time bonus for NODE, assuming we know information
3231 about arguments stored in AVALS. */
3234 devirtualization_time_bonus (struct cgraph_node
*node
,
3235 ipa_auto_call_arg_values
*avals
)
3237 struct cgraph_edge
*ie
;
3240 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3242 struct cgraph_node
*callee
;
3243 class ipa_fn_summary
*isummary
;
3244 enum availability avail
;
3248 ipa_argagg_value_list
avl (avals
);
3249 target
= ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3250 avals
->m_known_contexts
,
3255 /* Only bare minimum benefit for clearly un-inlineable targets. */
3257 callee
= cgraph_node::get (target
);
3258 if (!callee
|| !callee
->definition
)
3260 callee
= callee
->function_symbol (&avail
);
3261 if (avail
< AVAIL_AVAILABLE
)
3263 isummary
= ipa_fn_summaries
->get (callee
);
3264 if (!isummary
|| !isummary
->inlinable
)
3267 int size
= ipa_size_summaries
->get (callee
)->size
;
3268 /* FIXME: The values below need re-considering and perhaps also
3269 integrating into the cost metrics, at lest in some very basic way. */
3270 int max_inline_insns_auto
3271 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3272 if (size
<= max_inline_insns_auto
/ 4)
3273 res
+= 31 / ((int)speculative
+ 1);
3274 else if (size
<= max_inline_insns_auto
/ 2)
3275 res
+= 15 / ((int)speculative
+ 1);
3276 else if (size
<= max_inline_insns_auto
3277 || DECL_DECLARED_INLINE_P (callee
->decl
))
3278 res
+= 7 / ((int)speculative
+ 1);
3284 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3287 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3290 ipa_hints hints
= estimates
.hints
;
3291 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3292 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3294 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3296 if (hints
& INLINE_HINT_loop_iterations
)
3297 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3299 if (hints
& INLINE_HINT_loop_stride
)
3300 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3305 /* If there is a reason to penalize the function described by INFO in the
3306 cloning goodness evaluation, do so. */
3309 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3312 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3313 evaluation
= (evaluation
3314 * (100 - opt_for_fn (node
->decl
,
3315 param_ipa_cp_recursion_penalty
))) / 100;
3317 if (info
->node_calling_single_call
)
3318 evaluation
= (evaluation
3319 * (100 - opt_for_fn (node
->decl
,
3320 param_ipa_cp_single_call_penalty
)))
3326 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3327 and SIZE_COST and with the sum of frequencies of incoming edges to the
3328 potential new clone in FREQUENCIES. */
3331 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3332 sreal freq_sum
, profile_count count_sum
,
3335 if (time_benefit
== 0
3336 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3337 || node
->optimize_for_size_p ())
3340 gcc_assert (size_cost
> 0);
3342 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3343 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3344 if (count_sum
.nonzero_p ())
3346 gcc_assert (base_count
.nonzero_p ());
3347 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3348 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3349 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3352 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3354 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3355 "size: %i, count_sum: ", time_benefit
.to_double (),
3357 count_sum
.dump (dump_file
);
3358 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3359 info
->node_within_scc
3360 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3361 info
->node_calling_single_call
? ", single_call" : "",
3362 evaluation
.to_double (), eval_threshold
);
3365 return evaluation
.to_int () >= eval_threshold
;
3369 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3370 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3374 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3375 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3377 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3378 info
->node_within_scc
3379 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3380 info
->node_calling_single_call
? ", single_call" : "",
3381 evaluation
.to_double (), eval_threshold
);
3383 return evaluation
.to_int () >= eval_threshold
;
3387 /* Grow vectors in AVALS and fill them with information about values of
3388 parameters that are known to be independent of the context. Only calculate
3389 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3390 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3391 parameters will be stored in it.
3393 TODO: Also grow context independent value range vectors. */
3396 gather_context_independent_values (class ipa_node_params
*info
,
3397 ipa_auto_call_arg_values
*avals
,
3398 bool calculate_aggs
,
3399 int *removable_params_cost
)
3401 int i
, count
= ipa_get_param_count (info
);
3404 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3405 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3407 if (removable_params_cost
)
3408 *removable_params_cost
= 0;
3410 for (i
= 0; i
< count
; i
++)
3412 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3413 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3415 if (lat
->is_single_const ())
3417 ipcp_value
<tree
> *val
= lat
->values
;
3418 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3419 avals
->m_known_vals
[i
] = val
->value
;
3420 if (removable_params_cost
)
3421 *removable_params_cost
3422 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3425 else if (removable_params_cost
3426 && !ipa_is_param_used (info
, i
))
3427 *removable_params_cost
3428 += ipa_get_param_move_cost (info
, i
);
3430 if (!ipa_is_param_used (info
, i
))
3433 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3434 /* Do not account known context as reason for cloning. We can see
3435 if it permits devirtualization. */
3436 if (ctxlat
->is_single_const ())
3437 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3440 ret
|= push_agg_values_from_plats (plats
, i
, 0, &avals
->m_known_aggs
);
3446 /* Perform time and size measurement of NODE with the context given in AVALS,
3447 calculate the benefit compared to the node without specialization and store
3448 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3449 context-independent or unused removable parameters and EST_MOVE_COST, the
3450 estimated movement of the considered parameter. */
3453 perform_estimation_of_a_value (cgraph_node
*node
,
3454 ipa_auto_call_arg_values
*avals
,
3455 int removable_params_cost
, int est_move_cost
,
3456 ipcp_value_base
*val
)
3459 ipa_call_estimates estimates
;
3461 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3463 /* Extern inline functions have no cloning local time benefits because they
3464 will be inlined anyway. The only reason to clone them is if it enables
3465 optimization in any of the functions they call. */
3466 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3469 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3470 + (devirtualization_time_bonus (node
, avals
)
3471 + hint_time_bonus (node
, estimates
)
3472 + removable_params_cost
+ est_move_cost
);
3474 int size
= estimates
.size
;
3475 gcc_checking_assert (size
>=0);
3476 /* The inliner-heuristics based estimates may think that in certain
3477 contexts some functions do not have any size at all but we want
3478 all specializations to have at least a tiny cost, not least not to
3483 val
->local_time_benefit
= time_benefit
;
3484 val
->local_size_cost
= size
;
3487 /* Get the overall limit oof growth based on parameters extracted from growth.
3488 it does not really make sense to mix functions with different overall growth
3489 limits but it is possible and if it happens, we do not want to select one
3493 get_max_overall_size (cgraph_node
*node
)
3495 long max_new_size
= orig_overall_size
;
3496 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3497 if (max_new_size
< large_unit
)
3498 max_new_size
= large_unit
;
3499 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3500 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3501 return max_new_size
;
3504 /* Return true if NODE should be cloned just for a parameter removal, possibly
3505 dumping a reason if not. */
3508 clone_for_param_removal_p (cgraph_node
*node
)
3510 if (!node
->can_change_signature
)
3512 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3513 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3514 "function cannot change signature.\n");
3517 if (node
->can_be_local_p ())
3519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3520 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3521 "IPA-SRA can do it potentially better.\n");
3527 /* Iterate over known values of parameters of NODE and estimate the local
3528 effects in terms of time and size they have. */
3531 estimate_local_effects (struct cgraph_node
*node
)
3533 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3534 int count
= ipa_get_param_count (info
);
3536 int removable_params_cost
;
3538 if (!count
|| !ipcp_versionable_function_p (node
))
3541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3544 ipa_auto_call_arg_values avals
;
3545 always_const
= gather_context_independent_values (info
, &avals
, true,
3546 &removable_params_cost
);
3547 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3548 if (always_const
|| devirt_bonus
3549 || (removable_params_cost
&& clone_for_param_removal_p (node
)))
3551 struct caller_statistics stats
;
3552 ipa_call_estimates estimates
;
3554 init_caller_stats (&stats
);
3555 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3557 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3558 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3559 time
+= devirt_bonus
;
3560 time
+= hint_time_bonus (node
, estimates
);
3561 time
+= removable_params_cost
;
3562 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3565 fprintf (dump_file
, " - context independent values, size: %i, "
3566 "time_benefit: %f\n", size
, (time
).to_double ());
3568 if (size
<= 0 || node
->local
)
3570 info
->do_clone_for_all_contexts
= true;
3573 fprintf (dump_file
, " Decided to specialize for all "
3574 "known contexts, code not going to grow.\n");
3576 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3577 stats
.count_sum
, size
))
3579 if (size
+ overall_size
<= get_max_overall_size (node
))
3581 info
->do_clone_for_all_contexts
= true;
3582 overall_size
+= size
;
3585 fprintf (dump_file
, " Decided to specialize for all "
3586 "known contexts, growth (to %li) deemed "
3587 "beneficial.\n", overall_size
);
3589 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3590 fprintf (dump_file
, " Not cloning for all contexts because "
3591 "maximum unit size would be reached with %li.\n",
3592 size
+ overall_size
);
3594 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3595 fprintf (dump_file
, " Not cloning for all contexts because "
3596 "!good_cloning_opportunity_p.\n");
3600 for (int i
= 0; i
< count
; i
++)
3602 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3603 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3604 ipcp_value
<tree
> *val
;
3608 || avals
.m_known_vals
[i
])
3611 for (val
= lat
->values
; val
; val
= val
->next
)
3613 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3614 avals
.m_known_vals
[i
] = val
->value
;
3616 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3617 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3620 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3622 fprintf (dump_file
, " - estimates for value ");
3623 print_ipcp_constant_value (dump_file
, val
->value
);
3624 fprintf (dump_file
, " for ");
3625 ipa_dump_param (dump_file
, info
, i
);
3626 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3627 val
->local_time_benefit
.to_double (),
3628 val
->local_size_cost
);
3631 avals
.m_known_vals
[i
] = NULL_TREE
;
3634 for (int i
= 0; i
< count
; i
++)
3636 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3638 if (!plats
->virt_call
)
3641 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3642 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3646 || !avals
.m_known_contexts
[i
].useless_p ())
3649 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3651 avals
.m_known_contexts
[i
] = val
->value
;
3652 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3655 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3657 fprintf (dump_file
, " - estimates for polymorphic context ");
3658 print_ipcp_constant_value (dump_file
, val
->value
);
3659 fprintf (dump_file
, " for ");
3660 ipa_dump_param (dump_file
, info
, i
);
3661 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3662 val
->local_time_benefit
.to_double (),
3663 val
->local_size_cost
);
3666 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3669 unsigned all_ctx_len
= avals
.m_known_aggs
.length ();
3670 auto_vec
<ipa_argagg_value
, 32> all_ctx
;
3671 all_ctx
.reserve_exact (all_ctx_len
);
3672 all_ctx
.splice (avals
.m_known_aggs
);
3673 avals
.m_known_aggs
.safe_grow_cleared (all_ctx_len
+ 1);
3676 for (int index
= 0; index
< count
; index
++)
3678 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, index
);
3680 if (plats
->aggs_bottom
|| !plats
->aggs
)
3683 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3685 ipcp_value
<tree
> *val
;
3686 if (aglat
->bottom
|| !aglat
->values
3687 /* If the following is true, the one value is already part of all
3688 context estimations. */
3689 || (!plats
->aggs_contain_variable
3690 && aglat
->is_single_const ()))
3693 unsigned unit_offset
= aglat
->offset
/ BITS_PER_UNIT
;
3694 while (j
< all_ctx_len
3695 && (all_ctx
[j
].index
< index
3696 || (all_ctx
[j
].index
== index
3697 && all_ctx
[j
].unit_offset
< unit_offset
)))
3699 avals
.m_known_aggs
[j
] = all_ctx
[j
];
3703 for (unsigned k
= j
; k
< all_ctx_len
; k
++)
3704 avals
.m_known_aggs
[k
+1] = all_ctx
[k
];
3706 for (val
= aglat
->values
; val
; val
= val
->next
)
3708 avals
.m_known_aggs
[j
].value
= val
->value
;
3709 avals
.m_known_aggs
[j
].unit_offset
= unit_offset
;
3710 avals
.m_known_aggs
[j
].index
= index
;
3711 avals
.m_known_aggs
[j
].by_ref
= plats
->aggs_by_ref
;
3712 avals
.m_known_aggs
[j
].killed
= false;
3714 perform_estimation_of_a_value (node
, &avals
,
3715 removable_params_cost
, 0, val
);
3717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3719 fprintf (dump_file
, " - estimates for value ");
3720 print_ipcp_constant_value (dump_file
, val
->value
);
3721 fprintf (dump_file
, " for ");
3722 ipa_dump_param (dump_file
, info
, index
);
3723 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3724 "]: time_benefit: %g, size: %i\n",
3725 plats
->aggs_by_ref
? "ref " : "",
3727 val
->local_time_benefit
.to_double (),
3728 val
->local_size_cost
);
3736 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3737 topological sort of values. */
3739 template <typename valtype
>
3741 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3743 ipcp_value_source
<valtype
> *src
;
3749 cur_val
->dfs
= dfs_counter
;
3750 cur_val
->low_link
= dfs_counter
;
3752 cur_val
->topo_next
= stack
;
3754 cur_val
->on_stack
= true;
3756 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3759 if (src
->val
->dfs
== 0)
3762 if (src
->val
->low_link
< cur_val
->low_link
)
3763 cur_val
->low_link
= src
->val
->low_link
;
3765 else if (src
->val
->on_stack
3766 && src
->val
->dfs
< cur_val
->low_link
)
3767 cur_val
->low_link
= src
->val
->dfs
;
3770 if (cur_val
->dfs
== cur_val
->low_link
)
3772 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3777 stack
= v
->topo_next
;
3778 v
->on_stack
= false;
3779 v
->scc_no
= cur_val
->dfs
;
3781 v
->scc_next
= scc_list
;
3784 while (v
!= cur_val
);
3786 cur_val
->topo_next
= values_topo
;
3787 values_topo
= cur_val
;
3791 /* Add all values in lattices associated with NODE to the topological sort if
3792 they are not there yet. */
3795 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3797 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3798 int i
, count
= ipa_get_param_count (info
);
3800 for (i
= 0; i
< count
; i
++)
3802 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3803 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3804 struct ipcp_agg_lattice
*aglat
;
3808 ipcp_value
<tree
> *val
;
3809 for (val
= lat
->values
; val
; val
= val
->next
)
3810 topo
->constants
.add_val (val
);
3813 if (!plats
->aggs_bottom
)
3814 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3817 ipcp_value
<tree
> *val
;
3818 for (val
= aglat
->values
; val
; val
= val
->next
)
3819 topo
->constants
.add_val (val
);
3822 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3823 if (!ctxlat
->bottom
)
3825 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3826 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3827 topo
->contexts
.add_val (ctxval
);
3832 /* One pass of constants propagation along the call graph edges, from callers
3833 to callees (requires topological ordering in TOPO), iterate over strongly
3834 connected components. */
3837 propagate_constants_topo (class ipa_topo_info
*topo
)
3841 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3844 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3845 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3847 /* First, iteratively propagate within the strongly connected component
3848 until all lattices stabilize. */
3849 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3850 if (v
->has_gimple_body_p ())
3852 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3853 && opt_for_fn (v
->decl
, optimize
))
3854 push_node_to_stack (topo
, v
);
3855 /* When V is not optimized, we can not push it to stack, but
3856 still we need to set all its callees lattices to bottom. */
3859 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3860 propagate_constants_across_call (cs
);
3864 v
= pop_node_from_stack (topo
);
3867 struct cgraph_edge
*cs
;
3868 class ipa_node_params
*info
= NULL
;
3869 bool self_scc
= true;
3871 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3872 if (ipa_edge_within_scc (cs
))
3874 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3881 info
= ipa_node_params_sum
->get (v
);
3882 info
->node_within_scc
= true;
3885 if (propagate_constants_across_call (cs
))
3886 push_node_to_stack (topo
, callee
);
3890 info
->node_is_self_scc
= self_scc
;
3892 v
= pop_node_from_stack (topo
);
3895 /* Afterwards, propagate along edges leading out of the SCC, calculates
3896 the local effects of the discovered constants and all valid values to
3897 their topological sort. */
3898 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3899 if (v
->has_gimple_body_p ()
3900 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3901 && opt_for_fn (v
->decl
, optimize
))
3903 struct cgraph_edge
*cs
;
3905 estimate_local_effects (v
);
3906 add_all_node_vals_to_toposort (v
, topo
);
3907 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3908 if (!ipa_edge_within_scc (cs
))
3909 propagate_constants_across_call (cs
);
3911 cycle_nodes
.release ();
3915 /* Propagate the estimated effects of individual values along the topological
3916 from the dependent values to those they depend on. */
3918 template <typename valtype
>
3920 value_topo_info
<valtype
>::propagate_effects ()
3922 ipcp_value
<valtype
> *base
;
3923 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3925 for (base
= values_topo
; base
; base
= base
->topo_next
)
3927 ipcp_value_source
<valtype
> *src
;
3928 ipcp_value
<valtype
> *val
;
3930 HOST_WIDE_INT size
= 0;
3932 for (val
= base
; val
; val
= val
->scc_next
)
3934 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3935 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3938 for (val
= base
; val
; val
= val
->scc_next
)
3940 processed_srcvals
.empty ();
3941 for (src
= val
->sources
; src
; src
= src
->next
)
3943 && src
->cs
->maybe_hot_p ())
3945 if (!processed_srcvals
.add (src
->val
))
3947 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3948 if (prop_size
< INT_MAX
)
3949 src
->val
->prop_size_cost
= prop_size
;
3954 int special_factor
= 1;
3955 if (val
->same_scc (src
->val
))
3957 = opt_for_fn(src
->cs
->caller
->decl
,
3958 param_ipa_cp_recursive_freq_factor
);
3959 else if (val
->self_recursion_generated_p ()
3960 && (src
->cs
->callee
->function_symbol ()
3961 == src
->cs
->caller
))
3963 int max_recur_gen_depth
3964 = opt_for_fn(src
->cs
->caller
->decl
,
3965 param_ipa_cp_max_recursive_depth
);
3966 special_factor
= max_recur_gen_depth
3967 - val
->self_recursion_generated_level
+ 1;
3970 src
->val
->prop_time_benefit
3971 += time
* special_factor
* src
->cs
->sreal_frequency ();
3976 val
->prop_time_benefit
= time
;
3977 val
->prop_size_cost
= size
;
3981 val
->prop_time_benefit
= 0;
3982 val
->prop_size_cost
= 0;
3988 /* Callback for qsort to sort counts of all edges. */
3991 compare_edge_profile_counts (const void *a
, const void *b
)
3993 const profile_count
*cnt1
= (const profile_count
*) a
;
3994 const profile_count
*cnt2
= (const profile_count
*) b
;
4004 /* Propagate constants, polymorphic contexts and their effects from the
4005 summaries interprocedurally. */
4008 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4010 struct cgraph_node
*node
;
4013 fprintf (dump_file
, "\n Propagating constants:\n\n");
4015 base_count
= profile_count::uninitialized ();
4017 bool compute_count_base
= false;
4018 unsigned base_count_pos_percent
= 0;
4019 FOR_EACH_DEFINED_FUNCTION (node
)
4021 if (node
->has_gimple_body_p ()
4022 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4023 && opt_for_fn (node
->decl
, optimize
))
4025 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4026 determine_versionability (node
, info
);
4028 unsigned nlattices
= ipa_get_param_count (info
);
4029 info
->lattices
.safe_grow_cleared (nlattices
, true);
4030 initialize_node_lattices (node
);
4032 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4033 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4034 overall_size
+= s
->self_size
;
4035 if (node
->count
.ipa ().initialized_p ())
4037 compute_count_base
= true;
4038 unsigned pos_percent
= opt_for_fn (node
->decl
,
4039 param_ipa_cp_profile_count_base
);
4040 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4044 if (compute_count_base
)
4046 auto_vec
<profile_count
> all_edge_counts
;
4047 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4048 FOR_EACH_DEFINED_FUNCTION (node
)
4049 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4051 profile_count count
= cs
->count
.ipa ();
4052 if (!count
.nonzero_p ())
4055 enum availability avail
;
4057 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4058 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4059 if (info
&& info
->versionable
)
4060 all_edge_counts
.quick_push (count
);
4063 if (!all_edge_counts
.is_empty ())
4065 gcc_assert (base_count_pos_percent
<= 100);
4066 all_edge_counts
.qsort (compare_edge_profile_counts
);
4068 unsigned base_count_pos
4069 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4070 base_count
= all_edge_counts
[base_count_pos
];
4074 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4075 "position %u, arriving at: ", all_edge_counts
.length (),
4077 base_count
.dump (dump_file
);
4078 fprintf (dump_file
, "\n");
4082 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4083 "continuing as if without profile feedback.\n");
4086 orig_overall_size
= overall_size
;
4089 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4091 propagate_constants_topo (topo
);
4093 ipcp_verify_propagated_values ();
4094 topo
->constants
.propagate_effects ();
4095 topo
->contexts
.propagate_effects ();
4099 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4100 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4104 /* Discover newly direct outgoing edges from NODE which is a new clone with
4105 known KNOWN_CSTS and make them direct. */
4108 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4109 vec
<tree
> known_csts
,
4110 vec
<ipa_polymorphic_call_context
>
4112 vec
<ipa_argagg_value
, va_gc
> *aggvals
)
4114 struct cgraph_edge
*ie
, *next_ie
;
4117 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4122 next_ie
= ie
->next_callee
;
4123 ipa_argagg_value_list
avs (aggvals
);
4124 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4128 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4129 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4130 int param_index
= ie
->indirect_info
->param_index
;
4131 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4135 if (cs
&& !agg_contents
&& !polymorphic
)
4137 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4138 int c
= ipa_get_controlled_uses (info
, param_index
);
4139 if (c
!= IPA_UNDESCRIBED_USE
4140 && !ipa_get_param_load_dereferenced (info
, param_index
))
4142 struct ipa_ref
*to_del
;
4145 ipa_set_controlled_uses (info
, param_index
, c
);
4146 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4147 fprintf (dump_file
, " controlled uses count of param "
4148 "%i bumped down to %i\n", param_index
, c
);
4150 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0,
4153 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4154 fprintf (dump_file
, " and even removing its "
4155 "cloning-created reference\n");
4156 to_del
->remove_reference ();
4162 /* Turning calls to direct calls will improve overall summary. */
4164 ipa_update_overall_fn_summary (node
);
4167 class edge_clone_summary
;
4168 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4170 /* Edge clone summary. */
4172 class edge_clone_summary
4175 /* Default constructor. */
4176 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4178 /* Default destructor. */
4179 ~edge_clone_summary ()
4182 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4184 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4187 cgraph_edge
*prev_clone
;
4188 cgraph_edge
*next_clone
;
4191 class edge_clone_summary_t
:
4192 public call_summary
<edge_clone_summary
*>
4195 edge_clone_summary_t (symbol_table
*symtab
):
4196 call_summary
<edge_clone_summary
*> (symtab
)
4198 m_initialize_when_cloning
= true;
4201 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4202 edge_clone_summary
*src_data
,
4203 edge_clone_summary
*dst_data
) final override
;
4206 /* Edge duplication hook. */
4209 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4210 edge_clone_summary
*src_data
,
4211 edge_clone_summary
*dst_data
)
4213 if (src_data
->next_clone
)
4214 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4215 dst_data
->prev_clone
= src_edge
;
4216 dst_data
->next_clone
= src_data
->next_clone
;
4217 src_data
->next_clone
= dst_edge
;
4220 /* Return true is CS calls DEST or its clone for all contexts. When
4221 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4222 edges from/to an all-context clone. */
4225 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4226 bool allow_recursion_to_clone
)
4228 enum availability availability
;
4229 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4231 if (availability
<= AVAIL_INTERPOSABLE
)
4235 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4238 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4239 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4242 /* Return true if edge CS does bring about the value described by SRC to
4243 DEST_VAL of node DEST or its clone for all contexts. */
4246 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4247 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4249 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4251 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4252 || caller_info
->node_dead
)
4258 if (caller_info
->ipcp_orig_node
)
4261 if (src
->offset
== -1)
4262 t
= caller_info
->known_csts
[src
->index
];
4263 else if (ipcp_transformation
*ts
4264 = ipcp_get_transformation_summary (cs
->caller
))
4266 ipa_argagg_value_list
avl (ts
);
4267 t
= avl
.get_value (src
->index
, src
->offset
/ BITS_PER_UNIT
);
4269 return (t
!= NULL_TREE
4270 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4274 if (src
->val
== dest_val
)
4277 struct ipcp_agg_lattice
*aglat
;
4278 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4280 if (src
->offset
== -1)
4281 return (plats
->itself
.is_single_const ()
4282 && values_equal_for_ipcp_p (src
->val
->value
,
4283 plats
->itself
.values
->value
));
4286 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4288 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4289 if (aglat
->offset
== src
->offset
)
4290 return (aglat
->is_single_const ()
4291 && values_equal_for_ipcp_p (src
->val
->value
,
4292 aglat
->values
->value
));
4298 /* Return true if edge CS does bring about the value described by SRC to
4299 DST_VAL of node DEST or its clone for all contexts. */
4302 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4303 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4305 ipcp_value
<ipa_polymorphic_call_context
> *)
4307 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4309 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4310 || caller_info
->node_dead
)
4315 if (caller_info
->ipcp_orig_node
)
4316 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4317 && values_equal_for_ipcp_p (src
->val
->value
,
4318 caller_info
->known_contexts
[src
->index
]);
4320 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4322 return plats
->ctxlat
.is_single_const ()
4323 && values_equal_for_ipcp_p (src
->val
->value
,
4324 plats
->ctxlat
.values
->value
);
4327 /* Get the next clone in the linked list of clones of an edge. */
4329 static inline struct cgraph_edge
*
4330 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4332 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4333 return s
!= NULL
? s
->next_clone
: NULL
;
4336 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4337 of them is viable and hot, return true. In that case, for those that still
4338 hold, add their edge frequency and their number and cumulative profile
4339 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4340 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4342 template <typename valtype
>
4344 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4345 sreal
*freq_sum
, int *caller_count
,
4346 profile_count
*rec_count_sum
,
4347 profile_count
*nonrec_count_sum
)
4349 ipcp_value_source
<valtype
> *src
;
4352 profile_count rec_cnt
= profile_count::zero ();
4353 profile_count nonrec_cnt
= profile_count::zero ();
4355 bool non_self_recursive
= false;
4357 for (src
= val
->sources
; src
; src
= src
->next
)
4359 struct cgraph_edge
*cs
= src
->cs
;
4362 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4365 freq
+= cs
->sreal_frequency ();
4366 hot
|= cs
->maybe_hot_p ();
4367 if (cs
->caller
!= dest
)
4369 non_self_recursive
= true;
4370 if (cs
->count
.ipa ().initialized_p ())
4371 rec_cnt
+= cs
->count
.ipa ();
4373 else if (cs
->count
.ipa ().initialized_p ())
4374 nonrec_cnt
+= cs
->count
.ipa ();
4376 cs
= get_next_cgraph_edge_clone (cs
);
4380 /* If the only edges bringing a value are self-recursive ones, do not bother
4382 if (!non_self_recursive
)
4386 *caller_count
= count
;
4387 *rec_count_sum
= rec_cnt
;
4388 *nonrec_count_sum
= nonrec_cnt
;
4390 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4392 struct cgraph_edge
*cs
;
4394 /* Cold non-SCC source edge could trigger hot recursive execution of
4395 function. Consider the case as hot and rely on following cost model
4396 computation to further select right one. */
4397 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4398 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4405 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4406 to let a non-self-recursive caller be the first element. Thus, we can
4407 simplify intersecting operations on values that arrive from all of these
4408 callers, especially when there exists self-recursive call. Return true if
4409 this kind of adjustment is possible. */
4412 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4415 for (unsigned i
= 0; i
< callers
.length (); i
++)
4417 cgraph_edge
*cs
= callers
[i
];
4419 if (cs
->caller
!= node
)
4423 callers
[i
] = callers
[0];
4432 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4433 is assumed their number is known and equal to CALLER_COUNT. */
4435 template <typename valtype
>
4436 static vec
<cgraph_edge
*>
4437 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4440 ipcp_value_source
<valtype
> *src
;
4441 vec
<cgraph_edge
*> ret
;
4443 ret
.create (caller_count
);
4444 for (src
= val
->sources
; src
; src
= src
->next
)
4446 struct cgraph_edge
*cs
= src
->cs
;
4449 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4450 ret
.quick_push (cs
);
4451 cs
= get_next_cgraph_edge_clone (cs
);
4455 if (caller_count
> 1)
4456 adjust_callers_for_value_intersection (ret
, dest
);
4461 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4462 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4463 should be set to true when the reference created for the constant should be
4464 a load one and not an address one because the corresponding parameter p is
4467 static struct ipa_replace_map
*
4468 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4469 bool force_load_ref
)
4471 struct ipa_replace_map
*replace_map
;
4473 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4476 fprintf (dump_file
, " replacing ");
4477 ipa_dump_param (dump_file
, info
, parm_num
);
4479 fprintf (dump_file
, " with const ");
4480 print_generic_expr (dump_file
, value
);
4483 fprintf (dump_file
, " - forcing load reference\n");
4485 fprintf (dump_file
, "\n");
4487 replace_map
->parm_num
= parm_num
;
4488 replace_map
->new_tree
= value
;
4489 replace_map
->force_load_ref
= force_load_ref
;
4493 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4494 one, otherwise it will be referred to as the original node. */
4497 dump_profile_updates (cgraph_node
*node
, bool spec
)
4500 fprintf (dump_file
, " setting count of the specialized node %s to ",
4501 node
->dump_name ());
4503 fprintf (dump_file
, " setting count of the original node %s to ",
4504 node
->dump_name ());
4506 node
->count
.dump (dump_file
);
4507 fprintf (dump_file
, "\n");
4508 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4510 fprintf (dump_file
, " edge to %s has count ",
4511 cs
->callee
->dump_name ());
4512 cs
->count
.dump (dump_file
);
4513 fprintf (dump_file
, "\n");
4517 /* With partial train run we do not want to assume that original's count is
4518 zero whenever we redurect all executed edges to clone. Simply drop profile
4519 to local one in this case. In eany case, return the new value. ORIG_NODE
4520 is the original node and its count has not been updaed yet. */
4523 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4525 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4526 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4527 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4528 remainder
= remainder
.guessed_local ();
4533 /* Structure to sum counts coming from nodes other than the original node and
4536 struct gather_other_count_struct
4539 profile_count other_count
;
4542 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4543 counts that come from non-self-recursive calls.. */
4546 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4548 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4549 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4550 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4551 desc
->other_count
+= cs
->count
.ipa ();
4555 /* Structure to help analyze if we need to boost counts of some clones of some
4556 non-recursive edges to match the new callee count. */
4558 struct desc_incoming_count_struct
4561 hash_set
<cgraph_edge
*> *processed_edges
;
4562 profile_count count
;
4563 unsigned unproc_orig_rec_edges
;
4566 /* Go over edges calling NODE and its thunks and gather information about
4567 incoming counts so that we know if we need to make any adjustments. */
4570 analyze_clone_icoming_counts (cgraph_node
*node
,
4571 desc_incoming_count_struct
*desc
)
4573 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4574 if (cs
->caller
->thunk
)
4576 analyze_clone_icoming_counts (cs
->caller
, desc
);
4581 if (cs
->count
.initialized_p ())
4582 desc
->count
+= cs
->count
.ipa ();
4583 if (!desc
->processed_edges
->contains (cs
)
4584 && cs
->caller
->clone_of
== desc
->orig
)
4585 desc
->unproc_orig_rec_edges
++;
4589 /* If caller edge counts of a clone created for a self-recursive arithmetic
4590 jump function must be adjusted because it is coming from a the "seed" clone
4591 for the first value and so has been excessively scaled back as if it was not
4592 a recursive call, adjust it so that the incoming counts of NODE match its
4593 count. NODE is the node or its thunk. */
4596 adjust_clone_incoming_counts (cgraph_node
*node
,
4597 desc_incoming_count_struct
*desc
)
4599 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4600 if (cs
->caller
->thunk
)
4602 adjust_clone_incoming_counts (cs
->caller
, desc
);
4603 profile_count sum
= profile_count::zero ();
4604 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4605 if (e
->count
.initialized_p ())
4606 sum
+= e
->count
.ipa ();
4607 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4609 else if (!desc
->processed_edges
->contains (cs
)
4610 && cs
->caller
->clone_of
== desc
->orig
)
4612 cs
->count
+= desc
->count
;
4615 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4616 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4617 cs
->callee
->dump_name ());
4618 cs
->count
.dump (dump_file
);
4619 fprintf (dump_file
, "\n");
4624 /* When ORIG_NODE has been cloned for values which have been generated fora
4625 self-recursive call as a result of an arithmetic pass-through
4626 jump-functions, adjust its count together with counts of all such clones in
4627 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4629 The function sums the counts of the original node and all its clones that
4630 cannot be attributed to a specific clone because it comes from a
4631 non-recursive edge. This sum is then evenly divided between the clones and
4632 on top of that each one gets all the counts which can be attributed directly
4636 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4637 const vec
<cgraph_node
*> &self_gen_clones
)
4639 profile_count redist_sum
= orig_node
->count
.ipa ();
4640 if (!(redist_sum
> profile_count::zero ()))
4644 fprintf (dump_file
, " Updating profile of self recursive clone "
4647 gather_other_count_struct gocs
;
4648 gocs
.orig
= orig_node
;
4649 gocs
.other_count
= profile_count::zero ();
4651 auto_vec
<profile_count
, 8> other_edges_count
;
4652 for (cgraph_node
*n
: self_gen_clones
)
4654 gocs
.other_count
= profile_count::zero ();
4655 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4657 other_edges_count
.safe_push (gocs
.other_count
);
4658 redist_sum
-= gocs
.other_count
;
4661 hash_set
<cgraph_edge
*> processed_edges
;
4663 for (cgraph_node
*n
: self_gen_clones
)
4665 profile_count orig_count
= n
->count
;
4666 profile_count new_count
4667 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4668 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4669 n
->count
= new_count
;
4670 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4671 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4673 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4674 processed_edges
.add (cs
);
4676 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4677 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4682 /* There are still going to be edges to ORIG_NODE that have one or more
4683 clones coming from another node clone in SELF_GEN_CLONES and which we
4684 scaled by the same amount, which means that the total incoming sum of
4685 counts to ORIG_NODE will be too high, scale such edges back. */
4686 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4688 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4691 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4692 if (e
->callee
->ultimate_alias_target () == orig_node
4693 && processed_edges
.contains (e
))
4696 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4697 if (e
->callee
->ultimate_alias_target () == orig_node
4698 && processed_edges
.contains (e
))
4703 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4704 along self-recursive edges are likely to have fairly low count and so
4705 edges from them to nodes in the self_gen_clones do not correspond to the
4706 artificially distributed count of the nodes, the total sum of incoming
4707 edges to some clones might be too low. Detect this situation and correct
4709 for (cgraph_node
*n
: self_gen_clones
)
4711 if (!(n
->count
.ipa () > profile_count::zero ()))
4714 desc_incoming_count_struct desc
;
4715 desc
.orig
= orig_node
;
4716 desc
.processed_edges
= &processed_edges
;
4717 desc
.count
= profile_count::zero ();
4718 desc
.unproc_orig_rec_edges
= 0;
4719 analyze_clone_icoming_counts (n
, &desc
);
4721 if (n
->count
.differs_from_p (desc
.count
))
4723 if (n
->count
> desc
.count
4724 && desc
.unproc_orig_rec_edges
> 0)
4726 desc
.count
= n
->count
- desc
.count
;
4727 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
4728 adjust_clone_incoming_counts (n
, &desc
);
4732 " Unable to fix up incoming counts for %s.\n",
4738 for (cgraph_node
*n
: self_gen_clones
)
4739 dump_profile_updates (n
, n
!= orig_node
);
4743 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4744 their profile information to reflect this. This function should not be used
4745 for clones generated for arithmetic pass-through jump functions on a
4746 self-recursive call graph edge, that situation is handled by
4747 update_counts_for_self_gen_clones. */
4750 update_profiling_info (struct cgraph_node
*orig_node
,
4751 struct cgraph_node
*new_node
)
4753 struct caller_statistics stats
;
4754 profile_count new_sum
;
4755 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4757 if (!(orig_node_count
> profile_count::zero ()))
4762 fprintf (dump_file
, " Updating profile from original count: ");
4763 orig_node_count
.dump (dump_file
);
4764 fprintf (dump_file
, "\n");
4767 init_caller_stats (&stats
, new_node
);
4768 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4770 new_sum
= stats
.count_sum
;
4772 bool orig_edges_processed
= false;
4773 if (new_sum
> orig_node_count
)
4775 /* TODO: Profile has alreay gone astray, keep what we have but lower it
4776 to global0 category. */
4777 remainder
= orig_node
->count
.global0 ();
4779 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4780 cs
->count
= cs
->count
.global0 ();
4781 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4783 cs
= cs
->next_callee
)
4784 cs
->count
= cs
->count
.global0 ();
4785 orig_edges_processed
= true;
4787 else if (stats
.rec_count_sum
.nonzero_p ())
4789 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4790 /* There are self-recursive edges which are likely to bring in the
4791 majority of calls but which we must divide in between the original and
4793 init_caller_stats (&stats
, orig_node
);
4794 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4796 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4797 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4799 if (orig_node
->local
)
4801 if (!orig_nonrec_call_count
.nonzero_p ())
4804 fprintf (dump_file
, " The original is local and the only "
4805 "incoming edges from non-dead callers with nonzero "
4806 "counts are self-recursive, assuming it is cold.\n");
4807 /* The NEW_NODE count and counts of all its outgoing edges
4808 are still unmodified copies of ORIG_NODE's. Just clear
4809 the latter and bail out. */
4811 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4812 zero
= profile_count::zero ().guessed_local ();
4814 zero
= profile_count::adjusted_zero ();
4815 orig_node
->count
= zero
;
4816 for (cgraph_edge
*cs
= orig_node
->callees
;
4818 cs
= cs
->next_callee
)
4820 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4822 cs
= cs
->next_callee
)
4829 /* Let's behave as if there was another caller that accounts for all
4830 the calls that were either indirect or from other compilation
4832 orig_nonrec_calls
++;
4833 profile_count pretend_caller_count
4834 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4835 - stats
.rec_count_sum
);
4836 orig_nonrec_call_count
+= pretend_caller_count
;
4839 /* Divide all "unexplained" counts roughly proportionally to sums of
4840 counts of non-recursive calls.
4842 We put rather arbitrary limits on how many counts we claim because the
4843 number of non-self-recursive incoming count is only a rough guideline
4844 and there are cases (such as mcf) where using it blindly just takes
4845 too many. And if lattices are considered in the opposite order we
4846 could also take too few. */
4847 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4849 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4850 profile_count new_part
4851 = MAX(MIN (unexp
.apply_scale (new_sum
,
4852 new_sum
+ orig_nonrec_call_count
),
4853 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4854 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4857 fprintf (dump_file
, " Claiming ");
4858 new_part
.dump (dump_file
);
4859 fprintf (dump_file
, " of unexplained ");
4860 unexp
.dump (dump_file
);
4861 fprintf (dump_file
, " counts because of self-recursive "
4864 new_sum
+= new_part
;
4865 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4869 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4872 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4873 new_node
->count
= new_sum
;
4874 orig_node
->count
= remainder
;
4876 profile_count orig_new_node_count
= orig_node_count
;
4877 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4878 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4879 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4880 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4881 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4883 if (!orig_edges_processed
)
4885 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4886 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4887 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4888 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4890 cs
= cs
->next_callee
)
4891 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4896 dump_profile_updates (new_node
, true);
4897 dump_profile_updates (orig_node
, false);
4901 /* Update the respective profile of specialized NEW_NODE and the original
4902 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4903 have been redirected to the specialized version. */
4906 update_specialized_profile (struct cgraph_node
*new_node
,
4907 struct cgraph_node
*orig_node
,
4908 profile_count redirected_sum
)
4910 struct cgraph_edge
*cs
;
4911 profile_count new_node_count
, orig_node_count
= orig_node
->count
.ipa ();
4915 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4916 redirected_sum
.dump (dump_file
);
4917 fprintf (dump_file
, "\n old ipa count of the original node is ");
4918 orig_node_count
.dump (dump_file
);
4919 fprintf (dump_file
, "\n");
4921 if (!(orig_node_count
> profile_count::zero ()))
4924 new_node_count
= new_node
->count
;
4925 new_node
->count
+= redirected_sum
;
4927 = lenient_count_portion_handling (orig_node
->count
- redirected_sum
,
4930 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4931 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4933 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4935 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4942 dump_profile_updates (new_node
, true);
4943 dump_profile_updates (orig_node
, false);
4947 static void adjust_references_in_caller (cgraph_edge
*cs
,
4948 symtab_node
*symbol
, int index
);
4950 /* Simple structure to pass a symbol and index (with same meaning as parameters
4951 of adjust_references_in_caller) through a void* parameter of a
4952 call_for_symbol_thunks_and_aliases callback. */
4953 struct symbol_and_index_together
4955 symtab_node
*symbol
;
4959 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4960 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4962 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4964 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4965 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4966 if (!cs
->caller
->thunk
)
4967 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4971 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4972 variable which is only dereferenced and which is represented by SYMBOL. See
4973 if we can remove ADDR reference in callers assosiated witht the call. */
4976 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4978 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4979 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4980 if (jfunc
->type
== IPA_JF_CONST
)
4982 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
4987 to_del
->remove_reference ();
4988 ipa_zap_jf_refdesc (jfunc
);
4990 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4991 cs
->caller
->dump_name (), symbol
->dump_name ());
4995 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
4996 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
4997 || ipa_get_jf_pass_through_refdesc_decremented (jfunc
))
5000 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5001 cgraph_node
*caller
= cs
->caller
;
5002 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
5003 /* TODO: This consistency check may be too big and not really
5004 that useful. Consider removing it. */
5006 if (caller_info
->ipcp_orig_node
)
5007 cst
= caller_info
->known_csts
[fidx
];
5010 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
5011 gcc_assert (lat
->is_single_const ());
5012 cst
= lat
->values
->value
;
5014 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
5015 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
5018 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
5019 if (cuses
== IPA_UNDESCRIBED_USE
)
5021 gcc_assert (cuses
> 0);
5023 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
5024 ipa_set_jf_pass_through_refdesc_decremented (jfunc
, true);
5025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5026 fprintf (dump_file
, " Controlled uses of parameter %i of %s dropped "
5027 "to %i.\n", fidx
, caller
->dump_name (), cuses
);
5031 if (caller_info
->ipcp_orig_node
)
5033 /* Cloning machinery has created a reference here, we need to either
5034 remove it or change it to a read one. */
5035 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0, IPA_REF_ADDR
);
5038 to_del
->remove_reference ();
5040 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5041 cs
->caller
->dump_name (), symbol
->dump_name ());
5042 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5044 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5047 " ...and replaced it with LOAD one.\n");
5052 symbol_and_index_together pack
;
5053 pack
.symbol
= symbol
;
5055 if (caller
->can_change_signature
)
5056 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5061 /* Return true if we would like to remove a parameter from NODE when cloning it
5062 with KNOWN_CSTS scalar constants. */
5065 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5067 auto_vec
<bool, 16> surviving
;
5068 bool filled_vec
= false;
5069 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5070 int i
, count
= ipa_get_param_count (info
);
5072 for (i
= 0; i
< count
; i
++)
5074 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5079 clone_info
*info
= clone_info::get (node
);
5080 if (!info
|| !info
->param_adjustments
)
5082 info
->param_adjustments
->get_surviving_params (&surviving
);
5085 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5091 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5092 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5093 redirect all edges in CALLERS to it. */
5095 static struct cgraph_node
*
5096 create_specialized_node (struct cgraph_node
*node
,
5097 vec
<tree
> known_csts
,
5098 vec
<ipa_polymorphic_call_context
> known_contexts
,
5099 vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5100 vec
<cgraph_edge
*> &callers
)
5102 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5103 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5104 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5105 struct cgraph_node
*new_node
;
5106 int i
, count
= ipa_get_param_count (info
);
5107 clone_info
*cinfo
= clone_info::get (node
);
5108 ipa_param_adjustments
*old_adjustments
= cinfo
5109 ? cinfo
->param_adjustments
: NULL
;
5110 ipa_param_adjustments
*new_adjustments
;
5111 gcc_assert (!info
->ipcp_orig_node
);
5112 gcc_assert (node
->can_change_signature
5113 || !old_adjustments
);
5115 if (old_adjustments
)
5117 /* At the moment all IPA optimizations should use the number of
5118 parameters of the prevailing decl as the m_always_copy_start.
5119 Handling any other value would complicate the code below, so for the
5120 time bing let's only assert it is so. */
5121 gcc_assert (old_adjustments
->m_always_copy_start
== count
5122 || old_adjustments
->m_always_copy_start
< 0);
5123 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5124 for (i
= 0; i
< old_adj_count
; i
++)
5126 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5127 if (!node
->can_change_signature
5128 || old_adj
->op
!= IPA_PARAM_OP_COPY
5129 || (!known_csts
[old_adj
->base_index
]
5130 && ipa_is_param_used (info
, old_adj
->base_index
)))
5132 ipa_adjusted_param new_adj
= *old_adj
;
5134 new_adj
.prev_clone_adjustment
= true;
5135 new_adj
.prev_clone_index
= i
;
5136 vec_safe_push (new_params
, new_adj
);
5139 bool skip_return
= old_adjustments
->m_skip_return
;
5140 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5141 ipa_param_adjustments (new_params
, count
,
5144 else if (node
->can_change_signature
5145 && want_remove_some_param_p (node
, known_csts
))
5147 ipa_adjusted_param adj
;
5148 memset (&adj
, 0, sizeof (adj
));
5149 adj
.op
= IPA_PARAM_OP_COPY
;
5150 for (i
= 0; i
< count
; i
++)
5151 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5154 adj
.prev_clone_index
= i
;
5155 vec_safe_push (new_params
, adj
);
5157 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5158 ipa_param_adjustments (new_params
, count
, false));
5161 new_adjustments
= NULL
;
5163 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5164 for (i
= callers
.length () - 1; i
>= 0; i
--)
5166 cgraph_edge
*cs
= callers
[i
];
5167 if (cs
->caller
== node
)
5169 self_recursive_calls
.safe_push (cs
);
5170 callers
.unordered_remove (i
);
5173 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5174 for (i
= 0; i
< count
; i
++)
5176 tree t
= known_csts
[i
];
5180 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5182 bool load_ref
= false;
5183 symtab_node
*ref_symbol
;
5184 if (TREE_CODE (t
) == ADDR_EXPR
)
5186 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5187 if (TREE_CODE (base
) == VAR_DECL
5188 && ipa_get_controlled_uses (info
, i
) == 0
5189 && ipa_get_param_load_dereferenced (info
, i
)
5190 && (ref_symbol
= symtab_node::get (base
)))
5193 if (node
->can_change_signature
)
5194 for (cgraph_edge
*caller
: callers
)
5195 adjust_references_in_caller (caller
, ref_symbol
, i
);
5199 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5201 vec_safe_push (replace_trees
, replace_map
);
5204 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5205 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5207 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5208 new_adjustments
, "constprop",
5212 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5213 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5215 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5216 /* Cloned edges can disappear during cloning as speculation can be
5217 resolved, check that we have one and that it comes from the last
5219 if (cs
&& cs
->caller
== new_node
)
5220 cs
->redirect_callee_duplicating_thunks (new_node
);
5221 /* Any future code that would make more than one clone of an outgoing
5222 edge would confuse this mechanism, so let's check that does not
5224 gcc_checking_assert (!cs
5225 || !get_next_cgraph_edge_clone (cs
)
5226 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5228 if (have_self_recursive_calls
)
5229 new_node
->expand_all_artificial_thunks ();
5231 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5232 for (const ipa_argagg_value
&av
: aggvals
)
5233 new_node
->maybe_create_reference (av
.value
, NULL
);
5235 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5237 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5238 if (known_contexts
.exists ())
5240 for (i
= 0; i
< count
; i
++)
5241 if (!known_contexts
[i
].useless_p ())
5243 fprintf (dump_file
, " known ctx %i is ", i
);
5244 known_contexts
[i
].dump (dump_file
);
5249 fprintf (dump_file
, " Aggregate replacements:");
5250 ipa_argagg_value_list
avs (aggvals
);
5251 avs
.dump (dump_file
);
5255 new_info
= ipa_node_params_sum
->get (new_node
);
5256 new_info
->ipcp_orig_node
= node
;
5257 new_node
->ipcp_clone
= true;
5258 new_info
->known_csts
= known_csts
;
5259 new_info
->known_contexts
= known_contexts
;
5261 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
,
5267 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5268 pass-through function to itself when the cgraph_node involved is not an
5269 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5270 no-operation pass-through. */
5273 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5276 enum availability availability
;
5277 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5278 && availability
> AVAIL_INTERPOSABLE
5279 && jfunc
->type
== IPA_JF_PASS_THROUGH
5280 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5281 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5282 && ipa_node_params_sum
->get (cs
->caller
)
5283 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5288 /* Return true if JFUNC, which describes a part of an aggregate represented or
5289 pointed to by the i-th parameter of call CS, is a pass-through function to
5290 itself when the cgraph_node involved is not an IPA-CP clone.. When
5291 SIMPLE is true, further check if JFUNC is a simple no-operation
5295 self_recursive_agg_pass_through_p (const cgraph_edge
*cs
,
5296 const ipa_agg_jf_item
*jfunc
,
5297 int i
, bool simple
= true)
5299 enum availability availability
;
5300 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5301 && availability
> AVAIL_INTERPOSABLE
5302 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5303 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5304 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5305 && jfunc
->value
.pass_through
.formal_id
== i
5306 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5307 && ipa_node_params_sum
->get (cs
->caller
)
5308 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5313 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5314 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5317 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5318 vec
<tree
> &known_csts
,
5319 const vec
<cgraph_edge
*> &callers
)
5321 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5322 int i
, count
= ipa_get_param_count (info
);
5324 for (i
= 0; i
< count
; i
++)
5326 struct cgraph_edge
*cs
;
5327 tree newval
= NULL_TREE
;
5330 tree type
= ipa_get_type (info
, i
);
5332 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5335 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5337 struct ipa_jump_func
*jump_func
;
5340 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5342 || i
>= ipa_get_cs_argument_count (args
)
5344 && call_passes_through_thunk (cs
)))
5349 jump_func
= ipa_get_ith_jump_func (args
, i
);
5351 /* Besides simple pass-through jump function, arithmetic jump
5352 function could also introduce argument-direct-pass-through for
5353 self-feeding recursive call. For example,
5360 Given that i is 0, recursive propagation via (i & 1) also gets
5362 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5364 gcc_assert (newval
);
5365 t
= ipa_get_jf_arith_result (
5366 ipa_get_jf_pass_through_operation (jump_func
),
5368 ipa_get_jf_pass_through_operand (jump_func
),
5372 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5376 && !values_equal_for_ipcp_p (t
, newval
))
5377 || (!first
&& !newval
))
5389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5391 fprintf (dump_file
, " adding an extra known scalar value ");
5392 print_ipcp_constant_value (dump_file
, newval
);
5393 fprintf (dump_file
, " for ");
5394 ipa_dump_param (dump_file
, info
, i
);
5395 fprintf (dump_file
, "\n");
5398 known_csts
[i
] = newval
;
5403 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5404 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5408 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5409 vec
<ipa_polymorphic_call_context
>
5411 const vec
<cgraph_edge
*> &callers
)
5413 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5414 int i
, count
= ipa_get_param_count (info
);
5416 for (i
= 0; i
< count
; i
++)
5420 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5421 || (known_contexts
->exists ()
5422 && !(*known_contexts
)[i
].useless_p ()))
5425 ipa_polymorphic_call_context newval
;
5429 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5431 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5433 || i
>= ipa_get_cs_argument_count (args
))
5435 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5436 ipa_polymorphic_call_context ctx
;
5437 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5445 newval
.meet_with (ctx
);
5446 if (newval
.useless_p ())
5450 if (!newval
.useless_p ())
5452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5454 fprintf (dump_file
, " adding an extra known polymorphic "
5456 print_ipcp_constant_value (dump_file
, newval
);
5457 fprintf (dump_file
, " for ");
5458 ipa_dump_param (dump_file
, info
, i
);
5459 fprintf (dump_file
, "\n");
5462 if (!known_contexts
->exists ())
5463 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5465 (*known_contexts
)[i
] = newval
;
5471 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5472 RES. If INTERIM is non-NULL, it contains the current interim state of
5473 collected aggregate values which can be used to compute values passed over
5474 self-recursive edges.
5476 This basically one iteration of push_agg_values_from_edge over one
5477 parameter, which allows for simpler early returns. */
5480 push_agg_values_for_index_from_edge (struct cgraph_edge
*cs
, int index
,
5481 vec
<ipa_argagg_value
> *res
,
5482 const ipa_argagg_value_list
*interim
)
5484 bool agg_values_from_caller
= false;
5485 bool agg_jf_preserved
= false;
5486 unsigned unit_delta
= UINT_MAX
;
5488 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
),
5491 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5492 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5494 agg_values_from_caller
= true;
5495 agg_jf_preserved
= ipa_get_jf_pass_through_agg_preserved (jfunc
);
5496 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5499 else if (jfunc
->type
== IPA_JF_ANCESTOR
5500 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5502 agg_values_from_caller
= true;
5503 agg_jf_preserved
= true;
5504 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5505 unit_delta
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
5508 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5509 if (agg_values_from_caller
)
5511 if (caller_info
->ipcp_orig_node
)
5513 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5514 ipcp_transformation
*ts
5515 = ipcp_get_transformation_summary (cs
->caller
);
5516 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5517 ipcp_param_lattices
*orig_plats
5518 = ipa_get_parm_lattices (orig_info
, src_idx
);
5521 && (agg_jf_preserved
|| !orig_plats
->aggs_by_ref
))
5523 ipa_argagg_value_list
src (ts
);
5524 src
.push_adjusted_values (src_idx
, index
, unit_delta
, res
);
5530 ipcp_param_lattices
*src_plats
5531 = ipa_get_parm_lattices (caller_info
, src_idx
);
5533 && !src_plats
->aggs_bottom
5534 && (agg_jf_preserved
|| !src_plats
->aggs_by_ref
))
5536 if (interim
&& self_recursive_pass_through_p (cs
, jfunc
, index
))
5538 interim
->push_adjusted_values (src_idx
, index
, unit_delta
,
5542 if (!src_plats
->aggs_contain_variable
)
5544 push_agg_values_from_plats (src_plats
, index
, unit_delta
,
5552 if (!jfunc
->agg
.items
)
5555 unsigned prev_unit_offset
= 0;
5556 for (const ipa_agg_jf_item
&agg_jf
: *jfunc
->agg
.items
)
5558 tree value
, srcvalue
;
5559 /* Besides simple pass-through aggregate jump function, arithmetic
5560 aggregate jump function could also bring same aggregate value as
5561 parameter passed-in for self-feeding recursive call. For example,
5569 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5571 && self_recursive_agg_pass_through_p (cs
, &agg_jf
, index
, false)
5572 && (srcvalue
= interim
->get_value(index
,
5573 agg_jf
.offset
/ BITS_PER_UNIT
)))
5574 value
= ipa_get_jf_arith_result (agg_jf
.value
.pass_through
.operation
,
5576 agg_jf
.value
.pass_through
.operand
,
5579 value
= ipa_agg_value_from_jfunc (caller_info
, cs
->caller
,
5583 struct ipa_argagg_value iav
;
5585 iav
.unit_offset
= agg_jf
.offset
/ BITS_PER_UNIT
;
5587 iav
.by_ref
= jfunc
->agg
.by_ref
;
5591 || iav
.unit_offset
> prev_unit_offset
);
5592 prev_unit_offset
= iav
.unit_offset
;
5595 res
->safe_push (iav
);
5601 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5602 description of ultimate callee of CS or the one it was cloned from (the
5603 summary where lattices are). If INTERIM is non-NULL, it contains the
5604 current interim state of collected aggregate values which can be used to
5605 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5606 is true) and to skip values which clearly will not be part of intersection
5610 push_agg_values_from_edge (struct cgraph_edge
*cs
,
5611 ipa_node_params
*dest_info
,
5612 vec
<ipa_argagg_value
> *res
,
5613 const ipa_argagg_value_list
*interim
,
5614 bool optimize_self_recursion
)
5616 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5620 int count
= MIN (ipa_get_param_count (dest_info
),
5621 ipa_get_cs_argument_count (args
));
5623 unsigned interim_index
= 0;
5624 for (int index
= 0; index
< count
; index
++)
5628 while (interim_index
< interim
->m_elts
.size ()
5629 && interim
->m_elts
[interim_index
].value
5630 && interim
->m_elts
[interim_index
].index
< index
)
5632 if (interim_index
>= interim
->m_elts
.size ()
5633 || interim
->m_elts
[interim_index
].index
> index
)
5637 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, index
);
5638 if (!ipa_is_param_used (dest_info
, index
)
5639 || plats
->aggs_bottom
)
5641 push_agg_values_for_index_from_edge (cs
, index
, res
,
5642 optimize_self_recursion
? interim
5648 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5649 from all of them. Return nullptr if there are none. */
5651 static struct vec
<ipa_argagg_value
, va_gc
> *
5652 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5653 const vec
<cgraph_edge
*> &callers
)
5655 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5656 if (dest_info
->ipcp_orig_node
)
5657 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5659 /* gather_edges_for_value puts a non-recursive call into the first element of
5660 callers if it can. */
5661 auto_vec
<ipa_argagg_value
, 32> interim
;
5662 push_agg_values_from_edge (callers
[0], dest_info
, &interim
, NULL
, true);
5664 unsigned valid_entries
= interim
.length ();
5668 unsigned caller_count
= callers
.length();
5669 for (unsigned i
= 1; i
< caller_count
; i
++)
5671 auto_vec
<ipa_argagg_value
, 32> last
;
5672 ipa_argagg_value_list
avs (&interim
);
5673 push_agg_values_from_edge (callers
[i
], dest_info
, &last
, &avs
, true);
5675 valid_entries
= intersect_argaggs_with (interim
, last
);
5680 vec
<ipa_argagg_value
, va_gc
> *res
= NULL
;
5681 vec_safe_reserve_exact (res
, valid_entries
);
5682 for (const ipa_argagg_value
&av
: interim
)
5684 res
->quick_push(av
);
5685 gcc_checking_assert (res
->length () == valid_entries
);
5689 /* Determine whether CS also brings all scalar values that the NODE is
5693 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5694 struct cgraph_node
*node
)
5696 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5697 int count
= ipa_get_param_count (dest_info
);
5698 class ipa_node_params
*caller_info
;
5699 class ipa_edge_args
*args
;
5702 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5703 args
= ipa_edge_args_sum
->get (cs
);
5704 for (i
= 0; i
< count
; i
++)
5706 struct ipa_jump_func
*jump_func
;
5709 val
= dest_info
->known_csts
[i
];
5713 if (i
>= ipa_get_cs_argument_count (args
))
5715 jump_func
= ipa_get_ith_jump_func (args
, i
);
5716 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5717 ipa_get_type (dest_info
, i
));
5718 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5724 /* Determine whether CS also brings all aggregate values that NODE is
5728 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5729 struct cgraph_node
*node
)
5731 ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
);
5732 if (!ts
|| vec_safe_is_empty (ts
->m_agg_values
))
5735 const ipa_argagg_value_list
existing (ts
->m_agg_values
);
5736 auto_vec
<ipa_argagg_value
, 32> edge_values
;
5737 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5738 gcc_checking_assert (dest_info
->ipcp_orig_node
);
5739 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5740 push_agg_values_from_edge (cs
, dest_info
, &edge_values
, &existing
, false);
5741 const ipa_argagg_value_list
avl (&edge_values
);
5742 return avl
.superset_of_p (existing
);
5745 /* Given an original NODE and a VAL for which we have already created a
5746 specialized clone, look whether there are incoming edges that still lead
5747 into the old node but now also bring the requested value and also conform to
5748 all other criteria such that they can be redirected the special node.
5749 This function can therefore redirect the final edge in a SCC. */
5751 template <typename valtype
>
5753 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5755 ipcp_value_source
<valtype
> *src
;
5756 profile_count redirected_sum
= profile_count::zero ();
5758 for (src
= val
->sources
; src
; src
= src
->next
)
5760 struct cgraph_edge
*cs
= src
->cs
;
5763 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5764 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5765 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5768 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5769 cs
->caller
->dump_name (),
5770 val
->spec_node
->dump_name ());
5772 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5773 val
->spec_node
->expand_all_artificial_thunks ();
5774 if (cs
->count
.ipa ().initialized_p ())
5775 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5777 cs
= get_next_cgraph_edge_clone (cs
);
5781 if (redirected_sum
.nonzero_p ())
5782 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5785 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5788 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5790 ipa_polymorphic_call_context
*ctx
;
5793 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5794 if (!ctx
->useless_p ())
5799 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5801 static vec
<ipa_polymorphic_call_context
>
5802 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5804 if (known_contexts_useful_p (known_contexts
))
5805 return known_contexts
.copy ();
5810 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5811 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5815 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5816 vec
<tree
> *known_csts
,
5817 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5818 ipcp_value
<tree
> *val
, int index
)
5820 *known_csts
= avals
->m_known_vals
.copy ();
5821 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5822 (*known_csts
)[index
] = val
->value
;
5825 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5826 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5830 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5831 vec
<tree
> *known_csts
,
5832 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5833 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5836 *known_csts
= avals
->m_known_vals
.copy ();
5837 *known_contexts
= avals
->m_known_contexts
.copy ();
5838 (*known_contexts
)[index
] = val
->value
;
5841 /* Return true if OFFSET indicates this was not an aggregate value or there is
5842 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5846 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5847 int index
, HOST_WIDE_INT offset
, tree value
)
5852 const ipa_argagg_value_list
avl (aggvals
);
5853 tree v
= avl
.get_value (index
, offset
/ BITS_PER_UNIT
);
5854 return v
&& values_equal_for_ipcp_p (v
, value
);
5857 /* Return true if offset is minus one because source of a polymorphic context
5858 cannot be an aggregate value. */
5861 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *,
5862 int , HOST_WIDE_INT offset
,
5863 ipa_polymorphic_call_context
)
5865 return offset
== -1;
5868 /* Decide whether to create a special version of NODE for value VAL of
5869 parameter at the given INDEX. If OFFSET is -1, the value is for the
5870 parameter itself, otherwise it is stored at the given OFFSET of the
5871 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5872 is a vector which contains clones created for self-recursive calls with an
5873 arithmetic pass-through jump function. */
5875 template <typename valtype
>
5877 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
5878 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
5879 vec
<cgraph_node
*> *self_gen_clones
)
5883 profile_count count_sum
, rec_count_sum
;
5884 vec
<cgraph_edge
*> callers
;
5888 perhaps_add_new_callers (node
, val
);
5891 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
5893 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5894 fprintf (dump_file
, " Ignoring candidate value because "
5895 "maximum unit size would be reached with %li.\n",
5896 val
->local_size_cost
+ overall_size
);
5899 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
5900 &rec_count_sum
, &count_sum
))
5903 if (!dbg_cnt (ipa_cp_values
))
5906 if (val
->self_recursion_generated_p ())
5908 /* The edge counts in this case might not have been adjusted yet.
5909 Nevertleless, even if they were it would be only a guesswork which we
5910 can do now. The recursive part of the counts can be derived from the
5911 count of the original node anyway. */
5912 if (node
->count
.ipa ().nonzero_p ())
5914 unsigned dem
= self_gen_clones
->length () + 1;
5915 rec_count_sum
= node
->count
.ipa () / dem
;
5918 rec_count_sum
= profile_count::zero ();
5921 /* get_info_about_necessary_edges only sums up ipa counts. */
5922 count_sum
+= rec_count_sum
;
5924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5926 fprintf (dump_file
, " - considering value ");
5927 print_ipcp_constant_value (dump_file
, val
->value
);
5928 fprintf (dump_file
, " for ");
5929 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
5931 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
5932 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
5935 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
5936 freq_sum
, count_sum
,
5937 val
->local_size_cost
)
5938 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
5939 freq_sum
, count_sum
, val
->prop_size_cost
))
5943 fprintf (dump_file
, " Creating a specialized node of %s.\n",
5944 node
->dump_name ());
5946 vec
<tree
> known_csts
;
5947 vec
<ipa_polymorphic_call_context
> known_contexts
;
5949 callers
= gather_edges_for_value (val
, node
, caller_count
);
5951 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
5954 known_csts
= avals
->m_known_vals
.copy ();
5955 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5957 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
5958 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
5959 vec
<ipa_argagg_value
, va_gc
> *aggvals
5960 = find_aggregate_values_for_callers_subset (node
, callers
);
5961 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
5962 offset
, val
->value
));
5963 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
5966 if (val
->self_recursion_generated_p ())
5967 self_gen_clones
->safe_push (val
->spec_node
);
5969 update_profiling_info (node
, val
->spec_node
);
5972 overall_size
+= val
->local_size_cost
;
5973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5974 fprintf (dump_file
, " overall size reached %li\n",
5977 /* TODO: If for some lattice there is only one other known value
5978 left, make a special node for it too. */
5983 /* Like irange::contains_p(), but convert VAL to the range of R if
5987 ipa_range_contains_p (const vrange
&r
, tree val
)
5989 if (r
.undefined_p ())
5992 tree type
= r
.type ();
5993 if (!wi::fits_to_tree_p (wi::to_wide (val
), type
))
5996 val
= fold_convert (type
, val
);
5997 return r
.contains_p (val
);
6000 /* Decide whether and what specialized clones of NODE should be created. */
6003 decide_whether_version_node (struct cgraph_node
*node
)
6005 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6006 int i
, count
= ipa_get_param_count (info
);
6012 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6013 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6014 node
->dump_name ());
6016 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6017 ipa_auto_call_arg_values avals
;
6018 gather_context_independent_values (info
, &avals
, false, NULL
);
6020 for (i
= 0; i
< count
;i
++)
6022 if (!ipa_is_param_used (info
, i
))
6025 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6026 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6027 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6030 && !avals
.m_known_vals
[i
])
6032 ipcp_value
<tree
> *val
;
6033 for (val
= lat
->values
; val
; val
= val
->next
)
6035 /* If some values generated for self-recursive calls with
6036 arithmetic jump functions fall outside of the known
6037 range for the parameter, we can skip them. */
6038 if (TREE_CODE (val
->value
) == INTEGER_CST
6039 && !plats
->m_value_range
.bottom_p ()
6040 && !ipa_range_contains_p (plats
->m_value_range
.m_vr
,
6043 /* This can happen also if a constant present in the source
6044 code falls outside of the range of parameter's type, so we
6046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6048 fprintf (dump_file
, " - skipping%s value ",
6049 val
->self_recursion_generated_p ()
6050 ? " self_recursion_generated" : "");
6051 print_ipcp_constant_value (dump_file
, val
->value
);
6052 fprintf (dump_file
, " because it is outside known "
6057 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6062 if (!plats
->aggs_bottom
)
6064 struct ipcp_agg_lattice
*aglat
;
6065 ipcp_value
<tree
> *val
;
6066 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6067 if (!aglat
->bottom
&& aglat
->values
6068 /* If the following is false, the one value has been considered
6069 for cloning for all contexts. */
6070 && (plats
->aggs_contain_variable
6071 || !aglat
->is_single_const ()))
6072 for (val
= aglat
->values
; val
; val
= val
->next
)
6073 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6078 && avals
.m_known_contexts
[i
].useless_p ())
6080 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6081 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6082 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6087 if (!self_gen_clones
.is_empty ())
6089 self_gen_clones
.safe_push (node
);
6090 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6093 if (info
->do_clone_for_all_contexts
)
6095 if (!dbg_cnt (ipa_cp_values
))
6097 info
->do_clone_for_all_contexts
= false;
6101 struct cgraph_node
*clone
;
6102 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6104 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6106 cgraph_edge
*cs
= callers
[i
];
6107 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6109 if (caller_info
&& caller_info
->node_dead
)
6110 callers
.unordered_remove (i
);
6113 if (!adjust_callers_for_value_intersection (callers
, node
))
6115 /* If node is not called by anyone, or all its caller edges are
6116 self-recursive, the node is not really in use, no need to do
6118 info
->do_clone_for_all_contexts
= false;
6123 fprintf (dump_file
, " - Creating a specialized node of %s "
6124 "for all known contexts.\n", node
->dump_name ());
6126 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6127 vec
<ipa_polymorphic_call_context
> known_contexts
6128 = copy_useful_known_contexts (avals
.m_known_contexts
);
6129 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6130 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6131 vec
<ipa_argagg_value
, va_gc
> *aggvals
6132 = find_aggregate_values_for_callers_subset (node
, callers
);
6134 if (!known_contexts_useful_p (known_contexts
))
6136 known_contexts
.release ();
6137 known_contexts
= vNULL
;
6139 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6141 info
->do_clone_for_all_contexts
= false;
6142 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6149 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6152 spread_undeadness (struct cgraph_node
*node
)
6154 struct cgraph_edge
*cs
;
6156 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6157 if (ipa_edge_within_scc (cs
))
6159 struct cgraph_node
*callee
;
6160 class ipa_node_params
*info
;
6162 callee
= cs
->callee
->function_symbol (NULL
);
6163 info
= ipa_node_params_sum
->get (callee
);
6165 if (info
&& info
->node_dead
)
6167 info
->node_dead
= 0;
6168 spread_undeadness (callee
);
6173 /* Return true if NODE has a caller from outside of its SCC that is not
6174 dead. Worker callback for cgraph_for_node_and_aliases. */
6177 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6178 void *data ATTRIBUTE_UNUSED
)
6180 struct cgraph_edge
*cs
;
6182 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6183 if (cs
->caller
->thunk
6184 && cs
->caller
->call_for_symbol_thunks_and_aliases
6185 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6187 else if (!ipa_edge_within_scc (cs
))
6189 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6190 if (!caller_info
/* Unoptimized caller are like dead ones. */
6191 || !caller_info
->node_dead
)
6198 /* Identify nodes within the same SCC as NODE which are no longer needed
6199 because of new clones and will be removed as unreachable. */
6202 identify_dead_nodes (struct cgraph_node
*node
)
6204 struct cgraph_node
*v
;
6205 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6208 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6210 && !v
->call_for_symbol_thunks_and_aliases
6211 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6212 info
->node_dead
= 1;
6215 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6217 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6218 if (info
&& !info
->node_dead
)
6219 spread_undeadness (v
);
6222 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6224 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6225 if (ipa_node_params_sum
->get (v
)
6226 && ipa_node_params_sum
->get (v
)->node_dead
)
6227 fprintf (dump_file
, " Marking node as dead: %s.\n",
6232 /* The decision stage. Iterate over the topological order of call graph nodes
6233 TOPO and make specialized clones if deemed beneficial. */
6236 ipcp_decision_stage (class ipa_topo_info
*topo
)
6241 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6243 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6245 struct cgraph_node
*node
= topo
->order
[i
];
6246 bool change
= false, iterate
= true;
6250 struct cgraph_node
*v
;
6252 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6253 if (v
->has_gimple_body_p ()
6254 && ipcp_versionable_function_p (v
))
6255 iterate
|= decide_whether_version_node (v
);
6260 identify_dead_nodes (node
);
6264 /* Look up all VR and bits information that we have discovered and copy it
6265 over to the transformation summary. */
6268 ipcp_store_vr_results (void)
6272 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6274 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6275 bool dumped_sth
= false;
6276 bool found_useful_result
= false;
6278 bool do_bits
= true;
6280 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6283 fprintf (dump_file
, "Not considering %s for VR discovery "
6284 "and propagate; -fipa-ipa-vrp: disabled.\n",
6285 node
->dump_name ());
6288 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_bit_cp
))
6291 fprintf (dump_file
, "Not considering %s for ipa bitwise "
6292 "propagation ; -fipa-bit-cp: disabled.\n",
6293 node
->dump_name ());
6296 if (!do_bits
&& !do_vr
)
6299 if (info
->ipcp_orig_node
)
6300 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6301 if (info
->lattices
.is_empty ())
6302 /* Newly expanded artificial thunks do not have lattices. */
6305 unsigned count
= ipa_get_param_count (info
);
6306 for (unsigned i
= 0; i
< count
; i
++)
6308 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6310 && !plats
->m_value_range
.bottom_p ()
6311 && !plats
->m_value_range
.top_p ())
6313 found_useful_result
= true;
6316 if (do_bits
&& plats
->bits_lattice
.constant_p ())
6318 found_useful_result
= true;
6322 if (!found_useful_result
)
6325 ipcp_transformation_initialize ();
6326 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6327 vec_safe_reserve_exact (ts
->m_vr
, count
);
6329 for (unsigned i
= 0; i
< count
; i
++)
6331 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6332 ipcp_bits_lattice
*bits
= NULL
;
6335 && plats
->bits_lattice
.constant_p ()
6336 && dbg_cnt (ipa_cp_bits
))
6337 bits
= &plats
->bits_lattice
;
6340 && !plats
->m_value_range
.bottom_p ()
6341 && !plats
->m_value_range
.top_p ()
6342 && dbg_cnt (ipa_cp_vr
))
6346 Value_Range tmp
= plats
->m_value_range
.m_vr
;
6347 tree type
= ipa_get_type (info
, i
);
6348 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6349 TYPE_PRECISION (type
),
6351 wide_int::from (bits
->get_mask (),
6352 TYPE_PRECISION (type
),
6354 tmp
.update_bitmask (bm
);
6356 ts
->m_vr
->quick_push (vr
);
6360 ipa_vr
vr (plats
->m_value_range
.m_vr
);
6361 ts
->m_vr
->quick_push (vr
);
6366 tree type
= ipa_get_type (info
, i
);
6368 tmp
.set_varying (type
);
6369 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6370 TYPE_PRECISION (type
),
6372 wide_int::from (bits
->get_mask (),
6373 TYPE_PRECISION (type
),
6375 tmp
.update_bitmask (bm
);
6377 ts
->m_vr
->quick_push (vr
);
6382 ts
->m_vr
->quick_push (vr
);
6385 if (!dump_file
|| !bits
)
6390 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6391 node
->dump_name ());
6394 fprintf (dump_file
, " param %i: value = ", i
);
6395 print_hex (bits
->get_value (), dump_file
);
6396 fprintf (dump_file
, ", mask = ");
6397 print_hex (bits
->get_mask (), dump_file
);
6398 fprintf (dump_file
, "\n");
6403 /* The IPCP driver. */
6408 class ipa_topo_info topo
;
6410 if (edge_clone_summaries
== NULL
)
6411 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6413 ipa_check_create_node_params ();
6414 ipa_check_create_edge_args ();
6415 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6419 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6420 if (dump_flags
& TDF_DETAILS
)
6421 ipa_print_all_params (dump_file
);
6422 ipa_print_all_jump_functions (dump_file
);
6425 /* Topological sort. */
6426 build_toporder_info (&topo
);
6427 /* Do the interprocedural propagation. */
6428 ipcp_propagate_stage (&topo
);
6429 /* Decide what constant propagation and cloning should be performed. */
6430 ipcp_decision_stage (&topo
);
6431 /* Store results of value range and bits propagation. */
6432 ipcp_store_vr_results ();
6434 /* Free all IPCP structures. */
6435 delete clone_num_suffixes
;
6436 free_toporder_info (&topo
);
6437 delete edge_clone_summaries
;
6438 edge_clone_summaries
= NULL
;
6439 ipa_free_all_structures_after_ipa_cp ();
6441 fprintf (dump_file
, "\nIPA constant propagation end\n");
6445 /* Initialization and computation of IPCP data structures. This is the initial
6446 intraprocedural analysis of functions, which gathers information to be
6447 propagated later on. */
6450 ipcp_generate_summary (void)
6452 struct cgraph_node
*node
;
6455 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6456 ipa_register_cgraph_hooks ();
6458 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6459 ipa_analyze_node (node
);
6464 const pass_data pass_data_ipa_cp
=
6466 IPA_PASS
, /* type */
6468 OPTGROUP_NONE
, /* optinfo_flags */
6469 TV_IPA_CONSTANT_PROP
, /* tv_id */
6470 0, /* properties_required */
6471 0, /* properties_provided */
6472 0, /* properties_destroyed */
6473 0, /* todo_flags_start */
6474 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6477 class pass_ipa_cp
: public ipa_opt_pass_d
6480 pass_ipa_cp (gcc::context
*ctxt
)
6481 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6482 ipcp_generate_summary
, /* generate_summary */
6483 NULL
, /* write_summary */
6484 NULL
, /* read_summary */
6485 ipcp_write_transformation_summaries
, /*
6486 write_optimization_summary */
6487 ipcp_read_transformation_summaries
, /*
6488 read_optimization_summary */
6489 NULL
, /* stmt_fixup */
6490 0, /* function_transform_todo_flags_start */
6491 ipcp_transform_function
, /* function_transform */
6492 NULL
) /* variable_transform */
6495 /* opt_pass methods: */
6496 bool gate (function
*) final override
6498 /* FIXME: We should remove the optimize check after we ensure we never run
6499 IPA passes when not optimizing. */
6500 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6503 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6505 }; // class pass_ipa_cp
6510 make_pass_ipa_cp (gcc::context
*ctxt
)
6512 return new pass_ipa_cp (ctxt
);
6515 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6516 within the same process. For use by toplev::finalize. */
6519 ipa_cp_cc_finalize (void)
6521 base_count
= profile_count::uninitialized ();
6523 orig_overall_size
= 0;
6524 ipcp_free_transformation_sum ();
6527 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6528 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6529 DECL_UID-sorted vector if available (which is pre-computed only if there are
6530 many parameters). Can return -1 if param is static chain not represented
6531 among DECL_ARGUMENTS. */
6534 ipcp_transformation::get_param_index (const_tree fndecl
, const_tree param
) const
6536 gcc_assert (TREE_CODE (param
) == PARM_DECL
);
6539 unsigned puid
= DECL_UID (param
);
6540 const ipa_uid_to_idx_map_elt
*res
6541 = std::lower_bound (m_uid_to_idx
->begin(), m_uid_to_idx
->end (), puid
,
6542 [] (const ipa_uid_to_idx_map_elt
&elt
, unsigned uid
)
6544 return elt
.uid
< uid
;
6546 if (res
== m_uid_to_idx
->end ()
6547 || res
->uid
!= puid
)
6549 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6556 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6560 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6564 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6565 according to the uid. */
6568 compare_uids (const void *a
, const void *b
)
6570 const ipa_uid_to_idx_map_elt
*e1
= (const ipa_uid_to_idx_map_elt
*) a
;
6571 const ipa_uid_to_idx_map_elt
*e2
= (const ipa_uid_to_idx_map_elt
*) b
;
6572 if (e1
->uid
< e2
->uid
)
6574 if (e1
->uid
> e2
->uid
)
6579 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6580 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6581 from parameters to their indices in DECL_ARGUMENTS chain. */
6584 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl
)
6586 int c
= count_formal_params (fndecl
);
6590 m_uid_to_idx
= NULL
;
6591 vec_safe_reserve (m_uid_to_idx
, c
, true);
6593 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6595 ipa_uid_to_idx_map_elt elt
;
6596 elt
.uid
= DECL_UID (p
);
6598 m_uid_to_idx
->quick_push (elt
);
6600 m_uid_to_idx
->qsort (compare_uids
);