1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
88 function_summary
<ipa_fn_summary
*> *ipa_fn_summaries
;
89 call_summary
<ipa_call_summary
*> *ipa_call_summaries
;
91 /* Edge predicates goes here. */
92 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
97 ipa_dump_hints (FILE *f
, ipa_hints hints
)
101 fprintf (f
, "IPA hints:");
102 if (hints
& INLINE_HINT_indirect_call
)
104 hints
&= ~INLINE_HINT_indirect_call
;
105 fprintf (f
, " indirect_call");
107 if (hints
& INLINE_HINT_loop_iterations
)
109 hints
&= ~INLINE_HINT_loop_iterations
;
110 fprintf (f
, " loop_iterations");
112 if (hints
& INLINE_HINT_loop_stride
)
114 hints
&= ~INLINE_HINT_loop_stride
;
115 fprintf (f
, " loop_stride");
117 if (hints
& INLINE_HINT_same_scc
)
119 hints
&= ~INLINE_HINT_same_scc
;
120 fprintf (f
, " same_scc");
122 if (hints
& INLINE_HINT_in_scc
)
124 hints
&= ~INLINE_HINT_in_scc
;
125 fprintf (f
, " in_scc");
127 if (hints
& INLINE_HINT_cross_module
)
129 hints
&= ~INLINE_HINT_cross_module
;
130 fprintf (f
, " cross_module");
132 if (hints
& INLINE_HINT_declared_inline
)
134 hints
&= ~INLINE_HINT_declared_inline
;
135 fprintf (f
, " declared_inline");
137 if (hints
& INLINE_HINT_array_index
)
139 hints
&= ~INLINE_HINT_array_index
;
140 fprintf (f
, " array_index");
142 if (hints
& INLINE_HINT_known_hot
)
144 hints
&= ~INLINE_HINT_known_hot
;
145 fprintf (f
, " known_hot");
151 /* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
157 ipa_fn_summary::account_size_time (int size
, sreal time
,
158 const predicate
&exec_pred
,
159 const predicate
&nonconst_pred_in
)
164 predicate nonconst_pred
;
166 if (exec_pred
== false)
169 nonconst_pred
= nonconst_pred_in
& exec_pred
;
171 if (nonconst_pred
== false)
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size
&& time
== 0 && size_time_table
)
179 gcc_assert (time
>= 0);
181 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
182 if (e
->exec_predicate
== exec_pred
183 && e
->nonconst_predicate
== nonconst_pred
)
192 e
= &(*size_time_table
)[0];
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
198 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size
) / ipa_fn_summary::size_scale
,
203 (time
.to_double ()), found
? "" : "new ");
204 exec_pred
.dump (dump_file
, conds
, 0);
205 if (exec_pred
!= nonconst_pred
)
207 fprintf (dump_file
, " nonconst:");
208 nonconst_pred
.dump (dump_file
, conds
);
211 fprintf (dump_file
, "\n");
215 struct size_time_entry new_entry
;
216 new_entry
.size
= size
;
217 new_entry
.time
= time
;
218 new_entry
.exec_predicate
= exec_pred
;
219 new_entry
.nonconst_predicate
= nonconst_pred
;
220 vec_safe_push (size_time_table
, new_entry
);
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
231 static struct cgraph_edge
*
232 redirect_to_unreachable (struct cgraph_edge
*e
)
234 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
235 struct cgraph_node
*target
= cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
239 e
= e
->resolve_speculation (target
->decl
);
241 e
->make_direct (target
);
243 e
->redirect_callee (target
);
244 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
245 e
->inline_failed
= CIF_UNREACHABLE
;
246 e
->count
= profile_count::zero ();
247 es
->call_stmt_size
= 0;
248 es
->call_stmt_time
= 0;
250 callee
->remove_symbol_and_inline_clones ();
254 /* Set predicate for edge E. */
257 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
262 if (predicate
&& *predicate
== false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e
->speculative
|| e
->callee
))
267 e
= redirect_to_unreachable (e
);
269 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
270 if (predicate
&& *predicate
!= true)
273 es
->predicate
= edge_predicate_pool
.allocate ();
274 *es
->predicate
= *predicate
;
279 edge_predicate_pool
.remove (es
->predicate
);
280 es
->predicate
= NULL
;
284 /* Set predicate for hint *P. */
287 set_hint_predicate (predicate
**p
, predicate new_predicate
)
289 if (new_predicate
== false || new_predicate
== true)
292 edge_predicate_pool
.remove (*p
);
298 *p
= edge_predicate_pool
.allocate ();
304 /* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests cannot use
311 the fact that parameter is indeed a constant.
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
318 ERROR_MARK means compile time invariant. */
321 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
323 vec
<tree
> known_vals
,
324 vec
<ipa_agg_jump_function_p
>
326 clause_t
*ret_clause
,
327 clause_t
*ret_nonspec_clause
)
329 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
330 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
331 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
335 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
344 gcc_checking_assert (!known_aggs
.exists ()
345 || (known_vals
.length () == known_aggs
.length ()));
346 if (c
->operand_num
>= (int) known_vals
.length ())
348 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
349 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
355 struct ipa_agg_jump_function
*agg
;
357 if (c
->code
== predicate::changed
359 && (known_vals
[c
->operand_num
] == error_mark_node
))
362 if (known_aggs
.exists ())
364 agg
= known_aggs
[c
->operand_num
];
365 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
366 c
->offset
, c
->by_ref
);
373 val
= known_vals
[c
->operand_num
];
374 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
380 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
381 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
384 if (c
->code
== predicate::changed
)
386 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val
))) != c
->size
)
392 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
393 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
396 if (c
->code
== predicate::is_not_constant
)
398 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
402 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (c
->val
), val
);
404 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
407 if (res
&& integer_zerop (res
))
410 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
411 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
413 *ret_clause
= clause
;
414 if (ret_nonspec_clause
)
415 *ret_nonspec_clause
= nonspec_clause
;
419 /* Work out what conditions might be true at invocation of E. */
422 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
423 clause_t
*clause_ptr
,
424 clause_t
*nonspec_clause_ptr
,
425 vec
<tree
> *known_vals_ptr
,
426 vec
<ipa_polymorphic_call_context
>
428 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
430 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
431 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
432 vec
<tree
> known_vals
= vNULL
;
433 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
436 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
438 known_vals_ptr
->create (0);
439 if (known_contexts_ptr
)
440 known_contexts_ptr
->create (0);
442 if (ipa_node_params_sum
443 && !e
->call_stmt_cannot_inline_p
444 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
))
446 struct ipa_node_params
*caller_parms_info
, *callee_pi
;
447 struct ipa_edge_args
*args
= IPA_EDGE_REF (e
);
448 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
449 int i
, count
= ipa_get_cs_argument_count (args
);
451 if (e
->caller
->global
.inlined_to
)
452 caller_parms_info
= IPA_NODE_REF (e
->caller
->global
.inlined_to
);
454 caller_parms_info
= IPA_NODE_REF (e
->caller
);
455 callee_pi
= IPA_NODE_REF (e
->callee
);
457 if (count
&& (info
->conds
|| known_vals_ptr
))
458 known_vals
.safe_grow_cleared (count
);
459 if (count
&& (info
->conds
|| known_aggs_ptr
))
460 known_aggs
.safe_grow_cleared (count
);
461 if (count
&& known_contexts_ptr
)
462 known_contexts_ptr
->safe_grow_cleared (count
);
464 for (i
= 0; i
< count
; i
++)
466 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
467 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
468 ipa_get_type (callee_pi
, i
));
470 if (!cst
&& e
->call_stmt
471 && i
< (int)gimple_call_num_args (e
->call_stmt
))
473 cst
= gimple_call_arg (e
->call_stmt
, i
);
474 if (!is_gimple_min_invariant (cst
))
479 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
480 if (known_vals
.exists ())
483 else if (inline_p
&& !es
->param
[i
].change_prob
)
484 known_vals
[i
] = error_mark_node
;
486 if (known_contexts_ptr
)
487 (*known_contexts_ptr
)[i
]
488 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs
[i
] = &jf
->agg
;
495 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
496 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
498 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
500 if (count
&& (info
->conds
|| known_vals_ptr
))
501 known_vals
.safe_grow_cleared (count
);
502 for (i
= 0; i
< count
; i
++)
504 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
505 if (!is_gimple_min_invariant (cst
))
512 evaluate_conditions_for_known_args (callee
, inline_p
,
513 known_vals
, known_aggs
, clause_ptr
,
517 *known_vals_ptr
= known_vals
;
519 known_vals
.release ();
522 *known_aggs_ptr
= known_aggs
;
524 known_aggs
.release ();
528 /* Allocate the function summary. */
531 ipa_fn_summary_alloc (void)
533 gcc_checking_assert (!ipa_fn_summaries
);
534 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
535 ipa_call_summaries
= new ipa_call_summary_t (symtab
, false);
538 ipa_call_summary::~ipa_call_summary ()
541 edge_predicate_pool
.remove (predicate
);
546 ipa_fn_summary::~ipa_fn_summary ()
549 edge_predicate_pool
.remove (loop_iterations
);
551 edge_predicate_pool
.remove (loop_stride
);
553 edge_predicate_pool
.remove (array_index
);
555 vec_free (size_time_table
);
559 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
562 for (e
= node
->callees
; e
; e
= e
->next_callee
)
563 ipa_call_summaries
->remove (e
);
564 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
565 ipa_call_summaries
->remove (e
);
568 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
569 Additionally care about allocating new memory slot for updated predicate
570 and set it to NULL when it becomes true or false (and thus uninteresting).
574 remap_hint_predicate_after_duplication (predicate
**p
,
575 clause_t possible_truths
)
577 predicate new_predicate
;
582 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
583 /* We do not want to free previous predicate; it is used by node origin. */
585 set_hint_predicate (p
, new_predicate
);
589 /* Hook that is called by cgraph.c when a node is duplicated. */
591 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
594 ipa_fn_summary
*info
)
596 new (info
) ipa_fn_summary (*ipa_fn_summaries
->get (src
));
597 /* TODO: as an optimization, we may avoid copying conditions
598 that are known to be false or true. */
599 info
->conds
= vec_safe_copy (info
->conds
);
601 /* When there are any replacements in the function body, see if we can figure
602 out that something was optimized out. */
603 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
605 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
606 /* Use SRC parm info since it may not be copied yet. */
607 struct ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
608 vec
<tree
> known_vals
= vNULL
;
609 int count
= ipa_get_param_count (parms_info
);
611 clause_t possible_truths
;
612 predicate true_pred
= true;
614 int optimized_out_size
= 0;
615 bool inlined_to_p
= false;
616 struct cgraph_edge
*edge
, *next
;
618 info
->size_time_table
= 0;
619 known_vals
.safe_grow_cleared (count
);
620 for (i
= 0; i
< count
; i
++)
622 struct ipa_replace_map
*r
;
624 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
626 if (((!r
->old_tree
&& r
->parm_num
== i
)
627 || (r
->old_tree
&& r
->old_tree
== ipa_get_param (parms_info
, i
)))
628 && r
->replace_p
&& !r
->ref_p
)
630 known_vals
[i
] = r
->new_tree
;
635 evaluate_conditions_for_known_args (dst
, false,
639 /* We are going to specialize,
640 so ignore nonspec truths. */
642 known_vals
.release ();
644 info
->account_size_time (0, 0, true_pred
, true_pred
);
646 /* Remap size_time vectors.
647 Simplify the predicate by prunning out alternatives that are known
649 TODO: as on optimization, we can also eliminate conditions known
651 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
653 predicate new_exec_pred
;
654 predicate new_nonconst_pred
;
655 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
657 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
659 if (new_exec_pred
== false || new_nonconst_pred
== false)
660 optimized_out_size
+= e
->size
;
662 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
666 /* Remap edge predicates with the same simplification as above.
667 Also copy constantness arrays. */
668 for (edge
= dst
->callees
; edge
; edge
= next
)
670 predicate new_predicate
;
671 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
672 next
= edge
->next_callee
;
674 if (!edge
->inline_failed
)
678 new_predicate
= es
->predicate
->remap_after_duplication
680 if (new_predicate
== false && *es
->predicate
!= false)
681 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
682 edge_set_predicate (edge
, &new_predicate
);
685 /* Remap indirect edge predicates with the same simplificaiton as above.
686 Also copy constantness arrays. */
687 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
689 predicate new_predicate
;
690 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
691 next
= edge
->next_callee
;
693 gcc_checking_assert (edge
->inline_failed
);
696 new_predicate
= es
->predicate
->remap_after_duplication
698 if (new_predicate
== false && *es
->predicate
!= false)
699 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
700 edge_set_predicate (edge
, &new_predicate
);
702 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
704 remap_hint_predicate_after_duplication (&info
->loop_stride
,
706 remap_hint_predicate_after_duplication (&info
->array_index
,
709 /* If inliner or someone after inliner will ever start producing
710 non-trivial clones, we will get trouble with lack of information
711 about updating self sizes, because size vectors already contains
712 sizes of the calees. */
713 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
717 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
718 if (info
->loop_iterations
)
720 predicate p
= *info
->loop_iterations
;
721 info
->loop_iterations
= NULL
;
722 set_hint_predicate (&info
->loop_iterations
, p
);
724 if (info
->loop_stride
)
726 predicate p
= *info
->loop_stride
;
727 info
->loop_stride
= NULL
;
728 set_hint_predicate (&info
->loop_stride
, p
);
730 if (info
->array_index
)
732 predicate p
= *info
->array_index
;
733 info
->array_index
= NULL
;
734 set_hint_predicate (&info
->array_index
, p
);
737 if (!dst
->global
.inlined_to
)
738 ipa_update_overall_fn_summary (dst
);
742 /* Hook that is called by cgraph.c when a node is duplicated. */
745 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
746 struct cgraph_edge
*dst
,
747 struct ipa_call_summary
*srcinfo
,
748 struct ipa_call_summary
*info
)
750 new (info
) ipa_call_summary (*srcinfo
);
751 info
->predicate
= NULL
;
752 edge_set_predicate (dst
, srcinfo
->predicate
);
753 info
->param
= srcinfo
->param
.copy ();
754 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
756 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
757 - eni_size_weights
.call_cost
);
758 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
759 - eni_time_weights
.call_cost
);
763 /* Dump edge summaries associated to NODE and recursively to all clones.
767 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
768 struct ipa_fn_summary
*info
)
770 struct cgraph_edge
*edge
;
771 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
773 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
774 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
778 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
779 indent
, "", callee
->name (), callee
->order
,
781 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
782 indent
, "", es
->loop_depth
, edge
->sreal_frequency ().to_double (),
783 es
->call_stmt_size
, es
->call_stmt_time
);
785 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
787 fprintf (f
, "callee size:%2i stack:%2i",
788 (int) (s
->size
/ ipa_fn_summary::size_scale
),
789 (int) s
->estimated_stack_size
);
793 fprintf (f
, " predicate: ");
794 es
->predicate
->dump (f
, info
->conds
);
798 if (es
->param
.exists ())
799 for (i
= 0; i
< (int) es
->param
.length (); i
++)
801 int prob
= es
->param
[i
].change_prob
;
804 fprintf (f
, "%*s op%i is compile time invariant\n",
806 else if (prob
!= REG_BR_PROB_BASE
)
807 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
808 prob
* 100.0 / REG_BR_PROB_BASE
);
810 if (!edge
->inline_failed
)
812 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
813 fprintf (f
, "%*sStack frame offset %i, callee self size %i,"
816 (int) s
->stack_frame_offset
,
817 (int) s
->estimated_self_stack_size
,
818 (int) s
->estimated_stack_size
);
819 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
822 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
824 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
825 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
829 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
833 fprintf (f
, "predicate: ");
834 es
->predicate
->dump (f
, info
->conds
);
843 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
845 if (node
->definition
)
847 struct ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
852 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
853 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
854 fprintf (f
, " always_inline");
856 fprintf (f
, " inlinable");
857 if (s
->fp_expressions
)
858 fprintf (f
, " fp_expression");
859 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
860 fprintf (f
, " self size: %i\n", s
->self_size
);
861 fprintf (f
, " global size: %i\n", s
->size
);
862 fprintf (f
, " min size: %i\n", s
->min_size
);
863 fprintf (f
, " self stack: %i\n",
864 (int) s
->estimated_self_stack_size
);
865 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
867 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
869 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
870 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
872 fprintf (f
, " size:%f, time:%f",
873 (double) e
->size
/ ipa_fn_summary::size_scale
,
874 e
->time
.to_double ());
875 if (e
->exec_predicate
!= true)
877 fprintf (f
, ", executed if:");
878 e
->exec_predicate
.dump (f
, s
->conds
, 0);
880 if (e
->exec_predicate
!= e
->nonconst_predicate
)
882 fprintf (f
, ", nonconst if:");
883 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
887 if (s
->loop_iterations
)
889 fprintf (f
, " loop iterations:");
890 s
->loop_iterations
->dump (f
, s
->conds
);
894 fprintf (f
, " loop stride:");
895 s
->loop_stride
->dump (f
, s
->conds
);
899 fprintf (f
, " array index:");
900 s
->array_index
->dump (f
, s
->conds
);
902 fprintf (f
, " calls:\n");
903 dump_ipa_call_summary (f
, 4, node
, s
);
907 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
912 ipa_debug_fn_summary (struct cgraph_node
*node
)
914 ipa_dump_fn_summary (stderr
, node
);
918 ipa_dump_fn_summaries (FILE *f
)
920 struct cgraph_node
*node
;
922 FOR_EACH_DEFINED_FUNCTION (node
)
923 if (!node
->global
.inlined_to
)
924 ipa_dump_fn_summary (f
, node
);
927 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
928 boolean variable pointed to by DATA. */
931 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
934 bool *b
= (bool *) data
;
939 /* If OP refers to value of function parameter, return the corresponding
940 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
941 PARM_DECL) will be stored to *SIZE_P in that case too. */
944 unmodified_parm_1 (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
946 /* SSA_NAME referring to parm default def? */
947 if (TREE_CODE (op
) == SSA_NAME
948 && SSA_NAME_IS_DEFAULT_DEF (op
)
949 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
952 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
953 return SSA_NAME_VAR (op
);
955 /* Non-SSA parm reference? */
956 if (TREE_CODE (op
) == PARM_DECL
)
958 bool modified
= false;
961 ao_ref_init (&refd
, op
);
962 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), mark_modified
, &modified
,
967 *size_p
= tree_to_shwi (TYPE_SIZE (TREE_TYPE (op
)));
974 /* If OP refers to value of function parameter, return the corresponding
975 parameter. Also traverse chains of SSA register assignments. If non-NULL,
976 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
977 stored to *SIZE_P in that case too. */
980 unmodified_parm (gimple
*stmt
, tree op
, HOST_WIDE_INT
*size_p
)
982 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
986 if (TREE_CODE (op
) == SSA_NAME
987 && !SSA_NAME_IS_DEFAULT_DEF (op
)
988 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
989 return unmodified_parm (SSA_NAME_DEF_STMT (op
),
990 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
995 /* If OP refers to a value of a function parameter or value loaded from an
996 aggregate passed to a parameter (either by value or reference), return TRUE
997 and store the number of the parameter to *INDEX_P, the access size into
998 *SIZE_P, and information whether and how it has been loaded from an
999 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1000 statement in which OP is used or loaded. */
1003 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1004 gimple
*stmt
, tree op
, int *index_p
,
1005 HOST_WIDE_INT
*size_p
,
1006 struct agg_position_info
*aggpos
)
1008 tree res
= unmodified_parm_1 (stmt
, op
, size_p
);
1010 gcc_checking_assert (aggpos
);
1013 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1016 aggpos
->agg_contents
= false;
1017 aggpos
->by_ref
= false;
1021 if (TREE_CODE (op
) == SSA_NAME
)
1023 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1024 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1026 stmt
= SSA_NAME_DEF_STMT (op
);
1027 op
= gimple_assign_rhs1 (stmt
);
1028 if (!REFERENCE_CLASS_P (op
))
1029 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1033 aggpos
->agg_contents
= true;
1034 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1035 stmt
, op
, index_p
, &aggpos
->offset
,
1036 size_p
, &aggpos
->by_ref
);
1039 /* See if statement might disappear after inlining.
1040 0 - means not eliminated
1041 1 - half of statements goes away
1042 2 - for sure it is eliminated.
1043 We are not terribly sophisticated, basically looking for simple abstraction
1044 penalty wrappers. */
1047 eliminated_by_inlining_prob (gimple
*stmt
)
1049 enum gimple_code code
= gimple_code (stmt
);
1050 enum tree_code rhs_code
;
1060 if (gimple_num_ops (stmt
) != 2)
1063 rhs_code
= gimple_assign_rhs_code (stmt
);
1065 /* Casts of parameters, loads from parameters passed by reference
1066 and stores to return value or parameters are often free after
1067 inlining dua to SRA and further combining.
1068 Assume that half of statements goes away. */
1069 if (CONVERT_EXPR_CODE_P (rhs_code
)
1070 || rhs_code
== VIEW_CONVERT_EXPR
1071 || rhs_code
== ADDR_EXPR
1072 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1074 tree rhs
= gimple_assign_rhs1 (stmt
);
1075 tree lhs
= gimple_assign_lhs (stmt
);
1076 tree inner_rhs
= get_base_address (rhs
);
1077 tree inner_lhs
= get_base_address (lhs
);
1078 bool rhs_free
= false;
1079 bool lhs_free
= false;
1086 /* Reads of parameter are expected to be free. */
1087 if (unmodified_parm (stmt
, inner_rhs
, NULL
))
1089 /* Match expressions of form &this->field. Those will most likely
1090 combine with something upstream after inlining. */
1091 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1093 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1094 if (TREE_CODE (op
) == PARM_DECL
)
1096 else if (TREE_CODE (op
) == MEM_REF
1097 && unmodified_parm (stmt
, TREE_OPERAND (op
, 0), NULL
))
1101 /* When parameter is not SSA register because its address is taken
1102 and it is just copied into one, the statement will be completely
1103 free after inlining (we will copy propagate backward). */
1104 if (rhs_free
&& is_gimple_reg (lhs
))
1107 /* Reads of parameters passed by reference
1108 expected to be free (i.e. optimized out after inlining). */
1109 if (TREE_CODE (inner_rhs
) == MEM_REF
1110 && unmodified_parm (stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1113 /* Copying parameter passed by reference into gimple register is
1114 probably also going to copy propagate, but we can't be quite
1116 if (rhs_free
&& is_gimple_reg (lhs
))
1119 /* Writes to parameters, parameters passed by value and return value
1120 (either dirrectly or passed via invisible reference) are free.
1122 TODO: We ought to handle testcase like
1123 struct a {int a,b;};
1125 retrurnsturct (void)
1131 This translate into:
1146 For that we either need to copy ipa-split logic detecting writes
1148 if (TREE_CODE (inner_lhs
) == PARM_DECL
1149 || TREE_CODE (inner_lhs
) == RESULT_DECL
1150 || (TREE_CODE (inner_lhs
) == MEM_REF
1151 && (unmodified_parm (stmt
, TREE_OPERAND (inner_lhs
, 0), NULL
)
1152 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1153 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1154 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1156 0))) == RESULT_DECL
))))
1159 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1161 if (lhs_free
&& rhs_free
)
1171 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1172 predicates to the CFG edges. */
1175 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1176 struct ipa_fn_summary
*summary
,
1183 struct agg_position_info aggpos
;
1184 enum tree_code code
, inverted_code
;
1190 last
= last_stmt (bb
);
1191 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1193 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1195 op
= gimple_cond_lhs (last
);
1196 /* TODO: handle conditionals like
1199 if (unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1201 code
= gimple_cond_code (last
);
1202 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1204 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1206 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1207 ? code
: inverted_code
);
1208 /* invert_tree_comparison will return ERROR_MARK on FP
1209 comparsions that are not EQ/NE instead of returning proper
1210 unordered one. Be sure it is not confused with NON_CONSTANT. */
1211 if (this_code
!= ERROR_MARK
)
1214 = add_condition (summary
, index
, size
, &aggpos
, this_code
,
1215 unshare_expr_without_location
1216 (gimple_cond_rhs (last
)));
1217 e
->aux
= edge_predicate_pool
.allocate ();
1218 *(predicate
*) e
->aux
= p
;
1223 if (TREE_CODE (op
) != SSA_NAME
)
1226 if (builtin_constant_p (op))
1230 Here we can predicate nonconstant_code. We can't
1231 really handle constant_code since we have no predicate
1232 for this and also the constant code is not known to be
1233 optimized away when inliner doen't see operand is constant.
1234 Other optimizers might think otherwise. */
1235 if (gimple_cond_code (last
) != NE_EXPR
1236 || !integer_zerop (gimple_cond_rhs (last
)))
1238 set_stmt
= SSA_NAME_DEF_STMT (op
);
1239 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1240 || gimple_call_num_args (set_stmt
) != 1)
1242 op2
= gimple_call_arg (set_stmt
, 0);
1243 if (!unmodified_parm_or_parm_agg_item (fbi
, set_stmt
, op2
, &index
, &size
,
1246 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1248 predicate p
= add_condition (summary
, index
, size
, &aggpos
,
1249 predicate::is_not_constant
, NULL_TREE
);
1250 e
->aux
= edge_predicate_pool
.allocate ();
1251 *(predicate
*) e
->aux
= p
;
1256 /* If BB ends by a switch we can turn into predicates, attach corresponding
1257 predicates to the CFG edges. */
1260 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1261 struct ipa_fn_summary
*summary
,
1268 struct agg_position_info aggpos
;
1274 lastg
= last_stmt (bb
);
1275 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1277 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1278 op
= gimple_switch_index (last
);
1279 if (!unmodified_parm_or_parm_agg_item (fbi
, last
, op
, &index
, &size
, &aggpos
))
1282 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1284 e
->aux
= edge_predicate_pool
.allocate ();
1285 *(predicate
*) e
->aux
= false;
1287 n
= gimple_switch_num_labels (last
);
1288 for (case_idx
= 0; case_idx
< n
; ++case_idx
)
1290 tree cl
= gimple_switch_label (last
, case_idx
);
1294 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1295 min
= CASE_LOW (cl
);
1296 max
= CASE_HIGH (cl
);
1298 /* For default we might want to construct predicate that none
1299 of cases is met, but it is bit hard to do not having negations
1300 of conditionals handy. */
1304 p
= add_condition (summary
, index
, size
, &aggpos
, EQ_EXPR
,
1305 unshare_expr_without_location (min
));
1309 p1
= add_condition (summary
, index
, size
, &aggpos
, GE_EXPR
,
1310 unshare_expr_without_location (min
));
1311 p2
= add_condition (summary
, index
, size
, &aggpos
, LE_EXPR
,
1312 unshare_expr_without_location (max
));
1315 *(struct predicate
*) e
->aux
1316 = p
.or_with (summary
->conds
, *(struct predicate
*) e
->aux
);
1321 /* For each BB in NODE attach to its AUX pointer predicate under
1322 which it is executable. */
1325 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1326 struct cgraph_node
*node
,
1327 struct ipa_fn_summary
*summary
)
1329 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1333 FOR_EACH_BB_FN (bb
, my_function
)
1335 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1336 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1339 /* Entry block is always executable. */
1340 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1341 = edge_predicate_pool
.allocate ();
1342 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1344 /* A simple dataflow propagation of predicates forward in the CFG.
1345 TODO: work in reverse postorder. */
1349 FOR_EACH_BB_FN (bb
, my_function
)
1351 predicate p
= false;
1354 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1358 predicate this_bb_predicate
1359 = *(predicate
*) e
->src
->aux
;
1361 this_bb_predicate
&= (*(struct predicate
*) e
->aux
);
1362 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1368 gcc_checking_assert (!bb
->aux
);
1374 bb
->aux
= edge_predicate_pool
.allocate ();
1375 *((predicate
*) bb
->aux
) = p
;
1377 else if (p
!= *(predicate
*) bb
->aux
)
1379 /* This OR operation is needed to ensure monotonous data flow
1380 in the case we hit the limit on number of clauses and the
1381 and/or operations above give approximate answers. */
1382 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1383 if (p
!= *(predicate
*) bb
->aux
)
1386 *((predicate
*) bb
->aux
) = p
;
1395 /* Return predicate specifying when the STMT might have result that is not
1396 a compile time constant. */
1399 will_be_nonconstant_expr_predicate (struct ipa_node_params
*info
,
1400 struct ipa_fn_summary
*summary
,
1402 vec
<predicate
> nonconstant_names
)
1408 while (UNARY_CLASS_P (expr
))
1409 expr
= TREE_OPERAND (expr
, 0);
1411 parm
= unmodified_parm (NULL
, expr
, &size
);
1412 if (parm
&& (index
= ipa_get_param_decl_index (info
, parm
)) >= 0)
1413 return add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1415 if (is_gimple_min_invariant (expr
))
1417 if (TREE_CODE (expr
) == SSA_NAME
)
1418 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1419 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1421 predicate p1
= will_be_nonconstant_expr_predicate
1422 (info
, summary
, TREE_OPERAND (expr
, 0),
1428 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1429 TREE_OPERAND (expr
, 1),
1431 return p1
.or_with (summary
->conds
, p2
);
1433 else if (TREE_CODE (expr
) == COND_EXPR
)
1435 predicate p1
= will_be_nonconstant_expr_predicate
1436 (info
, summary
, TREE_OPERAND (expr
, 0),
1442 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1443 TREE_OPERAND (expr
, 1),
1447 p1
= p1
.or_with (summary
->conds
, p2
);
1448 p2
= will_be_nonconstant_expr_predicate (info
, summary
,
1449 TREE_OPERAND (expr
, 2),
1451 return p2
.or_with (summary
->conds
, p1
);
1453 else if (TREE_CODE (expr
) == CALL_EXPR
)
1464 /* Return predicate specifying when the STMT might have result that is not
1465 a compile time constant. */
1468 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1469 struct ipa_fn_summary
*summary
,
1471 vec
<predicate
> nonconstant_names
)
1476 predicate op_non_const
;
1480 struct agg_position_info aggpos
;
1482 /* What statments might be optimized away
1483 when their arguments are constant. */
1484 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1485 && gimple_code (stmt
) != GIMPLE_COND
1486 && gimple_code (stmt
) != GIMPLE_SWITCH
1487 && (gimple_code (stmt
) != GIMPLE_CALL
1488 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1491 /* Stores will stay anyway. */
1492 if (gimple_store_p (stmt
))
1495 is_load
= gimple_assign_load_p (stmt
);
1497 /* Loads can be optimized when the value is known. */
1501 gcc_assert (gimple_assign_single_p (stmt
));
1502 op
= gimple_assign_rhs1 (stmt
);
1503 if (!unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, &base_index
, &size
,
1510 /* See if we understand all operands before we start
1511 adding conditionals. */
1512 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1514 tree parm
= unmodified_parm (stmt
, use
, NULL
);
1515 /* For arguments we can build a condition. */
1516 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1518 if (TREE_CODE (use
) != SSA_NAME
)
1520 /* If we know when operand is constant,
1521 we still can say something useful. */
1522 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1529 add_condition (summary
, base_index
, size
, &aggpos
, predicate::changed
,
1532 op_non_const
= false;
1533 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1536 tree parm
= unmodified_parm (stmt
, use
, &size
);
1539 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1541 if (index
!= base_index
)
1542 p
= add_condition (summary
, index
, size
, NULL
, predicate::changed
,
1548 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1549 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1551 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1552 && gimple_op (stmt
, 0)
1553 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1554 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1556 return op_non_const
;
1559 struct record_modified_bb_info
1566 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1567 probability how often it changes between USE_BB.
1568 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1569 is in different loop nest, we can do better.
1570 This is all just estimate. In theory we look for minimal cut separating
1571 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1575 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1577 struct loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1578 if (l
&& l
->header
->count
< init_bb
->count
)
1583 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1584 set except for info->stmt. */
1587 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1589 struct record_modified_bb_info
*info
=
1590 (struct record_modified_bb_info
*) data
;
1591 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1593 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1595 bitmap_set_bit (info
->bb_set
,
1596 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1597 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1599 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1600 gimple_bb (info
->stmt
))->index
);
1603 fprintf (dump_file
, " Param ");
1604 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1605 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1606 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1608 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1609 gimple_bb (info
->stmt
))->index
);
1610 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
1615 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1616 will change since last invocation of STMT.
1618 Value 0 is reserved for compile time invariants.
1619 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1620 ought to be REG_BR_PROB_BASE / estimated_iters. */
1623 param_change_prob (gimple
*stmt
, int i
)
1625 tree op
= gimple_call_arg (stmt
, i
);
1626 basic_block bb
= gimple_bb (stmt
);
1628 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1629 op
= TREE_OPERAND (op
, 0);
1631 tree base
= get_base_address (op
);
1633 /* Global invariants never change. */
1634 if (is_gimple_min_invariant (base
))
1637 /* We would have to do non-trivial analysis to really work out what
1638 is the probability of value to change (i.e. when init statement
1639 is in a sibling loop of the call).
1641 We do an conservative estimate: when call is executed N times more often
1642 than the statement defining value, we take the frequency 1/N. */
1643 if (TREE_CODE (base
) == SSA_NAME
)
1645 profile_count init_count
;
1647 if (!bb
->count
.nonzero_p ())
1648 return REG_BR_PROB_BASE
;
1650 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1651 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1653 init_count
= get_minimal_bb
1654 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1655 gimple_bb (stmt
))->count
;
1657 if (init_count
< bb
->count
)
1658 return MAX ((init_count
.to_sreal_scale (bb
->count
)
1659 * REG_BR_PROB_BASE
).to_int (), 1);
1660 return REG_BR_PROB_BASE
;
1665 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1666 struct record_modified_bb_info info
;
1667 tree init
= ctor_for_folding (base
);
1669 if (init
!= error_mark_node
)
1671 if (!bb
->count
.nonzero_p ())
1672 return REG_BR_PROB_BASE
;
1675 fprintf (dump_file
, " Analyzing param change probablity of ");
1676 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1677 fprintf (dump_file
, "\n");
1679 ao_ref_init (&refd
, op
);
1682 info
.bb_set
= BITMAP_ALLOC (NULL
);
1683 walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1685 if (bitmap_bit_p (info
.bb_set
, bb
->index
))
1688 fprintf (dump_file
, " Set in same BB as used.\n");
1689 BITMAP_FREE (info
.bb_set
);
1690 return REG_BR_PROB_BASE
;
1695 /* Lookup the most frequent update of the value and believe that
1696 it dominates all the other; precise analysis here is difficult. */
1697 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
1698 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
1701 fprintf (dump_file
, " Set with count ");
1702 max
.dump (dump_file
);
1703 fprintf (dump_file
, " and used with count ");
1704 bb
->count
.dump (dump_file
);
1705 fprintf (dump_file
, " freq %f\n",
1706 max
.to_sreal_scale (bb
->count
).to_double ());
1709 BITMAP_FREE (info
.bb_set
);
1710 if (max
< bb
->count
)
1711 return MAX ((max
.to_sreal_scale (bb
->count
)
1712 * REG_BR_PROB_BASE
).to_int (), 1);
1713 return REG_BR_PROB_BASE
;
1717 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1718 sub-graph and if the predicate the condition depends on is known. If so,
1719 return true and store the pointer the predicate in *P. */
1722 phi_result_unknown_predicate (struct ipa_node_params
*info
,
1723 ipa_fn_summary
*summary
, basic_block bb
,
1725 vec
<predicate
> nonconstant_names
)
1729 basic_block first_bb
= NULL
;
1732 if (single_pred_p (bb
))
1738 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1740 if (single_succ_p (e
->src
))
1742 if (!single_pred_p (e
->src
))
1745 first_bb
= single_pred (e
->src
);
1746 else if (single_pred (e
->src
) != first_bb
)
1753 else if (e
->src
!= first_bb
)
1761 stmt
= last_stmt (first_bb
);
1763 || gimple_code (stmt
) != GIMPLE_COND
1764 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
1767 *p
= will_be_nonconstant_expr_predicate (info
, summary
,
1768 gimple_cond_lhs (stmt
),
1776 /* Given a PHI statement in a function described by inline properties SUMMARY
1777 and *P being the predicate describing whether the selected PHI argument is
1778 known, store a predicate for the result of the PHI statement into
1779 NONCONSTANT_NAMES, if possible. */
1782 predicate_for_phi_result (struct ipa_fn_summary
*summary
, gphi
*phi
,
1784 vec
<predicate
> nonconstant_names
)
1788 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
1790 tree arg
= gimple_phi_arg (phi
, i
)->def
;
1791 if (!is_gimple_min_invariant (arg
))
1793 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
1794 *p
= p
->or_with (summary
->conds
,
1795 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
1801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1803 fprintf (dump_file
, "\t\tphi predicate: ");
1804 p
->dump (dump_file
, summary
->conds
);
1806 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
1809 /* Return predicate specifying when array index in access OP becomes non-constant. */
1812 array_index_predicate (ipa_fn_summary
*info
,
1813 vec
< predicate
> nonconstant_names
, tree op
)
1815 predicate p
= false;
1816 while (handled_component_p (op
))
1818 if (TREE_CODE (op
) == ARRAY_REF
|| TREE_CODE (op
) == ARRAY_RANGE_REF
)
1820 if (TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
1821 p
= p
.or_with (info
->conds
,
1822 nonconstant_names
[SSA_NAME_VERSION
1823 (TREE_OPERAND (op
, 1))]);
1825 op
= TREE_OPERAND (op
, 0);
1830 /* For a typical usage of __builtin_expect (a<b, 1), we
1831 may introduce an extra relation stmt:
1832 With the builtin, we have
1835 t3 = __builtin_expect (t2, 1);
1838 Without the builtin, we have
1841 This affects the size/time estimation and may have
1842 an impact on the earlier inlining.
1843 Here find this pattern and fix it up later. */
1846 find_foldable_builtin_expect (basic_block bb
)
1848 gimple_stmt_iterator bsi
;
1850 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1852 gimple
*stmt
= gsi_stmt (bsi
);
1853 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
1854 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
1855 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
1857 tree var
= gimple_call_lhs (stmt
);
1858 tree arg
= gimple_call_arg (stmt
, 0);
1859 use_operand_p use_p
;
1866 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
1868 while (TREE_CODE (arg
) == SSA_NAME
)
1870 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
1871 if (!is_gimple_assign (stmt_tmp
))
1873 switch (gimple_assign_rhs_code (stmt_tmp
))
1892 arg
= gimple_assign_rhs1 (stmt_tmp
);
1895 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
1896 && gimple_code (use_stmt
) == GIMPLE_COND
)
1903 /* Return true when the basic blocks contains only clobbers followed by RESX.
1904 Such BBs are kept around to make removal of dead stores possible with
1905 presence of EH and will be optimized out by optimize_clobbers later in the
1908 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1909 that can be clobber only, too.. When it is false, the RESX is not necessary
1910 on the end of basic block. */
1913 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
1915 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
1921 if (gsi_end_p (gsi
))
1923 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
1927 else if (!single_succ_p (bb
))
1930 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
1932 gimple
*stmt
= gsi_stmt (gsi
);
1933 if (is_gimple_debug (stmt
))
1935 if (gimple_clobber_p (stmt
))
1937 if (gimple_code (stmt
) == GIMPLE_LABEL
)
1942 /* See if all predecestors are either throws or clobber only BBs. */
1943 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1944 if (!(e
->flags
& EDGE_EH
)
1945 && !clobber_only_eh_bb_p (e
->src
, false))
1951 /* Return true if STMT compute a floating point expression that may be affected
1952 by -ffast-math and similar flags. */
1955 fp_expression_p (gimple
*stmt
)
1960 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
1961 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
1966 /* Analyze function body for NODE.
1967 EARLY indicates run from early optimization pipeline. */
1970 analyze_function_body (struct cgraph_node
*node
, bool early
)
1972 sreal time
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
);
1973 /* Estimate static overhead for function prologue/epilogue and alignment. */
1974 int size
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
);
1975 /* Benefits are scaled by probability of elimination that is in range
1978 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1980 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
1981 predicate bb_predicate
;
1982 struct ipa_func_body_info fbi
;
1983 vec
<predicate
> nonconstant_names
= vNULL
;
1986 predicate array_index
= true;
1987 gimple
*fix_builtin_expect_stmt
;
1989 gcc_assert (my_function
&& my_function
->cfg
);
1990 gcc_assert (cfun
== my_function
);
1992 memset(&fbi
, 0, sizeof(fbi
));
1993 vec_free (info
->conds
);
1995 vec_free (info
->size_time_table
);
1996 info
->size_time_table
= NULL
;
1998 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
1999 so we can produce proper inline hints.
2001 When optimizing and analyzing for early inliner, initialize node params
2002 so we can produce correct BB predicates. */
2004 if (opt_for_fn (node
->decl
, optimize
))
2006 calculate_dominance_info (CDI_DOMINATORS
);
2008 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2011 ipa_check_create_node_params ();
2012 ipa_initialize_node_params (node
);
2015 if (ipa_node_params_sum
)
2018 fbi
.info
= IPA_NODE_REF (node
);
2019 fbi
.bb_infos
= vNULL
;
2020 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2021 fbi
.param_count
= count_formal_params(node
->decl
);
2022 nonconstant_names
.safe_grow_cleared
2023 (SSANAMES (my_function
)->length ());
2028 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2031 /* When we run into maximal number of entries, we assign everything to the
2032 constant truth case. Be sure to have it in list. */
2033 bb_predicate
= true;
2034 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2036 bb_predicate
= predicate::not_inlined ();
2037 info
->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
)
2038 * ipa_fn_summary::size_scale
,
2039 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
),
2044 compute_bb_predicates (&fbi
, node
, info
);
2045 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2046 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2047 for (n
= 0; n
< nblocks
; n
++)
2049 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2050 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2051 if (clobber_only_eh_bb_p (bb
))
2053 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2054 fprintf (dump_file
, "\n Ignoring BB %i;"
2055 " it will be optimized away by cleanup_clobbers\n",
2060 /* TODO: Obviously predicates can be propagated down across CFG. */
2064 bb_predicate
= *(predicate
*) bb
->aux
;
2066 bb_predicate
= false;
2069 bb_predicate
= true;
2071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2073 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2074 bb_predicate
.dump (dump_file
, info
->conds
);
2077 if (fbi
.info
&& nonconstant_names
.exists ())
2079 predicate phi_predicate
;
2080 bool first_phi
= true;
2082 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2086 && !phi_result_unknown_predicate (fbi
.info
, info
, bb
,
2091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2093 fprintf (dump_file
, " ");
2094 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2096 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2101 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2103 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
2106 gimple
*stmt
= gsi_stmt (bsi
);
2107 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2108 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2110 predicate will_be_nonconstant
;
2112 /* This relation stmt should be folded after we remove
2113 buildin_expect call. Adjust the cost here. */
2114 if (stmt
== fix_builtin_expect_stmt
)
2120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2122 fprintf (dump_file
, " ");
2123 print_gimple_stmt (dump_file
, stmt
, 0);
2124 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2125 freq
.to_double (), this_size
,
2129 if (gimple_assign_load_p (stmt
) && nonconstant_names
.exists ())
2131 predicate this_array_index
;
2133 array_index_predicate (info
, nonconstant_names
,
2134 gimple_assign_rhs1 (stmt
));
2135 if (this_array_index
!= false)
2136 array_index
&= this_array_index
;
2138 if (gimple_store_p (stmt
) && nonconstant_names
.exists ())
2140 predicate this_array_index
;
2142 array_index_predicate (info
, nonconstant_names
,
2143 gimple_get_lhs (stmt
));
2144 if (this_array_index
!= false)
2145 array_index
&= this_array_index
;
2149 if (is_gimple_call (stmt
)
2150 && !gimple_call_internal_p (stmt
))
2152 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2153 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2155 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2156 resolved as constant. We however don't want to optimize
2157 out the cgraph edges. */
2158 if (nonconstant_names
.exists ()
2159 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2160 && gimple_call_lhs (stmt
)
2161 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2163 predicate false_p
= false;
2164 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2167 if (ipa_node_params_sum
)
2169 int count
= gimple_call_num_args (stmt
);
2173 es
->param
.safe_grow_cleared (count
);
2174 for (i
= 0; i
< count
; i
++)
2176 int prob
= param_change_prob (stmt
, i
);
2177 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2178 es
->param
[i
].change_prob
= prob
;
2182 es
->call_stmt_size
= this_size
;
2183 es
->call_stmt_time
= this_time
;
2184 es
->loop_depth
= bb_loop_depth (bb
);
2185 edge_set_predicate (edge
, &bb_predicate
);
2186 if (edge
->speculative
)
2188 cgraph_edge
*direct
, *indirect
;
2190 edge
->speculative_call_info (direct
, indirect
, ref
);
2191 gcc_assert (direct
== edge
);
2192 ipa_call_summary
*es2
2193 = ipa_call_summaries
->get_create (indirect
);
2194 ipa_call_summaries
->duplicate (edge
, indirect
,
2199 /* TODO: When conditional jump or swithc is known to be constant, but
2200 we did not translate it into the predicates, we really can account
2201 just maximum of the possible paths. */
2204 = will_be_nonconstant_predicate (&fbi
, info
,
2205 stmt
, nonconstant_names
);
2207 will_be_nonconstant
= true;
2208 if (this_time
|| this_size
)
2210 sreal final_time
= (sreal
)this_time
* freq
;
2212 prob
= eliminated_by_inlining_prob (stmt
);
2213 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2215 "\t\t50%% will be eliminated by inlining\n");
2216 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2217 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2219 struct predicate p
= bb_predicate
& will_be_nonconstant
;
2221 /* We can ignore statement when we proved it is never going
2222 to happen, but we cannot do that for call statements
2223 because edges are accounted specially. */
2225 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2231 /* We account everything but the calls. Calls have their own
2232 size/time info attached to cgraph edges. This is necessary
2233 in order to make the cost disappear after inlining. */
2234 if (!is_gimple_call (stmt
))
2238 predicate ip
= bb_predicate
& predicate::not_inlined ();
2239 info
->account_size_time (this_size
* prob
,
2240 (final_time
* prob
) / 2, ip
,
2244 info
->account_size_time (this_size
* (2 - prob
),
2245 (final_time
* (2 - prob
) / 2),
2250 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2252 info
->fp_expressions
= true;
2254 fprintf (dump_file
, " fp_expression set\n");
2257 gcc_assert (time
>= 0);
2258 gcc_assert (size
>= 0);
2262 set_hint_predicate (&ipa_fn_summaries
->get_create (node
)->array_index
,
2266 if (nonconstant_names
.exists () && !early
)
2269 predicate loop_iterations
= true;
2270 predicate loop_stride
= true;
2272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2273 flow_loops_dump (dump_file
, NULL
, 0);
2275 FOR_EACH_LOOP (loop
, 0)
2280 struct tree_niter_desc niter_desc
;
2281 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2283 exits
= get_loop_exit_edges (loop
);
2284 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2285 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2286 && !is_gimple_min_invariant (niter_desc
.niter
))
2288 predicate will_be_nonconstant
2289 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2292 if (will_be_nonconstant
!= true)
2293 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2294 if (will_be_nonconstant
!= true
2295 && will_be_nonconstant
!= false)
2296 /* This is slightly inprecise. We may want to represent each
2297 loop with independent predicate. */
2298 loop_iterations
&= will_be_nonconstant
;
2303 /* To avoid quadratic behavior we analyze stride predicates only
2304 with respect to the containing loop. Thus we simply iterate
2305 over all defs in the outermost loop body. */
2306 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2307 loop
!= NULL
; loop
= loop
->next
)
2309 basic_block
*body
= get_loop_body (loop
);
2310 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2312 gimple_stmt_iterator gsi
;
2313 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2314 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2317 gimple
*stmt
= gsi_stmt (gsi
);
2319 if (!is_gimple_assign (stmt
))
2322 tree def
= gimple_assign_lhs (stmt
);
2323 if (TREE_CODE (def
) != SSA_NAME
)
2327 if (!simple_iv (loop_containing_stmt (stmt
),
2328 loop_containing_stmt (stmt
),
2330 || is_gimple_min_invariant (iv
.step
))
2333 predicate will_be_nonconstant
2334 = will_be_nonconstant_expr_predicate (fbi
.info
, info
,
2337 if (will_be_nonconstant
!= true)
2338 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2339 if (will_be_nonconstant
!= true
2340 && will_be_nonconstant
!= false)
2341 /* This is slightly inprecise. We may want to represent
2342 each loop with independent predicate. */
2343 loop_stride
= loop_stride
& will_be_nonconstant
;
2348 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2349 set_hint_predicate (&s
->loop_iterations
, loop_iterations
);
2350 set_hint_predicate (&s
->loop_stride
, loop_stride
);
2353 FOR_ALL_BB_FN (bb
, my_function
)
2359 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2361 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2364 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2368 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2370 s
->self_size
= size
;
2371 nonconstant_names
.release ();
2372 ipa_release_body_info (&fbi
);
2373 if (opt_for_fn (node
->decl
, optimize
))
2376 loop_optimizer_finalize ();
2377 else if (!ipa_edge_args_sum
)
2378 ipa_free_all_node_params ();
2379 free_dominance_info (CDI_DOMINATORS
);
2383 fprintf (dump_file
, "\n");
2384 ipa_dump_fn_summary (dump_file
, node
);
2389 /* Compute function summary.
2390 EARLY is true when we compute parameters during early opts. */
2393 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2395 HOST_WIDE_INT self_stack_size
;
2396 struct cgraph_edge
*e
;
2397 struct ipa_fn_summary
*info
;
2399 gcc_assert (!node
->global
.inlined_to
);
2401 if (!ipa_fn_summaries
)
2402 ipa_fn_summary_alloc ();
2404 /* Create a new ipa_fn_summary. */
2405 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
2406 ipa_fn_summaries
->remove (node
);
2407 info
= ipa_fn_summaries
->get_create (node
);
2409 /* Estimate the stack size for the function if we're optimizing. */
2410 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2411 ? estimated_stack_frame_size (node
) : 0;
2412 info
->estimated_self_stack_size
= self_stack_size
;
2413 info
->estimated_stack_size
= self_stack_size
;
2414 info
->stack_frame_offset
= 0;
2416 if (node
->thunk
.thunk_p
)
2418 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
2421 node
->local
.can_change_signature
= false;
2422 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2423 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2424 info
->account_size_time (ipa_fn_summary::size_scale
2426 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS
),
2428 (PARAM_UNINLINED_FUNCTION_THUNK_TIME
), t
, t
);
2429 t
= predicate::not_inlined ();
2430 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2431 ipa_update_overall_fn_summary (node
);
2432 info
->self_size
= info
->size
;
2433 /* We cannot inline instrumentation clones. */
2434 if (node
->thunk
.add_pointer_bounds_args
)
2436 info
->inlinable
= false;
2437 node
->callees
->inline_failed
= CIF_CHKP
;
2439 else if (stdarg_p (TREE_TYPE (node
->decl
)))
2441 info
->inlinable
= false;
2442 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2445 info
->inlinable
= true;
2449 /* Even is_gimple_min_invariant rely on current_function_decl. */
2450 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2452 /* Can this function be inlined at all? */
2453 if (!opt_for_fn (node
->decl
, optimize
)
2454 && !lookup_attribute ("always_inline",
2455 DECL_ATTRIBUTES (node
->decl
)))
2456 info
->inlinable
= false;
2458 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2460 /* Type attributes can use parameter indices to describe them. */
2461 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2462 /* Likewise for #pragma omp declare simd functions or functions
2463 with simd attribute. */
2464 || lookup_attribute ("omp declare simd",
2465 DECL_ATTRIBUTES (node
->decl
)))
2466 node
->local
.can_change_signature
= false;
2469 /* Otherwise, inlinable functions always can change signature. */
2470 if (info
->inlinable
)
2471 node
->local
.can_change_signature
= true;
2474 /* Functions calling builtin_apply cannot change signature. */
2475 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2477 tree
cdecl = e
->callee
->decl
;
2478 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
2479 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
2482 node
->local
.can_change_signature
= !e
;
2485 /* Functions called by instrumentation thunk can't change signature
2486 because instrumentation thunk modification is not supported. */
2487 if (node
->local
.can_change_signature
)
2488 for (e
= node
->callers
; e
; e
= e
->next_caller
)
2489 if (e
->caller
->thunk
.thunk_p
2490 && e
->caller
->thunk
.add_pointer_bounds_args
)
2492 node
->local
.can_change_signature
= false;
2495 analyze_function_body (node
, early
);
2498 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2499 if (e
->callee
->comdat_local_p ())
2501 node
->calls_comdat_local
= (e
!= NULL
);
2503 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2504 info
->size
= info
->self_size
;
2505 info
->stack_frame_offset
= 0;
2506 info
->estimated_stack_size
= info
->estimated_self_stack_size
;
2508 /* Code above should compute exactly the same result as
2509 ipa_update_overall_fn_summary but because computation happens in
2510 different order the roundoff errors result in slight changes. */
2511 ipa_update_overall_fn_summary (node
);
2512 /* In LTO mode we may have speculative edges set. */
2513 gcc_assert (in_lto_p
|| info
->size
== info
->self_size
);
2517 /* Compute parameters of functions used by inliner using
2518 current_function_decl. */
2521 compute_fn_summary_for_current (void)
2523 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2527 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2528 KNOWN_CONTEXTS and KNOWN_AGGS. */
2531 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2532 int *size
, int *time
,
2533 vec
<tree
> known_vals
,
2534 vec
<ipa_polymorphic_call_context
> known_contexts
,
2535 vec
<ipa_agg_jump_function_p
> known_aggs
)
2538 struct cgraph_node
*callee
;
2539 struct ipa_fn_summary
*isummary
;
2540 enum availability avail
;
2543 if (!known_vals
.exists () && !known_contexts
.exists ())
2545 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2548 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2549 known_aggs
, &speculative
);
2550 if (!target
|| speculative
)
2553 /* Account for difference in cost between indirect and direct calls. */
2554 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2555 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2556 gcc_checking_assert (*time
>= 0);
2557 gcc_checking_assert (*size
>= 0);
2559 callee
= cgraph_node::get (target
);
2560 if (!callee
|| !callee
->definition
)
2562 callee
= callee
->function_symbol (&avail
);
2563 if (avail
< AVAIL_AVAILABLE
)
2565 isummary
= ipa_fn_summaries
->get (callee
);
2566 return isummary
->inlinable
;
2569 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2570 handle edge E with probability PROB.
2571 Set HINTS if edge may be devirtualized.
2572 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2576 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2579 vec
<tree
> known_vals
,
2580 vec
<ipa_polymorphic_call_context
> known_contexts
,
2581 vec
<ipa_agg_jump_function_p
> known_aggs
,
2584 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2585 int call_size
= es
->call_stmt_size
;
2586 int call_time
= es
->call_stmt_time
;
2589 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2590 known_vals
, known_contexts
, known_aggs
)
2591 && hints
&& e
->maybe_hot_p ())
2592 *hints
|= INLINE_HINT_indirect_call
;
2593 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2596 *min_size
+= cur_size
;
2597 if (prob
== REG_BR_PROB_BASE
)
2598 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2600 *time
+= ((sreal
)call_time
* prob
) * e
->sreal_frequency ();
2605 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2606 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2607 describe context of the call site. */
2610 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2611 int *min_size
, sreal
*time
,
2613 clause_t possible_truths
,
2614 vec
<tree
> known_vals
,
2615 vec
<ipa_polymorphic_call_context
> known_contexts
,
2616 vec
<ipa_agg_jump_function_p
> known_aggs
)
2618 struct cgraph_edge
*e
;
2619 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2621 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2623 /* Do not care about zero sized builtins. */
2624 if (e
->inline_failed
&& !es
->call_stmt_size
)
2626 gcc_checking_assert (!es
->call_stmt_time
);
2630 || es
->predicate
->evaluate (possible_truths
))
2632 if (e
->inline_failed
)
2634 /* Predicates of calls shall not use NOT_CHANGED codes,
2635 sowe do not need to compute probabilities. */
2636 estimate_edge_size_and_time (e
, size
,
2637 es
->predicate
? NULL
: min_size
,
2638 time
, REG_BR_PROB_BASE
,
2639 known_vals
, known_contexts
,
2643 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2646 known_vals
, known_contexts
,
2650 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2652 struct ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2654 || es
->predicate
->evaluate (possible_truths
))
2655 estimate_edge_size_and_time (e
, size
,
2656 es
->predicate
? NULL
: min_size
,
2657 time
, REG_BR_PROB_BASE
,
2658 known_vals
, known_contexts
, known_aggs
,
2664 /* Estimate size and time needed to execute NODE assuming
2665 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2666 information about NODE's arguments. If non-NULL use also probability
2667 information present in INLINE_PARAM_SUMMARY vector.
2668 Additionally detemine hints determined by the context. Finally compute
2669 minimal size needed for the call that is independent on the call context and
2670 can be used for fast estimates. Return the values in RET_SIZE,
2671 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2674 estimate_node_size_and_time (struct cgraph_node
*node
,
2675 clause_t possible_truths
,
2676 clause_t nonspec_possible_truths
,
2677 vec
<tree
> known_vals
,
2678 vec
<ipa_polymorphic_call_context
> known_contexts
,
2679 vec
<ipa_agg_jump_function_p
> known_aggs
,
2680 int *ret_size
, int *ret_min_size
,
2682 sreal
*ret_nonspecialized_time
,
2683 ipa_hints
*ret_hints
,
2684 vec
<inline_param_summary
>
2685 inline_param_summary
)
2687 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2692 ipa_hints hints
= 0;
2695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2698 fprintf (dump_file
, " Estimating body: %s/%i\n"
2699 " Known to be false: ", node
->name (),
2702 for (i
= predicate::not_inlined_condition
;
2703 i
< (predicate::first_dynamic_condition
2704 + (int) vec_safe_length (info
->conds
)); i
++)
2705 if (!(possible_truths
& (1 << i
)))
2708 fprintf (dump_file
, ", ");
2710 dump_condition (dump_file
, info
->conds
, i
);
2714 estimate_calls_size_and_time (node
, &size
, &min_size
, &time
, &hints
, possible_truths
,
2715 known_vals
, known_contexts
, known_aggs
);
2716 sreal nonspecialized_time
= time
;
2718 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
2720 bool exec
= e
->exec_predicate
.evaluate (nonspec_possible_truths
);
2722 /* Because predicates are conservative, it can happen that nonconst is 1
2726 bool nonconst
= e
->nonconst_predicate
.evaluate (possible_truths
);
2728 gcc_checking_assert (e
->time
>= 0);
2729 gcc_checking_assert (time
>= 0);
2731 /* We compute specialized size only because size of nonspecialized
2732 copy is context independent.
2734 The difference between nonspecialized execution and specialized is
2735 that nonspecialized is not going to have optimized out computations
2736 known to be constant in a specialized setting. */
2739 nonspecialized_time
+= e
->time
;
2742 else if (!inline_param_summary
.exists ())
2749 int prob
= e
->nonconst_predicate
.probability
2750 (info
->conds
, possible_truths
,
2751 inline_param_summary
);
2752 gcc_checking_assert (prob
>= 0);
2753 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
2754 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
2756 gcc_checking_assert (time
>= 0);
2759 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
2760 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
2761 min_size
= (*info
->size_time_table
)[0].size
;
2762 gcc_checking_assert (size
>= 0);
2763 gcc_checking_assert (time
>= 0);
2764 /* nonspecialized_time should be always bigger than specialized time.
2765 Roundoff issues however may get into the way. */
2766 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
2768 /* Roundoff issues may make specialized time bigger than nonspecialized
2769 time. We do not really want that to happen because some heurstics
2770 may get confused by seeing negative speedups. */
2771 if (time
> nonspecialized_time
)
2772 time
= nonspecialized_time
;
2774 if (info
->loop_iterations
2775 && !info
->loop_iterations
->evaluate (possible_truths
))
2776 hints
|= INLINE_HINT_loop_iterations
;
2777 if (info
->loop_stride
2778 && !info
->loop_stride
->evaluate (possible_truths
))
2779 hints
|= INLINE_HINT_loop_stride
;
2780 if (info
->array_index
2781 && !info
->array_index
->evaluate (possible_truths
))
2782 hints
|= INLINE_HINT_array_index
;
2784 hints
|= INLINE_HINT_in_scc
;
2785 if (DECL_DECLARED_INLINE_P (node
->decl
))
2786 hints
|= INLINE_HINT_declared_inline
;
2788 size
= RDIV (size
, ipa_fn_summary::size_scale
);
2789 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
2791 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2792 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
2793 time
.to_double (), nonspecialized_time
.to_double ());
2796 if (ret_nonspecialized_time
)
2797 *ret_nonspecialized_time
= nonspecialized_time
;
2801 *ret_min_size
= min_size
;
2808 /* Estimate size and time needed to execute callee of EDGE assuming that
2809 parameters known to be constant at caller of EDGE are propagated.
2810 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2811 and types for parameters. */
2814 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
2815 vec
<tree
> known_vals
,
2816 vec
<ipa_polymorphic_call_context
>
2818 vec
<ipa_agg_jump_function_p
> known_aggs
,
2819 int *ret_size
, sreal
*ret_time
,
2820 sreal
*ret_nonspec_time
,
2823 clause_t clause
, nonspec_clause
;
2825 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
2826 &clause
, &nonspec_clause
);
2827 estimate_node_size_and_time (node
, clause
, nonspec_clause
,
2828 known_vals
, known_contexts
,
2829 known_aggs
, ret_size
, NULL
, ret_time
,
2830 ret_nonspec_time
, hints
, vNULL
);
2834 /* Update summary information of inline clones after inlining.
2835 Compute peak stack usage. */
2838 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
2840 struct cgraph_edge
*e
;
2841 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (node
);
2842 ipa_fn_summary
*caller_info
= ipa_fn_summaries
->get (node
->callers
->caller
);
2845 callee_info
->stack_frame_offset
2846 = caller_info
->stack_frame_offset
2847 + caller_info
->estimated_self_stack_size
;
2848 peak
= callee_info
->stack_frame_offset
2849 + callee_info
->estimated_self_stack_size
;
2851 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
->global
.inlined_to
);
2852 if (s
->estimated_stack_size
< peak
)
2853 s
->estimated_stack_size
= peak
;
2854 ipa_propagate_frequency (node
);
2855 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2857 if (!e
->inline_failed
)
2858 inline_update_callee_summaries (e
->callee
, depth
);
2859 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2861 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2862 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
2865 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2866 When functoin A is inlined in B and A calls C with parameter that
2867 changes with probability PROB1 and C is known to be passthroug
2868 of argument if B that change with probability PROB2, the probability
2869 of change is now PROB1*PROB2. */
2872 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
2873 struct cgraph_edge
*edge
)
2875 if (ipa_node_params_sum
)
2878 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
2879 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
2880 struct ipa_call_summary
*inlined_es
2881 = ipa_call_summaries
->get (inlined_edge
);
2883 if (es
->param
.length () == 0)
2886 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
2888 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
2889 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2890 || jfunc
->type
== IPA_JF_ANCESTOR
)
2892 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
2893 ? ipa_get_jf_pass_through_formal_id (jfunc
)
2894 : ipa_get_jf_ancestor_formal_id (jfunc
);
2895 if (id
< (int) inlined_es
->param
.length ())
2897 int prob1
= es
->param
[i
].change_prob
;
2898 int prob2
= inlined_es
->param
[id
].change_prob
;
2899 int prob
= combine_probabilities (prob1
, prob2
);
2901 if (prob1
&& prob2
&& !prob
)
2904 es
->param
[i
].change_prob
= prob
;
2911 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2913 Remap predicates of callees of NODE. Rest of arguments match
2916 Also update change probabilities. */
2919 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
2920 struct cgraph_node
*node
,
2921 struct ipa_fn_summary
*info
,
2922 struct ipa_fn_summary
*callee_info
,
2923 vec
<int> operand_map
,
2924 vec
<int> offset_map
,
2925 clause_t possible_truths
,
2926 predicate
*toplev_predicate
)
2928 struct cgraph_edge
*e
, *next
;
2929 for (e
= node
->callees
; e
; e
= next
)
2931 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2933 next
= e
->next_callee
;
2935 if (e
->inline_failed
)
2937 remap_edge_change_prob (inlined_edge
, e
);
2941 p
= es
->predicate
->remap_after_inlining
2942 (info
, callee_info
, operand_map
,
2943 offset_map
, possible_truths
,
2945 edge_set_predicate (e
, &p
);
2948 edge_set_predicate (e
, toplev_predicate
);
2951 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
2952 operand_map
, offset_map
, possible_truths
,
2955 for (e
= node
->indirect_calls
; e
; e
= next
)
2957 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2959 next
= e
->next_callee
;
2961 remap_edge_change_prob (inlined_edge
, e
);
2964 p
= es
->predicate
->remap_after_inlining
2965 (info
, callee_info
, operand_map
, offset_map
,
2966 possible_truths
, *toplev_predicate
);
2967 edge_set_predicate (e
, &p
);
2970 edge_set_predicate (e
, toplev_predicate
);
2974 /* Same as remap_predicate, but set result into hint *HINT. */
2977 remap_hint_predicate (struct ipa_fn_summary
*info
,
2978 struct ipa_fn_summary
*callee_info
,
2980 vec
<int> operand_map
,
2981 vec
<int> offset_map
,
2982 clause_t possible_truths
,
2983 predicate
*toplev_predicate
)
2989 p
= (*hint
)->remap_after_inlining
2991 operand_map
, offset_map
,
2992 possible_truths
, *toplev_predicate
);
2993 if (p
!= false && p
!= true)
2996 set_hint_predicate (hint
, p
);
3002 /* We inlined EDGE. Update summary of the function we inlined into. */
3005 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
3007 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
3008 struct cgraph_node
*to
= (edge
->caller
->global
.inlined_to
3009 ? edge
->caller
->global
.inlined_to
: edge
->caller
);
3010 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
3011 clause_t clause
= 0; /* not_inline is known to be false. */
3013 vec
<int> operand_map
= vNULL
;
3014 vec
<int> offset_map
= vNULL
;
3016 predicate toplev_predicate
;
3017 predicate true_p
= true;
3018 struct ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3021 toplev_predicate
= *es
->predicate
;
3023 toplev_predicate
= true;
3025 info
->fp_expressions
|= callee_info
->fp_expressions
;
3027 if (callee_info
->conds
)
3028 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
3029 if (ipa_node_params_sum
&& callee_info
->conds
)
3031 struct ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3032 int count
= ipa_get_cs_argument_count (args
);
3037 operand_map
.safe_grow_cleared (count
);
3038 offset_map
.safe_grow_cleared (count
);
3040 for (i
= 0; i
< count
; i
++)
3042 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3045 /* TODO: handle non-NOPs when merging. */
3046 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3048 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3049 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3050 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3053 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3055 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3056 if (offset
>= 0 && offset
< INT_MAX
)
3058 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3059 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3061 offset_map
[i
] = offset
;
3064 operand_map
[i
] = map
;
3065 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3068 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3071 p
= e
->exec_predicate
.remap_after_inlining
3072 (info
, callee_info
, operand_map
,
3075 predicate nonconstp
;
3076 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3077 (info
, callee_info
, operand_map
,
3080 if (p
!= false && nonconstp
!= false)
3082 sreal add_time
= ((sreal
)e
->time
* edge
->sreal_frequency ());
3083 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3085 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3086 if (prob
!= REG_BR_PROB_BASE
3087 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3089 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3090 (double) prob
/ REG_BR_PROB_BASE
);
3092 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3095 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3096 offset_map
, clause
, &toplev_predicate
);
3097 remap_hint_predicate (info
, callee_info
,
3098 &callee_info
->loop_iterations
,
3099 operand_map
, offset_map
, clause
, &toplev_predicate
);
3100 remap_hint_predicate (info
, callee_info
,
3101 &callee_info
->loop_stride
,
3102 operand_map
, offset_map
, clause
, &toplev_predicate
);
3103 remap_hint_predicate (info
, callee_info
,
3104 &callee_info
->array_index
,
3105 operand_map
, offset_map
, clause
, &toplev_predicate
);
3107 ipa_call_summary
*s
= ipa_call_summaries
->get (edge
);
3108 inline_update_callee_summaries (edge
->callee
, s
->loop_depth
);
3110 /* We do not maintain predicates of inlined edges, free it. */
3111 edge_set_predicate (edge
, &true_p
);
3112 /* Similarly remove param summaries. */
3113 es
->param
.release ();
3114 operand_map
.release ();
3115 offset_map
.release ();
3118 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3119 and time. Recompute it. */
3122 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3124 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3130 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3132 info
->size
+= e
->size
;
3133 info
->time
+= e
->time
;
3135 estimate_calls_size_and_time (node
, &info
->size
, &info
->min_size
,
3137 ~(clause_t
) (1 << predicate::false_condition
),
3138 vNULL
, vNULL
, vNULL
);
3139 info
->size
= (info
->size
+ ipa_fn_summary::size_scale
/ 2) / ipa_fn_summary::size_scale
;
3143 /* This function performs intraprocedural analysis in NODE that is required to
3144 inline indirect calls. */
3147 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3149 ipa_analyze_node (node
);
3150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3152 ipa_print_node_params (dump_file
, node
);
3153 ipa_print_node_jump_functions (dump_file
, node
);
3158 /* Note function body size. */
3161 inline_analyze_function (struct cgraph_node
*node
)
3163 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3166 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3167 node
->name (), node
->order
);
3168 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3169 inline_indirect_intraprocedural_analysis (node
);
3170 compute_fn_summary (node
, false);
3173 struct cgraph_edge
*e
;
3174 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3175 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3176 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3177 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3184 /* Called when new function is inserted to callgraph late. */
3187 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3189 inline_analyze_function (node
);
3192 /* Note function body size. */
3195 ipa_fn_summary_generate (void)
3197 struct cgraph_node
*node
;
3199 FOR_EACH_DEFINED_FUNCTION (node
)
3200 if (DECL_STRUCT_FUNCTION (node
->decl
))
3201 node
->local
.versionable
= tree_versionable_function_p (node
->decl
);
3203 ipa_fn_summary_alloc ();
3205 ipa_fn_summaries
->enable_insertion_hook ();
3207 ipa_register_cgraph_hooks ();
3209 FOR_EACH_DEFINED_FUNCTION (node
)
3211 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3212 || opt_for_fn (node
->decl
, optimize
)))
3213 inline_analyze_function (node
);
3217 /* Write inline summary for edge E to OB. */
3220 read_ipa_call_summary (struct lto_input_block
*ib
, struct cgraph_edge
*e
,
3223 struct ipa_call_summary
*es
= prevails
3224 ? ipa_call_summaries
->get_create (e
) : NULL
;
3228 int size
= streamer_read_uhwi (ib
);
3229 int time
= streamer_read_uhwi (ib
);
3230 int depth
= streamer_read_uhwi (ib
);
3234 es
->call_stmt_size
= size
;
3235 es
->call_stmt_time
= time
;
3236 es
->loop_depth
= depth
;
3239 bitpack_d bp
= streamer_read_bitpack (ib
);
3241 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3243 bp_unpack_value (&bp
, 1);
3247 edge_set_predicate (e
, &p
);
3248 length
= streamer_read_uhwi (ib
);
3249 if (length
&& es
&& e
->possibly_call_in_translation_unit_p ())
3251 es
->param
.safe_grow_cleared (length
);
3252 for (i
= 0; i
< length
; i
++)
3253 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3257 for (i
= 0; i
< length
; i
++)
3258 streamer_read_uhwi (ib
);
3263 /* Stream in inline summaries from the section. */
3266 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3269 const struct lto_function_header
*header
=
3270 (const struct lto_function_header
*) data
;
3271 const int cfg_offset
= sizeof (struct lto_function_header
);
3272 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3273 const int string_offset
= main_offset
+ header
->main_size
;
3274 struct data_in
*data_in
;
3275 unsigned int i
, count2
, j
;
3276 unsigned int f_count
;
3278 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3279 file_data
->mode_table
);
3282 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3283 header
->string_size
, vNULL
);
3284 f_count
= streamer_read_uhwi (&ib
);
3285 for (i
= 0; i
< f_count
; i
++)
3288 struct cgraph_node
*node
;
3289 struct ipa_fn_summary
*info
;
3290 lto_symtab_encoder_t encoder
;
3291 struct bitpack_d bp
;
3292 struct cgraph_edge
*e
;
3295 index
= streamer_read_uhwi (&ib
);
3296 encoder
= file_data
->symtab_node_encoder
;
3297 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3299 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
3301 int stack_size
= streamer_read_uhwi (&ib
);
3302 int size
= streamer_read_uhwi (&ib
);
3303 sreal time
= sreal::stream_in (&ib
);
3307 info
->estimated_stack_size
3308 = info
->estimated_self_stack_size
= stack_size
;
3309 info
->size
= info
->self_size
= size
;
3313 bp
= streamer_read_bitpack (&ib
);
3316 info
->inlinable
= bp_unpack_value (&bp
, 1);
3317 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3321 bp_unpack_value (&bp
, 1);
3322 bp_unpack_value (&bp
, 1);
3325 count2
= streamer_read_uhwi (&ib
);
3326 gcc_assert (!info
|| !info
->conds
);
3327 for (j
= 0; j
< count2
; j
++)
3330 c
.operand_num
= streamer_read_uhwi (&ib
);
3331 c
.size
= streamer_read_uhwi (&ib
);
3332 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3333 c
.val
= stream_read_tree (&ib
, data_in
);
3334 bp
= streamer_read_bitpack (&ib
);
3335 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3336 c
.by_ref
= bp_unpack_value (&bp
, 1);
3338 c
.offset
= streamer_read_uhwi (&ib
);
3340 vec_safe_push (info
->conds
, c
);
3342 count2
= streamer_read_uhwi (&ib
);
3343 gcc_assert (!info
|| !info
->size_time_table
);
3344 for (j
= 0; j
< count2
; j
++)
3346 struct size_time_entry e
;
3348 e
.size
= streamer_read_uhwi (&ib
);
3349 e
.time
= sreal::stream_in (&ib
);
3350 e
.exec_predicate
.stream_in (&ib
);
3351 e
.nonconst_predicate
.stream_in (&ib
);
3354 vec_safe_push (info
->size_time_table
, e
);
3359 set_hint_predicate (&info
->loop_iterations
, p
);
3362 set_hint_predicate (&info
->loop_stride
, p
);
3365 set_hint_predicate (&info
->array_index
, p
);
3366 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3367 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3368 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3369 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3372 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3374 lto_data_in_delete (data_in
);
3378 /* Read inline summary. Jump functions are shared among ipa-cp
3379 and inliner, so when ipa-cp is active, we don't need to write them
3383 ipa_fn_summary_read (void)
3385 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3386 struct lto_file_decl_data
*file_data
;
3389 ipa_fn_summary_alloc ();
3391 while ((file_data
= file_data_vec
[j
++]))
3394 const char *data
= lto_get_section_data (file_data
,
3395 LTO_section_ipa_fn_summary
,
3398 inline_read_section (file_data
, data
, len
);
3400 /* Fatal error here. We do not want to support compiling ltrans units
3401 with different version of compiler or different flags than the WPA
3402 unit, so this should never happen. */
3403 fatal_error (input_location
,
3404 "ipa inline summary is missing in input file");
3406 ipa_register_cgraph_hooks ();
3408 ipa_prop_read_jump_functions ();
3410 gcc_assert (ipa_fn_summaries
);
3411 ipa_fn_summaries
->enable_insertion_hook ();
3415 /* Write inline summary for edge E to OB. */
3418 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3420 struct ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3423 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3424 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3425 streamer_write_uhwi (ob
, es
->loop_depth
);
3427 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3428 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3429 streamer_write_bitpack (&bp
);
3432 es
->predicate
->stream_out (ob
);
3434 streamer_write_uhwi (ob
, 0);
3435 streamer_write_uhwi (ob
, es
->param
.length ());
3436 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3437 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3441 /* Write inline summary for node in SET.
3442 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3443 active, we don't need to write them twice. */
3446 ipa_fn_summary_write (void)
3448 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3449 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3450 unsigned int count
= 0;
3453 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3455 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3456 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3457 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3460 streamer_write_uhwi (ob
, count
);
3462 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3464 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3465 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3466 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3468 struct ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3469 struct bitpack_d bp
;
3470 struct cgraph_edge
*edge
;
3473 struct condition
*c
;
3475 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3476 streamer_write_hwi (ob
, info
->estimated_self_stack_size
);
3477 streamer_write_hwi (ob
, info
->self_size
);
3478 info
->time
.stream_out (ob
);
3479 bp
= bitpack_create (ob
->main_stream
);
3480 bp_pack_value (&bp
, info
->inlinable
, 1);
3481 bp_pack_value (&bp
, false, 1);
3482 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3483 streamer_write_bitpack (&bp
);
3484 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3485 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3487 streamer_write_uhwi (ob
, c
->operand_num
);
3488 streamer_write_uhwi (ob
, c
->size
);
3489 streamer_write_uhwi (ob
, c
->code
);
3490 stream_write_tree (ob
, c
->val
, true);
3491 bp
= bitpack_create (ob
->main_stream
);
3492 bp_pack_value (&bp
, c
->agg_contents
, 1);
3493 bp_pack_value (&bp
, c
->by_ref
, 1);
3494 streamer_write_bitpack (&bp
);
3495 if (c
->agg_contents
)
3496 streamer_write_uhwi (ob
, c
->offset
);
3498 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3499 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3501 streamer_write_uhwi (ob
, e
->size
);
3502 e
->time
.stream_out (ob
);
3503 e
->exec_predicate
.stream_out (ob
);
3504 e
->nonconst_predicate
.stream_out (ob
);
3506 if (info
->loop_iterations
)
3507 info
->loop_iterations
->stream_out (ob
);
3509 streamer_write_uhwi (ob
, 0);
3510 if (info
->loop_stride
)
3511 info
->loop_stride
->stream_out (ob
);
3513 streamer_write_uhwi (ob
, 0);
3514 if (info
->array_index
)
3515 info
->array_index
->stream_out (ob
);
3517 streamer_write_uhwi (ob
, 0);
3518 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3519 write_ipa_call_summary (ob
, edge
);
3520 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3521 write_ipa_call_summary (ob
, edge
);
3524 streamer_write_char_stream (ob
->main_stream
, 0);
3525 produce_asm (ob
, NULL
);
3526 destroy_output_block (ob
);
3529 ipa_prop_write_jump_functions ();
3533 /* Release inline summary. */
3536 ipa_free_fn_summary (void)
3538 struct cgraph_node
*node
;
3539 if (!ipa_call_summaries
)
3541 FOR_EACH_DEFINED_FUNCTION (node
)
3543 ipa_fn_summaries
->remove (node
);
3544 ipa_fn_summaries
->release ();
3545 ipa_fn_summaries
= NULL
;
3546 ipa_call_summaries
->release ();
3547 delete ipa_call_summaries
;
3548 ipa_call_summaries
= NULL
;
3549 edge_predicate_pool
.release ();
3554 const pass_data pass_data_local_fn_summary
=
3556 GIMPLE_PASS
, /* type */
3557 "local-fnsummary", /* name */
3558 OPTGROUP_INLINE
, /* optinfo_flags */
3559 TV_INLINE_PARAMETERS
, /* tv_id */
3560 0, /* properties_required */
3561 0, /* properties_provided */
3562 0, /* properties_destroyed */
3563 0, /* todo_flags_start */
3564 0, /* todo_flags_finish */
3567 class pass_local_fn_summary
: public gimple_opt_pass
3570 pass_local_fn_summary (gcc::context
*ctxt
)
3571 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3574 /* opt_pass methods: */
3575 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3576 virtual unsigned int execute (function
*)
3578 return compute_fn_summary_for_current ();
3581 }; // class pass_local_fn_summary
3586 make_pass_local_fn_summary (gcc::context
*ctxt
)
3588 return new pass_local_fn_summary (ctxt
);
3592 /* Free inline summary. */
3596 const pass_data pass_data_ipa_free_fn_summary
=
3598 SIMPLE_IPA_PASS
, /* type */
3599 "free-fnsummary", /* name */
3600 OPTGROUP_NONE
, /* optinfo_flags */
3601 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3602 0, /* properties_required */
3603 0, /* properties_provided */
3604 0, /* properties_destroyed */
3605 0, /* todo_flags_start */
3606 0, /* todo_flags_finish */
3609 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3612 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3613 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
3617 /* opt_pass methods: */
3618 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
3619 void set_pass_param (unsigned int n
, bool param
)
3621 gcc_assert (n
== 0);
3624 virtual bool gate (function
*) { return small_p
|| !flag_wpa
; }
3625 virtual unsigned int execute (function
*)
3627 ipa_free_fn_summary ();
3633 }; // class pass_ipa_free_fn_summary
3637 simple_ipa_opt_pass
*
3638 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3640 return new pass_ipa_free_fn_summary (ctxt
);
3645 const pass_data pass_data_ipa_fn_summary
=
3647 IPA_PASS
, /* type */
3648 "fnsummary", /* name */
3649 OPTGROUP_INLINE
, /* optinfo_flags */
3650 TV_IPA_FNSUMMARY
, /* tv_id */
3651 0, /* properties_required */
3652 0, /* properties_provided */
3653 0, /* properties_destroyed */
3654 0, /* todo_flags_start */
3655 ( TODO_dump_symtab
), /* todo_flags_finish */
3658 class pass_ipa_fn_summary
: public ipa_opt_pass_d
3661 pass_ipa_fn_summary (gcc::context
*ctxt
)
3662 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
3663 ipa_fn_summary_generate
, /* generate_summary */
3664 ipa_fn_summary_write
, /* write_summary */
3665 ipa_fn_summary_read
, /* read_summary */
3666 NULL
, /* write_optimization_summary */
3667 NULL
, /* read_optimization_summary */
3668 NULL
, /* stmt_fixup */
3669 0, /* function_transform_todo_flags_start */
3670 NULL
, /* function_transform */
3671 NULL
) /* variable_transform */
3674 /* opt_pass methods: */
3675 virtual unsigned int execute (function
*) { return 0; }
3677 }; // class pass_ipa_fn_summary
3682 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
3684 return new pass_ipa_fn_summary (ctxt
);
3687 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3688 within the same process. For use by toplev::finalize. */
3691 ipa_fnsummary_c_finalize (void)
3693 ipa_free_fn_summary ();