1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
72 #include "gimple-iterator.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
78 #include "ipa-fnsummary.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
84 #include "stringpool.h"
88 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
89 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
90 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
92 /* Edge predicates goes here. */
93 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
98 ipa_dump_hints (FILE *f
, ipa_hints hints
)
102 fprintf (f
, "IPA hints:");
103 if (hints
& INLINE_HINT_indirect_call
)
105 hints
&= ~INLINE_HINT_indirect_call
;
106 fprintf (f
, " indirect_call");
108 if (hints
& INLINE_HINT_loop_iterations
)
110 hints
&= ~INLINE_HINT_loop_iterations
;
111 fprintf (f
, " loop_iterations");
113 if (hints
& INLINE_HINT_loop_stride
)
115 hints
&= ~INLINE_HINT_loop_stride
;
116 fprintf (f
, " loop_stride");
118 if (hints
& INLINE_HINT_same_scc
)
120 hints
&= ~INLINE_HINT_same_scc
;
121 fprintf (f
, " same_scc");
123 if (hints
& INLINE_HINT_in_scc
)
125 hints
&= ~INLINE_HINT_in_scc
;
126 fprintf (f
, " in_scc");
128 if (hints
& INLINE_HINT_cross_module
)
130 hints
&= ~INLINE_HINT_cross_module
;
131 fprintf (f
, " cross_module");
133 if (hints
& INLINE_HINT_declared_inline
)
135 hints
&= ~INLINE_HINT_declared_inline
;
136 fprintf (f
, " declared_inline");
138 if (hints
& INLINE_HINT_known_hot
)
140 hints
&= ~INLINE_HINT_known_hot
;
141 fprintf (f
, " known_hot");
147 /* Record SIZE and TIME to SUMMARY.
148 The accounted code will be executed when EXEC_PRED is true.
149 When NONCONST_PRED is false the code will evaulate to constant and
150 will get optimized out in specialized clones of the function. */
153 ipa_fn_summary::account_size_time (int size
, sreal time
,
154 const predicate
&exec_pred
,
155 const predicate
&nonconst_pred_in
)
160 predicate nonconst_pred
;
162 if (exec_pred
== false)
165 nonconst_pred
= nonconst_pred_in
& exec_pred
;
167 if (nonconst_pred
== false)
170 /* We need to create initial empty unconitional clause, but otherwie
171 we don't need to account empty times and sizes. */
172 if (!size
&& time
== 0 && size_time_table
)
175 gcc_assert (time
>= 0);
177 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
178 if (e
->exec_predicate
== exec_pred
179 && e
->nonconst_predicate
== nonconst_pred
)
188 e
= &(*size_time_table
)[0];
189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
191 "\t\tReached limit on number of entries, "
192 "ignoring the predicate.");
194 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
197 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
198 ((double) size
) / ipa_fn_summary::size_scale
,
199 (time
.to_double ()), found
? "" : "new ");
200 exec_pred
.dump (dump_file
, conds
, 0);
201 if (exec_pred
!= nonconst_pred
)
203 fprintf (dump_file
, " nonconst:");
204 nonconst_pred
.dump (dump_file
, conds
);
207 fprintf (dump_file
, "\n");
211 class size_time_entry new_entry
;
212 new_entry
.size
= size
;
213 new_entry
.time
= time
;
214 new_entry
.exec_predicate
= exec_pred
;
215 new_entry
.nonconst_predicate
= nonconst_pred
;
216 vec_safe_push (size_time_table
, new_entry
);
225 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
227 static struct cgraph_edge
*
228 redirect_to_unreachable (struct cgraph_edge
*e
)
230 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
231 struct cgraph_node
*target
= cgraph_node::get_create
232 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
235 e
= e
->resolve_speculation (target
->decl
);
237 e
->make_direct (target
);
239 e
->redirect_callee (target
);
240 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
241 e
->inline_failed
= CIF_UNREACHABLE
;
242 e
->count
= profile_count::zero ();
243 es
->call_stmt_size
= 0;
244 es
->call_stmt_time
= 0;
246 callee
->remove_symbol_and_inline_clones ();
250 /* Set predicate for edge E. */
253 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
255 /* If the edge is determined to be never executed, redirect it
256 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
258 if (predicate
&& *predicate
== false
259 /* When handling speculative edges, we need to do the redirection
260 just once. Do it always on the direct edge, so we do not
261 attempt to resolve speculation while duplicating the edge. */
262 && (!e
->speculative
|| e
->callee
))
263 e
= redirect_to_unreachable (e
);
265 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
266 if (predicate
&& *predicate
!= true)
269 es
->predicate
= edge_predicate_pool
.allocate ();
270 *es
->predicate
= *predicate
;
275 edge_predicate_pool
.remove (es
->predicate
);
276 es
->predicate
= NULL
;
280 /* Set predicate for hint *P. */
283 set_hint_predicate (predicate
**p
, predicate new_predicate
)
285 if (new_predicate
== false || new_predicate
== true)
288 edge_predicate_pool
.remove (*p
);
294 *p
= edge_predicate_pool
.allocate ();
300 /* Compute what conditions may or may not hold given invormation about
301 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
302 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
303 copy when called in a given context. It is a bitmask of conditions. Bit
304 0 means that condition is known to be false, while bit 1 means that condition
305 may or may not be true. These differs - for example NOT_INLINED condition
306 is always false in the second and also builtin_constant_p tests cannot use
307 the fact that parameter is indeed a constant.
309 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
310 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
311 Return clause of possible truths. When INLINE_P is true, assume that we are
314 ERROR_MARK means compile time invariant. */
317 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
319 vec
<tree
> known_vals
,
320 vec
<ipa_agg_jump_function_p
>
322 clause_t
*ret_clause
,
323 clause_t
*ret_nonspec_clause
)
325 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
326 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
327 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
331 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
336 struct expr_eval_op
*op
;
338 /* We allow call stmt to have fewer arguments than the callee function
339 (especially for K&R style programs). So bound check here (we assume
340 known_aggs vector, if non-NULL, has the same length as
342 gcc_checking_assert (!known_aggs
.exists ()
343 || (known_vals
.length () == known_aggs
.length ()));
344 if (c
->operand_num
>= (int) known_vals
.length ())
346 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
347 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
353 struct ipa_agg_jump_function
*agg
;
355 if (c
->code
== predicate::changed
357 && (known_vals
[c
->operand_num
] == error_mark_node
))
360 if (known_aggs
.exists ())
362 agg
= known_aggs
[c
->operand_num
];
363 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
364 c
->offset
, c
->by_ref
);
371 val
= known_vals
[c
->operand_num
];
372 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
378 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
379 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
382 if (c
->code
== predicate::changed
)
384 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
388 if (TYPE_SIZE (c
->type
) != TYPE_SIZE (TREE_TYPE (val
)))
390 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
391 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
394 if (c
->code
== predicate::is_not_constant
)
396 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
400 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
401 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
406 val
= fold_unary (op
->code
, op
->type
, val
);
407 else if (!op
->val
[1])
408 val
= fold_binary (op
->code
, op
->type
,
409 op
->index
? op
->val
[0] : val
,
410 op
->index
? val
: op
->val
[0]);
411 else if (op
->index
== 0)
412 val
= fold_ternary (op
->code
, op
->type
,
413 val
, op
->val
[0], op
->val
[1]);
414 else if (op
->index
== 1)
415 val
= fold_ternary (op
->code
, op
->type
,
416 op
->val
[0], val
, op
->val
[1]);
417 else if (op
->index
== 2)
418 val
= fold_ternary (op
->code
, op
->type
,
419 op
->val
[0], op
->val
[1], val
);
425 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
428 if (res
&& integer_zerop (res
))
431 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
432 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
434 *ret_clause
= clause
;
435 if (ret_nonspec_clause
)
436 *ret_nonspec_clause
= nonspec_clause
;
440 /* Work out what conditions might be true at invocation of E. */
443 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
444 clause_t
*clause_ptr
,
445 clause_t
*nonspec_clause_ptr
,
446 vec
<tree
> *known_vals_ptr
,
447 vec
<ipa_polymorphic_call_context
>
449 vec
<ipa_agg_jump_function_p
> *known_aggs_ptr
)
451 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
452 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
453 vec
<tree
> known_vals
= vNULL
;
454 vec
<ipa_agg_jump_function_p
> known_aggs
= vNULL
;
455 class ipa_edge_args
*args
;
458 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
460 known_vals_ptr
->create (0);
461 if (known_contexts_ptr
)
462 known_contexts_ptr
->create (0);
464 if (ipa_node_params_sum
465 && !e
->call_stmt_cannot_inline_p
466 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
)
467 && (args
= IPA_EDGE_REF (e
)) != NULL
)
469 class ipa_node_params
*caller_parms_info
, *callee_pi
;
470 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
471 int i
, count
= ipa_get_cs_argument_count (args
);
473 if (e
->caller
->inlined_to
)
474 caller_parms_info
= IPA_NODE_REF (e
->caller
->inlined_to
);
476 caller_parms_info
= IPA_NODE_REF (e
->caller
);
477 callee_pi
= IPA_NODE_REF (e
->callee
);
479 if (count
&& (info
->conds
|| known_vals_ptr
))
480 known_vals
.safe_grow_cleared (count
);
481 if (count
&& (info
->conds
|| known_aggs_ptr
))
482 known_aggs
.safe_grow_cleared (count
);
483 if (count
&& known_contexts_ptr
)
484 known_contexts_ptr
->safe_grow_cleared (count
);
486 for (i
= 0; i
< count
; i
++)
488 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
489 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
490 ipa_get_type (callee_pi
, i
));
492 if (!cst
&& e
->call_stmt
493 && i
< (int)gimple_call_num_args (e
->call_stmt
))
495 cst
= gimple_call_arg (e
->call_stmt
, i
);
496 if (!is_gimple_min_invariant (cst
))
501 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
502 if (known_vals
.exists ())
505 else if (inline_p
&& !es
->param
[i
].change_prob
)
506 known_vals
[i
] = error_mark_node
;
508 if (known_contexts_ptr
)
509 (*known_contexts_ptr
)[i
]
510 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
511 /* TODO: When IPA-CP starts propagating and merging aggregate jump
512 functions, use its knowledge of the caller too, just like the
513 scalar case above. */
514 known_aggs
[i
] = &jf
->agg
;
517 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
518 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
520 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
522 if (count
&& (info
->conds
|| known_vals_ptr
))
523 known_vals
.safe_grow_cleared (count
);
524 for (i
= 0; i
< count
; i
++)
526 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
527 if (!is_gimple_min_invariant (cst
))
534 evaluate_conditions_for_known_args (callee
, inline_p
,
535 known_vals
, known_aggs
, clause_ptr
,
539 *known_vals_ptr
= known_vals
;
541 known_vals
.release ();
544 *known_aggs_ptr
= known_aggs
;
546 known_aggs
.release ();
550 /* Allocate the function summary. */
553 ipa_fn_summary_alloc (void)
555 gcc_checking_assert (!ipa_fn_summaries
);
556 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
557 ipa_size_summaries
= new fast_function_summary
<ipa_size_summary
*, va_heap
>
559 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
562 ipa_call_summary::~ipa_call_summary ()
565 edge_predicate_pool
.remove (predicate
);
570 ipa_fn_summary::~ipa_fn_summary ()
573 edge_predicate_pool
.remove (loop_iterations
);
575 edge_predicate_pool
.remove (loop_stride
);
577 vec_free (size_time_table
);
581 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
584 for (e
= node
->callees
; e
; e
= e
->next_callee
)
585 ipa_call_summaries
->remove (e
);
586 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
587 ipa_call_summaries
->remove (e
);
590 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
591 Additionally care about allocating new memory slot for updated predicate
592 and set it to NULL when it becomes true or false (and thus uninteresting).
596 remap_hint_predicate_after_duplication (predicate
**p
,
597 clause_t possible_truths
)
599 predicate new_predicate
;
604 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
605 /* We do not want to free previous predicate; it is used by node origin. */
607 set_hint_predicate (p
, new_predicate
);
611 /* Hook that is called by cgraph.c when a node is duplicated. */
613 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
616 ipa_fn_summary
*info
)
618 new (info
) ipa_fn_summary (*ipa_fn_summaries
->get (src
));
619 /* TODO: as an optimization, we may avoid copying conditions
620 that are known to be false or true. */
621 info
->conds
= vec_safe_copy (info
->conds
);
623 /* When there are any replacements in the function body, see if we can figure
624 out that something was optimized out. */
625 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
627 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
628 /* Use SRC parm info since it may not be copied yet. */
629 class ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
630 vec
<tree
> known_vals
= vNULL
;
631 int count
= ipa_get_param_count (parms_info
);
633 clause_t possible_truths
;
634 predicate true_pred
= true;
636 int optimized_out_size
= 0;
637 bool inlined_to_p
= false;
638 struct cgraph_edge
*edge
, *next
;
640 info
->size_time_table
= 0;
641 known_vals
.safe_grow_cleared (count
);
642 for (i
= 0; i
< count
; i
++)
644 struct ipa_replace_map
*r
;
646 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
648 if (r
->parm_num
== i
)
650 known_vals
[i
] = r
->new_tree
;
655 evaluate_conditions_for_known_args (dst
, false,
659 /* We are going to specialize,
660 so ignore nonspec truths. */
662 known_vals
.release ();
664 info
->account_size_time (0, 0, true_pred
, true_pred
);
666 /* Remap size_time vectors.
667 Simplify the predicate by prunning out alternatives that are known
669 TODO: as on optimization, we can also eliminate conditions known
671 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
673 predicate new_exec_pred
;
674 predicate new_nonconst_pred
;
675 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
677 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
679 if (new_exec_pred
== false || new_nonconst_pred
== false)
680 optimized_out_size
+= e
->size
;
682 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
686 /* Remap edge predicates with the same simplification as above.
687 Also copy constantness arrays. */
688 for (edge
= dst
->callees
; edge
; edge
= next
)
690 predicate new_predicate
;
691 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
692 next
= edge
->next_callee
;
694 if (!edge
->inline_failed
)
698 new_predicate
= es
->predicate
->remap_after_duplication
700 if (new_predicate
== false && *es
->predicate
!= false)
701 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
702 edge_set_predicate (edge
, &new_predicate
);
705 /* Remap indirect edge predicates with the same simplificaiton as above.
706 Also copy constantness arrays. */
707 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
709 predicate new_predicate
;
710 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
711 next
= edge
->next_callee
;
713 gcc_checking_assert (edge
->inline_failed
);
716 new_predicate
= es
->predicate
->remap_after_duplication
718 if (new_predicate
== false && *es
->predicate
!= false)
719 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
720 edge_set_predicate (edge
, &new_predicate
);
722 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
724 remap_hint_predicate_after_duplication (&info
->loop_stride
,
727 /* If inliner or someone after inliner will ever start producing
728 non-trivial clones, we will get trouble with lack of information
729 about updating self sizes, because size vectors already contains
730 sizes of the calees. */
731 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
735 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
736 if (info
->loop_iterations
)
738 predicate p
= *info
->loop_iterations
;
739 info
->loop_iterations
= NULL
;
740 set_hint_predicate (&info
->loop_iterations
, p
);
742 if (info
->loop_stride
)
744 predicate p
= *info
->loop_stride
;
745 info
->loop_stride
= NULL
;
746 set_hint_predicate (&info
->loop_stride
, p
);
749 if (!dst
->inlined_to
)
750 ipa_update_overall_fn_summary (dst
);
754 /* Hook that is called by cgraph.c when a node is duplicated. */
757 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
758 struct cgraph_edge
*dst
,
759 class ipa_call_summary
*srcinfo
,
760 class ipa_call_summary
*info
)
762 new (info
) ipa_call_summary (*srcinfo
);
763 info
->predicate
= NULL
;
764 edge_set_predicate (dst
, srcinfo
->predicate
);
765 info
->param
= srcinfo
->param
.copy ();
766 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
768 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
769 - eni_size_weights
.call_cost
);
770 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
771 - eni_time_weights
.call_cost
);
775 /* Dump edge summaries associated to NODE and recursively to all clones.
779 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
780 class ipa_fn_summary
*info
)
782 struct cgraph_edge
*edge
;
783 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
785 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
786 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
790 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
791 indent
, "", callee
->name (), callee
->order
,
793 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
794 indent
, "", es
->loop_depth
, edge
->sreal_frequency ().to_double (),
795 es
->call_stmt_size
, es
->call_stmt_time
);
797 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
798 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
800 fprintf (f
, " callee size:%2i stack:%2i",
801 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
802 (int) s
->estimated_stack_size
);
806 fprintf (f
, " predicate: ");
807 es
->predicate
->dump (f
, info
->conds
);
811 if (es
->param
.exists ())
812 for (i
= 0; i
< (int) es
->param
.length (); i
++)
814 int prob
= es
->param
[i
].change_prob
;
817 fprintf (f
, "%*s op%i is compile time invariant\n",
819 else if (prob
!= REG_BR_PROB_BASE
)
820 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
821 prob
* 100.0 / REG_BR_PROB_BASE
);
823 if (!edge
->inline_failed
)
825 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
826 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
828 (int) ipa_get_stack_frame_offset (callee
),
829 (int) ss
->estimated_self_stack_size
);
830 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
833 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
835 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
836 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
840 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
844 fprintf (f
, "predicate: ");
845 es
->predicate
->dump (f
, info
->conds
);
854 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
856 if (node
->definition
)
858 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
859 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
864 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
865 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
866 fprintf (f
, " always_inline");
868 fprintf (f
, " inlinable");
869 if (s
->fp_expressions
)
870 fprintf (f
, " fp_expression");
871 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
872 fprintf (f
, " self size: %i\n", ss
->self_size
);
873 fprintf (f
, " global size: %i\n", ss
->size
);
874 fprintf (f
, " min size: %i\n", s
->min_size
);
875 fprintf (f
, " self stack: %i\n",
876 (int) ss
->estimated_self_stack_size
);
877 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
879 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
881 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
882 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
884 fprintf (f
, " size:%f, time:%f",
885 (double) e
->size
/ ipa_fn_summary::size_scale
,
886 e
->time
.to_double ());
887 if (e
->exec_predicate
!= true)
889 fprintf (f
, ", executed if:");
890 e
->exec_predicate
.dump (f
, s
->conds
, 0);
892 if (e
->exec_predicate
!= e
->nonconst_predicate
)
894 fprintf (f
, ", nonconst if:");
895 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
899 if (s
->loop_iterations
)
901 fprintf (f
, " loop iterations:");
902 s
->loop_iterations
->dump (f
, s
->conds
);
906 fprintf (f
, " loop stride:");
907 s
->loop_stride
->dump (f
, s
->conds
);
909 fprintf (f
, " calls:\n");
910 dump_ipa_call_summary (f
, 4, node
, s
);
914 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
919 ipa_debug_fn_summary (struct cgraph_node
*node
)
921 ipa_dump_fn_summary (stderr
, node
);
925 ipa_dump_fn_summaries (FILE *f
)
927 struct cgraph_node
*node
;
929 FOR_EACH_DEFINED_FUNCTION (node
)
930 if (!node
->inlined_to
)
931 ipa_dump_fn_summary (f
, node
);
934 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
935 boolean variable pointed to by DATA. */
938 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
941 bool *b
= (bool *) data
;
946 /* If OP refers to value of function parameter, return the corresponding
947 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
948 PARM_DECL) will be stored to *SIZE_P in that case too. */
951 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
954 /* SSA_NAME referring to parm default def? */
955 if (TREE_CODE (op
) == SSA_NAME
956 && SSA_NAME_IS_DEFAULT_DEF (op
)
957 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
960 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
961 return SSA_NAME_VAR (op
);
963 /* Non-SSA parm reference? */
964 if (TREE_CODE (op
) == PARM_DECL
)
966 bool modified
= false;
969 ao_ref_init (&refd
, op
);
970 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
971 mark_modified
, &modified
, NULL
, NULL
,
972 fbi
->aa_walk_budget
+ 1);
975 fbi
->aa_walk_budget
= 0;
981 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
988 /* If OP refers to value of function parameter, return the corresponding
989 parameter. Also traverse chains of SSA register assignments. If non-NULL,
990 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
991 stored to *SIZE_P in that case too. */
994 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
997 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1001 if (TREE_CODE (op
) == SSA_NAME
1002 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1003 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1004 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1005 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1010 /* If OP refers to a value of a function parameter or value loaded from an
1011 aggregate passed to a parameter (either by value or reference), return TRUE
1012 and store the number of the parameter to *INDEX_P, the access size into
1013 *SIZE_P, and information whether and how it has been loaded from an
1014 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1015 statement in which OP is used or loaded. */
1018 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1019 gimple
*stmt
, tree op
, int *index_p
,
1021 struct agg_position_info
*aggpos
)
1023 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1025 gcc_checking_assert (aggpos
);
1028 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1031 aggpos
->agg_contents
= false;
1032 aggpos
->by_ref
= false;
1036 if (TREE_CODE (op
) == SSA_NAME
)
1038 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1039 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1041 stmt
= SSA_NAME_DEF_STMT (op
);
1042 op
= gimple_assign_rhs1 (stmt
);
1043 if (!REFERENCE_CLASS_P (op
))
1044 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1048 aggpos
->agg_contents
= true;
1049 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1050 stmt
, op
, index_p
, &aggpos
->offset
,
1051 size_p
, &aggpos
->by_ref
);
1054 /* See if statement might disappear after inlining.
1055 0 - means not eliminated
1056 1 - half of statements goes away
1057 2 - for sure it is eliminated.
1058 We are not terribly sophisticated, basically looking for simple abstraction
1059 penalty wrappers. */
1062 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1064 enum gimple_code code
= gimple_code (stmt
);
1065 enum tree_code rhs_code
;
1075 if (gimple_num_ops (stmt
) != 2)
1078 rhs_code
= gimple_assign_rhs_code (stmt
);
1080 /* Casts of parameters, loads from parameters passed by reference
1081 and stores to return value or parameters are often free after
1082 inlining dua to SRA and further combining.
1083 Assume that half of statements goes away. */
1084 if (CONVERT_EXPR_CODE_P (rhs_code
)
1085 || rhs_code
== VIEW_CONVERT_EXPR
1086 || rhs_code
== ADDR_EXPR
1087 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1089 tree rhs
= gimple_assign_rhs1 (stmt
);
1090 tree lhs
= gimple_assign_lhs (stmt
);
1091 tree inner_rhs
= get_base_address (rhs
);
1092 tree inner_lhs
= get_base_address (lhs
);
1093 bool rhs_free
= false;
1094 bool lhs_free
= false;
1101 /* Reads of parameter are expected to be free. */
1102 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1104 /* Match expressions of form &this->field. Those will most likely
1105 combine with something upstream after inlining. */
1106 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1108 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1109 if (TREE_CODE (op
) == PARM_DECL
)
1111 else if (TREE_CODE (op
) == MEM_REF
1112 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1117 /* When parameter is not SSA register because its address is taken
1118 and it is just copied into one, the statement will be completely
1119 free after inlining (we will copy propagate backward). */
1120 if (rhs_free
&& is_gimple_reg (lhs
))
1123 /* Reads of parameters passed by reference
1124 expected to be free (i.e. optimized out after inlining). */
1125 if (TREE_CODE (inner_rhs
) == MEM_REF
1126 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1129 /* Copying parameter passed by reference into gimple register is
1130 probably also going to copy propagate, but we can't be quite
1132 if (rhs_free
&& is_gimple_reg (lhs
))
1135 /* Writes to parameters, parameters passed by value and return value
1136 (either dirrectly or passed via invisible reference) are free.
1138 TODO: We ought to handle testcase like
1139 struct a {int a,b;};
1141 retrurnsturct (void)
1147 This translate into:
1162 For that we either need to copy ipa-split logic detecting writes
1164 if (TREE_CODE (inner_lhs
) == PARM_DECL
1165 || TREE_CODE (inner_lhs
) == RESULT_DECL
1166 || (TREE_CODE (inner_lhs
) == MEM_REF
1167 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1169 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1170 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1171 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1173 0))) == RESULT_DECL
))))
1176 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1178 if (lhs_free
&& rhs_free
)
1187 /* Analyze EXPR if it represents a series of simple operations performed on
1188 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1189 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1190 Type of the parameter or load from an aggregate via the parameter is
1191 stored in *TYPE_P. Operations on the parameter are recorded to
1192 PARAM_OPS_P if it is not NULL. */
1195 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1196 gimple
*stmt
, tree expr
,
1197 int *index_p
, tree
*type_p
,
1198 struct agg_position_info
*aggpos
,
1199 expr_eval_ops
*param_ops_p
= NULL
)
1201 int op_limit
= PARAM_VALUE (PARAM_IPA_MAX_PARAM_EXPR_OPS
);
1205 *param_ops_p
= NULL
;
1209 expr_eval_op eval_op
;
1211 unsigned cst_count
= 0;
1213 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1216 tree type
= TREE_TYPE (expr
);
1218 if (aggpos
->agg_contents
)
1220 /* Stop if containing bit-field. */
1221 if (TREE_CODE (expr
) == BIT_FIELD_REF
1222 || contains_bitfld_component_ref_p (expr
))
1230 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1233 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1236 switch (gimple_assign_rhs_class (stmt
))
1238 case GIMPLE_SINGLE_RHS
:
1239 expr
= gimple_assign_rhs1 (stmt
);
1242 case GIMPLE_UNARY_RHS
:
1246 case GIMPLE_BINARY_RHS
:
1250 case GIMPLE_TERNARY_RHS
:
1258 /* Stop if expression is too complex. */
1259 if (op_count
++ == op_limit
)
1264 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1265 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1266 eval_op
.val
[0] = NULL_TREE
;
1267 eval_op
.val
[1] = NULL_TREE
;
1271 for (unsigned i
= 0; i
< rhs_count
; i
++)
1273 tree op
= gimple_op (stmt
, i
+ 1);
1275 gcc_assert (op
&& !TYPE_P (op
));
1276 if (is_gimple_ip_invariant (op
))
1278 if (++cst_count
== rhs_count
)
1281 eval_op
.val
[cst_count
- 1] = op
;
1285 /* Found a non-constant operand, and record its index in rhs
1292 /* Found more than one non-constant operands. */
1298 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1301 /* Failed to decompose, free resource and return. */
1304 vec_free (*param_ops_p
);
1309 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1310 predicates to the CFG edges. */
1313 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1314 class ipa_fn_summary
*summary
,
1320 struct agg_position_info aggpos
;
1321 enum tree_code code
, inverted_code
;
1326 expr_eval_ops param_ops
;
1328 last
= last_stmt (bb
);
1329 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1331 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1333 op
= gimple_cond_lhs (last
);
1335 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1338 code
= gimple_cond_code (last
);
1339 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1341 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1343 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1344 ? code
: inverted_code
);
1345 /* invert_tree_comparison will return ERROR_MARK on FP
1346 comparsions that are not EQ/NE instead of returning proper
1347 unordered one. Be sure it is not confused with NON_CONSTANT.
1349 And if the edge's target is the final block of diamond CFG graph
1350 of this conditional statement, we do not need to compute
1351 predicate for the edge because the final block's predicate must
1352 be at least as that of the first block of the statement. */
1353 if (this_code
!= ERROR_MARK
1354 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1357 = add_condition (summary
, index
, param_type
, &aggpos
,
1358 this_code
, gimple_cond_rhs (last
), param_ops
);
1359 e
->aux
= edge_predicate_pool
.allocate ();
1360 *(predicate
*) e
->aux
= p
;
1363 vec_free (param_ops
);
1366 if (TREE_CODE (op
) != SSA_NAME
)
1369 if (builtin_constant_p (op))
1373 Here we can predicate nonconstant_code. We can't
1374 really handle constant_code since we have no predicate
1375 for this and also the constant code is not known to be
1376 optimized away when inliner doen't see operand is constant.
1377 Other optimizers might think otherwise. */
1378 if (gimple_cond_code (last
) != NE_EXPR
1379 || !integer_zerop (gimple_cond_rhs (last
)))
1381 set_stmt
= SSA_NAME_DEF_STMT (op
);
1382 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1383 || gimple_call_num_args (set_stmt
) != 1)
1385 op2
= gimple_call_arg (set_stmt
, 0);
1386 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1388 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1390 predicate p
= add_condition (summary
, index
, param_type
, &aggpos
,
1391 predicate::is_not_constant
, NULL_TREE
);
1392 e
->aux
= edge_predicate_pool
.allocate ();
1393 *(predicate
*) e
->aux
= p
;
1398 /* If BB ends by a switch we can turn into predicates, attach corresponding
1399 predicates to the CFG edges. */
1402 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1403 class ipa_fn_summary
*summary
,
1409 struct agg_position_info aggpos
;
1415 expr_eval_ops param_ops
;
1417 lastg
= last_stmt (bb
);
1418 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1420 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1421 op
= gimple_switch_index (last
);
1422 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1426 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1427 tree type
= TREE_TYPE (op
);
1428 int bound_limit
= PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS
);
1429 int bound_count
= 0;
1430 wide_int vr_wmin
, vr_wmax
;
1431 value_range_kind vr_type
= get_range_info (op
, &vr_wmin
, &vr_wmax
);
1433 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1435 e
->aux
= edge_predicate_pool
.allocate ();
1436 *(predicate
*) e
->aux
= false;
1439 e
= gimple_switch_edge (cfun
, last
, 0);
1440 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1441 default case if its target basic block is in convergence point of all
1442 switch cases, which can be determined by checking whether it
1443 post-dominates the switch statement. */
1444 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1445 bound_count
= INT_MAX
;
1447 n
= gimple_switch_num_labels (last
);
1448 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1450 tree cl
= gimple_switch_label (last
, case_idx
);
1451 tree min
= CASE_LOW (cl
);
1452 tree max
= CASE_HIGH (cl
);
1455 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1457 /* The case value might not have same type as switch expression,
1458 extend the value based on the expression type. */
1459 if (TREE_TYPE (min
) != type
)
1460 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1464 else if (TREE_TYPE (max
) != type
)
1465 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1467 /* The case's target basic block is in convergence point of all switch
1468 cases, its predicate should be at least as that of the switch
1470 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1472 else if (min
== max
)
1473 p
= add_condition (summary
, index
, param_type
, &aggpos
, EQ_EXPR
,
1478 p1
= add_condition (summary
, index
, param_type
, &aggpos
, GE_EXPR
,
1480 p2
= add_condition (summary
, index
, param_type
, &aggpos
, LE_EXPR
,
1484 *(class predicate
*) e
->aux
1485 = p
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1487 /* If there are too many disjoint case ranges, predicate for default
1488 case might become too complicated. So add a limit here. */
1489 if (bound_count
> bound_limit
)
1492 bool new_range
= true;
1494 if (!ranges
.is_empty ())
1496 wide_int curr_wmin
= wi::to_wide (min
);
1497 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1499 /* Merge case ranges if they are continuous. */
1500 if (curr_wmin
== last_wmax
+ 1)
1502 else if (vr_type
== VR_ANTI_RANGE
)
1504 /* If two disjoint case ranges can be connected by anti-range
1505 of switch index, combine them to one range. */
1506 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1507 vr_type
= VR_UNDEFINED
;
1508 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1513 /* Create/extend a case range. And we count endpoints of range set,
1514 this number nearly equals to number of conditions that we will create
1515 for predicate of default case. */
1518 bound_count
+= (min
== max
) ? 1 : 2;
1519 ranges
.safe_push (std::make_pair (min
, max
));
1523 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1524 ranges
.last ().second
= max
;
1528 e
= gimple_switch_edge (cfun
, last
, 0);
1529 if (bound_count
> bound_limit
)
1531 *(class predicate
*) e
->aux
= true;
1532 vec_free (param_ops
);
1536 predicate p_seg
= true;
1537 predicate p_all
= false;
1539 if (vr_type
!= VR_RANGE
)
1541 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1542 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1545 /* Construct predicate to represent default range set that is negation of
1546 all case ranges. Case range is classified as containing single/non-single
1547 values. Suppose a piece of case ranges in the following.
1549 [D1...D2] [S1] ... [Sn] [D3...D4]
1551 To represent default case's range sets between two non-single value
1552 case ranges (From D2 to D3), we construct predicate as:
1554 D2 < x < D3 && x != S1 && ... && x != Sn
1556 for (size_t i
= 0; i
< ranges
.length (); i
++)
1558 tree min
= ranges
[i
].first
;
1559 tree max
= ranges
[i
].second
;
1562 p_seg
&= add_condition (summary
, index
, param_type
, &aggpos
, NE_EXPR
,
1566 /* Do not create sub-predicate for range that is beyond low bound
1568 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1570 p_seg
&= add_condition (summary
, index
, param_type
, &aggpos
,
1571 LT_EXPR
, min
, param_ops
);
1572 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1575 /* Do not create sub-predicate for range that is beyond up bound
1577 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1583 p_seg
= add_condition (summary
, index
, param_type
, &aggpos
, GT_EXPR
,
1588 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1589 *(class predicate
*) e
->aux
1590 = p_all
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1592 vec_free (param_ops
);
1596 /* For each BB in NODE attach to its AUX pointer predicate under
1597 which it is executable. */
1600 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1601 struct cgraph_node
*node
,
1602 class ipa_fn_summary
*summary
)
1604 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1608 FOR_EACH_BB_FN (bb
, my_function
)
1610 set_cond_stmt_execution_predicate (fbi
, summary
, bb
);
1611 set_switch_stmt_execution_predicate (fbi
, summary
, bb
);
1614 /* Entry block is always executable. */
1615 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1616 = edge_predicate_pool
.allocate ();
1617 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1619 /* A simple dataflow propagation of predicates forward in the CFG.
1620 TODO: work in reverse postorder. */
1624 FOR_EACH_BB_FN (bb
, my_function
)
1626 predicate p
= false;
1629 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1633 predicate this_bb_predicate
1634 = *(predicate
*) e
->src
->aux
;
1636 this_bb_predicate
&= (*(class predicate
*) e
->aux
);
1637 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1644 basic_block pdom_bb
;
1649 bb
->aux
= edge_predicate_pool
.allocate ();
1650 *((predicate
*) bb
->aux
) = p
;
1652 else if (p
!= *(predicate
*) bb
->aux
)
1654 /* This OR operation is needed to ensure monotonous data flow
1655 in the case we hit the limit on number of clauses and the
1656 and/or operations above give approximate answers. */
1657 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1658 if (p
!= *(predicate
*) bb
->aux
)
1661 *((predicate
*) bb
->aux
) = p
;
1665 /* For switch/if statement, we can OR-combine predicates of all
1666 its cases/branches to get predicate for basic block in their
1667 convergence point, but sometimes this will generate very
1668 complicated predicate. Actually, we can get simplified
1669 predicate in another way by using the fact that predicate
1670 for a basic block must also hold true for its post dominators.
1671 To be specific, basic block in convergence point of
1672 conditional statement should include predicate of the
1674 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1675 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1677 else if (!pdom_bb
->aux
)
1680 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1681 *((predicate
*) pdom_bb
->aux
) = p
;
1683 else if (p
!= *(predicate
*) pdom_bb
->aux
)
1685 p
= p
.or_with (summary
->conds
, *(predicate
*)pdom_bb
->aux
);
1686 if (p
!= *(predicate
*) pdom_bb
->aux
)
1689 *((predicate
*) pdom_bb
->aux
) = p
;
1698 /* Return predicate specifying when the STMT might have result that is not
1699 a compile time constant. */
1702 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1703 class ipa_fn_summary
*summary
,
1705 vec
<predicate
> nonconstant_names
)
1710 while (UNARY_CLASS_P (expr
))
1711 expr
= TREE_OPERAND (expr
, 0);
1713 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1714 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1715 return add_condition (summary
, index
, TREE_TYPE (parm
), NULL
,
1716 predicate::changed
, NULL_TREE
);
1717 if (is_gimple_min_invariant (expr
))
1719 if (TREE_CODE (expr
) == SSA_NAME
)
1720 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1721 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1724 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1725 TREE_OPERAND (expr
, 0),
1731 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1732 TREE_OPERAND (expr
, 1),
1734 return p1
.or_with (summary
->conds
, p2
);
1736 else if (TREE_CODE (expr
) == COND_EXPR
)
1739 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1740 TREE_OPERAND (expr
, 0),
1746 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1747 TREE_OPERAND (expr
, 1),
1751 p1
= p1
.or_with (summary
->conds
, p2
);
1752 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1753 TREE_OPERAND (expr
, 2),
1755 return p2
.or_with (summary
->conds
, p1
);
1757 else if (TREE_CODE (expr
) == CALL_EXPR
)
1768 /* Return predicate specifying when the STMT might have result that is not
1769 a compile time constant. */
1772 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1773 class ipa_fn_summary
*summary
,
1775 vec
<predicate
> nonconstant_names
)
1780 tree param_type
= NULL_TREE
;
1781 predicate op_non_const
;
1784 struct agg_position_info aggpos
;
1786 /* What statments might be optimized away
1787 when their arguments are constant. */
1788 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1789 && gimple_code (stmt
) != GIMPLE_COND
1790 && gimple_code (stmt
) != GIMPLE_SWITCH
1791 && (gimple_code (stmt
) != GIMPLE_CALL
1792 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1795 /* Stores will stay anyway. */
1796 if (gimple_store_p (stmt
))
1799 is_load
= gimple_assign_load_p (stmt
);
1801 /* Loads can be optimized when the value is known. */
1804 tree op
= gimple_assign_rhs1 (stmt
);
1805 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
1812 /* See if we understand all operands before we start
1813 adding conditionals. */
1814 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1816 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1817 /* For arguments we can build a condition. */
1818 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1820 if (TREE_CODE (use
) != SSA_NAME
)
1822 /* If we know when operand is constant,
1823 we still can say something useful. */
1824 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1831 add_condition (summary
, base_index
, param_type
, &aggpos
,
1832 predicate::changed
, NULL_TREE
);
1834 op_non_const
= false;
1835 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1837 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1840 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1842 if (index
!= base_index
)
1843 p
= add_condition (summary
, index
, TREE_TYPE (parm
), NULL
,
1844 predicate::changed
, NULL_TREE
);
1849 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1850 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1852 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1853 && gimple_op (stmt
, 0)
1854 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1855 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1857 return op_non_const
;
1860 struct record_modified_bb_info
1867 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1868 probability how often it changes between USE_BB.
1869 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1870 is in different loop nest, we can do better.
1871 This is all just estimate. In theory we look for minimal cut separating
1872 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1876 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1878 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1879 if (l
&& l
->header
->count
< init_bb
->count
)
1884 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1885 set except for info->stmt. */
1888 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1890 struct record_modified_bb_info
*info
=
1891 (struct record_modified_bb_info
*) data
;
1892 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1894 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1896 bitmap_set_bit (info
->bb_set
,
1897 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1898 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1900 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1901 gimple_bb (info
->stmt
))->index
);
1904 fprintf (dump_file
, " Param ");
1905 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1906 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1907 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1909 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1910 gimple_bb (info
->stmt
))->index
);
1911 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
1916 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1917 will change since last invocation of STMT.
1919 Value 0 is reserved for compile time invariants.
1920 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1921 ought to be REG_BR_PROB_BASE / estimated_iters. */
1924 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
1926 tree op
= gimple_call_arg (stmt
, i
);
1927 basic_block bb
= gimple_bb (stmt
);
1929 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
1930 op
= TREE_OPERAND (op
, 0);
1932 tree base
= get_base_address (op
);
1934 /* Global invariants never change. */
1935 if (is_gimple_min_invariant (base
))
1938 /* We would have to do non-trivial analysis to really work out what
1939 is the probability of value to change (i.e. when init statement
1940 is in a sibling loop of the call).
1942 We do an conservative estimate: when call is executed N times more often
1943 than the statement defining value, we take the frequency 1/N. */
1944 if (TREE_CODE (base
) == SSA_NAME
)
1946 profile_count init_count
;
1948 if (!bb
->count
.nonzero_p ())
1949 return REG_BR_PROB_BASE
;
1951 if (SSA_NAME_IS_DEFAULT_DEF (base
))
1952 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1954 init_count
= get_minimal_bb
1955 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
1956 gimple_bb (stmt
))->count
;
1958 if (init_count
< bb
->count
)
1959 return MAX ((init_count
.to_sreal_scale (bb
->count
)
1960 * REG_BR_PROB_BASE
).to_int (), 1);
1961 return REG_BR_PROB_BASE
;
1966 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
1967 struct record_modified_bb_info info
;
1968 tree init
= ctor_for_folding (base
);
1970 if (init
!= error_mark_node
)
1972 if (!bb
->count
.nonzero_p ())
1973 return REG_BR_PROB_BASE
;
1976 fprintf (dump_file
, " Analyzing param change probability of ");
1977 print_generic_expr (dump_file
, op
, TDF_SLIM
);
1978 fprintf (dump_file
, "\n");
1980 ao_ref_init (&refd
, op
);
1983 info
.bb_set
= BITMAP_ALLOC (NULL
);
1985 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
1986 NULL
, NULL
, fbi
->aa_walk_budget
);
1987 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
1992 fprintf (dump_file
, " Ran out of AA walking budget.\n");
1994 fprintf (dump_file
, " Set in same BB as used.\n");
1996 BITMAP_FREE (info
.bb_set
);
1997 return REG_BR_PROB_BASE
;
2002 /* Lookup the most frequent update of the value and believe that
2003 it dominates all the other; precise analysis here is difficult. */
2004 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2005 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2008 fprintf (dump_file
, " Set with count ");
2009 max
.dump (dump_file
);
2010 fprintf (dump_file
, " and used with count ");
2011 bb
->count
.dump (dump_file
);
2012 fprintf (dump_file
, " freq %f\n",
2013 max
.to_sreal_scale (bb
->count
).to_double ());
2016 BITMAP_FREE (info
.bb_set
);
2017 if (max
< bb
->count
)
2018 return MAX ((max
.to_sreal_scale (bb
->count
)
2019 * REG_BR_PROB_BASE
).to_int (), 1);
2020 return REG_BR_PROB_BASE
;
2024 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2025 sub-graph and if the predicate the condition depends on is known. If so,
2026 return true and store the pointer the predicate in *P. */
2029 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2030 ipa_fn_summary
*summary
, basic_block bb
,
2032 vec
<predicate
> nonconstant_names
)
2036 basic_block first_bb
= NULL
;
2039 if (single_pred_p (bb
))
2045 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2047 if (single_succ_p (e
->src
))
2049 if (!single_pred_p (e
->src
))
2052 first_bb
= single_pred (e
->src
);
2053 else if (single_pred (e
->src
) != first_bb
)
2060 else if (e
->src
!= first_bb
)
2068 stmt
= last_stmt (first_bb
);
2070 || gimple_code (stmt
) != GIMPLE_COND
2071 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2074 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
,
2075 gimple_cond_lhs (stmt
),
2083 /* Given a PHI statement in a function described by inline properties SUMMARY
2084 and *P being the predicate describing whether the selected PHI argument is
2085 known, store a predicate for the result of the PHI statement into
2086 NONCONSTANT_NAMES, if possible. */
2089 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2091 vec
<predicate
> nonconstant_names
)
2095 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2097 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2098 if (!is_gimple_min_invariant (arg
))
2100 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2101 *p
= p
->or_with (summary
->conds
,
2102 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2110 fprintf (dump_file
, "\t\tphi predicate: ");
2111 p
->dump (dump_file
, summary
->conds
);
2113 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2116 /* For a typical usage of __builtin_expect (a<b, 1), we
2117 may introduce an extra relation stmt:
2118 With the builtin, we have
2121 t3 = __builtin_expect (t2, 1);
2124 Without the builtin, we have
2127 This affects the size/time estimation and may have
2128 an impact on the earlier inlining.
2129 Here find this pattern and fix it up later. */
2132 find_foldable_builtin_expect (basic_block bb
)
2134 gimple_stmt_iterator bsi
;
2136 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2138 gimple
*stmt
= gsi_stmt (bsi
);
2139 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2140 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2141 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2143 tree var
= gimple_call_lhs (stmt
);
2144 tree arg
= gimple_call_arg (stmt
, 0);
2145 use_operand_p use_p
;
2152 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2154 while (TREE_CODE (arg
) == SSA_NAME
)
2156 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2157 if (!is_gimple_assign (stmt_tmp
))
2159 switch (gimple_assign_rhs_code (stmt_tmp
))
2178 arg
= gimple_assign_rhs1 (stmt_tmp
);
2181 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2182 && gimple_code (use_stmt
) == GIMPLE_COND
)
2189 /* Return true when the basic blocks contains only clobbers followed by RESX.
2190 Such BBs are kept around to make removal of dead stores possible with
2191 presence of EH and will be optimized out by optimize_clobbers later in the
2194 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2195 that can be clobber only, too.. When it is false, the RESX is not necessary
2196 on the end of basic block. */
2199 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2201 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2207 if (gsi_end_p (gsi
))
2209 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2213 else if (!single_succ_p (bb
))
2216 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2218 gimple
*stmt
= gsi_stmt (gsi
);
2219 if (is_gimple_debug (stmt
))
2221 if (gimple_clobber_p (stmt
))
2223 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2228 /* See if all predecestors are either throws or clobber only BBs. */
2229 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2230 if (!(e
->flags
& EDGE_EH
)
2231 && !clobber_only_eh_bb_p (e
->src
, false))
2237 /* Return true if STMT compute a floating point expression that may be affected
2238 by -ffast-math and similar flags. */
2241 fp_expression_p (gimple
*stmt
)
2246 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2247 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2252 /* Analyze function body for NODE.
2253 EARLY indicates run from early optimization pipeline. */
2256 analyze_function_body (struct cgraph_node
*node
, bool early
)
2258 sreal time
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
);
2259 /* Estimate static overhead for function prologue/epilogue and alignment. */
2260 int size
= PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
);
2261 /* Benefits are scaled by probability of elimination that is in range
2264 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2266 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2267 predicate bb_predicate
;
2268 struct ipa_func_body_info fbi
;
2269 vec
<predicate
> nonconstant_names
= vNULL
;
2272 gimple
*fix_builtin_expect_stmt
;
2274 gcc_assert (my_function
&& my_function
->cfg
);
2275 gcc_assert (cfun
== my_function
);
2277 memset(&fbi
, 0, sizeof(fbi
));
2278 vec_free (info
->conds
);
2280 vec_free (info
->size_time_table
);
2281 info
->size_time_table
= NULL
;
2283 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2284 so we can produce proper inline hints.
2286 When optimizing and analyzing for early inliner, initialize node params
2287 so we can produce correct BB predicates. */
2289 if (opt_for_fn (node
->decl
, optimize
))
2291 calculate_dominance_info (CDI_DOMINATORS
);
2292 calculate_dominance_info (CDI_POST_DOMINATORS
);
2294 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2297 ipa_check_create_node_params ();
2298 ipa_initialize_node_params (node
);
2301 if (ipa_node_params_sum
)
2304 fbi
.info
= IPA_NODE_REF (node
);
2305 fbi
.bb_infos
= vNULL
;
2306 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2307 fbi
.param_count
= count_formal_params (node
->decl
);
2308 fbi
.aa_walk_budget
= PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS
);
2310 nonconstant_names
.safe_grow_cleared
2311 (SSANAMES (my_function
)->length ());
2316 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2319 /* When we run into maximal number of entries, we assign everything to the
2320 constant truth case. Be sure to have it in list. */
2321 bb_predicate
= true;
2322 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2324 bb_predicate
= predicate::not_inlined ();
2325 info
->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS
)
2326 * ipa_fn_summary::size_scale
,
2327 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME
),
2332 compute_bb_predicates (&fbi
, node
, info
);
2333 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2334 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2335 for (n
= 0; n
< nblocks
; n
++)
2337 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2338 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2339 if (clobber_only_eh_bb_p (bb
))
2341 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2342 fprintf (dump_file
, "\n Ignoring BB %i;"
2343 " it will be optimized away by cleanup_clobbers\n",
2348 /* TODO: Obviously predicates can be propagated down across CFG. */
2352 bb_predicate
= *(predicate
*) bb
->aux
;
2354 bb_predicate
= false;
2357 bb_predicate
= true;
2359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2361 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2362 bb_predicate
.dump (dump_file
, info
->conds
);
2365 if (fbi
.info
&& nonconstant_names
.exists ())
2367 predicate phi_predicate
;
2368 bool first_phi
= true;
2370 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2374 && !phi_result_unknown_predicate (&fbi
, info
, bb
,
2379 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2381 fprintf (dump_file
, " ");
2382 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2384 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2389 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2391 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2392 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2394 gimple
*stmt
= gsi_stmt (bsi
);
2395 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2396 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2398 predicate will_be_nonconstant
;
2400 /* This relation stmt should be folded after we remove
2401 buildin_expect call. Adjust the cost here. */
2402 if (stmt
== fix_builtin_expect_stmt
)
2408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2410 fprintf (dump_file
, " ");
2411 print_gimple_stmt (dump_file
, stmt
, 0);
2412 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2413 freq
.to_double (), this_size
,
2417 if (is_gimple_call (stmt
)
2418 && !gimple_call_internal_p (stmt
))
2420 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2421 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2423 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2424 resolved as constant. We however don't want to optimize
2425 out the cgraph edges. */
2426 if (nonconstant_names
.exists ()
2427 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2428 && gimple_call_lhs (stmt
)
2429 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2431 predicate false_p
= false;
2432 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2435 if (ipa_node_params_sum
)
2437 int count
= gimple_call_num_args (stmt
);
2441 es
->param
.safe_grow_cleared (count
);
2442 for (i
= 0; i
< count
; i
++)
2444 int prob
= param_change_prob (&fbi
, stmt
, i
);
2445 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2446 es
->param
[i
].change_prob
= prob
;
2450 es
->call_stmt_size
= this_size
;
2451 es
->call_stmt_time
= this_time
;
2452 es
->loop_depth
= bb_loop_depth (bb
);
2453 edge_set_predicate (edge
, &bb_predicate
);
2454 if (edge
->speculative
)
2456 cgraph_edge
*direct
, *indirect
;
2458 edge
->speculative_call_info (direct
, indirect
, ref
);
2459 gcc_assert (direct
== edge
);
2460 ipa_call_summary
*es2
2461 = ipa_call_summaries
->get_create (indirect
);
2462 ipa_call_summaries
->duplicate (edge
, indirect
,
2467 /* TODO: When conditional jump or swithc is known to be constant, but
2468 we did not translate it into the predicates, we really can account
2469 just maximum of the possible paths. */
2472 = will_be_nonconstant_predicate (&fbi
, info
,
2473 stmt
, nonconstant_names
);
2475 will_be_nonconstant
= true;
2476 if (this_time
|| this_size
)
2478 sreal final_time
= (sreal
)this_time
* freq
;
2480 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2481 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2483 "\t\t50%% will be eliminated by inlining\n");
2484 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2485 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2487 class predicate p
= bb_predicate
& will_be_nonconstant
;
2489 /* We can ignore statement when we proved it is never going
2490 to happen, but we cannot do that for call statements
2491 because edges are accounted specially. */
2493 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2499 /* We account everything but the calls. Calls have their own
2500 size/time info attached to cgraph edges. This is necessary
2501 in order to make the cost disappear after inlining. */
2502 if (!is_gimple_call (stmt
))
2506 predicate ip
= bb_predicate
& predicate::not_inlined ();
2507 info
->account_size_time (this_size
* prob
,
2508 (final_time
* prob
) / 2, ip
,
2512 info
->account_size_time (this_size
* (2 - prob
),
2513 (final_time
* (2 - prob
) / 2),
2518 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2520 info
->fp_expressions
= true;
2522 fprintf (dump_file
, " fp_expression set\n");
2526 /* Account cost of address calculations in the statements. */
2527 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2529 for (tree op
= gimple_op (stmt
, i
);
2530 op
&& handled_component_p (op
);
2531 op
= TREE_OPERAND (op
, 0))
2532 if ((TREE_CODE (op
) == ARRAY_REF
2533 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2534 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2536 predicate p
= bb_predicate
;
2538 p
= p
& will_be_nonconstant_expr_predicate
2539 (&fbi
, info
, TREE_OPERAND (op
, 1),
2547 "\t\tAccounting address calculation.\n");
2548 info
->account_size_time (ipa_fn_summary::size_scale
,
2560 if (nonconstant_names
.exists () && !early
)
2563 predicate loop_iterations
= true;
2564 predicate loop_stride
= true;
2566 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2567 flow_loops_dump (dump_file
, NULL
, 0);
2569 FOR_EACH_LOOP (loop
, 0)
2574 class tree_niter_desc niter_desc
;
2575 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2577 exits
= get_loop_exit_edges (loop
);
2578 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2579 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2580 && !is_gimple_min_invariant (niter_desc
.niter
))
2582 predicate will_be_nonconstant
2583 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2586 if (will_be_nonconstant
!= true)
2587 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2588 if (will_be_nonconstant
!= true
2589 && will_be_nonconstant
!= false)
2590 /* This is slightly inprecise. We may want to represent each
2591 loop with independent predicate. */
2592 loop_iterations
&= will_be_nonconstant
;
2597 /* To avoid quadratic behavior we analyze stride predicates only
2598 with respect to the containing loop. Thus we simply iterate
2599 over all defs in the outermost loop body. */
2600 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2601 loop
!= NULL
; loop
= loop
->next
)
2603 basic_block
*body
= get_loop_body (loop
);
2604 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2606 gimple_stmt_iterator gsi
;
2607 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2608 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2611 gimple
*stmt
= gsi_stmt (gsi
);
2613 if (!is_gimple_assign (stmt
))
2616 tree def
= gimple_assign_lhs (stmt
);
2617 if (TREE_CODE (def
) != SSA_NAME
)
2621 if (!simple_iv (loop_containing_stmt (stmt
),
2622 loop_containing_stmt (stmt
),
2624 || is_gimple_min_invariant (iv
.step
))
2627 predicate will_be_nonconstant
2628 = will_be_nonconstant_expr_predicate (&fbi
, info
, iv
.step
,
2630 if (will_be_nonconstant
!= true)
2631 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2632 if (will_be_nonconstant
!= true
2633 && will_be_nonconstant
!= false)
2634 /* This is slightly inprecise. We may want to represent
2635 each loop with independent predicate. */
2636 loop_stride
= loop_stride
& will_be_nonconstant
;
2641 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2642 set_hint_predicate (&s
->loop_iterations
, loop_iterations
);
2643 set_hint_predicate (&s
->loop_stride
, loop_stride
);
2646 FOR_ALL_BB_FN (bb
, my_function
)
2652 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2654 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2657 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2661 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2662 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
2664 ss
->self_size
= size
;
2665 nonconstant_names
.release ();
2666 ipa_release_body_info (&fbi
);
2667 if (opt_for_fn (node
->decl
, optimize
))
2670 loop_optimizer_finalize ();
2671 else if (!ipa_edge_args_sum
)
2672 ipa_free_all_node_params ();
2673 free_dominance_info (CDI_DOMINATORS
);
2674 free_dominance_info (CDI_POST_DOMINATORS
);
2678 fprintf (dump_file
, "\n");
2679 ipa_dump_fn_summary (dump_file
, node
);
2684 /* Compute function summary.
2685 EARLY is true when we compute parameters during early opts. */
2688 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2690 HOST_WIDE_INT self_stack_size
;
2691 struct cgraph_edge
*e
;
2693 gcc_assert (!node
->inlined_to
);
2695 if (!ipa_fn_summaries
)
2696 ipa_fn_summary_alloc ();
2698 /* Create a new ipa_fn_summary. */
2699 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
2700 ipa_fn_summaries
->remove (node
);
2701 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2702 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
2704 /* Estimate the stack size for the function if we're optimizing. */
2705 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2706 ? estimated_stack_frame_size (node
) : 0;
2707 size_info
->estimated_self_stack_size
= self_stack_size
;
2708 info
->estimated_stack_size
= self_stack_size
;
2710 if (node
->thunk
.thunk_p
)
2712 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
2715 node
->can_change_signature
= false;
2716 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2717 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2718 info
->account_size_time (ipa_fn_summary::size_scale
2720 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS
),
2722 (PARAM_UNINLINED_FUNCTION_THUNK_TIME
), t
, t
);
2723 t
= predicate::not_inlined ();
2724 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2725 ipa_update_overall_fn_summary (node
);
2726 size_info
->self_size
= size_info
->size
;
2727 if (stdarg_p (TREE_TYPE (node
->decl
)))
2729 info
->inlinable
= false;
2730 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2733 info
->inlinable
= true;
2737 /* Even is_gimple_min_invariant rely on current_function_decl. */
2738 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2740 /* Can this function be inlined at all? */
2741 if (!opt_for_fn (node
->decl
, optimize
)
2742 && !lookup_attribute ("always_inline",
2743 DECL_ATTRIBUTES (node
->decl
)))
2744 info
->inlinable
= false;
2746 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2748 /* Type attributes can use parameter indices to describe them. */
2749 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2750 /* Likewise for #pragma omp declare simd functions or functions
2751 with simd attribute. */
2752 || lookup_attribute ("omp declare simd",
2753 DECL_ATTRIBUTES (node
->decl
)))
2754 node
->can_change_signature
= false;
2757 /* Otherwise, inlinable functions always can change signature. */
2758 if (info
->inlinable
)
2759 node
->can_change_signature
= true;
2762 /* Functions calling builtin_apply cannot change signature. */
2763 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2765 tree
cdecl = e
->callee
->decl
;
2766 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
2767 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
2770 node
->can_change_signature
= !e
;
2773 analyze_function_body (node
, early
);
2776 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2777 if (e
->callee
->comdat_local_p ())
2779 node
->calls_comdat_local
= (e
!= NULL
);
2781 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2782 size_info
->size
= size_info
->self_size
;
2783 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
2785 /* Code above should compute exactly the same result as
2786 ipa_update_overall_fn_summary but because computation happens in
2787 different order the roundoff errors result in slight changes. */
2788 ipa_update_overall_fn_summary (node
);
2789 /* In LTO mode we may have speculative edges set. */
2790 gcc_assert (in_lto_p
|| size_info
->size
== size_info
->self_size
);
2794 /* Compute parameters of functions used by inliner using
2795 current_function_decl. */
2798 compute_fn_summary_for_current (void)
2800 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2804 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2805 KNOWN_CONTEXTS and KNOWN_AGGS. */
2808 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2809 int *size
, int *time
,
2810 vec
<tree
> known_vals
,
2811 vec
<ipa_polymorphic_call_context
> known_contexts
,
2812 vec
<ipa_agg_jump_function_p
> known_aggs
)
2815 struct cgraph_node
*callee
;
2816 class ipa_fn_summary
*isummary
;
2817 enum availability avail
;
2820 if (!known_vals
.exists () && !known_contexts
.exists ())
2822 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2825 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2826 known_aggs
, &speculative
);
2827 if (!target
|| speculative
)
2830 /* Account for difference in cost between indirect and direct calls. */
2831 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2832 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2833 gcc_checking_assert (*time
>= 0);
2834 gcc_checking_assert (*size
>= 0);
2836 callee
= cgraph_node::get (target
);
2837 if (!callee
|| !callee
->definition
)
2839 callee
= callee
->function_symbol (&avail
);
2840 if (avail
< AVAIL_AVAILABLE
)
2842 isummary
= ipa_fn_summaries
->get (callee
);
2843 if (isummary
== NULL
)
2846 return isummary
->inlinable
;
2849 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2850 handle edge E with probability PROB.
2851 Set HINTS if edge may be devirtualized.
2852 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2856 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2859 vec
<tree
> known_vals
,
2860 vec
<ipa_polymorphic_call_context
> known_contexts
,
2861 vec
<ipa_agg_jump_function_p
> known_aggs
,
2864 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2865 int call_size
= es
->call_stmt_size
;
2866 int call_time
= es
->call_stmt_time
;
2869 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2870 known_vals
, known_contexts
, known_aggs
)
2871 && hints
&& e
->maybe_hot_p ())
2872 *hints
|= INLINE_HINT_indirect_call
;
2873 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2876 *min_size
+= cur_size
;
2877 if (prob
== REG_BR_PROB_BASE
)
2878 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2880 *time
+= ((sreal
)call_time
* prob
) * e
->sreal_frequency ();
2885 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2886 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2887 describe context of the call site. */
2890 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2891 int *min_size
, sreal
*time
,
2893 clause_t possible_truths
,
2894 vec
<tree
> known_vals
,
2895 vec
<ipa_polymorphic_call_context
> known_contexts
,
2896 vec
<ipa_agg_jump_function_p
> known_aggs
)
2898 struct cgraph_edge
*e
;
2899 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2901 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2903 /* Do not care about zero sized builtins. */
2904 if (e
->inline_failed
&& !es
->call_stmt_size
)
2906 gcc_checking_assert (!es
->call_stmt_time
);
2910 || es
->predicate
->evaluate (possible_truths
))
2912 if (e
->inline_failed
)
2914 /* Predicates of calls shall not use NOT_CHANGED codes,
2915 sowe do not need to compute probabilities. */
2916 estimate_edge_size_and_time (e
, size
,
2917 es
->predicate
? NULL
: min_size
,
2918 time
, REG_BR_PROB_BASE
,
2919 known_vals
, known_contexts
,
2923 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2926 known_vals
, known_contexts
,
2930 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
2932 class ipa_call_summary
*es
= ipa_call_summaries
->get_create (e
);
2934 || es
->predicate
->evaluate (possible_truths
))
2935 estimate_edge_size_and_time (e
, size
,
2936 es
->predicate
? NULL
: min_size
,
2937 time
, REG_BR_PROB_BASE
,
2938 known_vals
, known_contexts
, known_aggs
,
2943 /* Default constructor for ipa call context.
2944 Memory alloction of known_vals, known_contexts
2945 and known_aggs vectors is owned by the caller, but can
2946 be release by ipa_call_context::release.
2948 inline_param_summary is owned by the caller. */
2949 ipa_call_context::ipa_call_context (cgraph_node
*node
,
2950 clause_t possible_truths
,
2951 clause_t nonspec_possible_truths
,
2952 vec
<tree
> known_vals
,
2953 vec
<ipa_polymorphic_call_context
>
2955 vec
<ipa_agg_jump_function_p
> known_aggs
,
2956 vec
<inline_param_summary
>
2957 inline_param_summary
)
2958 : m_node (node
), m_possible_truths (possible_truths
),
2959 m_nonspec_possible_truths (nonspec_possible_truths
),
2960 m_inline_param_summary (inline_param_summary
),
2961 m_known_vals (known_vals
),
2962 m_known_contexts (known_contexts
),
2963 m_known_aggs (known_aggs
)
2967 /* Release memory used by known_vals/contexts/aggs vectors. */
2970 ipa_call_context::release ()
2972 m_known_vals
.release ();
2973 m_known_contexts
.release ();
2974 m_known_aggs
.release ();
2977 /* Estimate size and time needed to execute call in the given context.
2978 Additionally detemine hints determined by the context. Finally compute
2979 minimal size needed for the call that is independent on the call context and
2980 can be used for fast estimates. Return the values in RET_SIZE,
2981 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2984 ipa_call_context::estimate_size_and_time (int *ret_size
,
2987 sreal
*ret_nonspecialized_time
,
2988 ipa_hints
*ret_hints
)
2990 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (m_node
);
2995 ipa_hints hints
= 0;
2998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3001 fprintf (dump_file
, " Estimating body: %s/%i\n"
3002 " Known to be false: ", m_node
->name (),
3005 for (i
= predicate::not_inlined_condition
;
3006 i
< (predicate::first_dynamic_condition
3007 + (int) vec_safe_length (info
->conds
)); i
++)
3008 if (!(m_possible_truths
& (1 << i
)))
3011 fprintf (dump_file
, ", ");
3013 dump_condition (dump_file
, info
->conds
, i
);
3017 estimate_calls_size_and_time (m_node
, &size
, &min_size
, &time
, &hints
, m_possible_truths
,
3018 m_known_vals
, m_known_contexts
, m_known_aggs
);
3019 sreal nonspecialized_time
= time
;
3021 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3023 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3025 /* Because predicates are conservative, it can happen that nonconst is 1
3029 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3031 gcc_checking_assert (e
->time
>= 0);
3032 gcc_checking_assert (time
>= 0);
3034 /* We compute specialized size only because size of nonspecialized
3035 copy is context independent.
3037 The difference between nonspecialized execution and specialized is
3038 that nonspecialized is not going to have optimized out computations
3039 known to be constant in a specialized setting. */
3042 nonspecialized_time
+= e
->time
;
3045 else if (!m_inline_param_summary
.exists ())
3052 int prob
= e
->nonconst_predicate
.probability
3053 (info
->conds
, m_possible_truths
,
3054 m_inline_param_summary
);
3055 gcc_checking_assert (prob
>= 0);
3056 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3057 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3059 gcc_checking_assert (time
>= 0);
3062 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
3063 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
3064 min_size
= (*info
->size_time_table
)[0].size
;
3065 gcc_checking_assert (size
>= 0);
3066 gcc_checking_assert (time
>= 0);
3067 /* nonspecialized_time should be always bigger than specialized time.
3068 Roundoff issues however may get into the way. */
3069 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3071 /* Roundoff issues may make specialized time bigger than nonspecialized
3072 time. We do not really want that to happen because some heurstics
3073 may get confused by seeing negative speedups. */
3074 if (time
> nonspecialized_time
)
3075 time
= nonspecialized_time
;
3077 if (info
->loop_iterations
3078 && !info
->loop_iterations
->evaluate (m_possible_truths
))
3079 hints
|= INLINE_HINT_loop_iterations
;
3080 if (info
->loop_stride
3081 && !info
->loop_stride
->evaluate (m_possible_truths
))
3082 hints
|= INLINE_HINT_loop_stride
;
3084 hints
|= INLINE_HINT_in_scc
;
3085 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3086 hints
|= INLINE_HINT_declared_inline
;
3088 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3089 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3092 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
3093 time
.to_double (), nonspecialized_time
.to_double ());
3096 if (ret_nonspecialized_time
)
3097 *ret_nonspecialized_time
= nonspecialized_time
;
3101 *ret_min_size
= min_size
;
3108 /* Estimate size and time needed to execute callee of EDGE assuming that
3109 parameters known to be constant at caller of EDGE are propagated.
3110 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3111 and types for parameters. */
3114 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3115 vec
<tree
> known_vals
,
3116 vec
<ipa_polymorphic_call_context
>
3118 vec
<ipa_agg_jump_function_p
> known_aggs
,
3119 int *ret_size
, sreal
*ret_time
,
3120 sreal
*ret_nonspec_time
,
3123 clause_t clause
, nonspec_clause
;
3125 evaluate_conditions_for_known_args (node
, false, known_vals
, known_aggs
,
3126 &clause
, &nonspec_clause
);
3127 ipa_call_context
ctx (node
, clause
, nonspec_clause
,
3128 known_vals
, known_contexts
,
3130 ctx
.estimate_size_and_time (ret_size
, NULL
, ret_time
,
3131 ret_nonspec_time
, hints
);
3134 /* Return stack frame offset where frame of NODE is supposed to start inside
3135 of the function it is inlined to.
3136 Return 0 for functions that are not inlined. */
3139 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3141 HOST_WIDE_INT offset
= 0;
3142 if (!node
->inlined_to
)
3144 node
= node
->callers
->caller
;
3147 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3148 if (!node
->inlined_to
)
3150 node
= node
->callers
->caller
;
3155 /* Update summary information of inline clones after inlining.
3156 Compute peak stack usage. */
3159 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3161 struct cgraph_edge
*e
;
3163 ipa_propagate_frequency (node
);
3164 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3166 if (!e
->inline_failed
)
3167 inline_update_callee_summaries (e
->callee
, depth
);
3168 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3170 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3171 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3174 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3175 When functoin A is inlined in B and A calls C with parameter that
3176 changes with probability PROB1 and C is known to be passthroug
3177 of argument if B that change with probability PROB2, the probability
3178 of change is now PROB1*PROB2. */
3181 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
3182 struct cgraph_edge
*edge
)
3184 if (ipa_node_params_sum
)
3187 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3190 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3191 class ipa_call_summary
*inlined_es
3192 = ipa_call_summaries
->get (inlined_edge
);
3194 if (es
->param
.length () == 0)
3197 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3199 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3200 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3201 || jfunc
->type
== IPA_JF_ANCESTOR
)
3203 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3204 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3205 : ipa_get_jf_ancestor_formal_id (jfunc
);
3206 if (id
< (int) inlined_es
->param
.length ())
3208 int prob1
= es
->param
[i
].change_prob
;
3209 int prob2
= inlined_es
->param
[id
].change_prob
;
3210 int prob
= combine_probabilities (prob1
, prob2
);
3212 if (prob1
&& prob2
&& !prob
)
3215 es
->param
[i
].change_prob
= prob
;
3222 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3224 Remap predicates of callees of NODE. Rest of arguments match
3227 Also update change probabilities. */
3230 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
3231 struct cgraph_node
*node
,
3232 class ipa_fn_summary
*info
,
3233 class ipa_fn_summary
*callee_info
,
3234 vec
<int> operand_map
,
3235 vec
<int> offset_map
,
3236 clause_t possible_truths
,
3237 predicate
*toplev_predicate
)
3239 struct cgraph_edge
*e
, *next
;
3240 for (e
= node
->callees
; e
; e
= next
)
3242 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3244 next
= e
->next_callee
;
3246 if (e
->inline_failed
)
3248 remap_edge_change_prob (inlined_edge
, e
);
3252 p
= es
->predicate
->remap_after_inlining
3253 (info
, callee_info
, operand_map
,
3254 offset_map
, possible_truths
,
3256 edge_set_predicate (e
, &p
);
3259 edge_set_predicate (e
, toplev_predicate
);
3262 remap_edge_summaries (inlined_edge
, e
->callee
, info
, callee_info
,
3263 operand_map
, offset_map
, possible_truths
,
3266 for (e
= node
->indirect_calls
; e
; e
= next
)
3268 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3270 next
= e
->next_callee
;
3272 remap_edge_change_prob (inlined_edge
, e
);
3275 p
= es
->predicate
->remap_after_inlining
3276 (info
, callee_info
, operand_map
, offset_map
,
3277 possible_truths
, *toplev_predicate
);
3278 edge_set_predicate (e
, &p
);
3281 edge_set_predicate (e
, toplev_predicate
);
3285 /* Same as remap_predicate, but set result into hint *HINT. */
3288 remap_hint_predicate (class ipa_fn_summary
*info
,
3289 class ipa_fn_summary
*callee_info
,
3291 vec
<int> operand_map
,
3292 vec
<int> offset_map
,
3293 clause_t possible_truths
,
3294 predicate
*toplev_predicate
)
3300 p
= (*hint
)->remap_after_inlining
3302 operand_map
, offset_map
,
3303 possible_truths
, *toplev_predicate
);
3304 if (p
!= false && p
!= true)
3307 set_hint_predicate (hint
, p
);
3313 /* We inlined EDGE. Update summary of the function we inlined into. */
3316 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
3318 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
3319 struct cgraph_node
*to
= (edge
->caller
->inlined_to
3320 ? edge
->caller
->inlined_to
: edge
->caller
);
3321 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
3322 clause_t clause
= 0; /* not_inline is known to be false. */
3324 auto_vec
<int, 8> operand_map
;
3325 auto_vec
<int, 8> offset_map
;
3327 predicate toplev_predicate
;
3328 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3331 toplev_predicate
= *es
->predicate
;
3333 toplev_predicate
= true;
3335 info
->fp_expressions
|= callee_info
->fp_expressions
;
3337 if (callee_info
->conds
)
3338 evaluate_properties_for_edge (edge
, true, &clause
, NULL
, NULL
, NULL
, NULL
);
3339 if (ipa_node_params_sum
&& callee_info
->conds
)
3341 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3342 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
3347 operand_map
.safe_grow_cleared (count
);
3348 offset_map
.safe_grow_cleared (count
);
3350 for (i
= 0; i
< count
; i
++)
3352 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3355 /* TODO: handle non-NOPs when merging. */
3356 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3358 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3359 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3360 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3363 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3365 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3366 if (offset
>= 0 && offset
< INT_MAX
)
3368 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3369 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3371 offset_map
[i
] = offset
;
3374 operand_map
[i
] = map
;
3375 gcc_assert (map
< ipa_get_param_count (IPA_NODE_REF (to
)));
3378 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3381 p
= e
->exec_predicate
.remap_after_inlining
3382 (info
, callee_info
, operand_map
,
3385 predicate nonconstp
;
3386 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3387 (info
, callee_info
, operand_map
,
3390 if (p
!= false && nonconstp
!= false)
3392 sreal add_time
= ((sreal
)e
->time
* edge
->sreal_frequency ());
3393 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3395 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3396 if (prob
!= REG_BR_PROB_BASE
3397 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3399 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3400 (double) prob
/ REG_BR_PROB_BASE
);
3402 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3405 remap_edge_summaries (edge
, edge
->callee
, info
, callee_info
, operand_map
,
3406 offset_map
, clause
, &toplev_predicate
);
3407 remap_hint_predicate (info
, callee_info
,
3408 &callee_info
->loop_iterations
,
3409 operand_map
, offset_map
, clause
, &toplev_predicate
);
3410 remap_hint_predicate (info
, callee_info
,
3411 &callee_info
->loop_stride
,
3412 operand_map
, offset_map
, clause
, &toplev_predicate
);
3414 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
3415 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
3417 if (info
->estimated_stack_size
< peak
)
3418 info
->estimated_stack_size
= peak
;
3420 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
3422 /* Free summaries that are not maintained for inline clones/edges. */
3423 ipa_call_summaries
->remove (edge
);
3424 ipa_fn_summaries
->remove (edge
->callee
);
3427 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3428 overall size and time. Recompute it. */
3431 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3433 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
3434 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
3438 size_info
->size
= 0;
3440 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3442 size_info
->size
+= e
->size
;
3443 info
->time
+= e
->time
;
3445 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
3447 ~(clause_t
) (1 << predicate::false_condition
),
3448 vNULL
, vNULL
, vNULL
);
3449 size_info
->size
= (size_info
->size
+ ipa_fn_summary::size_scale
/ 2)
3450 / ipa_fn_summary::size_scale
;
3454 /* This function performs intraprocedural analysis in NODE that is required to
3455 inline indirect calls. */
3458 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3460 ipa_analyze_node (node
);
3461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3463 ipa_print_node_params (dump_file
, node
);
3464 ipa_print_node_jump_functions (dump_file
, node
);
3469 /* Note function body size. */
3472 inline_analyze_function (struct cgraph_node
*node
)
3474 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3477 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3478 node
->name (), node
->order
);
3479 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3480 inline_indirect_intraprocedural_analysis (node
);
3481 compute_fn_summary (node
, false);
3484 struct cgraph_edge
*e
;
3485 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3486 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3487 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3488 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3495 /* Called when new function is inserted to callgraph late. */
3498 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3500 inline_analyze_function (node
);
3503 /* Note function body size. */
3506 ipa_fn_summary_generate (void)
3508 struct cgraph_node
*node
;
3510 FOR_EACH_DEFINED_FUNCTION (node
)
3511 if (DECL_STRUCT_FUNCTION (node
->decl
))
3512 node
->versionable
= tree_versionable_function_p (node
->decl
);
3514 ipa_fn_summary_alloc ();
3516 ipa_fn_summaries
->enable_insertion_hook ();
3518 ipa_register_cgraph_hooks ();
3520 FOR_EACH_DEFINED_FUNCTION (node
)
3522 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3523 || opt_for_fn (node
->decl
, optimize
)))
3524 inline_analyze_function (node
);
3528 /* Write inline summary for edge E to OB. */
3531 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
3534 class ipa_call_summary
*es
= prevails
3535 ? ipa_call_summaries
->get_create (e
) : NULL
;
3539 int size
= streamer_read_uhwi (ib
);
3540 int time
= streamer_read_uhwi (ib
);
3541 int depth
= streamer_read_uhwi (ib
);
3545 es
->call_stmt_size
= size
;
3546 es
->call_stmt_time
= time
;
3547 es
->loop_depth
= depth
;
3550 bitpack_d bp
= streamer_read_bitpack (ib
);
3552 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3554 bp_unpack_value (&bp
, 1);
3558 edge_set_predicate (e
, &p
);
3559 length
= streamer_read_uhwi (ib
);
3560 if (length
&& es
&& e
->possibly_call_in_translation_unit_p ())
3562 es
->param
.safe_grow_cleared (length
);
3563 for (i
= 0; i
< length
; i
++)
3564 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3568 for (i
= 0; i
< length
; i
++)
3569 streamer_read_uhwi (ib
);
3574 /* Stream in inline summaries from the section. */
3577 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3580 const struct lto_function_header
*header
=
3581 (const struct lto_function_header
*) data
;
3582 const int cfg_offset
= sizeof (struct lto_function_header
);
3583 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3584 const int string_offset
= main_offset
+ header
->main_size
;
3585 class data_in
*data_in
;
3586 unsigned int i
, count2
, j
;
3587 unsigned int f_count
;
3589 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3590 file_data
->mode_table
);
3593 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3594 header
->string_size
, vNULL
);
3595 f_count
= streamer_read_uhwi (&ib
);
3596 for (i
= 0; i
< f_count
; i
++)
3599 struct cgraph_node
*node
;
3600 class ipa_fn_summary
*info
;
3601 class ipa_size_summary
*size_info
;
3602 lto_symtab_encoder_t encoder
;
3603 struct bitpack_d bp
;
3604 struct cgraph_edge
*e
;
3607 index
= streamer_read_uhwi (&ib
);
3608 encoder
= file_data
->symtab_node_encoder
;
3609 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3611 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
3612 size_info
= node
->prevailing_p ()
3613 ? ipa_size_summaries
->get_create (node
) : NULL
;
3615 int stack_size
= streamer_read_uhwi (&ib
);
3616 int size
= streamer_read_uhwi (&ib
);
3617 sreal time
= sreal::stream_in (&ib
);
3621 info
->estimated_stack_size
3622 = size_info
->estimated_self_stack_size
= stack_size
;
3623 size_info
->size
= size_info
->self_size
= size
;
3627 bp
= streamer_read_bitpack (&ib
);
3630 info
->inlinable
= bp_unpack_value (&bp
, 1);
3631 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3635 bp_unpack_value (&bp
, 1);
3636 bp_unpack_value (&bp
, 1);
3639 count2
= streamer_read_uhwi (&ib
);
3640 gcc_assert (!info
|| !info
->conds
);
3641 for (j
= 0; j
< count2
; j
++)
3644 unsigned int k
, count3
;
3645 c
.operand_num
= streamer_read_uhwi (&ib
);
3646 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3647 c
.type
= stream_read_tree (&ib
, data_in
);
3648 c
.val
= stream_read_tree (&ib
, data_in
);
3649 bp
= streamer_read_bitpack (&ib
);
3650 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3651 c
.by_ref
= bp_unpack_value (&bp
, 1);
3653 c
.offset
= streamer_read_uhwi (&ib
);
3655 count3
= streamer_read_uhwi (&ib
);
3656 for (k
= 0; k
< count3
; k
++)
3658 struct expr_eval_op op
;
3659 enum gimple_rhs_class rhs_class
;
3660 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3661 op
.type
= stream_read_tree (&ib
, data_in
);
3662 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
3664 case GIMPLE_UNARY_RHS
:
3666 op
.val
[0] = NULL_TREE
;
3667 op
.val
[1] = NULL_TREE
;
3670 case GIMPLE_BINARY_RHS
:
3671 case GIMPLE_TERNARY_RHS
:
3672 bp
= streamer_read_bitpack (&ib
);
3673 op
.index
= bp_unpack_value (&bp
, 2);
3674 op
.val
[0] = stream_read_tree (&ib
, data_in
);
3675 if (rhs_class
== GIMPLE_BINARY_RHS
)
3676 op
.val
[1] = NULL_TREE
;
3678 op
.val
[1] = stream_read_tree (&ib
, data_in
);
3682 fatal_error (UNKNOWN_LOCATION
,
3683 "invalid fnsummary in LTO stream");
3685 vec_safe_push (c
.param_ops
, op
);
3688 vec_safe_push (info
->conds
, c
);
3690 count2
= streamer_read_uhwi (&ib
);
3691 gcc_assert (!info
|| !info
->size_time_table
);
3692 for (j
= 0; j
< count2
; j
++)
3694 class size_time_entry e
;
3696 e
.size
= streamer_read_uhwi (&ib
);
3697 e
.time
= sreal::stream_in (&ib
);
3698 e
.exec_predicate
.stream_in (&ib
);
3699 e
.nonconst_predicate
.stream_in (&ib
);
3702 vec_safe_push (info
->size_time_table
, e
);
3707 set_hint_predicate (&info
->loop_iterations
, p
);
3710 set_hint_predicate (&info
->loop_stride
, p
);
3711 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3712 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3713 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3714 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
3717 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
3719 lto_data_in_delete (data_in
);
3723 /* Read inline summary. Jump functions are shared among ipa-cp
3724 and inliner, so when ipa-cp is active, we don't need to write them
3728 ipa_fn_summary_read (void)
3730 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
3731 struct lto_file_decl_data
*file_data
;
3734 ipa_fn_summary_alloc ();
3736 while ((file_data
= file_data_vec
[j
++]))
3740 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
3743 inline_read_section (file_data
, data
, len
);
3745 /* Fatal error here. We do not want to support compiling ltrans units
3746 with different version of compiler or different flags than the WPA
3747 unit, so this should never happen. */
3748 fatal_error (input_location
,
3749 "ipa inline summary is missing in input file");
3751 ipa_register_cgraph_hooks ();
3753 ipa_prop_read_jump_functions ();
3755 gcc_assert (ipa_fn_summaries
);
3756 ipa_fn_summaries
->enable_insertion_hook ();
3760 /* Write inline summary for edge E to OB. */
3763 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
3765 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3768 streamer_write_uhwi (ob
, es
->call_stmt_size
);
3769 streamer_write_uhwi (ob
, es
->call_stmt_time
);
3770 streamer_write_uhwi (ob
, es
->loop_depth
);
3772 bitpack_d bp
= bitpack_create (ob
->main_stream
);
3773 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
3774 streamer_write_bitpack (&bp
);
3777 es
->predicate
->stream_out (ob
);
3779 streamer_write_uhwi (ob
, 0);
3780 streamer_write_uhwi (ob
, es
->param
.length ());
3781 for (i
= 0; i
< (int) es
->param
.length (); i
++)
3782 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
3786 /* Write inline summary for node in SET.
3787 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3788 active, we don't need to write them twice. */
3791 ipa_fn_summary_write (void)
3793 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
3794 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
3795 unsigned int count
= 0;
3798 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3800 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3801 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3802 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3805 streamer_write_uhwi (ob
, count
);
3807 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
3809 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
3810 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
3811 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
3813 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
3814 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
3815 struct bitpack_d bp
;
3816 struct cgraph_edge
*edge
;
3819 struct condition
*c
;
3821 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
3822 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
3823 streamer_write_hwi (ob
, size_info
->self_size
);
3824 info
->time
.stream_out (ob
);
3825 bp
= bitpack_create (ob
->main_stream
);
3826 bp_pack_value (&bp
, info
->inlinable
, 1);
3827 bp_pack_value (&bp
, false, 1);
3828 bp_pack_value (&bp
, info
->fp_expressions
, 1);
3829 streamer_write_bitpack (&bp
);
3830 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
3831 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
3834 struct expr_eval_op
*op
;
3836 streamer_write_uhwi (ob
, c
->operand_num
);
3837 streamer_write_uhwi (ob
, c
->code
);
3838 stream_write_tree (ob
, c
->type
, true);
3839 stream_write_tree (ob
, c
->val
, true);
3840 bp
= bitpack_create (ob
->main_stream
);
3841 bp_pack_value (&bp
, c
->agg_contents
, 1);
3842 bp_pack_value (&bp
, c
->by_ref
, 1);
3843 streamer_write_bitpack (&bp
);
3844 if (c
->agg_contents
)
3845 streamer_write_uhwi (ob
, c
->offset
);
3846 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
3847 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
3849 streamer_write_uhwi (ob
, op
->code
);
3850 stream_write_tree (ob
, op
->type
, true);
3853 bp
= bitpack_create (ob
->main_stream
);
3854 bp_pack_value (&bp
, op
->index
, 2);
3855 streamer_write_bitpack (&bp
);
3856 stream_write_tree (ob
, op
->val
[0], true);
3858 stream_write_tree (ob
, op
->val
[1], true);
3862 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
3863 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3865 streamer_write_uhwi (ob
, e
->size
);
3866 e
->time
.stream_out (ob
);
3867 e
->exec_predicate
.stream_out (ob
);
3868 e
->nonconst_predicate
.stream_out (ob
);
3870 if (info
->loop_iterations
)
3871 info
->loop_iterations
->stream_out (ob
);
3873 streamer_write_uhwi (ob
, 0);
3874 if (info
->loop_stride
)
3875 info
->loop_stride
->stream_out (ob
);
3877 streamer_write_uhwi (ob
, 0);
3878 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
3879 write_ipa_call_summary (ob
, edge
);
3880 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
3881 write_ipa_call_summary (ob
, edge
);
3884 streamer_write_char_stream (ob
->main_stream
, 0);
3885 produce_asm (ob
, NULL
);
3886 destroy_output_block (ob
);
3889 ipa_prop_write_jump_functions ();
3893 /* Release function summary. */
3896 ipa_free_fn_summary (void)
3898 if (!ipa_call_summaries
)
3900 ipa_fn_summaries
->~fast_function_summary
<ipa_fn_summary
*, va_gc
> ();
3901 ggc_free (ipa_fn_summaries
);
3902 ipa_fn_summaries
= NULL
;
3903 delete ipa_call_summaries
;
3904 ipa_call_summaries
= NULL
;
3905 edge_predicate_pool
.release ();
3906 /* During IPA this is one of largest datastructures to release. */
3911 /* Release function summary. */
3914 ipa_free_size_summary (void)
3916 if (!ipa_size_summaries
)
3918 delete ipa_size_summaries
;
3919 ipa_size_summaries
= NULL
;
3924 const pass_data pass_data_local_fn_summary
=
3926 GIMPLE_PASS
, /* type */
3927 "local-fnsummary", /* name */
3928 OPTGROUP_INLINE
, /* optinfo_flags */
3929 TV_INLINE_PARAMETERS
, /* tv_id */
3930 0, /* properties_required */
3931 0, /* properties_provided */
3932 0, /* properties_destroyed */
3933 0, /* todo_flags_start */
3934 0, /* todo_flags_finish */
3937 class pass_local_fn_summary
: public gimple_opt_pass
3940 pass_local_fn_summary (gcc::context
*ctxt
)
3941 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
3944 /* opt_pass methods: */
3945 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
3946 virtual unsigned int execute (function
*)
3948 return compute_fn_summary_for_current ();
3951 }; // class pass_local_fn_summary
3956 make_pass_local_fn_summary (gcc::context
*ctxt
)
3958 return new pass_local_fn_summary (ctxt
);
3962 /* Free inline summary. */
3966 const pass_data pass_data_ipa_free_fn_summary
=
3968 SIMPLE_IPA_PASS
, /* type */
3969 "free-fnsummary", /* name */
3970 OPTGROUP_NONE
, /* optinfo_flags */
3971 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
3972 0, /* properties_required */
3973 0, /* properties_provided */
3974 0, /* properties_destroyed */
3975 0, /* todo_flags_start */
3976 0, /* todo_flags_finish */
3979 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
3982 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
3983 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
3987 /* opt_pass methods: */
3988 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
3989 void set_pass_param (unsigned int n
, bool param
)
3991 gcc_assert (n
== 0);
3994 virtual bool gate (function
*) { return true; }
3995 virtual unsigned int execute (function
*)
3997 ipa_free_fn_summary ();
3999 ipa_free_size_summary ();
4005 }; // class pass_ipa_free_fn_summary
4009 simple_ipa_opt_pass
*
4010 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4012 return new pass_ipa_free_fn_summary (ctxt
);
4017 const pass_data pass_data_ipa_fn_summary
=
4019 IPA_PASS
, /* type */
4020 "fnsummary", /* name */
4021 OPTGROUP_INLINE
, /* optinfo_flags */
4022 TV_IPA_FNSUMMARY
, /* tv_id */
4023 0, /* properties_required */
4024 0, /* properties_provided */
4025 0, /* properties_destroyed */
4026 0, /* todo_flags_start */
4027 ( TODO_dump_symtab
), /* todo_flags_finish */
4030 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4033 pass_ipa_fn_summary (gcc::context
*ctxt
)
4034 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4035 ipa_fn_summary_generate
, /* generate_summary */
4036 ipa_fn_summary_write
, /* write_summary */
4037 ipa_fn_summary_read
, /* read_summary */
4038 NULL
, /* write_optimization_summary */
4039 NULL
, /* read_optimization_summary */
4040 NULL
, /* stmt_fixup */
4041 0, /* function_transform_todo_flags_start */
4042 NULL
, /* function_transform */
4043 NULL
) /* variable_transform */
4046 /* opt_pass methods: */
4047 virtual unsigned int execute (function
*) { return 0; }
4049 }; // class pass_ipa_fn_summary
4054 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4056 return new pass_ipa_fn_summary (ctxt
);
4059 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4060 within the same process. For use by toplev::finalize. */
4063 ipa_fnsummary_c_finalize (void)
4065 ipa_free_fn_summary ();