1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
56 #include "coretypes.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
63 #include "tree-streamer.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
71 #include "gimple-iterator.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
77 #include "ipa-fnsummary.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
83 #include "stringpool.h"
87 fast_function_summary
<ipa_fn_summary
*, va_gc
> *ipa_fn_summaries
;
88 fast_function_summary
<ipa_size_summary
*, va_heap
> *ipa_size_summaries
;
89 fast_call_summary
<ipa_call_summary
*, va_heap
> *ipa_call_summaries
;
91 /* Edge predicates goes here. */
92 static object_allocator
<predicate
> edge_predicate_pool ("edge predicates");
97 ipa_dump_hints (FILE *f
, ipa_hints hints
)
101 fprintf (f
, "IPA hints:");
102 if (hints
& INLINE_HINT_indirect_call
)
104 hints
&= ~INLINE_HINT_indirect_call
;
105 fprintf (f
, " indirect_call");
107 if (hints
& INLINE_HINT_loop_iterations
)
109 hints
&= ~INLINE_HINT_loop_iterations
;
110 fprintf (f
, " loop_iterations");
112 if (hints
& INLINE_HINT_loop_stride
)
114 hints
&= ~INLINE_HINT_loop_stride
;
115 fprintf (f
, " loop_stride");
117 if (hints
& INLINE_HINT_same_scc
)
119 hints
&= ~INLINE_HINT_same_scc
;
120 fprintf (f
, " same_scc");
122 if (hints
& INLINE_HINT_in_scc
)
124 hints
&= ~INLINE_HINT_in_scc
;
125 fprintf (f
, " in_scc");
127 if (hints
& INLINE_HINT_cross_module
)
129 hints
&= ~INLINE_HINT_cross_module
;
130 fprintf (f
, " cross_module");
132 if (hints
& INLINE_HINT_declared_inline
)
134 hints
&= ~INLINE_HINT_declared_inline
;
135 fprintf (f
, " declared_inline");
137 if (hints
& INLINE_HINT_known_hot
)
139 hints
&= ~INLINE_HINT_known_hot
;
140 fprintf (f
, " known_hot");
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
152 ipa_fn_summary::account_size_time (int size
, sreal time
,
153 const predicate
&exec_pred
,
154 const predicate
&nonconst_pred_in
)
159 predicate nonconst_pred
;
161 if (exec_pred
== false)
164 nonconst_pred
= nonconst_pred_in
& exec_pred
;
166 if (nonconst_pred
== false)
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size
&& time
== 0 && size_time_table
)
174 gcc_assert (time
>= 0);
176 for (i
= 0; vec_safe_iterate (size_time_table
, i
, &e
); i
++)
177 if (e
->exec_predicate
== exec_pred
178 && e
->nonconst_predicate
== nonconst_pred
)
187 e
= &(*size_time_table
)[0];
188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
193 if (dump_file
&& (dump_flags
& TDF_DETAILS
) && (time
!= 0 || size
))
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
197 ((double) size
) / ipa_fn_summary::size_scale
,
198 (time
.to_double ()), found
? "" : "new ");
199 exec_pred
.dump (dump_file
, conds
, 0);
200 if (exec_pred
!= nonconst_pred
)
202 fprintf (dump_file
, " nonconst:");
203 nonconst_pred
.dump (dump_file
, conds
);
206 fprintf (dump_file
, "\n");
210 class size_time_entry new_entry
;
211 new_entry
.size
= size
;
212 new_entry
.time
= time
;
213 new_entry
.exec_predicate
= exec_pred
;
214 new_entry
.nonconst_predicate
= nonconst_pred
;
215 vec_safe_push (size_time_table
, new_entry
);
224 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
226 static struct cgraph_edge
*
227 redirect_to_unreachable (struct cgraph_edge
*e
)
229 struct cgraph_node
*callee
= !e
->inline_failed
? e
->callee
: NULL
;
230 struct cgraph_node
*target
= cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE
));
234 e
= e
->resolve_speculation (target
->decl
);
236 e
->make_direct (target
);
238 e
->redirect_callee (target
);
239 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
240 e
->inline_failed
= CIF_UNREACHABLE
;
241 e
->count
= profile_count::zero ();
242 es
->call_stmt_size
= 0;
243 es
->call_stmt_time
= 0;
245 callee
->remove_symbol_and_inline_clones ();
249 /* Set predicate for edge E. */
252 edge_set_predicate (struct cgraph_edge
*e
, predicate
*predicate
)
254 /* If the edge is determined to be never executed, redirect it
255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
257 if (predicate
&& *predicate
== false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e
->speculative
|| e
->callee
))
262 e
= redirect_to_unreachable (e
);
264 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
265 if (predicate
&& *predicate
!= true)
268 es
->predicate
= edge_predicate_pool
.allocate ();
269 *es
->predicate
= *predicate
;
274 edge_predicate_pool
.remove (es
->predicate
);
275 es
->predicate
= NULL
;
279 /* Set predicate for hint *P. */
282 set_hint_predicate (predicate
**p
, predicate new_predicate
)
284 if (new_predicate
== false || new_predicate
== true)
287 edge_predicate_pool
.remove (*p
);
293 *p
= edge_predicate_pool
.allocate ();
299 /* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
305 is always false in the second and also builtin_constant_p tests cannot use
306 the fact that parameter is indeed a constant.
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate known offset/value set for each
310 parameter. Return clause of possible truths. When INLINE_P is true, assume
311 that we are inlining.
313 ERROR_MARK means compile time invariant. */
316 evaluate_conditions_for_known_args (struct cgraph_node
*node
,
318 vec
<tree
> known_vals
,
319 vec
<value_range
> known_value_ranges
,
320 vec
<ipa_agg_value_set
> known_aggs
,
321 clause_t
*ret_clause
,
322 clause_t
*ret_nonspec_clause
)
324 clause_t clause
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
325 clause_t nonspec_clause
= 1 << predicate::not_inlined_condition
;
326 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
330 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
335 struct expr_eval_op
*op
;
337 /* We allow call stmt to have fewer arguments than the callee function
338 (especially for K&R style programs). So bound check here (we assume
339 known_aggs vector, if non-NULL, has the same length as
341 gcc_checking_assert (!known_aggs
.exists ()
342 || (known_vals
.length () == known_aggs
.length ()));
343 if (c
->operand_num
>= (int) known_vals
.length ())
345 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
346 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
352 struct ipa_agg_value_set
*agg
;
354 if (c
->code
== predicate::changed
356 && (known_vals
[c
->operand_num
] == error_mark_node
))
359 if (known_aggs
.exists ())
361 agg
= &known_aggs
[c
->operand_num
];
362 val
= ipa_find_agg_cst_for_param (agg
, known_vals
[c
->operand_num
],
363 c
->offset
, c
->by_ref
);
370 val
= known_vals
[c
->operand_num
];
371 if (val
== error_mark_node
&& c
->code
!= predicate::changed
)
376 && (c
->code
== predicate::changed
377 || c
->code
== predicate::is_not_constant
))
379 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
380 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
383 if (c
->code
== predicate::changed
)
385 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
389 if (c
->code
== predicate::is_not_constant
)
391 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
395 if (val
&& TYPE_SIZE (c
->type
) == TYPE_SIZE (TREE_TYPE (val
)))
397 if (c
->type
!= TREE_TYPE (val
))
398 val
= fold_unary (VIEW_CONVERT_EXPR
, c
->type
, val
);
399 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
404 val
= fold_unary (op
->code
, op
->type
, val
);
405 else if (!op
->val
[1])
406 val
= fold_binary (op
->code
, op
->type
,
407 op
->index
? op
->val
[0] : val
,
408 op
->index
? val
: op
->val
[0]);
409 else if (op
->index
== 0)
410 val
= fold_ternary (op
->code
, op
->type
,
411 val
, op
->val
[0], op
->val
[1]);
412 else if (op
->index
== 1)
413 val
= fold_ternary (op
->code
, op
->type
,
414 op
->val
[0], val
, op
->val
[1]);
415 else if (op
->index
== 2)
416 val
= fold_ternary (op
->code
, op
->type
,
417 op
->val
[0], op
->val
[1], val
);
423 ? fold_binary_to_constant (c
->code
, boolean_type_node
, val
, c
->val
)
426 if (res
&& integer_zerop (res
))
428 if (res
&& integer_onep (res
))
430 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
431 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
435 if (c
->operand_num
< (int) known_value_ranges
.length ()
437 && !known_value_ranges
[c
->operand_num
].undefined_p ()
438 && !known_value_ranges
[c
->operand_num
].varying_p ()
439 && TYPE_SIZE (c
->type
)
440 == TYPE_SIZE (known_value_ranges
[c
->operand_num
].type ())
441 && (!val
|| TREE_CODE (val
) != INTEGER_CST
))
443 value_range vr
= known_value_ranges
[c
->operand_num
];
444 if (!useless_type_conversion_p (c
->type
, vr
.type ()))
447 range_fold_unary_expr (&res
, NOP_EXPR
,
448 c
->type
, &vr
, vr
.type ());
453 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
455 if (vr
.varying_p () || vr
.undefined_p ())
460 range_fold_unary_expr (&res
, op
->code
, op
->type
, &vr
, type
);
461 else if (!op
->val
[1])
463 value_range
op0 (op
->val
[0], op
->val
[0]);
464 range_fold_binary_expr (&res
, op
->code
, op
->type
,
465 op
->index
? &op0
: &vr
,
466 op
->index
? &vr
: &op0
);
473 if (!vr
.varying_p () && !vr
.undefined_p ())
476 value_range
val_vr (c
->val
, c
->val
);
477 range_fold_binary_expr (&res
, c
->code
, boolean_type_node
,
485 clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
486 nonspec_clause
|= 1 << (i
+ predicate::first_dynamic_condition
);
488 *ret_clause
= clause
;
489 if (ret_nonspec_clause
)
490 *ret_nonspec_clause
= nonspec_clause
;
494 /* Work out what conditions might be true at invocation of E. */
497 evaluate_properties_for_edge (struct cgraph_edge
*e
, bool inline_p
,
498 clause_t
*clause_ptr
,
499 clause_t
*nonspec_clause_ptr
,
500 vec
<tree
> *known_vals_ptr
,
501 vec
<ipa_polymorphic_call_context
>
503 vec
<ipa_agg_value_set
> *known_aggs_ptr
)
505 struct cgraph_node
*callee
= e
->callee
->ultimate_alias_target ();
506 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (callee
);
507 vec
<tree
> known_vals
= vNULL
;
508 auto_vec
<value_range
, 32> known_value_ranges
;
509 vec
<ipa_agg_value_set
> known_aggs
= vNULL
;
510 class ipa_edge_args
*args
;
513 *clause_ptr
= inline_p
? 0 : 1 << predicate::not_inlined_condition
;
515 known_vals_ptr
->create (0);
516 if (known_contexts_ptr
)
517 known_contexts_ptr
->create (0);
519 if (ipa_node_params_sum
520 && !e
->call_stmt_cannot_inline_p
521 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
|| known_contexts_ptr
)
522 && (args
= IPA_EDGE_REF (e
)) != NULL
)
524 struct cgraph_node
*caller
;
525 class ipa_node_params
*caller_parms_info
, *callee_pi
;
526 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
527 int i
, count
= ipa_get_cs_argument_count (args
);
529 if (e
->caller
->inlined_to
)
530 caller
= e
->caller
->inlined_to
;
533 caller_parms_info
= IPA_NODE_REF (caller
);
534 callee_pi
= IPA_NODE_REF (callee
);
536 if (count
&& (info
->conds
|| known_vals_ptr
))
537 known_vals
.safe_grow_cleared (count
);
538 if (count
&& info
->conds
)
539 known_value_ranges
.safe_grow_cleared (count
);
540 if (count
&& (info
->conds
|| known_aggs_ptr
))
541 known_aggs
.safe_grow_cleared (count
);
542 if (count
&& known_contexts_ptr
)
543 known_contexts_ptr
->safe_grow_cleared (count
);
546 for (i
= 0; i
< count
; i
++)
548 struct ipa_jump_func
*jf
= ipa_get_ith_jump_func (args
, i
);
549 tree cst
= ipa_value_from_jfunc (caller_parms_info
, jf
,
550 ipa_get_type (callee_pi
, i
));
552 if (!cst
&& e
->call_stmt
553 && i
< (int)gimple_call_num_args (e
->call_stmt
))
555 cst
= gimple_call_arg (e
->call_stmt
, i
);
556 if (!is_gimple_min_invariant (cst
))
561 gcc_checking_assert (TREE_CODE (cst
) != TREE_BINFO
);
562 if (known_vals
.exists ())
565 else if (inline_p
&& !es
->param
[i
].change_prob
)
566 known_vals
[i
] = error_mark_node
;
568 if (known_contexts_ptr
)
569 (*known_contexts_ptr
)[i
]
570 = ipa_context_from_jfunc (caller_parms_info
, e
, i
, jf
);
572 known_aggs
[i
] = ipa_agg_value_set_from_jfunc (caller_parms_info
,
575 known_value_ranges
[i
]
576 = ipa_value_range_from_jfunc (caller_parms_info
, e
, jf
,
577 ipa_get_type (callee_pi
, i
));
580 gcc_assert (callee
->thunk
.thunk_p
);
582 else if (e
->call_stmt
&& !e
->call_stmt_cannot_inline_p
583 && ((clause_ptr
&& info
->conds
) || known_vals_ptr
))
585 int i
, count
= (int)gimple_call_num_args (e
->call_stmt
);
587 if (count
&& (info
->conds
|| known_vals_ptr
))
588 known_vals
.safe_grow_cleared (count
);
589 for (i
= 0; i
< count
; i
++)
591 tree cst
= gimple_call_arg (e
->call_stmt
, i
);
592 if (!is_gimple_min_invariant (cst
))
599 evaluate_conditions_for_known_args (callee
, inline_p
,
602 known_aggs
, clause_ptr
,
606 *known_vals_ptr
= known_vals
;
608 known_vals
.release ();
611 *known_aggs_ptr
= known_aggs
;
613 ipa_release_agg_values (known_aggs
);
617 /* Allocate the function summary. */
620 ipa_fn_summary_alloc (void)
622 gcc_checking_assert (!ipa_fn_summaries
);
623 ipa_size_summaries
= new fast_function_summary
<ipa_size_summary
*, va_heap
>
625 ipa_fn_summaries
= ipa_fn_summary_t::create_ggc (symtab
);
626 ipa_call_summaries
= new ipa_call_summary_t (symtab
);
629 ipa_call_summary::~ipa_call_summary ()
632 edge_predicate_pool
.remove (predicate
);
637 ipa_fn_summary::~ipa_fn_summary ()
640 edge_predicate_pool
.remove (loop_iterations
);
642 edge_predicate_pool
.remove (loop_stride
);
644 vec_free (size_time_table
);
648 ipa_fn_summary_t::remove_callees (cgraph_node
*node
)
651 for (e
= node
->callees
; e
; e
= e
->next_callee
)
652 ipa_call_summaries
->remove (e
);
653 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
654 ipa_call_summaries
->remove (e
);
657 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
658 Additionally care about allocating new memory slot for updated predicate
659 and set it to NULL when it becomes true or false (and thus uninteresting).
663 remap_hint_predicate_after_duplication (predicate
**p
,
664 clause_t possible_truths
)
666 predicate new_predicate
;
671 new_predicate
= (*p
)->remap_after_duplication (possible_truths
);
672 /* We do not want to free previous predicate; it is used by node origin. */
674 set_hint_predicate (p
, new_predicate
);
678 /* Hook that is called by cgraph.c when a node is duplicated. */
680 ipa_fn_summary_t::duplicate (cgraph_node
*src
,
683 ipa_fn_summary
*info
)
685 new (info
) ipa_fn_summary (*ipa_fn_summaries
->get (src
));
686 /* TODO: as an optimization, we may avoid copying conditions
687 that are known to be false or true. */
688 info
->conds
= vec_safe_copy (info
->conds
);
690 /* When there are any replacements in the function body, see if we can figure
691 out that something was optimized out. */
692 if (ipa_node_params_sum
&& dst
->clone
.tree_map
)
694 vec
<size_time_entry
, va_gc
> *entry
= info
->size_time_table
;
695 /* Use SRC parm info since it may not be copied yet. */
696 class ipa_node_params
*parms_info
= IPA_NODE_REF (src
);
697 vec
<tree
> known_vals
= vNULL
;
698 int count
= ipa_get_param_count (parms_info
);
700 clause_t possible_truths
;
701 predicate true_pred
= true;
703 int optimized_out_size
= 0;
704 bool inlined_to_p
= false;
705 struct cgraph_edge
*edge
, *next
;
707 info
->size_time_table
= 0;
708 known_vals
.safe_grow_cleared (count
);
709 for (i
= 0; i
< count
; i
++)
711 struct ipa_replace_map
*r
;
713 for (j
= 0; vec_safe_iterate (dst
->clone
.tree_map
, j
, &r
); j
++)
715 if (r
->parm_num
== i
)
717 known_vals
[i
] = r
->new_tree
;
722 evaluate_conditions_for_known_args (dst
, false,
727 /* We are going to specialize,
728 so ignore nonspec truths. */
730 known_vals
.release ();
732 info
->account_size_time (0, 0, true_pred
, true_pred
);
734 /* Remap size_time vectors.
735 Simplify the predicate by prunning out alternatives that are known
737 TODO: as on optimization, we can also eliminate conditions known
739 for (i
= 0; vec_safe_iterate (entry
, i
, &e
); i
++)
741 predicate new_exec_pred
;
742 predicate new_nonconst_pred
;
743 new_exec_pred
= e
->exec_predicate
.remap_after_duplication
745 new_nonconst_pred
= e
->nonconst_predicate
.remap_after_duplication
747 if (new_exec_pred
== false || new_nonconst_pred
== false)
748 optimized_out_size
+= e
->size
;
750 info
->account_size_time (e
->size
, e
->time
, new_exec_pred
,
754 /* Remap edge predicates with the same simplification as above.
755 Also copy constantness arrays. */
756 for (edge
= dst
->callees
; edge
; edge
= next
)
758 predicate new_predicate
;
759 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
760 next
= edge
->next_callee
;
762 if (!edge
->inline_failed
)
766 new_predicate
= es
->predicate
->remap_after_duplication
768 if (new_predicate
== false && *es
->predicate
!= false)
769 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
770 edge_set_predicate (edge
, &new_predicate
);
773 /* Remap indirect edge predicates with the same simplificaiton as above.
774 Also copy constantness arrays. */
775 for (edge
= dst
->indirect_calls
; edge
; edge
= next
)
777 predicate new_predicate
;
778 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
779 next
= edge
->next_callee
;
781 gcc_checking_assert (edge
->inline_failed
);
784 new_predicate
= es
->predicate
->remap_after_duplication
786 if (new_predicate
== false && *es
->predicate
!= false)
787 optimized_out_size
+= es
->call_stmt_size
* ipa_fn_summary::size_scale
;
788 edge_set_predicate (edge
, &new_predicate
);
790 remap_hint_predicate_after_duplication (&info
->loop_iterations
,
792 remap_hint_predicate_after_duplication (&info
->loop_stride
,
795 /* If inliner or someone after inliner will ever start producing
796 non-trivial clones, we will get trouble with lack of information
797 about updating self sizes, because size vectors already contains
798 sizes of the calees. */
799 gcc_assert (!inlined_to_p
|| !optimized_out_size
);
803 info
->size_time_table
= vec_safe_copy (info
->size_time_table
);
804 if (info
->loop_iterations
)
806 predicate p
= *info
->loop_iterations
;
807 info
->loop_iterations
= NULL
;
808 set_hint_predicate (&info
->loop_iterations
, p
);
810 if (info
->loop_stride
)
812 predicate p
= *info
->loop_stride
;
813 info
->loop_stride
= NULL
;
814 set_hint_predicate (&info
->loop_stride
, p
);
817 if (!dst
->inlined_to
)
818 ipa_update_overall_fn_summary (dst
);
822 /* Hook that is called by cgraph.c when a node is duplicated. */
825 ipa_call_summary_t::duplicate (struct cgraph_edge
*src
,
826 struct cgraph_edge
*dst
,
827 class ipa_call_summary
*srcinfo
,
828 class ipa_call_summary
*info
)
830 new (info
) ipa_call_summary (*srcinfo
);
831 info
->predicate
= NULL
;
832 edge_set_predicate (dst
, srcinfo
->predicate
);
833 info
->param
= srcinfo
->param
.copy ();
834 if (!dst
->indirect_unknown_callee
&& src
->indirect_unknown_callee
)
836 info
->call_stmt_size
-= (eni_size_weights
.indirect_call_cost
837 - eni_size_weights
.call_cost
);
838 info
->call_stmt_time
-= (eni_time_weights
.indirect_call_cost
839 - eni_time_weights
.call_cost
);
843 /* Dump edge summaries associated to NODE and recursively to all clones.
847 dump_ipa_call_summary (FILE *f
, int indent
, struct cgraph_node
*node
,
848 class ipa_fn_summary
*info
)
850 struct cgraph_edge
*edge
;
851 for (edge
= node
->callees
; edge
; edge
= edge
->next_callee
)
853 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
854 struct cgraph_node
*callee
= edge
->callee
->ultimate_alias_target ();
858 "%*s%s/%i %s\n%*s freq:%4.2f",
859 indent
, "", callee
->name (), callee
->order
,
861 ? "inlined" : cgraph_inline_failed_string (edge
-> inline_failed
),
862 indent
, "", edge
->sreal_frequency ().to_double ());
865 fprintf (f
, " loop depth:%2i size:%2i time: %2i",
866 es
->loop_depth
, es
->call_stmt_size
, es
->call_stmt_time
);
868 ipa_fn_summary
*s
= ipa_fn_summaries
->get (callee
);
869 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
871 fprintf (f
, " callee size:%2i stack:%2i",
872 (int) (ss
->size
/ ipa_fn_summary::size_scale
),
873 (int) s
->estimated_stack_size
);
875 if (es
&& es
->predicate
)
877 fprintf (f
, " predicate: ");
878 es
->predicate
->dump (f
, info
->conds
);
882 if (es
&& es
->param
.exists ())
883 for (i
= 0; i
< (int) es
->param
.length (); i
++)
885 int prob
= es
->param
[i
].change_prob
;
888 fprintf (f
, "%*s op%i is compile time invariant\n",
890 else if (prob
!= REG_BR_PROB_BASE
)
891 fprintf (f
, "%*s op%i change %f%% of time\n", indent
+ 2, "", i
,
892 prob
* 100.0 / REG_BR_PROB_BASE
);
894 if (!edge
->inline_failed
)
896 ipa_size_summary
*ss
= ipa_size_summaries
->get (callee
);
897 fprintf (f
, "%*sStack frame offset %i, callee self size %i\n",
899 (int) ipa_get_stack_frame_offset (callee
),
900 (int) ss
->estimated_self_stack_size
);
901 dump_ipa_call_summary (f
, indent
+ 2, callee
, info
);
904 for (edge
= node
->indirect_calls
; edge
; edge
= edge
->next_callee
)
906 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
907 fprintf (f
, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
911 edge
->sreal_frequency ().to_double (), es
->call_stmt_size
,
915 fprintf (f
, "predicate: ");
916 es
->predicate
->dump (f
, info
->conds
);
925 ipa_dump_fn_summary (FILE *f
, struct cgraph_node
*node
)
927 if (node
->definition
)
929 class ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
930 class ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
935 fprintf (f
, "IPA function summary for %s", node
->dump_name ());
936 if (DECL_DISREGARD_INLINE_LIMITS (node
->decl
))
937 fprintf (f
, " always_inline");
939 fprintf (f
, " inlinable");
940 if (s
->fp_expressions
)
941 fprintf (f
, " fp_expression");
942 fprintf (f
, "\n global time: %f\n", s
->time
.to_double ());
943 fprintf (f
, " self size: %i\n", ss
->self_size
);
944 fprintf (f
, " global size: %i\n", ss
->size
);
945 fprintf (f
, " min size: %i\n", s
->min_size
);
946 fprintf (f
, " self stack: %i\n",
947 (int) ss
->estimated_self_stack_size
);
948 fprintf (f
, " global stack: %i\n", (int) s
->estimated_stack_size
);
950 fprintf (f
, " estimated growth:%i\n", (int) s
->growth
);
952 fprintf (f
, " In SCC: %i\n", (int) s
->scc_no
);
953 for (i
= 0; vec_safe_iterate (s
->size_time_table
, i
, &e
); i
++)
955 fprintf (f
, " size:%f, time:%f",
956 (double) e
->size
/ ipa_fn_summary::size_scale
,
957 e
->time
.to_double ());
958 if (e
->exec_predicate
!= true)
960 fprintf (f
, ", executed if:");
961 e
->exec_predicate
.dump (f
, s
->conds
, 0);
963 if (e
->exec_predicate
!= e
->nonconst_predicate
)
965 fprintf (f
, ", nonconst if:");
966 e
->nonconst_predicate
.dump (f
, s
->conds
, 0);
970 if (s
->loop_iterations
)
972 fprintf (f
, " loop iterations:");
973 s
->loop_iterations
->dump (f
, s
->conds
);
977 fprintf (f
, " loop stride:");
978 s
->loop_stride
->dump (f
, s
->conds
);
980 fprintf (f
, " calls:\n");
981 dump_ipa_call_summary (f
, 4, node
, s
);
985 fprintf (f
, "IPA summary for %s is missing.\n", node
->dump_name ());
990 ipa_debug_fn_summary (struct cgraph_node
*node
)
992 ipa_dump_fn_summary (stderr
, node
);
996 ipa_dump_fn_summaries (FILE *f
)
998 struct cgraph_node
*node
;
1000 FOR_EACH_DEFINED_FUNCTION (node
)
1001 if (!node
->inlined_to
)
1002 ipa_dump_fn_summary (f
, node
);
1005 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1006 boolean variable pointed to by DATA. */
1009 mark_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef ATTRIBUTE_UNUSED
,
1012 bool *b
= (bool *) data
;
1017 /* If OP refers to value of function parameter, return the corresponding
1018 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1019 PARM_DECL) will be stored to *SIZE_P in that case too. */
1022 unmodified_parm_1 (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1025 /* SSA_NAME referring to parm default def? */
1026 if (TREE_CODE (op
) == SSA_NAME
1027 && SSA_NAME_IS_DEFAULT_DEF (op
)
1028 && TREE_CODE (SSA_NAME_VAR (op
)) == PARM_DECL
)
1031 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1032 return SSA_NAME_VAR (op
);
1034 /* Non-SSA parm reference? */
1035 if (TREE_CODE (op
) == PARM_DECL
)
1037 bool modified
= false;
1040 ao_ref_init (&refd
, op
);
1041 int walked
= walk_aliased_vdefs (&refd
, gimple_vuse (stmt
),
1042 mark_modified
, &modified
, NULL
, NULL
,
1043 fbi
->aa_walk_budget
+ 1);
1046 fbi
->aa_walk_budget
= 0;
1052 *size_p
= tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op
)));
1059 /* If OP refers to value of function parameter, return the corresponding
1060 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1061 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1062 stored to *SIZE_P in that case too. */
1065 unmodified_parm (ipa_func_body_info
*fbi
, gimple
*stmt
, tree op
,
1068 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1072 if (TREE_CODE (op
) == SSA_NAME
1073 && !SSA_NAME_IS_DEFAULT_DEF (op
)
1074 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1075 return unmodified_parm (fbi
, SSA_NAME_DEF_STMT (op
),
1076 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op
)),
1081 /* If OP refers to a value of a function parameter or value loaded from an
1082 aggregate passed to a parameter (either by value or reference), return TRUE
1083 and store the number of the parameter to *INDEX_P, the access size into
1084 *SIZE_P, and information whether and how it has been loaded from an
1085 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1086 statement in which OP is used or loaded. */
1089 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info
*fbi
,
1090 gimple
*stmt
, tree op
, int *index_p
,
1092 struct agg_position_info
*aggpos
)
1094 tree res
= unmodified_parm_1 (fbi
, stmt
, op
, size_p
);
1096 gcc_checking_assert (aggpos
);
1099 *index_p
= ipa_get_param_decl_index (fbi
->info
, res
);
1102 aggpos
->agg_contents
= false;
1103 aggpos
->by_ref
= false;
1107 if (TREE_CODE (op
) == SSA_NAME
)
1109 if (SSA_NAME_IS_DEFAULT_DEF (op
)
1110 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op
)))
1112 stmt
= SSA_NAME_DEF_STMT (op
);
1113 op
= gimple_assign_rhs1 (stmt
);
1114 if (!REFERENCE_CLASS_P (op
))
1115 return unmodified_parm_or_parm_agg_item (fbi
, stmt
, op
, index_p
, size_p
,
1119 aggpos
->agg_contents
= true;
1120 return ipa_load_from_parm_agg (fbi
, fbi
->info
->descriptors
,
1121 stmt
, op
, index_p
, &aggpos
->offset
,
1122 size_p
, &aggpos
->by_ref
);
1125 /* See if statement might disappear after inlining.
1126 0 - means not eliminated
1127 1 - half of statements goes away
1128 2 - for sure it is eliminated.
1129 We are not terribly sophisticated, basically looking for simple abstraction
1130 penalty wrappers. */
1133 eliminated_by_inlining_prob (ipa_func_body_info
*fbi
, gimple
*stmt
)
1135 enum gimple_code code
= gimple_code (stmt
);
1136 enum tree_code rhs_code
;
1146 if (gimple_num_ops (stmt
) != 2)
1149 rhs_code
= gimple_assign_rhs_code (stmt
);
1151 /* Casts of parameters, loads from parameters passed by reference
1152 and stores to return value or parameters are often free after
1153 inlining dua to SRA and further combining.
1154 Assume that half of statements goes away. */
1155 if (CONVERT_EXPR_CODE_P (rhs_code
)
1156 || rhs_code
== VIEW_CONVERT_EXPR
1157 || rhs_code
== ADDR_EXPR
1158 || gimple_assign_rhs_class (stmt
) == GIMPLE_SINGLE_RHS
)
1160 tree rhs
= gimple_assign_rhs1 (stmt
);
1161 tree lhs
= gimple_assign_lhs (stmt
);
1162 tree inner_rhs
= get_base_address (rhs
);
1163 tree inner_lhs
= get_base_address (lhs
);
1164 bool rhs_free
= false;
1165 bool lhs_free
= false;
1172 /* Reads of parameter are expected to be free. */
1173 if (unmodified_parm (fbi
, stmt
, inner_rhs
, NULL
))
1175 /* Match expressions of form &this->field. Those will most likely
1176 combine with something upstream after inlining. */
1177 else if (TREE_CODE (inner_rhs
) == ADDR_EXPR
)
1179 tree op
= get_base_address (TREE_OPERAND (inner_rhs
, 0));
1180 if (TREE_CODE (op
) == PARM_DECL
)
1182 else if (TREE_CODE (op
) == MEM_REF
1183 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (op
, 0),
1188 /* When parameter is not SSA register because its address is taken
1189 and it is just copied into one, the statement will be completely
1190 free after inlining (we will copy propagate backward). */
1191 if (rhs_free
&& is_gimple_reg (lhs
))
1194 /* Reads of parameters passed by reference
1195 expected to be free (i.e. optimized out after inlining). */
1196 if (TREE_CODE (inner_rhs
) == MEM_REF
1197 && unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_rhs
, 0), NULL
))
1200 /* Copying parameter passed by reference into gimple register is
1201 probably also going to copy propagate, but we can't be quite
1203 if (rhs_free
&& is_gimple_reg (lhs
))
1206 /* Writes to parameters, parameters passed by value and return value
1207 (either dirrectly or passed via invisible reference) are free.
1209 TODO: We ought to handle testcase like
1210 struct a {int a,b;};
1212 retrurnsturct (void)
1218 This translate into:
1233 For that we either need to copy ipa-split logic detecting writes
1235 if (TREE_CODE (inner_lhs
) == PARM_DECL
1236 || TREE_CODE (inner_lhs
) == RESULT_DECL
1237 || (TREE_CODE (inner_lhs
) == MEM_REF
1238 && (unmodified_parm (fbi
, stmt
, TREE_OPERAND (inner_lhs
, 0),
1240 || (TREE_CODE (TREE_OPERAND (inner_lhs
, 0)) == SSA_NAME
1241 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs
, 0))
1242 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1244 0))) == RESULT_DECL
))))
1247 && (is_gimple_reg (rhs
) || is_gimple_min_invariant (rhs
)))
1249 if (lhs_free
&& rhs_free
)
1258 /* Analyze EXPR if it represents a series of simple operations performed on
1259 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1260 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1261 Type of the parameter or load from an aggregate via the parameter is
1262 stored in *TYPE_P. Operations on the parameter are recorded to
1263 PARAM_OPS_P if it is not NULL. */
1266 decompose_param_expr (struct ipa_func_body_info
*fbi
,
1267 gimple
*stmt
, tree expr
,
1268 int *index_p
, tree
*type_p
,
1269 struct agg_position_info
*aggpos
,
1270 expr_eval_ops
*param_ops_p
= NULL
)
1272 int op_limit
= param_ipa_max_param_expr_ops
;
1276 *param_ops_p
= NULL
;
1280 expr_eval_op eval_op
;
1282 unsigned cst_count
= 0;
1284 if (unmodified_parm_or_parm_agg_item (fbi
, stmt
, expr
, index_p
, NULL
,
1287 tree type
= TREE_TYPE (expr
);
1289 if (aggpos
->agg_contents
)
1291 /* Stop if containing bit-field. */
1292 if (TREE_CODE (expr
) == BIT_FIELD_REF
1293 || contains_bitfld_component_ref_p (expr
))
1301 if (TREE_CODE (expr
) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (expr
))
1304 if (!is_gimple_assign (stmt
= SSA_NAME_DEF_STMT (expr
)))
1307 switch (gimple_assign_rhs_class (stmt
))
1309 case GIMPLE_SINGLE_RHS
:
1310 expr
= gimple_assign_rhs1 (stmt
);
1313 case GIMPLE_UNARY_RHS
:
1317 case GIMPLE_BINARY_RHS
:
1321 case GIMPLE_TERNARY_RHS
:
1329 /* Stop if expression is too complex. */
1330 if (op_count
++ == op_limit
)
1335 eval_op
.code
= gimple_assign_rhs_code (stmt
);
1336 eval_op
.type
= TREE_TYPE (gimple_assign_lhs (stmt
));
1337 eval_op
.val
[0] = NULL_TREE
;
1338 eval_op
.val
[1] = NULL_TREE
;
1342 for (unsigned i
= 0; i
< rhs_count
; i
++)
1344 tree op
= gimple_op (stmt
, i
+ 1);
1346 gcc_assert (op
&& !TYPE_P (op
));
1347 if (is_gimple_ip_invariant (op
))
1349 if (++cst_count
== rhs_count
)
1352 eval_op
.val
[cst_count
- 1] = op
;
1356 /* Found a non-constant operand, and record its index in rhs
1363 /* Found more than one non-constant operands. */
1369 vec_safe_insert (*param_ops_p
, 0, eval_op
);
1372 /* Failed to decompose, free resource and return. */
1375 vec_free (*param_ops_p
);
1380 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1381 predicates to the CFG edges. */
1384 set_cond_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1385 class ipa_fn_summary
*summary
,
1386 class ipa_node_params
*params_summary
,
1392 struct agg_position_info aggpos
;
1393 enum tree_code code
, inverted_code
;
1398 expr_eval_ops param_ops
;
1400 last
= last_stmt (bb
);
1401 if (!last
|| gimple_code (last
) != GIMPLE_COND
)
1403 if (!is_gimple_ip_invariant (gimple_cond_rhs (last
)))
1405 op
= gimple_cond_lhs (last
);
1407 if (decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1410 code
= gimple_cond_code (last
);
1411 inverted_code
= invert_tree_comparison (code
, HONOR_NANS (op
));
1413 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1415 enum tree_code this_code
= (e
->flags
& EDGE_TRUE_VALUE
1416 ? code
: inverted_code
);
1417 /* invert_tree_comparison will return ERROR_MARK on FP
1418 comparsions that are not EQ/NE instead of returning proper
1419 unordered one. Be sure it is not confused with NON_CONSTANT.
1421 And if the edge's target is the final block of diamond CFG graph
1422 of this conditional statement, we do not need to compute
1423 predicate for the edge because the final block's predicate must
1424 be at least as that of the first block of the statement. */
1425 if (this_code
!= ERROR_MARK
1426 && !dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1429 = add_condition (summary
, params_summary
, index
,
1430 param_type
, &aggpos
,
1431 this_code
, gimple_cond_rhs (last
), param_ops
);
1432 e
->aux
= edge_predicate_pool
.allocate ();
1433 *(predicate
*) e
->aux
= p
;
1436 vec_free (param_ops
);
1439 if (TREE_CODE (op
) != SSA_NAME
)
1442 if (builtin_constant_p (op))
1446 Here we can predicate nonconstant_code. We can't
1447 really handle constant_code since we have no predicate
1448 for this and also the constant code is not known to be
1449 optimized away when inliner doen't see operand is constant.
1450 Other optimizers might think otherwise. */
1451 if (gimple_cond_code (last
) != NE_EXPR
1452 || !integer_zerop (gimple_cond_rhs (last
)))
1454 set_stmt
= SSA_NAME_DEF_STMT (op
);
1455 if (!gimple_call_builtin_p (set_stmt
, BUILT_IN_CONSTANT_P
)
1456 || gimple_call_num_args (set_stmt
) != 1)
1458 op2
= gimple_call_arg (set_stmt
, 0);
1459 if (!decompose_param_expr (fbi
, set_stmt
, op2
, &index
, ¶m_type
, &aggpos
))
1461 FOR_EACH_EDGE (e
, ei
, bb
->succs
) if (e
->flags
& EDGE_FALSE_VALUE
)
1463 predicate p
= add_condition (summary
, params_summary
, index
,
1464 param_type
, &aggpos
,
1465 predicate::is_not_constant
, NULL_TREE
);
1466 e
->aux
= edge_predicate_pool
.allocate ();
1467 *(predicate
*) e
->aux
= p
;
1472 /* If BB ends by a switch we can turn into predicates, attach corresponding
1473 predicates to the CFG edges. */
1476 set_switch_stmt_execution_predicate (struct ipa_func_body_info
*fbi
,
1477 class ipa_fn_summary
*summary
,
1478 class ipa_node_params
*params_summary
,
1484 struct agg_position_info aggpos
;
1490 expr_eval_ops param_ops
;
1492 lastg
= last_stmt (bb
);
1493 if (!lastg
|| gimple_code (lastg
) != GIMPLE_SWITCH
)
1495 gswitch
*last
= as_a
<gswitch
*> (lastg
);
1496 op
= gimple_switch_index (last
);
1497 if (!decompose_param_expr (fbi
, last
, op
, &index
, ¶m_type
, &aggpos
,
1501 auto_vec
<std::pair
<tree
, tree
> > ranges
;
1502 tree type
= TREE_TYPE (op
);
1503 int bound_limit
= param_ipa_max_switch_predicate_bounds
;
1504 int bound_count
= 0;
1505 wide_int vr_wmin
, vr_wmax
;
1506 value_range_kind vr_type
= get_range_info (op
, &vr_wmin
, &vr_wmax
);
1508 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1510 e
->aux
= edge_predicate_pool
.allocate ();
1511 *(predicate
*) e
->aux
= false;
1514 e
= gimple_switch_edge (cfun
, last
, 0);
1515 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1516 default case if its target basic block is in convergence point of all
1517 switch cases, which can be determined by checking whether it
1518 post-dominates the switch statement. */
1519 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1520 bound_count
= INT_MAX
;
1522 n
= gimple_switch_num_labels (last
);
1523 for (case_idx
= 1; case_idx
< n
; ++case_idx
)
1525 tree cl
= gimple_switch_label (last
, case_idx
);
1526 tree min
= CASE_LOW (cl
);
1527 tree max
= CASE_HIGH (cl
);
1530 e
= gimple_switch_edge (cfun
, last
, case_idx
);
1532 /* The case value might not have same type as switch expression,
1533 extend the value based on the expression type. */
1534 if (TREE_TYPE (min
) != type
)
1535 min
= wide_int_to_tree (type
, wi::to_wide (min
));
1539 else if (TREE_TYPE (max
) != type
)
1540 max
= wide_int_to_tree (type
, wi::to_wide (max
));
1542 /* The case's target basic block is in convergence point of all switch
1543 cases, its predicate should be at least as that of the switch
1545 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, e
->dest
))
1547 else if (min
== max
)
1548 p
= add_condition (summary
, params_summary
, index
, param_type
,
1549 &aggpos
, EQ_EXPR
, min
, param_ops
);
1553 p1
= add_condition (summary
, params_summary
, index
, param_type
,
1554 &aggpos
, GE_EXPR
, min
, param_ops
);
1555 p2
= add_condition (summary
, params_summary
,index
, param_type
,
1556 &aggpos
, LE_EXPR
, max
, param_ops
);
1559 *(class predicate
*) e
->aux
1560 = p
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1562 /* If there are too many disjoint case ranges, predicate for default
1563 case might become too complicated. So add a limit here. */
1564 if (bound_count
> bound_limit
)
1567 bool new_range
= true;
1569 if (!ranges
.is_empty ())
1571 wide_int curr_wmin
= wi::to_wide (min
);
1572 wide_int last_wmax
= wi::to_wide (ranges
.last ().second
);
1574 /* Merge case ranges if they are continuous. */
1575 if (curr_wmin
== last_wmax
+ 1)
1577 else if (vr_type
== VR_ANTI_RANGE
)
1579 /* If two disjoint case ranges can be connected by anti-range
1580 of switch index, combine them to one range. */
1581 if (wi::lt_p (vr_wmax
, curr_wmin
- 1, TYPE_SIGN (type
)))
1582 vr_type
= VR_UNDEFINED
;
1583 else if (wi::le_p (vr_wmin
, last_wmax
+ 1, TYPE_SIGN (type
)))
1588 /* Create/extend a case range. And we count endpoints of range set,
1589 this number nearly equals to number of conditions that we will create
1590 for predicate of default case. */
1593 bound_count
+= (min
== max
) ? 1 : 2;
1594 ranges
.safe_push (std::make_pair (min
, max
));
1598 bound_count
+= (ranges
.last ().first
== ranges
.last ().second
);
1599 ranges
.last ().second
= max
;
1603 e
= gimple_switch_edge (cfun
, last
, 0);
1604 if (bound_count
> bound_limit
)
1606 *(class predicate
*) e
->aux
= true;
1607 vec_free (param_ops
);
1611 predicate p_seg
= true;
1612 predicate p_all
= false;
1614 if (vr_type
!= VR_RANGE
)
1616 vr_wmin
= wi::to_wide (TYPE_MIN_VALUE (type
));
1617 vr_wmax
= wi::to_wide (TYPE_MAX_VALUE (type
));
1620 /* Construct predicate to represent default range set that is negation of
1621 all case ranges. Case range is classified as containing single/non-single
1622 values. Suppose a piece of case ranges in the following.
1624 [D1...D2] [S1] ... [Sn] [D3...D4]
1626 To represent default case's range sets between two non-single value
1627 case ranges (From D2 to D3), we construct predicate as:
1629 D2 < x < D3 && x != S1 && ... && x != Sn
1631 for (size_t i
= 0; i
< ranges
.length (); i
++)
1633 tree min
= ranges
[i
].first
;
1634 tree max
= ranges
[i
].second
;
1637 p_seg
&= add_condition (summary
, params_summary
, index
,
1638 param_type
, &aggpos
, NE_EXPR
,
1642 /* Do not create sub-predicate for range that is beyond low bound
1644 if (wi::lt_p (vr_wmin
, wi::to_wide (min
), TYPE_SIGN (type
)))
1646 p_seg
&= add_condition (summary
, params_summary
, index
,
1647 param_type
, &aggpos
,
1648 LT_EXPR
, min
, param_ops
);
1649 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1652 /* Do not create sub-predicate for range that is beyond up bound
1654 if (wi::le_p (vr_wmax
, wi::to_wide (max
), TYPE_SIGN (type
)))
1660 p_seg
= add_condition (summary
, params_summary
, index
,
1661 param_type
, &aggpos
, GT_EXPR
,
1666 p_all
= p_all
.or_with (summary
->conds
, p_seg
);
1667 *(class predicate
*) e
->aux
1668 = p_all
.or_with (summary
->conds
, *(class predicate
*) e
->aux
);
1670 vec_free (param_ops
);
1674 /* For each BB in NODE attach to its AUX pointer predicate under
1675 which it is executable. */
1678 compute_bb_predicates (struct ipa_func_body_info
*fbi
,
1679 struct cgraph_node
*node
,
1680 class ipa_fn_summary
*summary
,
1681 class ipa_node_params
*params_summary
)
1683 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
1687 FOR_EACH_BB_FN (bb
, my_function
)
1689 set_cond_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1690 set_switch_stmt_execution_predicate (fbi
, summary
, params_summary
, bb
);
1693 /* Entry block is always executable. */
1694 ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
1695 = edge_predicate_pool
.allocate ();
1696 *(predicate
*) ENTRY_BLOCK_PTR_FOR_FN (my_function
)->aux
= true;
1698 /* A simple dataflow propagation of predicates forward in the CFG.
1699 TODO: work in reverse postorder. */
1703 FOR_EACH_BB_FN (bb
, my_function
)
1705 predicate p
= false;
1708 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1712 predicate this_bb_predicate
1713 = *(predicate
*) e
->src
->aux
;
1715 this_bb_predicate
&= (*(class predicate
*) e
->aux
);
1716 p
= p
.or_with (summary
->conds
, this_bb_predicate
);
1723 basic_block pdom_bb
;
1728 bb
->aux
= edge_predicate_pool
.allocate ();
1729 *((predicate
*) bb
->aux
) = p
;
1731 else if (p
!= *(predicate
*) bb
->aux
)
1733 /* This OR operation is needed to ensure monotonous data flow
1734 in the case we hit the limit on number of clauses and the
1735 and/or operations above give approximate answers. */
1736 p
= p
.or_with (summary
->conds
, *(predicate
*)bb
->aux
);
1737 if (p
!= *(predicate
*) bb
->aux
)
1740 *((predicate
*) bb
->aux
) = p
;
1744 /* For switch/if statement, we can OR-combine predicates of all
1745 its cases/branches to get predicate for basic block in their
1746 convergence point, but sometimes this will generate very
1747 complicated predicate. Actually, we can get simplified
1748 predicate in another way by using the fact that predicate
1749 for a basic block must also hold true for its post dominators.
1750 To be specific, basic block in convergence point of
1751 conditional statement should include predicate of the
1753 pdom_bb
= get_immediate_dominator (CDI_POST_DOMINATORS
, bb
);
1754 if (pdom_bb
== EXIT_BLOCK_PTR_FOR_FN (my_function
) || !pdom_bb
)
1756 else if (!pdom_bb
->aux
)
1759 pdom_bb
->aux
= edge_predicate_pool
.allocate ();
1760 *((predicate
*) pdom_bb
->aux
) = p
;
1762 else if (p
!= *(predicate
*) pdom_bb
->aux
)
1764 p
= p
.or_with (summary
->conds
, *(predicate
*)pdom_bb
->aux
);
1765 if (p
!= *(predicate
*) pdom_bb
->aux
)
1768 *((predicate
*) pdom_bb
->aux
) = p
;
1777 /* Return predicate specifying when the STMT might have result that is not
1778 a compile time constant. */
1781 will_be_nonconstant_expr_predicate (ipa_func_body_info
*fbi
,
1782 class ipa_fn_summary
*summary
,
1783 class ipa_node_params
*params_summary
,
1785 vec
<predicate
> nonconstant_names
)
1790 while (UNARY_CLASS_P (expr
))
1791 expr
= TREE_OPERAND (expr
, 0);
1793 parm
= unmodified_parm (fbi
, NULL
, expr
, NULL
);
1794 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1795 return add_condition (summary
, params_summary
, index
, TREE_TYPE (parm
), NULL
,
1796 predicate::changed
, NULL_TREE
);
1797 if (is_gimple_min_invariant (expr
))
1799 if (TREE_CODE (expr
) == SSA_NAME
)
1800 return nonconstant_names
[SSA_NAME_VERSION (expr
)];
1801 if (BINARY_CLASS_P (expr
) || COMPARISON_CLASS_P (expr
))
1804 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1806 TREE_OPERAND (expr
, 0),
1812 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1814 TREE_OPERAND (expr
, 1),
1816 return p1
.or_with (summary
->conds
, p2
);
1818 else if (TREE_CODE (expr
) == COND_EXPR
)
1821 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1823 TREE_OPERAND (expr
, 0),
1829 = will_be_nonconstant_expr_predicate (fbi
, summary
,
1831 TREE_OPERAND (expr
, 1),
1835 p1
= p1
.or_with (summary
->conds
, p2
);
1836 p2
= will_be_nonconstant_expr_predicate (fbi
, summary
,
1838 TREE_OPERAND (expr
, 2),
1840 return p2
.or_with (summary
->conds
, p1
);
1842 else if (TREE_CODE (expr
) == CALL_EXPR
)
1853 /* Return predicate specifying when the STMT might have result that is not
1854 a compile time constant. */
1857 will_be_nonconstant_predicate (struct ipa_func_body_info
*fbi
,
1858 class ipa_fn_summary
*summary
,
1859 class ipa_node_params
*params_summary
,
1861 vec
<predicate
> nonconstant_names
)
1866 tree param_type
= NULL_TREE
;
1867 predicate op_non_const
;
1870 struct agg_position_info aggpos
;
1872 /* What statments might be optimized away
1873 when their arguments are constant. */
1874 if (gimple_code (stmt
) != GIMPLE_ASSIGN
1875 && gimple_code (stmt
) != GIMPLE_COND
1876 && gimple_code (stmt
) != GIMPLE_SWITCH
1877 && (gimple_code (stmt
) != GIMPLE_CALL
1878 || !(gimple_call_flags (stmt
) & ECF_CONST
)))
1881 /* Stores will stay anyway. */
1882 if (gimple_store_p (stmt
))
1885 is_load
= gimple_assign_load_p (stmt
);
1887 /* Loads can be optimized when the value is known. */
1890 tree op
= gimple_assign_rhs1 (stmt
);
1891 if (!decompose_param_expr (fbi
, stmt
, op
, &base_index
, ¶m_type
,
1898 /* See if we understand all operands before we start
1899 adding conditionals. */
1900 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1902 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1903 /* For arguments we can build a condition. */
1904 if (parm
&& ipa_get_param_decl_index (fbi
->info
, parm
) >= 0)
1906 if (TREE_CODE (use
) != SSA_NAME
)
1908 /* If we know when operand is constant,
1909 we still can say something useful. */
1910 if (nonconstant_names
[SSA_NAME_VERSION (use
)] != true)
1917 add_condition (summary
, params_summary
,
1918 base_index
, param_type
, &aggpos
,
1919 predicate::changed
, NULL_TREE
);
1921 op_non_const
= false;
1922 FOR_EACH_SSA_TREE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
1924 tree parm
= unmodified_parm (fbi
, stmt
, use
, NULL
);
1927 if (parm
&& (index
= ipa_get_param_decl_index (fbi
->info
, parm
)) >= 0)
1929 if (index
!= base_index
)
1930 p
= add_condition (summary
, params_summary
, index
,
1931 TREE_TYPE (parm
), NULL
,
1932 predicate::changed
, NULL_TREE
);
1937 p
= nonconstant_names
[SSA_NAME_VERSION (use
)];
1938 op_non_const
= p
.or_with (summary
->conds
, op_non_const
);
1940 if ((gimple_code (stmt
) == GIMPLE_ASSIGN
|| gimple_code (stmt
) == GIMPLE_CALL
)
1941 && gimple_op (stmt
, 0)
1942 && TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1943 nonconstant_names
[SSA_NAME_VERSION (gimple_op (stmt
, 0))]
1945 return op_non_const
;
1948 struct record_modified_bb_info
1955 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1956 probability how often it changes between USE_BB.
1957 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1958 is in different loop nest, we can do better.
1959 This is all just estimate. In theory we look for minimal cut separating
1960 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1964 get_minimal_bb (basic_block init_bb
, basic_block use_bb
)
1966 class loop
*l
= find_common_loop (init_bb
->loop_father
, use_bb
->loop_father
);
1967 if (l
&& l
->header
->count
< init_bb
->count
)
1972 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1973 set except for info->stmt. */
1976 record_modified (ao_ref
*ao ATTRIBUTE_UNUSED
, tree vdef
, void *data
)
1978 struct record_modified_bb_info
*info
=
1979 (struct record_modified_bb_info
*) data
;
1980 if (SSA_NAME_DEF_STMT (vdef
) == info
->stmt
)
1982 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef
)))
1984 bitmap_set_bit (info
->bb_set
,
1985 SSA_NAME_IS_DEFAULT_DEF (vdef
)
1986 ? ENTRY_BLOCK_PTR_FOR_FN (cfun
)->index
1988 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1989 gimple_bb (info
->stmt
))->index
);
1992 fprintf (dump_file
, " Param ");
1993 print_generic_expr (dump_file
, info
->op
, TDF_SLIM
);
1994 fprintf (dump_file
, " changed at bb %i, minimal: %i stmt: ",
1995 gimple_bb (SSA_NAME_DEF_STMT (vdef
))->index
,
1997 (gimple_bb (SSA_NAME_DEF_STMT (vdef
)),
1998 gimple_bb (info
->stmt
))->index
);
1999 print_gimple_stmt (dump_file
, SSA_NAME_DEF_STMT (vdef
), 0);
2004 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2005 will change since last invocation of STMT.
2007 Value 0 is reserved for compile time invariants.
2008 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2009 ought to be REG_BR_PROB_BASE / estimated_iters. */
2012 param_change_prob (ipa_func_body_info
*fbi
, gimple
*stmt
, int i
)
2014 tree op
= gimple_call_arg (stmt
, i
);
2015 basic_block bb
= gimple_bb (stmt
);
2017 if (TREE_CODE (op
) == WITH_SIZE_EXPR
)
2018 op
= TREE_OPERAND (op
, 0);
2020 tree base
= get_base_address (op
);
2022 /* Global invariants never change. */
2023 if (is_gimple_min_invariant (base
))
2026 /* We would have to do non-trivial analysis to really work out what
2027 is the probability of value to change (i.e. when init statement
2028 is in a sibling loop of the call).
2030 We do an conservative estimate: when call is executed N times more often
2031 than the statement defining value, we take the frequency 1/N. */
2032 if (TREE_CODE (base
) == SSA_NAME
)
2034 profile_count init_count
;
2036 if (!bb
->count
.nonzero_p ())
2037 return REG_BR_PROB_BASE
;
2039 if (SSA_NAME_IS_DEFAULT_DEF (base
))
2040 init_count
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2042 init_count
= get_minimal_bb
2043 (gimple_bb (SSA_NAME_DEF_STMT (base
)),
2044 gimple_bb (stmt
))->count
;
2046 if (init_count
< bb
->count
)
2047 return MAX ((init_count
.to_sreal_scale (bb
->count
)
2048 * REG_BR_PROB_BASE
).to_int (), 1);
2049 return REG_BR_PROB_BASE
;
2054 profile_count max
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
;
2055 struct record_modified_bb_info info
;
2056 tree init
= ctor_for_folding (base
);
2058 if (init
!= error_mark_node
)
2060 if (!bb
->count
.nonzero_p ())
2061 return REG_BR_PROB_BASE
;
2064 fprintf (dump_file
, " Analyzing param change probability of ");
2065 print_generic_expr (dump_file
, op
, TDF_SLIM
);
2066 fprintf (dump_file
, "\n");
2068 ao_ref_init (&refd
, op
);
2071 info
.bb_set
= BITMAP_ALLOC (NULL
);
2073 = walk_aliased_vdefs (&refd
, gimple_vuse (stmt
), record_modified
, &info
,
2074 NULL
, NULL
, fbi
->aa_walk_budget
);
2075 if (walked
< 0 || bitmap_bit_p (info
.bb_set
, bb
->index
))
2080 fprintf (dump_file
, " Ran out of AA walking budget.\n");
2082 fprintf (dump_file
, " Set in same BB as used.\n");
2084 BITMAP_FREE (info
.bb_set
);
2085 return REG_BR_PROB_BASE
;
2090 /* Lookup the most frequent update of the value and believe that
2091 it dominates all the other; precise analysis here is difficult. */
2092 EXECUTE_IF_SET_IN_BITMAP (info
.bb_set
, 0, index
, bi
)
2093 max
= max
.max (BASIC_BLOCK_FOR_FN (cfun
, index
)->count
);
2096 fprintf (dump_file
, " Set with count ");
2097 max
.dump (dump_file
);
2098 fprintf (dump_file
, " and used with count ");
2099 bb
->count
.dump (dump_file
);
2100 fprintf (dump_file
, " freq %f\n",
2101 max
.to_sreal_scale (bb
->count
).to_double ());
2104 BITMAP_FREE (info
.bb_set
);
2105 if (max
< bb
->count
)
2106 return MAX ((max
.to_sreal_scale (bb
->count
)
2107 * REG_BR_PROB_BASE
).to_int (), 1);
2108 return REG_BR_PROB_BASE
;
2112 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2113 sub-graph and if the predicate the condition depends on is known. If so,
2114 return true and store the pointer the predicate in *P. */
2117 phi_result_unknown_predicate (ipa_func_body_info
*fbi
,
2118 ipa_fn_summary
*summary
,
2119 class ipa_node_params
*params_summary
,
2122 vec
<predicate
> nonconstant_names
)
2126 basic_block first_bb
= NULL
;
2129 if (single_pred_p (bb
))
2135 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2137 if (single_succ_p (e
->src
))
2139 if (!single_pred_p (e
->src
))
2142 first_bb
= single_pred (e
->src
);
2143 else if (single_pred (e
->src
) != first_bb
)
2150 else if (e
->src
!= first_bb
)
2158 stmt
= last_stmt (first_bb
);
2160 || gimple_code (stmt
) != GIMPLE_COND
2161 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt
)))
2164 *p
= will_be_nonconstant_expr_predicate (fbi
, summary
, params_summary
,
2165 gimple_cond_lhs (stmt
),
2173 /* Given a PHI statement in a function described by inline properties SUMMARY
2174 and *P being the predicate describing whether the selected PHI argument is
2175 known, store a predicate for the result of the PHI statement into
2176 NONCONSTANT_NAMES, if possible. */
2179 predicate_for_phi_result (class ipa_fn_summary
*summary
, gphi
*phi
,
2181 vec
<predicate
> nonconstant_names
)
2185 for (i
= 0; i
< gimple_phi_num_args (phi
); i
++)
2187 tree arg
= gimple_phi_arg (phi
, i
)->def
;
2188 if (!is_gimple_min_invariant (arg
))
2190 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
2191 *p
= p
->or_with (summary
->conds
,
2192 nonconstant_names
[SSA_NAME_VERSION (arg
)]);
2198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2200 fprintf (dump_file
, "\t\tphi predicate: ");
2201 p
->dump (dump_file
, summary
->conds
);
2203 nonconstant_names
[SSA_NAME_VERSION (gimple_phi_result (phi
))] = *p
;
2206 /* For a typical usage of __builtin_expect (a<b, 1), we
2207 may introduce an extra relation stmt:
2208 With the builtin, we have
2211 t3 = __builtin_expect (t2, 1);
2214 Without the builtin, we have
2217 This affects the size/time estimation and may have
2218 an impact on the earlier inlining.
2219 Here find this pattern and fix it up later. */
2222 find_foldable_builtin_expect (basic_block bb
)
2224 gimple_stmt_iterator bsi
;
2226 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2228 gimple
*stmt
= gsi_stmt (bsi
);
2229 if (gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT
)
2230 || gimple_call_builtin_p (stmt
, BUILT_IN_EXPECT_WITH_PROBABILITY
)
2231 || gimple_call_internal_p (stmt
, IFN_BUILTIN_EXPECT
))
2233 tree var
= gimple_call_lhs (stmt
);
2234 tree arg
= gimple_call_arg (stmt
, 0);
2235 use_operand_p use_p
;
2242 gcc_assert (TREE_CODE (var
) == SSA_NAME
);
2244 while (TREE_CODE (arg
) == SSA_NAME
)
2246 gimple
*stmt_tmp
= SSA_NAME_DEF_STMT (arg
);
2247 if (!is_gimple_assign (stmt_tmp
))
2249 switch (gimple_assign_rhs_code (stmt_tmp
))
2268 arg
= gimple_assign_rhs1 (stmt_tmp
);
2271 if (match
&& single_imm_use (var
, &use_p
, &use_stmt
)
2272 && gimple_code (use_stmt
) == GIMPLE_COND
)
2279 /* Return true when the basic blocks contains only clobbers followed by RESX.
2280 Such BBs are kept around to make removal of dead stores possible with
2281 presence of EH and will be optimized out by optimize_clobbers later in the
2284 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2285 that can be clobber only, too.. When it is false, the RESX is not necessary
2286 on the end of basic block. */
2289 clobber_only_eh_bb_p (basic_block bb
, bool need_eh
= true)
2291 gimple_stmt_iterator gsi
= gsi_last_bb (bb
);
2297 if (gsi_end_p (gsi
))
2299 if (gimple_code (gsi_stmt (gsi
)) != GIMPLE_RESX
)
2303 else if (!single_succ_p (bb
))
2306 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2308 gimple
*stmt
= gsi_stmt (gsi
);
2309 if (is_gimple_debug (stmt
))
2311 if (gimple_clobber_p (stmt
))
2313 if (gimple_code (stmt
) == GIMPLE_LABEL
)
2318 /* See if all predecestors are either throws or clobber only BBs. */
2319 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2320 if (!(e
->flags
& EDGE_EH
)
2321 && !clobber_only_eh_bb_p (e
->src
, false))
2327 /* Return true if STMT compute a floating point expression that may be affected
2328 by -ffast-math and similar flags. */
2331 fp_expression_p (gimple
*stmt
)
2336 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, i
, SSA_OP_DEF
|SSA_OP_USE
)
2337 if (FLOAT_TYPE_P (TREE_TYPE (op
)))
2342 /* Analyze function body for NODE.
2343 EARLY indicates run from early optimization pipeline. */
2346 analyze_function_body (struct cgraph_node
*node
, bool early
)
2348 sreal time
= param_uninlined_function_time
;
2349 /* Estimate static overhead for function prologue/epilogue and alignment. */
2350 int size
= param_uninlined_function_insns
;
2351 /* Benefits are scaled by probability of elimination that is in range
2354 struct function
*my_function
= DECL_STRUCT_FUNCTION (node
->decl
);
2356 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2357 class ipa_node_params
*params_summary
= early
? NULL
: IPA_NODE_REF (node
);
2358 predicate bb_predicate
;
2359 struct ipa_func_body_info fbi
;
2360 vec
<predicate
> nonconstant_names
= vNULL
;
2363 gimple
*fix_builtin_expect_stmt
;
2365 gcc_assert (my_function
&& my_function
->cfg
);
2366 gcc_assert (cfun
== my_function
);
2368 memset(&fbi
, 0, sizeof(fbi
));
2369 vec_free (info
->conds
);
2371 vec_free (info
->size_time_table
);
2372 info
->size_time_table
= NULL
;
2374 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2375 so we can produce proper inline hints.
2377 When optimizing and analyzing for early inliner, initialize node params
2378 so we can produce correct BB predicates. */
2380 if (opt_for_fn (node
->decl
, optimize
))
2382 calculate_dominance_info (CDI_DOMINATORS
);
2383 calculate_dominance_info (CDI_POST_DOMINATORS
);
2385 loop_optimizer_init (LOOPS_NORMAL
| LOOPS_HAVE_RECORDED_EXITS
);
2388 ipa_check_create_node_params ();
2389 ipa_initialize_node_params (node
);
2392 if (ipa_node_params_sum
)
2395 fbi
.info
= IPA_NODE_REF (node
);
2396 fbi
.bb_infos
= vNULL
;
2397 fbi
.bb_infos
.safe_grow_cleared (last_basic_block_for_fn (cfun
));
2398 fbi
.param_count
= count_formal_params (node
->decl
);
2399 fbi
.aa_walk_budget
= param_ipa_max_aa_steps
;
2401 nonconstant_names
.safe_grow_cleared
2402 (SSANAMES (my_function
)->length ());
2407 fprintf (dump_file
, "\nAnalyzing function body size: %s\n",
2410 /* When we run into maximal number of entries, we assign everything to the
2411 constant truth case. Be sure to have it in list. */
2412 bb_predicate
= true;
2413 info
->account_size_time (0, 0, bb_predicate
, bb_predicate
);
2415 bb_predicate
= predicate::not_inlined ();
2416 info
->account_size_time (param_uninlined_function_insns
2417 * ipa_fn_summary::size_scale
,
2418 param_uninlined_function_time
,
2423 compute_bb_predicates (&fbi
, node
, info
, params_summary
);
2424 order
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2425 nblocks
= pre_and_rev_post_order_compute (NULL
, order
, false);
2426 for (n
= 0; n
< nblocks
; n
++)
2428 bb
= BASIC_BLOCK_FOR_FN (cfun
, order
[n
]);
2429 freq
= bb
->count
.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun
)->count
);
2430 if (clobber_only_eh_bb_p (bb
))
2432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2433 fprintf (dump_file
, "\n Ignoring BB %i;"
2434 " it will be optimized away by cleanup_clobbers\n",
2439 /* TODO: Obviously predicates can be propagated down across CFG. */
2443 bb_predicate
= *(predicate
*) bb
->aux
;
2445 bb_predicate
= false;
2448 bb_predicate
= true;
2450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2452 fprintf (dump_file
, "\n BB %i predicate:", bb
->index
);
2453 bb_predicate
.dump (dump_file
, info
->conds
);
2456 if (fbi
.info
&& nonconstant_names
.exists ())
2458 predicate phi_predicate
;
2459 bool first_phi
= true;
2461 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
2465 && !phi_result_unknown_predicate (&fbi
, info
,
2472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2474 fprintf (dump_file
, " ");
2475 print_gimple_stmt (dump_file
, gsi_stmt (bsi
), 0);
2477 predicate_for_phi_result (info
, bsi
.phi (), &phi_predicate
,
2482 fix_builtin_expect_stmt
= find_foldable_builtin_expect (bb
);
2484 for (gimple_stmt_iterator bsi
= gsi_start_nondebug_bb (bb
);
2485 !gsi_end_p (bsi
); gsi_next_nondebug (&bsi
))
2487 gimple
*stmt
= gsi_stmt (bsi
);
2488 int this_size
= estimate_num_insns (stmt
, &eni_size_weights
);
2489 int this_time
= estimate_num_insns (stmt
, &eni_time_weights
);
2491 predicate will_be_nonconstant
;
2493 /* This relation stmt should be folded after we remove
2494 buildin_expect call. Adjust the cost here. */
2495 if (stmt
== fix_builtin_expect_stmt
)
2501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2503 fprintf (dump_file
, " ");
2504 print_gimple_stmt (dump_file
, stmt
, 0);
2505 fprintf (dump_file
, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2506 freq
.to_double (), this_size
,
2510 if (is_gimple_call (stmt
)
2511 && !gimple_call_internal_p (stmt
))
2513 struct cgraph_edge
*edge
= node
->get_edge (stmt
);
2514 ipa_call_summary
*es
= ipa_call_summaries
->get_create (edge
);
2516 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2517 resolved as constant. We however don't want to optimize
2518 out the cgraph edges. */
2519 if (nonconstant_names
.exists ()
2520 && gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
)
2521 && gimple_call_lhs (stmt
)
2522 && TREE_CODE (gimple_call_lhs (stmt
)) == SSA_NAME
)
2524 predicate false_p
= false;
2525 nonconstant_names
[SSA_NAME_VERSION (gimple_call_lhs (stmt
))]
2528 if (ipa_node_params_sum
)
2530 int count
= gimple_call_num_args (stmt
);
2534 es
->param
.safe_grow_cleared (count
);
2535 for (i
= 0; i
< count
; i
++)
2537 int prob
= param_change_prob (&fbi
, stmt
, i
);
2538 gcc_assert (prob
>= 0 && prob
<= REG_BR_PROB_BASE
);
2539 es
->param
[i
].change_prob
= prob
;
2543 es
->call_stmt_size
= this_size
;
2544 es
->call_stmt_time
= this_time
;
2545 es
->loop_depth
= bb_loop_depth (bb
);
2546 edge_set_predicate (edge
, &bb_predicate
);
2547 if (edge
->speculative
)
2549 cgraph_edge
*direct
, *indirect
;
2551 edge
->speculative_call_info (direct
, indirect
, ref
);
2552 gcc_assert (direct
== edge
);
2553 ipa_call_summary
*es2
2554 = ipa_call_summaries
->get_create (indirect
);
2555 ipa_call_summaries
->duplicate (edge
, indirect
,
2560 /* TODO: When conditional jump or swithc is known to be constant, but
2561 we did not translate it into the predicates, we really can account
2562 just maximum of the possible paths. */
2565 = will_be_nonconstant_predicate (&fbi
, info
, params_summary
,
2566 stmt
, nonconstant_names
);
2568 will_be_nonconstant
= true;
2569 if (this_time
|| this_size
)
2571 sreal final_time
= (sreal
)this_time
* freq
;
2573 prob
= eliminated_by_inlining_prob (&fbi
, stmt
);
2574 if (prob
== 1 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2576 "\t\t50%% will be eliminated by inlining\n");
2577 if (prob
== 2 && dump_file
&& (dump_flags
& TDF_DETAILS
))
2578 fprintf (dump_file
, "\t\tWill be eliminated by inlining\n");
2580 class predicate p
= bb_predicate
& will_be_nonconstant
;
2582 /* We can ignore statement when we proved it is never going
2583 to happen, but we cannot do that for call statements
2584 because edges are accounted specially. */
2586 if (*(is_gimple_call (stmt
) ? &bb_predicate
: &p
) != false)
2592 /* We account everything but the calls. Calls have their own
2593 size/time info attached to cgraph edges. This is necessary
2594 in order to make the cost disappear after inlining. */
2595 if (!is_gimple_call (stmt
))
2599 predicate ip
= bb_predicate
& predicate::not_inlined ();
2600 info
->account_size_time (this_size
* prob
,
2601 (final_time
* prob
) / 2, ip
,
2605 info
->account_size_time (this_size
* (2 - prob
),
2606 (final_time
* (2 - prob
) / 2),
2611 if (!info
->fp_expressions
&& fp_expression_p (stmt
))
2613 info
->fp_expressions
= true;
2615 fprintf (dump_file
, " fp_expression set\n");
2619 /* Account cost of address calculations in the statements. */
2620 for (unsigned int i
= 0; i
< gimple_num_ops (stmt
); i
++)
2622 for (tree op
= gimple_op (stmt
, i
);
2623 op
&& handled_component_p (op
);
2624 op
= TREE_OPERAND (op
, 0))
2625 if ((TREE_CODE (op
) == ARRAY_REF
2626 || TREE_CODE (op
) == ARRAY_RANGE_REF
)
2627 && TREE_CODE (TREE_OPERAND (op
, 1)) == SSA_NAME
)
2629 predicate p
= bb_predicate
;
2631 p
= p
& will_be_nonconstant_expr_predicate
2632 (&fbi
, info
, params_summary
,
2633 TREE_OPERAND (op
, 1),
2641 "\t\tAccounting address calculation.\n");
2642 info
->account_size_time (ipa_fn_summary::size_scale
,
2654 if (nonconstant_names
.exists () && !early
)
2657 predicate loop_iterations
= true;
2658 predicate loop_stride
= true;
2660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2661 flow_loops_dump (dump_file
, NULL
, 0);
2663 FOR_EACH_LOOP (loop
, 0)
2668 class tree_niter_desc niter_desc
;
2669 bb_predicate
= *(predicate
*) loop
->header
->aux
;
2671 exits
= get_loop_exit_edges (loop
);
2672 FOR_EACH_VEC_ELT (exits
, j
, ex
)
2673 if (number_of_iterations_exit (loop
, ex
, &niter_desc
, false)
2674 && !is_gimple_min_invariant (niter_desc
.niter
))
2676 predicate will_be_nonconstant
2677 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2681 if (will_be_nonconstant
!= true)
2682 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2683 if (will_be_nonconstant
!= true
2684 && will_be_nonconstant
!= false)
2685 /* This is slightly inprecise. We may want to represent each
2686 loop with independent predicate. */
2687 loop_iterations
&= will_be_nonconstant
;
2692 /* To avoid quadratic behavior we analyze stride predicates only
2693 with respect to the containing loop. Thus we simply iterate
2694 over all defs in the outermost loop body. */
2695 for (loop
= loops_for_fn (cfun
)->tree_root
->inner
;
2696 loop
!= NULL
; loop
= loop
->next
)
2698 basic_block
*body
= get_loop_body (loop
);
2699 for (unsigned i
= 0; i
< loop
->num_nodes
; i
++)
2701 gimple_stmt_iterator gsi
;
2702 bb_predicate
= *(predicate
*) body
[i
]->aux
;
2703 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
);
2706 gimple
*stmt
= gsi_stmt (gsi
);
2708 if (!is_gimple_assign (stmt
))
2711 tree def
= gimple_assign_lhs (stmt
);
2712 if (TREE_CODE (def
) != SSA_NAME
)
2716 if (!simple_iv (loop_containing_stmt (stmt
),
2717 loop_containing_stmt (stmt
),
2719 || is_gimple_min_invariant (iv
.step
))
2722 predicate will_be_nonconstant
2723 = will_be_nonconstant_expr_predicate (&fbi
, info
,
2727 if (will_be_nonconstant
!= true)
2728 will_be_nonconstant
= bb_predicate
& will_be_nonconstant
;
2729 if (will_be_nonconstant
!= true
2730 && will_be_nonconstant
!= false)
2731 /* This is slightly inprecise. We may want to represent
2732 each loop with independent predicate. */
2733 loop_stride
= loop_stride
& will_be_nonconstant
;
2738 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2739 set_hint_predicate (&s
->loop_iterations
, loop_iterations
);
2740 set_hint_predicate (&s
->loop_stride
, loop_stride
);
2743 FOR_ALL_BB_FN (bb
, my_function
)
2749 edge_predicate_pool
.remove ((predicate
*)bb
->aux
);
2751 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2754 edge_predicate_pool
.remove ((predicate
*) e
->aux
);
2758 ipa_fn_summary
*s
= ipa_fn_summaries
->get (node
);
2759 ipa_size_summary
*ss
= ipa_size_summaries
->get (node
);
2761 ss
->self_size
= size
;
2762 nonconstant_names
.release ();
2763 ipa_release_body_info (&fbi
);
2764 if (opt_for_fn (node
->decl
, optimize
))
2767 loop_optimizer_finalize ();
2768 else if (!ipa_edge_args_sum
)
2769 ipa_free_all_node_params ();
2770 free_dominance_info (CDI_DOMINATORS
);
2771 free_dominance_info (CDI_POST_DOMINATORS
);
2775 fprintf (dump_file
, "\n");
2776 ipa_dump_fn_summary (dump_file
, node
);
2781 /* Compute function summary.
2782 EARLY is true when we compute parameters during early opts. */
2785 compute_fn_summary (struct cgraph_node
*node
, bool early
)
2787 HOST_WIDE_INT self_stack_size
;
2788 struct cgraph_edge
*e
;
2790 gcc_assert (!node
->inlined_to
);
2792 if (!ipa_fn_summaries
)
2793 ipa_fn_summary_alloc ();
2795 /* Create a new ipa_fn_summary. */
2796 ((ipa_fn_summary_t
*)ipa_fn_summaries
)->remove_callees (node
);
2797 ipa_fn_summaries
->remove (node
);
2798 class ipa_fn_summary
*info
= ipa_fn_summaries
->get_create (node
);
2799 class ipa_size_summary
*size_info
= ipa_size_summaries
->get_create (node
);
2801 /* Estimate the stack size for the function if we're optimizing. */
2802 self_stack_size
= optimize
&& !node
->thunk
.thunk_p
2803 ? estimated_stack_frame_size (node
) : 0;
2804 size_info
->estimated_self_stack_size
= self_stack_size
;
2805 info
->estimated_stack_size
= self_stack_size
;
2807 if (node
->thunk
.thunk_p
)
2809 ipa_call_summary
*es
= ipa_call_summaries
->get_create (node
->callees
);
2812 node
->can_change_signature
= false;
2813 es
->call_stmt_size
= eni_size_weights
.call_cost
;
2814 es
->call_stmt_time
= eni_time_weights
.call_cost
;
2815 info
->account_size_time (ipa_fn_summary::size_scale
2816 * param_uninlined_function_thunk_insns
,
2817 param_uninlined_function_thunk_time
, t
, t
);
2818 t
= predicate::not_inlined ();
2819 info
->account_size_time (2 * ipa_fn_summary::size_scale
, 0, t
, t
);
2820 ipa_update_overall_fn_summary (node
);
2821 size_info
->self_size
= size_info
->size
;
2822 if (stdarg_p (TREE_TYPE (node
->decl
)))
2824 info
->inlinable
= false;
2825 node
->callees
->inline_failed
= CIF_VARIADIC_THUNK
;
2828 info
->inlinable
= true;
2832 /* Even is_gimple_min_invariant rely on current_function_decl. */
2833 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
2835 /* Can this function be inlined at all? */
2836 if (!opt_for_fn (node
->decl
, optimize
)
2837 && !lookup_attribute ("always_inline",
2838 DECL_ATTRIBUTES (node
->decl
)))
2839 info
->inlinable
= false;
2841 info
->inlinable
= tree_inlinable_function_p (node
->decl
);
2843 /* Type attributes can use parameter indices to describe them. */
2844 if (TYPE_ATTRIBUTES (TREE_TYPE (node
->decl
))
2845 /* Likewise for #pragma omp declare simd functions or functions
2846 with simd attribute. */
2847 || lookup_attribute ("omp declare simd",
2848 DECL_ATTRIBUTES (node
->decl
)))
2849 node
->can_change_signature
= false;
2852 /* Otherwise, inlinable functions always can change signature. */
2853 if (info
->inlinable
)
2854 node
->can_change_signature
= true;
2857 /* Functions calling builtin_apply cannot change signature. */
2858 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2860 tree
cdecl = e
->callee
->decl
;
2861 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS
)
2862 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START
))
2865 node
->can_change_signature
= !e
;
2868 analyze_function_body (node
, early
);
2871 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2872 if (e
->callee
->comdat_local_p ())
2874 node
->calls_comdat_local
= (e
!= NULL
);
2876 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2877 size_info
->size
= size_info
->self_size
;
2878 info
->estimated_stack_size
= size_info
->estimated_self_stack_size
;
2880 /* Code above should compute exactly the same result as
2881 ipa_update_overall_fn_summary but because computation happens in
2882 different order the roundoff errors result in slight changes. */
2883 ipa_update_overall_fn_summary (node
);
2884 /* In LTO mode we may have speculative edges set. */
2885 gcc_assert (in_lto_p
|| size_info
->size
== size_info
->self_size
);
2889 /* Compute parameters of functions used by inliner using
2890 current_function_decl. */
2893 compute_fn_summary_for_current (void)
2895 compute_fn_summary (cgraph_node::get (current_function_decl
), true);
2899 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2900 KNOWN_CONTEXTS and KNOWN_AGGS. */
2903 estimate_edge_devirt_benefit (struct cgraph_edge
*ie
,
2904 int *size
, int *time
,
2905 vec
<tree
> known_vals
,
2906 vec
<ipa_polymorphic_call_context
> known_contexts
,
2907 vec
<ipa_agg_value_set
> known_aggs
)
2910 struct cgraph_node
*callee
;
2911 class ipa_fn_summary
*isummary
;
2912 enum availability avail
;
2915 if (!known_vals
.exists () && !known_contexts
.exists ())
2917 if (!opt_for_fn (ie
->caller
->decl
, flag_indirect_inlining
))
2920 target
= ipa_get_indirect_edge_target (ie
, known_vals
, known_contexts
,
2921 known_aggs
, &speculative
);
2922 if (!target
|| speculative
)
2925 /* Account for difference in cost between indirect and direct calls. */
2926 *size
-= (eni_size_weights
.indirect_call_cost
- eni_size_weights
.call_cost
);
2927 *time
-= (eni_time_weights
.indirect_call_cost
- eni_time_weights
.call_cost
);
2928 gcc_checking_assert (*time
>= 0);
2929 gcc_checking_assert (*size
>= 0);
2931 callee
= cgraph_node::get (target
);
2932 if (!callee
|| !callee
->definition
)
2934 callee
= callee
->function_symbol (&avail
);
2935 if (avail
< AVAIL_AVAILABLE
)
2937 isummary
= ipa_fn_summaries
->get (callee
);
2938 if (isummary
== NULL
)
2941 return isummary
->inlinable
;
2944 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2945 handle edge E with probability PROB.
2946 Set HINTS if edge may be devirtualized.
2947 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2951 estimate_edge_size_and_time (struct cgraph_edge
*e
, int *size
, int *min_size
,
2953 vec
<tree
> known_vals
,
2954 vec
<ipa_polymorphic_call_context
> known_contexts
,
2955 vec
<ipa_agg_value_set
> known_aggs
,
2958 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
2959 int call_size
= es
->call_stmt_size
;
2960 int call_time
= es
->call_stmt_time
;
2963 if (!e
->callee
&& hints
&& e
->maybe_hot_p ()
2964 && estimate_edge_devirt_benefit (e
, &call_size
, &call_time
,
2965 known_vals
, known_contexts
, known_aggs
))
2966 *hints
|= INLINE_HINT_indirect_call
;
2967 cur_size
= call_size
* ipa_fn_summary::size_scale
;
2970 *min_size
+= cur_size
;
2972 *time
+= ((sreal
)call_time
) * e
->sreal_frequency ();
2977 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2978 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2979 describe context of the call site. */
2982 estimate_calls_size_and_time (struct cgraph_node
*node
, int *size
,
2983 int *min_size
, sreal
*time
,
2985 clause_t possible_truths
,
2986 vec
<tree
> known_vals
,
2987 vec
<ipa_polymorphic_call_context
> known_contexts
,
2988 vec
<ipa_agg_value_set
> known_aggs
)
2990 struct cgraph_edge
*e
;
2991 for (e
= node
->callees
; e
; e
= e
->next_callee
)
2993 if (!e
->inline_failed
)
2995 gcc_checking_assert (!ipa_call_summaries
->get (e
));
2996 estimate_calls_size_and_time (e
->callee
, size
, min_size
, time
,
2999 known_vals
, known_contexts
,
3003 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3005 /* Do not care about zero sized builtins. */
3006 if (!es
->call_stmt_size
)
3008 gcc_checking_assert (!es
->call_stmt_time
);
3012 || es
->predicate
->evaluate (possible_truths
))
3014 /* Predicates of calls shall not use NOT_CHANGED codes,
3015 sowe do not need to compute probabilities. */
3016 estimate_edge_size_and_time (e
, size
,
3017 es
->predicate
? NULL
: min_size
,
3019 known_vals
, known_contexts
,
3023 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3025 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3027 || es
->predicate
->evaluate (possible_truths
))
3028 estimate_edge_size_and_time (e
, size
,
3029 es
->predicate
? NULL
: min_size
,
3031 known_vals
, known_contexts
, known_aggs
,
3036 /* Default constructor for ipa call context.
3037 Memory alloction of known_vals, known_contexts
3038 and known_aggs vectors is owned by the caller, but can
3039 be release by ipa_call_context::release.
3041 inline_param_summary is owned by the caller. */
3042 ipa_call_context::ipa_call_context (cgraph_node
*node
,
3043 clause_t possible_truths
,
3044 clause_t nonspec_possible_truths
,
3045 vec
<tree
> known_vals
,
3046 vec
<ipa_polymorphic_call_context
>
3048 vec
<ipa_agg_value_set
> known_aggs
,
3049 vec
<inline_param_summary
>
3050 inline_param_summary
)
3051 : m_node (node
), m_possible_truths (possible_truths
),
3052 m_nonspec_possible_truths (nonspec_possible_truths
),
3053 m_inline_param_summary (inline_param_summary
),
3054 m_known_vals (known_vals
),
3055 m_known_contexts (known_contexts
),
3056 m_known_aggs (known_aggs
)
3060 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3063 ipa_call_context::duplicate_from (const ipa_call_context
&ctx
)
3065 m_node
= ctx
.m_node
;
3066 m_possible_truths
= ctx
.m_possible_truths
;
3067 m_nonspec_possible_truths
= ctx
.m_nonspec_possible_truths
;
3068 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3069 unsigned int nargs
= params_summary
3070 ? ipa_get_param_count (params_summary
) : 0;
3072 m_inline_param_summary
= vNULL
;
3073 /* Copy the info only if there is at least one useful entry. */
3074 if (ctx
.m_inline_param_summary
.exists ())
3076 unsigned int n
= MIN (ctx
.m_inline_param_summary
.length (), nargs
);
3078 for (unsigned int i
= 0; i
< n
; i
++)
3079 if (ipa_is_param_used_by_ipa_predicates (params_summary
, i
)
3080 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3082 m_inline_param_summary
3083 = ctx
.m_inline_param_summary
.copy ();
3087 m_known_vals
= vNULL
;
3088 if (ctx
.m_known_vals
.exists ())
3090 unsigned int n
= MIN (ctx
.m_known_vals
.length (), nargs
);
3092 for (unsigned int i
= 0; i
< n
; i
++)
3093 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3094 && ctx
.m_known_vals
[i
])
3096 m_known_vals
= ctx
.m_known_vals
.copy ();
3101 m_known_contexts
= vNULL
;
3102 if (ctx
.m_known_contexts
.exists ())
3104 unsigned int n
= MIN (ctx
.m_known_contexts
.length (), nargs
);
3106 for (unsigned int i
= 0; i
< n
; i
++)
3107 if (ipa_is_param_used_by_polymorphic_call (params_summary
, i
)
3108 && !ctx
.m_known_contexts
[i
].useless_p ())
3110 m_known_contexts
= ctx
.m_known_contexts
.copy ();
3115 m_known_aggs
= vNULL
;
3116 if (ctx
.m_known_aggs
.exists ())
3118 unsigned int n
= MIN (ctx
.m_known_aggs
.length (), nargs
);
3120 for (unsigned int i
= 0; i
< n
; i
++)
3121 if (ipa_is_param_used_by_indirect_call (params_summary
, i
)
3122 && !ctx
.m_known_aggs
[i
].is_empty ())
3124 m_known_aggs
= ipa_copy_agg_values (ctx
.m_known_aggs
);
3130 /* Release memory used by known_vals/contexts/aggs vectors.
3131 If ALL is true release also inline_param_summary.
3132 This happens when context was previously duplciated to be stored
3136 ipa_call_context::release (bool all
)
3138 /* See if context is initialized at first place. */
3141 m_known_vals
.release ();
3142 m_known_contexts
.release ();
3143 ipa_release_agg_values (m_known_aggs
);
3145 m_inline_param_summary
.release ();
3148 /* Return true if CTX describes the same call context as THIS. */
3151 ipa_call_context::equal_to (const ipa_call_context
&ctx
)
3153 if (m_node
!= ctx
.m_node
3154 || m_possible_truths
!= ctx
.m_possible_truths
3155 || m_nonspec_possible_truths
!= ctx
.m_nonspec_possible_truths
)
3158 class ipa_node_params
*params_summary
= IPA_NODE_REF (m_node
);
3159 unsigned int nargs
= params_summary
3160 ? ipa_get_param_count (params_summary
) : 0;
3162 if (m_inline_param_summary
.exists () || ctx
.m_inline_param_summary
.exists ())
3164 for (unsigned int i
= 0; i
< nargs
; i
++)
3166 if (!ipa_is_param_used_by_ipa_predicates (params_summary
, i
))
3168 if (i
>= m_inline_param_summary
.length ()
3169 || m_inline_param_summary
[i
].useless_p ())
3171 if (i
< ctx
.m_inline_param_summary
.length ()
3172 && !ctx
.m_inline_param_summary
[i
].useless_p ())
3176 if (i
>= ctx
.m_inline_param_summary
.length ()
3177 || ctx
.m_inline_param_summary
[i
].useless_p ())
3179 if (i
< m_inline_param_summary
.length ()
3180 && !m_inline_param_summary
[i
].useless_p ())
3184 if (!m_inline_param_summary
[i
].equal_to
3185 (ctx
.m_inline_param_summary
[i
]))
3189 if (m_known_vals
.exists () || ctx
.m_known_vals
.exists ())
3191 for (unsigned int i
= 0; i
< nargs
; i
++)
3193 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3195 if (i
>= m_known_vals
.length () || !m_known_vals
[i
])
3197 if (i
< ctx
.m_known_vals
.length () && ctx
.m_known_vals
[i
])
3201 if (i
>= ctx
.m_known_vals
.length () || !ctx
.m_known_vals
[i
])
3203 if (i
< m_known_vals
.length () && m_known_vals
[i
])
3207 if (m_known_vals
[i
] != ctx
.m_known_vals
[i
])
3211 if (m_known_contexts
.exists () || ctx
.m_known_contexts
.exists ())
3213 for (unsigned int i
= 0; i
< nargs
; i
++)
3215 if (!ipa_is_param_used_by_polymorphic_call (params_summary
, i
))
3217 if (i
>= m_known_contexts
.length ()
3218 || m_known_contexts
[i
].useless_p ())
3220 if (i
< ctx
.m_known_contexts
.length ()
3221 && !ctx
.m_known_contexts
[i
].useless_p ())
3225 if (i
>= ctx
.m_known_contexts
.length ()
3226 || ctx
.m_known_contexts
[i
].useless_p ())
3228 if (i
< m_known_contexts
.length ()
3229 && !m_known_contexts
[i
].useless_p ())
3233 if (!m_known_contexts
[i
].equal_to
3234 (ctx
.m_known_contexts
[i
]))
3238 if (m_known_aggs
.exists () || ctx
.m_known_aggs
.exists ())
3240 for (unsigned int i
= 0; i
< nargs
; i
++)
3242 if (!ipa_is_param_used_by_indirect_call (params_summary
, i
))
3244 if (i
>= m_known_aggs
.length () || m_known_aggs
[i
].is_empty ())
3246 if (i
< ctx
.m_known_aggs
.length ()
3247 && !ctx
.m_known_aggs
[i
].is_empty ())
3251 if (i
>= ctx
.m_known_aggs
.length ()
3252 || ctx
.m_known_aggs
[i
].is_empty ())
3254 if (i
< m_known_aggs
.length ()
3255 && !m_known_aggs
[i
].is_empty ())
3259 if (!m_known_aggs
[i
].equal_to (ctx
.m_known_aggs
[i
]))
3266 /* Estimate size and time needed to execute call in the given context.
3267 Additionally detemine hints determined by the context. Finally compute
3268 minimal size needed for the call that is independent on the call context and
3269 can be used for fast estimates. Return the values in RET_SIZE,
3270 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3273 ipa_call_context::estimate_size_and_time (int *ret_size
,
3276 sreal
*ret_nonspecialized_time
,
3277 ipa_hints
*ret_hints
)
3279 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (m_node
);
3284 ipa_hints hints
= 0;
3287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3290 fprintf (dump_file
, " Estimating body: %s/%i\n"
3291 " Known to be false: ", m_node
->name (),
3294 for (i
= predicate::not_inlined_condition
;
3295 i
< (predicate::first_dynamic_condition
3296 + (int) vec_safe_length (info
->conds
)); i
++)
3297 if (!(m_possible_truths
& (1 << i
)))
3300 fprintf (dump_file
, ", ");
3302 dump_condition (dump_file
, info
->conds
, i
);
3306 estimate_calls_size_and_time (m_node
, &size
, &min_size
,
3307 ret_time
? &time
: NULL
,
3308 ret_hints
? &hints
: NULL
, m_possible_truths
,
3309 m_known_vals
, m_known_contexts
, m_known_aggs
);
3311 sreal nonspecialized_time
= time
;
3313 min_size
+= (*info
->size_time_table
)[0].size
;
3314 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3316 bool exec
= e
->exec_predicate
.evaluate (m_nonspec_possible_truths
);
3318 /* Because predicates are conservative, it can happen that nonconst is 1
3322 bool nonconst
= e
->nonconst_predicate
.evaluate (m_possible_truths
);
3324 gcc_checking_assert (e
->time
>= 0);
3325 gcc_checking_assert (time
>= 0);
3327 /* We compute specialized size only because size of nonspecialized
3328 copy is context independent.
3330 The difference between nonspecialized execution and specialized is
3331 that nonspecialized is not going to have optimized out computations
3332 known to be constant in a specialized setting. */
3337 nonspecialized_time
+= e
->time
;
3340 else if (!m_inline_param_summary
.exists ())
3347 int prob
= e
->nonconst_predicate
.probability
3348 (info
->conds
, m_possible_truths
,
3349 m_inline_param_summary
);
3350 gcc_checking_assert (prob
>= 0);
3351 gcc_checking_assert (prob
<= REG_BR_PROB_BASE
);
3352 if (prob
== REG_BR_PROB_BASE
)
3355 time
+= e
->time
* prob
/ REG_BR_PROB_BASE
;
3357 gcc_checking_assert (time
>= 0);
3360 gcc_checking_assert ((*info
->size_time_table
)[0].exec_predicate
== true);
3361 gcc_checking_assert ((*info
->size_time_table
)[0].nonconst_predicate
== true);
3362 gcc_checking_assert (min_size
>= 0);
3363 gcc_checking_assert (size
>= 0);
3364 gcc_checking_assert (time
>= 0);
3365 /* nonspecialized_time should be always bigger than specialized time.
3366 Roundoff issues however may get into the way. */
3367 gcc_checking_assert ((nonspecialized_time
- time
* 99 / 100) >= -1);
3369 /* Roundoff issues may make specialized time bigger than nonspecialized
3370 time. We do not really want that to happen because some heurstics
3371 may get confused by seeing negative speedups. */
3372 if (time
> nonspecialized_time
)
3373 time
= nonspecialized_time
;
3377 if (info
->loop_iterations
3378 && !info
->loop_iterations
->evaluate (m_possible_truths
))
3379 hints
|= INLINE_HINT_loop_iterations
;
3380 if (info
->loop_stride
3381 && !info
->loop_stride
->evaluate (m_possible_truths
))
3382 hints
|= INLINE_HINT_loop_stride
;
3384 hints
|= INLINE_HINT_in_scc
;
3385 if (DECL_DECLARED_INLINE_P (m_node
->decl
))
3386 hints
|= INLINE_HINT_declared_inline
;
3389 size
= RDIV (size
, ipa_fn_summary::size_scale
);
3390 min_size
= RDIV (min_size
, ipa_fn_summary::size_scale
);
3392 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3393 fprintf (dump_file
, "\n size:%i time:%f nonspec time:%f\n", (int) size
,
3394 time
.to_double (), nonspecialized_time
.to_double ());
3397 if (ret_nonspecialized_time
)
3398 *ret_nonspecialized_time
= nonspecialized_time
;
3402 *ret_min_size
= min_size
;
3409 /* Estimate size and time needed to execute callee of EDGE assuming that
3410 parameters known to be constant at caller of EDGE are propagated.
3411 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3412 and types for parameters. */
3415 estimate_ipcp_clone_size_and_time (struct cgraph_node
*node
,
3416 vec
<tree
> known_vals
,
3417 vec
<ipa_polymorphic_call_context
>
3419 vec
<ipa_agg_value_set
> known_aggs
,
3420 int *ret_size
, sreal
*ret_time
,
3421 sreal
*ret_nonspec_time
,
3424 clause_t clause
, nonspec_clause
;
3426 /* TODO: Also pass known value ranges. */
3427 evaluate_conditions_for_known_args (node
, false, known_vals
, vNULL
,
3428 known_aggs
, &clause
, &nonspec_clause
);
3429 ipa_call_context
ctx (node
, clause
, nonspec_clause
,
3430 known_vals
, known_contexts
,
3432 ctx
.estimate_size_and_time (ret_size
, NULL
, ret_time
,
3433 ret_nonspec_time
, hints
);
3436 /* Return stack frame offset where frame of NODE is supposed to start inside
3437 of the function it is inlined to.
3438 Return 0 for functions that are not inlined. */
3441 ipa_get_stack_frame_offset (struct cgraph_node
*node
)
3443 HOST_WIDE_INT offset
= 0;
3444 if (!node
->inlined_to
)
3446 node
= node
->callers
->caller
;
3449 offset
+= ipa_size_summaries
->get (node
)->estimated_self_stack_size
;
3450 if (!node
->inlined_to
)
3452 node
= node
->callers
->caller
;
3457 /* Update summary information of inline clones after inlining.
3458 Compute peak stack usage. */
3461 inline_update_callee_summaries (struct cgraph_node
*node
, int depth
)
3463 struct cgraph_edge
*e
;
3465 ipa_propagate_frequency (node
);
3466 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3468 if (!e
->inline_failed
)
3469 inline_update_callee_summaries (e
->callee
, depth
);
3471 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3473 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3474 ipa_call_summaries
->get (e
)->loop_depth
+= depth
;
3477 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3478 When function A is inlined in B and A calls C with parameter that
3479 changes with probability PROB1 and C is known to be passthroug
3480 of argument if B that change with probability PROB2, the probability
3481 of change is now PROB1*PROB2. */
3484 remap_edge_change_prob (struct cgraph_edge
*inlined_edge
,
3485 struct cgraph_edge
*edge
)
3487 if (ipa_node_params_sum
)
3490 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3493 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3494 class ipa_call_summary
*inlined_es
3495 = ipa_call_summaries
->get (inlined_edge
);
3497 if (es
->param
.length () == 0)
3500 for (i
= 0; i
< ipa_get_cs_argument_count (args
); i
++)
3502 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3503 if (jfunc
->type
== IPA_JF_PASS_THROUGH
3504 || jfunc
->type
== IPA_JF_ANCESTOR
)
3506 int id
= jfunc
->type
== IPA_JF_PASS_THROUGH
3507 ? ipa_get_jf_pass_through_formal_id (jfunc
)
3508 : ipa_get_jf_ancestor_formal_id (jfunc
);
3509 if (id
< (int) inlined_es
->param
.length ())
3511 int prob1
= es
->param
[i
].change_prob
;
3512 int prob2
= inlined_es
->param
[id
].change_prob
;
3513 int prob
= combine_probabilities (prob1
, prob2
);
3515 if (prob1
&& prob2
&& !prob
)
3518 es
->param
[i
].change_prob
= prob
;
3525 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3527 Remap predicates of callees of NODE. Rest of arguments match
3530 Also update change probabilities. */
3533 remap_edge_summaries (struct cgraph_edge
*inlined_edge
,
3534 struct cgraph_node
*node
,
3535 class ipa_fn_summary
*info
,
3536 class ipa_node_params
*params_summary
,
3537 class ipa_fn_summary
*callee_info
,
3538 vec
<int> operand_map
,
3539 vec
<int> offset_map
,
3540 clause_t possible_truths
,
3541 predicate
*toplev_predicate
)
3543 struct cgraph_edge
*e
, *next
;
3544 for (e
= node
->callees
; e
; e
= next
)
3547 next
= e
->next_callee
;
3549 if (e
->inline_failed
)
3551 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3552 remap_edge_change_prob (inlined_edge
, e
);
3556 p
= es
->predicate
->remap_after_inlining
3557 (info
, params_summary
,
3558 callee_info
, operand_map
,
3559 offset_map
, possible_truths
,
3561 edge_set_predicate (e
, &p
);
3564 edge_set_predicate (e
, toplev_predicate
);
3567 remap_edge_summaries (inlined_edge
, e
->callee
, info
,
3568 params_summary
, callee_info
,
3569 operand_map
, offset_map
, possible_truths
,
3572 for (e
= node
->indirect_calls
; e
; e
= next
)
3574 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
3576 next
= e
->next_callee
;
3578 remap_edge_change_prob (inlined_edge
, e
);
3581 p
= es
->predicate
->remap_after_inlining
3582 (info
, params_summary
,
3583 callee_info
, operand_map
, offset_map
,
3584 possible_truths
, *toplev_predicate
);
3585 edge_set_predicate (e
, &p
);
3588 edge_set_predicate (e
, toplev_predicate
);
3592 /* Same as remap_predicate, but set result into hint *HINT. */
3595 remap_hint_predicate (class ipa_fn_summary
*info
,
3596 class ipa_node_params
*params_summary
,
3597 class ipa_fn_summary
*callee_info
,
3599 vec
<int> operand_map
,
3600 vec
<int> offset_map
,
3601 clause_t possible_truths
,
3602 predicate
*toplev_predicate
)
3608 p
= (*hint
)->remap_after_inlining
3609 (info
, params_summary
, callee_info
,
3610 operand_map
, offset_map
,
3611 possible_truths
, *toplev_predicate
);
3612 if (p
!= false && p
!= true)
3615 set_hint_predicate (hint
, p
);
3621 /* We inlined EDGE. Update summary of the function we inlined into. */
3624 ipa_merge_fn_summary_after_inlining (struct cgraph_edge
*edge
)
3626 ipa_fn_summary
*callee_info
= ipa_fn_summaries
->get (edge
->callee
);
3627 struct cgraph_node
*to
= (edge
->caller
->inlined_to
3628 ? edge
->caller
->inlined_to
: edge
->caller
);
3629 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (to
);
3630 clause_t clause
= 0; /* not_inline is known to be false. */
3632 auto_vec
<int, 8> operand_map
;
3633 auto_vec
<int, 8> offset_map
;
3635 predicate toplev_predicate
;
3636 class ipa_call_summary
*es
= ipa_call_summaries
->get (edge
);
3637 class ipa_node_params
*params_summary
= (ipa_node_params_sum
3638 ? IPA_NODE_REF (to
) : NULL
);
3641 toplev_predicate
= *es
->predicate
;
3643 toplev_predicate
= true;
3645 info
->fp_expressions
|= callee_info
->fp_expressions
;
3647 if (callee_info
->conds
)
3648 evaluate_properties_for_edge (edge
, true, &clause
,
3649 NULL
, NULL
, NULL
, NULL
);
3650 if (ipa_node_params_sum
&& callee_info
->conds
)
3652 class ipa_edge_args
*args
= IPA_EDGE_REF (edge
);
3653 int count
= args
? ipa_get_cs_argument_count (args
) : 0;
3658 operand_map
.safe_grow_cleared (count
);
3659 offset_map
.safe_grow_cleared (count
);
3661 for (i
= 0; i
< count
; i
++)
3663 struct ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
3666 /* TODO: handle non-NOPs when merging. */
3667 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
3669 if (ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
3670 map
= ipa_get_jf_pass_through_formal_id (jfunc
);
3671 if (!ipa_get_jf_pass_through_agg_preserved (jfunc
))
3674 else if (jfunc
->type
== IPA_JF_ANCESTOR
)
3676 HOST_WIDE_INT offset
= ipa_get_jf_ancestor_offset (jfunc
);
3677 if (offset
>= 0 && offset
< INT_MAX
)
3679 map
= ipa_get_jf_ancestor_formal_id (jfunc
);
3680 if (!ipa_get_jf_ancestor_agg_preserved (jfunc
))
3682 offset_map
[i
] = offset
;
3685 operand_map
[i
] = map
;
3686 gcc_assert (map
< ipa_get_param_count (params_summary
));
3689 sreal freq
= edge
->sreal_frequency ();
3690 for (i
= 0; vec_safe_iterate (callee_info
->size_time_table
, i
, &e
); i
++)
3693 p
= e
->exec_predicate
.remap_after_inlining
3694 (info
, params_summary
,
3695 callee_info
, operand_map
,
3698 predicate nonconstp
;
3699 nonconstp
= e
->nonconst_predicate
.remap_after_inlining
3700 (info
, params_summary
,
3701 callee_info
, operand_map
,
3704 if (p
!= false && nonconstp
!= false)
3706 sreal add_time
= ((sreal
)e
->time
* freq
);
3707 int prob
= e
->nonconst_predicate
.probability (callee_info
->conds
,
3709 if (prob
!= REG_BR_PROB_BASE
)
3710 add_time
= add_time
* prob
/ REG_BR_PROB_BASE
;
3711 if (prob
!= REG_BR_PROB_BASE
3712 && dump_file
&& (dump_flags
& TDF_DETAILS
))
3714 fprintf (dump_file
, "\t\tScaling time by probability:%f\n",
3715 (double) prob
/ REG_BR_PROB_BASE
);
3717 info
->account_size_time (e
->size
, add_time
, p
, nonconstp
);
3720 remap_edge_summaries (edge
, edge
->callee
, info
, params_summary
,
3721 callee_info
, operand_map
,
3722 offset_map
, clause
, &toplev_predicate
);
3723 remap_hint_predicate (info
, params_summary
, callee_info
,
3724 &callee_info
->loop_iterations
,
3725 operand_map
, offset_map
, clause
, &toplev_predicate
);
3726 remap_hint_predicate (info
, params_summary
, callee_info
,
3727 &callee_info
->loop_stride
,
3728 operand_map
, offset_map
, clause
, &toplev_predicate
);
3730 HOST_WIDE_INT stack_frame_offset
= ipa_get_stack_frame_offset (edge
->callee
);
3731 HOST_WIDE_INT peak
= stack_frame_offset
+ callee_info
->estimated_stack_size
;
3733 if (info
->estimated_stack_size
< peak
)
3734 info
->estimated_stack_size
= peak
;
3736 inline_update_callee_summaries (edge
->callee
, es
->loop_depth
);
3738 /* Free summaries that are not maintained for inline clones/edges. */
3739 ipa_call_summaries
->remove (edge
);
3740 ipa_fn_summaries
->remove (edge
->callee
);
3741 ipa_remove_from_growth_caches (edge
);
3744 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3745 overall size and time. Recompute it. */
3748 ipa_update_overall_fn_summary (struct cgraph_node
*node
)
3750 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (node
);
3751 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (node
);
3755 size_info
->size
= 0;
3757 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
3759 size_info
->size
+= e
->size
;
3760 info
->time
+= e
->time
;
3762 info
->min_size
= (*info
->size_time_table
)[0].size
;
3763 estimate_calls_size_and_time (node
, &size_info
->size
, &info
->min_size
,
3765 ~(clause_t
) (1 << predicate::false_condition
),
3766 vNULL
, vNULL
, vNULL
);
3767 size_info
->size
= RDIV (size_info
->size
, ipa_fn_summary::size_scale
);
3768 info
->min_size
= RDIV (info
->min_size
, ipa_fn_summary::size_scale
);
3772 /* This function performs intraprocedural analysis in NODE that is required to
3773 inline indirect calls. */
3776 inline_indirect_intraprocedural_analysis (struct cgraph_node
*node
)
3778 ipa_analyze_node (node
);
3779 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3781 ipa_print_node_params (dump_file
, node
);
3782 ipa_print_node_jump_functions (dump_file
, node
);
3787 /* Note function body size. */
3790 inline_analyze_function (struct cgraph_node
*node
)
3792 push_cfun (DECL_STRUCT_FUNCTION (node
->decl
));
3795 fprintf (dump_file
, "\nAnalyzing function: %s/%u\n",
3796 node
->name (), node
->order
);
3797 if (opt_for_fn (node
->decl
, optimize
) && !node
->thunk
.thunk_p
)
3798 inline_indirect_intraprocedural_analysis (node
);
3799 compute_fn_summary (node
, false);
3802 struct cgraph_edge
*e
;
3803 for (e
= node
->callees
; e
; e
= e
->next_callee
)
3804 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3805 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
3806 e
->inline_failed
= CIF_FUNCTION_NOT_OPTIMIZED
;
3813 /* Called when new function is inserted to callgraph late. */
3816 ipa_fn_summary_t::insert (struct cgraph_node
*node
, ipa_fn_summary
*)
3818 inline_analyze_function (node
);
3821 /* Note function body size. */
3824 ipa_fn_summary_generate (void)
3826 struct cgraph_node
*node
;
3828 FOR_EACH_DEFINED_FUNCTION (node
)
3829 if (DECL_STRUCT_FUNCTION (node
->decl
))
3830 node
->versionable
= tree_versionable_function_p (node
->decl
);
3832 ipa_fn_summary_alloc ();
3834 ipa_fn_summaries
->enable_insertion_hook ();
3836 ipa_register_cgraph_hooks ();
3838 FOR_EACH_DEFINED_FUNCTION (node
)
3840 && (flag_generate_lto
|| flag_generate_offload
|| flag_wpa
3841 || opt_for_fn (node
->decl
, optimize
)))
3842 inline_analyze_function (node
);
3846 /* Write inline summary for edge E to OB. */
3849 read_ipa_call_summary (class lto_input_block
*ib
, struct cgraph_edge
*e
,
3852 class ipa_call_summary
*es
= prevails
3853 ? ipa_call_summaries
->get_create (e
) : NULL
;
3857 int size
= streamer_read_uhwi (ib
);
3858 int time
= streamer_read_uhwi (ib
);
3859 int depth
= streamer_read_uhwi (ib
);
3863 es
->call_stmt_size
= size
;
3864 es
->call_stmt_time
= time
;
3865 es
->loop_depth
= depth
;
3868 bitpack_d bp
= streamer_read_bitpack (ib
);
3870 es
->is_return_callee_uncaptured
= bp_unpack_value (&bp
, 1);
3872 bp_unpack_value (&bp
, 1);
3876 edge_set_predicate (e
, &p
);
3877 length
= streamer_read_uhwi (ib
);
3878 if (length
&& es
&& e
->possibly_call_in_translation_unit_p ())
3880 es
->param
.safe_grow_cleared (length
);
3881 for (i
= 0; i
< length
; i
++)
3882 es
->param
[i
].change_prob
= streamer_read_uhwi (ib
);
3886 for (i
= 0; i
< length
; i
++)
3887 streamer_read_uhwi (ib
);
3892 /* Stream in inline summaries from the section. */
3895 inline_read_section (struct lto_file_decl_data
*file_data
, const char *data
,
3898 const struct lto_function_header
*header
=
3899 (const struct lto_function_header
*) data
;
3900 const int cfg_offset
= sizeof (struct lto_function_header
);
3901 const int main_offset
= cfg_offset
+ header
->cfg_size
;
3902 const int string_offset
= main_offset
+ header
->main_size
;
3903 class data_in
*data_in
;
3904 unsigned int i
, count2
, j
;
3905 unsigned int f_count
;
3907 lto_input_block
ib ((const char *) data
+ main_offset
, header
->main_size
,
3908 file_data
->mode_table
);
3911 lto_data_in_create (file_data
, (const char *) data
+ string_offset
,
3912 header
->string_size
, vNULL
);
3913 f_count
= streamer_read_uhwi (&ib
);
3914 for (i
= 0; i
< f_count
; i
++)
3917 struct cgraph_node
*node
;
3918 class ipa_fn_summary
*info
;
3919 class ipa_node_params
*params_summary
;
3920 class ipa_size_summary
*size_info
;
3921 lto_symtab_encoder_t encoder
;
3922 struct bitpack_d bp
;
3923 struct cgraph_edge
*e
;
3926 index
= streamer_read_uhwi (&ib
);
3927 encoder
= file_data
->symtab_node_encoder
;
3928 node
= dyn_cast
<cgraph_node
*> (lto_symtab_encoder_deref (encoder
,
3930 info
= node
->prevailing_p () ? ipa_fn_summaries
->get_create (node
) : NULL
;
3931 params_summary
= node
->prevailing_p () ? IPA_NODE_REF (node
) : NULL
;
3932 size_info
= node
->prevailing_p ()
3933 ? ipa_size_summaries
->get_create (node
) : NULL
;
3935 int stack_size
= streamer_read_uhwi (&ib
);
3936 int size
= streamer_read_uhwi (&ib
);
3937 sreal time
= sreal::stream_in (&ib
);
3941 info
->estimated_stack_size
3942 = size_info
->estimated_self_stack_size
= stack_size
;
3943 size_info
->size
= size_info
->self_size
= size
;
3947 bp
= streamer_read_bitpack (&ib
);
3950 info
->inlinable
= bp_unpack_value (&bp
, 1);
3951 info
->fp_expressions
= bp_unpack_value (&bp
, 1);
3955 bp_unpack_value (&bp
, 1);
3956 bp_unpack_value (&bp
, 1);
3959 count2
= streamer_read_uhwi (&ib
);
3960 gcc_assert (!info
|| !info
->conds
);
3962 vec_safe_reserve_exact (info
->conds
, count2
);
3963 for (j
= 0; j
< count2
; j
++)
3966 unsigned int k
, count3
;
3967 c
.operand_num
= streamer_read_uhwi (&ib
);
3968 c
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3969 c
.type
= stream_read_tree (&ib
, data_in
);
3970 c
.val
= stream_read_tree (&ib
, data_in
);
3971 bp
= streamer_read_bitpack (&ib
);
3972 c
.agg_contents
= bp_unpack_value (&bp
, 1);
3973 c
.by_ref
= bp_unpack_value (&bp
, 1);
3975 c
.offset
= streamer_read_uhwi (&ib
);
3976 count3
= streamer_read_uhwi (&ib
);
3979 vec_safe_reserve_exact (c
.param_ops
, count3
);
3981 ipa_set_param_used_by_ipa_predicates
3982 (params_summary
, c
.operand_num
, true);
3983 for (k
= 0; k
< count3
; k
++)
3985 struct expr_eval_op op
;
3986 enum gimple_rhs_class rhs_class
;
3987 op
.code
= (enum tree_code
) streamer_read_uhwi (&ib
);
3988 op
.type
= stream_read_tree (&ib
, data_in
);
3989 switch (rhs_class
= get_gimple_rhs_class (op
.code
))
3991 case GIMPLE_UNARY_RHS
:
3993 op
.val
[0] = NULL_TREE
;
3994 op
.val
[1] = NULL_TREE
;
3997 case GIMPLE_BINARY_RHS
:
3998 case GIMPLE_TERNARY_RHS
:
3999 bp
= streamer_read_bitpack (&ib
);
4000 op
.index
= bp_unpack_value (&bp
, 2);
4001 op
.val
[0] = stream_read_tree (&ib
, data_in
);
4002 if (rhs_class
== GIMPLE_BINARY_RHS
)
4003 op
.val
[1] = NULL_TREE
;
4005 op
.val
[1] = stream_read_tree (&ib
, data_in
);
4009 fatal_error (UNKNOWN_LOCATION
,
4010 "invalid fnsummary in LTO stream");
4013 c
.param_ops
->quick_push (op
);
4016 info
->conds
->quick_push (c
);
4018 count2
= streamer_read_uhwi (&ib
);
4019 gcc_assert (!info
|| !info
->size_time_table
);
4021 vec_safe_reserve_exact (info
->size_time_table
, count2
);
4022 for (j
= 0; j
< count2
; j
++)
4024 class size_time_entry e
;
4026 e
.size
= streamer_read_uhwi (&ib
);
4027 e
.time
= sreal::stream_in (&ib
);
4028 e
.exec_predicate
.stream_in (&ib
);
4029 e
.nonconst_predicate
.stream_in (&ib
);
4032 info
->size_time_table
->quick_push (e
);
4037 set_hint_predicate (&info
->loop_iterations
, p
);
4040 set_hint_predicate (&info
->loop_stride
, p
);
4041 for (e
= node
->callees
; e
; e
= e
->next_callee
)
4042 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4043 for (e
= node
->indirect_calls
; e
; e
= e
->next_callee
)
4044 read_ipa_call_summary (&ib
, e
, info
!= NULL
);
4047 lto_free_section_data (file_data
, LTO_section_ipa_fn_summary
, NULL
, data
,
4049 lto_data_in_delete (data_in
);
4053 /* Read inline summary. Jump functions are shared among ipa-cp
4054 and inliner, so when ipa-cp is active, we don't need to write them
4058 ipa_fn_summary_read (void)
4060 struct lto_file_decl_data
**file_data_vec
= lto_get_file_decl_data ();
4061 struct lto_file_decl_data
*file_data
;
4064 ipa_fn_summary_alloc ();
4066 while ((file_data
= file_data_vec
[j
++]))
4070 = lto_get_summary_section_data (file_data
, LTO_section_ipa_fn_summary
,
4073 inline_read_section (file_data
, data
, len
);
4075 /* Fatal error here. We do not want to support compiling ltrans units
4076 with different version of compiler or different flags than the WPA
4077 unit, so this should never happen. */
4078 fatal_error (input_location
,
4079 "ipa inline summary is missing in input file");
4081 ipa_register_cgraph_hooks ();
4083 ipa_prop_read_jump_functions ();
4085 gcc_assert (ipa_fn_summaries
);
4086 ipa_fn_summaries
->enable_insertion_hook ();
4090 /* Write inline summary for edge E to OB. */
4093 write_ipa_call_summary (struct output_block
*ob
, struct cgraph_edge
*e
)
4095 class ipa_call_summary
*es
= ipa_call_summaries
->get (e
);
4098 streamer_write_uhwi (ob
, es
->call_stmt_size
);
4099 streamer_write_uhwi (ob
, es
->call_stmt_time
);
4100 streamer_write_uhwi (ob
, es
->loop_depth
);
4102 bitpack_d bp
= bitpack_create (ob
->main_stream
);
4103 bp_pack_value (&bp
, es
->is_return_callee_uncaptured
, 1);
4104 streamer_write_bitpack (&bp
);
4107 es
->predicate
->stream_out (ob
);
4109 streamer_write_uhwi (ob
, 0);
4110 streamer_write_uhwi (ob
, es
->param
.length ());
4111 for (i
= 0; i
< (int) es
->param
.length (); i
++)
4112 streamer_write_uhwi (ob
, es
->param
[i
].change_prob
);
4116 /* Write inline summary for node in SET.
4117 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4118 active, we don't need to write them twice. */
4121 ipa_fn_summary_write (void)
4123 struct output_block
*ob
= create_output_block (LTO_section_ipa_fn_summary
);
4124 lto_symtab_encoder_t encoder
= ob
->decl_state
->symtab_node_encoder
;
4125 unsigned int count
= 0;
4128 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
4130 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
4131 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
4132 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
4135 streamer_write_uhwi (ob
, count
);
4137 for (i
= 0; i
< lto_symtab_encoder_size (encoder
); i
++)
4139 symtab_node
*snode
= lto_symtab_encoder_deref (encoder
, i
);
4140 cgraph_node
*cnode
= dyn_cast
<cgraph_node
*> (snode
);
4141 if (cnode
&& cnode
->definition
&& !cnode
->alias
)
4143 class ipa_fn_summary
*info
= ipa_fn_summaries
->get (cnode
);
4144 class ipa_size_summary
*size_info
= ipa_size_summaries
->get (cnode
);
4145 struct bitpack_d bp
;
4146 struct cgraph_edge
*edge
;
4149 struct condition
*c
;
4151 streamer_write_uhwi (ob
, lto_symtab_encoder_encode (encoder
, cnode
));
4152 streamer_write_hwi (ob
, size_info
->estimated_self_stack_size
);
4153 streamer_write_hwi (ob
, size_info
->self_size
);
4154 info
->time
.stream_out (ob
);
4155 bp
= bitpack_create (ob
->main_stream
);
4156 bp_pack_value (&bp
, info
->inlinable
, 1);
4157 bp_pack_value (&bp
, false, 1);
4158 bp_pack_value (&bp
, info
->fp_expressions
, 1);
4159 streamer_write_bitpack (&bp
);
4160 streamer_write_uhwi (ob
, vec_safe_length (info
->conds
));
4161 for (i
= 0; vec_safe_iterate (info
->conds
, i
, &c
); i
++)
4164 struct expr_eval_op
*op
;
4166 streamer_write_uhwi (ob
, c
->operand_num
);
4167 streamer_write_uhwi (ob
, c
->code
);
4168 stream_write_tree (ob
, c
->type
, true);
4169 stream_write_tree (ob
, c
->val
, true);
4170 bp
= bitpack_create (ob
->main_stream
);
4171 bp_pack_value (&bp
, c
->agg_contents
, 1);
4172 bp_pack_value (&bp
, c
->by_ref
, 1);
4173 streamer_write_bitpack (&bp
);
4174 if (c
->agg_contents
)
4175 streamer_write_uhwi (ob
, c
->offset
);
4176 streamer_write_uhwi (ob
, vec_safe_length (c
->param_ops
));
4177 for (j
= 0; vec_safe_iterate (c
->param_ops
, j
, &op
); j
++)
4179 streamer_write_uhwi (ob
, op
->code
);
4180 stream_write_tree (ob
, op
->type
, true);
4183 bp
= bitpack_create (ob
->main_stream
);
4184 bp_pack_value (&bp
, op
->index
, 2);
4185 streamer_write_bitpack (&bp
);
4186 stream_write_tree (ob
, op
->val
[0], true);
4188 stream_write_tree (ob
, op
->val
[1], true);
4192 streamer_write_uhwi (ob
, vec_safe_length (info
->size_time_table
));
4193 for (i
= 0; vec_safe_iterate (info
->size_time_table
, i
, &e
); i
++)
4195 streamer_write_uhwi (ob
, e
->size
);
4196 e
->time
.stream_out (ob
);
4197 e
->exec_predicate
.stream_out (ob
);
4198 e
->nonconst_predicate
.stream_out (ob
);
4200 if (info
->loop_iterations
)
4201 info
->loop_iterations
->stream_out (ob
);
4203 streamer_write_uhwi (ob
, 0);
4204 if (info
->loop_stride
)
4205 info
->loop_stride
->stream_out (ob
);
4207 streamer_write_uhwi (ob
, 0);
4208 for (edge
= cnode
->callees
; edge
; edge
= edge
->next_callee
)
4209 write_ipa_call_summary (ob
, edge
);
4210 for (edge
= cnode
->indirect_calls
; edge
; edge
= edge
->next_callee
)
4211 write_ipa_call_summary (ob
, edge
);
4214 streamer_write_char_stream (ob
->main_stream
, 0);
4215 produce_asm (ob
, NULL
);
4216 destroy_output_block (ob
);
4219 ipa_prop_write_jump_functions ();
4223 /* Release function summary. */
4226 ipa_free_fn_summary (void)
4228 if (!ipa_call_summaries
)
4230 ggc_delete (ipa_fn_summaries
);
4231 ipa_fn_summaries
= NULL
;
4232 delete ipa_call_summaries
;
4233 ipa_call_summaries
= NULL
;
4234 edge_predicate_pool
.release ();
4235 /* During IPA this is one of largest datastructures to release. */
4240 /* Release function summary. */
4243 ipa_free_size_summary (void)
4245 if (!ipa_size_summaries
)
4247 delete ipa_size_summaries
;
4248 ipa_size_summaries
= NULL
;
4253 const pass_data pass_data_local_fn_summary
=
4255 GIMPLE_PASS
, /* type */
4256 "local-fnsummary", /* name */
4257 OPTGROUP_INLINE
, /* optinfo_flags */
4258 TV_INLINE_PARAMETERS
, /* tv_id */
4259 0, /* properties_required */
4260 0, /* properties_provided */
4261 0, /* properties_destroyed */
4262 0, /* todo_flags_start */
4263 0, /* todo_flags_finish */
4266 class pass_local_fn_summary
: public gimple_opt_pass
4269 pass_local_fn_summary (gcc::context
*ctxt
)
4270 : gimple_opt_pass (pass_data_local_fn_summary
, ctxt
)
4273 /* opt_pass methods: */
4274 opt_pass
* clone () { return new pass_local_fn_summary (m_ctxt
); }
4275 virtual unsigned int execute (function
*)
4277 return compute_fn_summary_for_current ();
4280 }; // class pass_local_fn_summary
4285 make_pass_local_fn_summary (gcc::context
*ctxt
)
4287 return new pass_local_fn_summary (ctxt
);
4291 /* Free inline summary. */
4295 const pass_data pass_data_ipa_free_fn_summary
=
4297 SIMPLE_IPA_PASS
, /* type */
4298 "free-fnsummary", /* name */
4299 OPTGROUP_NONE
, /* optinfo_flags */
4300 TV_IPA_FREE_INLINE_SUMMARY
, /* tv_id */
4301 0, /* properties_required */
4302 0, /* properties_provided */
4303 0, /* properties_destroyed */
4304 0, /* todo_flags_start */
4305 0, /* todo_flags_finish */
4308 class pass_ipa_free_fn_summary
: public simple_ipa_opt_pass
4311 pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4312 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary
, ctxt
),
4316 /* opt_pass methods: */
4317 opt_pass
*clone () { return new pass_ipa_free_fn_summary (m_ctxt
); }
4318 void set_pass_param (unsigned int n
, bool param
)
4320 gcc_assert (n
== 0);
4323 virtual bool gate (function
*) { return true; }
4324 virtual unsigned int execute (function
*)
4326 ipa_free_fn_summary ();
4328 ipa_free_size_summary ();
4334 }; // class pass_ipa_free_fn_summary
4338 simple_ipa_opt_pass
*
4339 make_pass_ipa_free_fn_summary (gcc::context
*ctxt
)
4341 return new pass_ipa_free_fn_summary (ctxt
);
4346 const pass_data pass_data_ipa_fn_summary
=
4348 IPA_PASS
, /* type */
4349 "fnsummary", /* name */
4350 OPTGROUP_INLINE
, /* optinfo_flags */
4351 TV_IPA_FNSUMMARY
, /* tv_id */
4352 0, /* properties_required */
4353 0, /* properties_provided */
4354 0, /* properties_destroyed */
4355 0, /* todo_flags_start */
4356 ( TODO_dump_symtab
), /* todo_flags_finish */
4359 class pass_ipa_fn_summary
: public ipa_opt_pass_d
4362 pass_ipa_fn_summary (gcc::context
*ctxt
)
4363 : ipa_opt_pass_d (pass_data_ipa_fn_summary
, ctxt
,
4364 ipa_fn_summary_generate
, /* generate_summary */
4365 ipa_fn_summary_write
, /* write_summary */
4366 ipa_fn_summary_read
, /* read_summary */
4367 NULL
, /* write_optimization_summary */
4368 NULL
, /* read_optimization_summary */
4369 NULL
, /* stmt_fixup */
4370 0, /* function_transform_todo_flags_start */
4371 NULL
, /* function_transform */
4372 NULL
) /* variable_transform */
4375 /* opt_pass methods: */
4376 virtual unsigned int execute (function
*) { return 0; }
4378 }; // class pass_ipa_fn_summary
4383 make_pass_ipa_fn_summary (gcc::context
*ctxt
)
4385 return new pass_ipa_fn_summary (ctxt
);
4388 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4389 within the same process. For use by toplev::finalize. */
4392 ipa_fnsummary_c_finalize (void)
4394 ipa_free_fn_summary ();