]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-fnsummary.c
re PR tree-optimization/82838 (ICE in verify_ssa failed w/ store-merging)
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
CommitLineData
27d020cf
JH
1/* Function summary pass.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
0bceb671 30 ipa_fn_summary data structures store above information locally (i.e.
27d020cf
JH
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
0bceb671 35 We provide access to the ipa_fn_summary data structure and
27d020cf
JH
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
0bceb671 47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
27d020cf
JH
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54#include "config.h"
55#include "system.h"
56#include "coretypes.h"
57#include "backend.h"
58#include "tree.h"
59#include "gimple.h"
60#include "alloc-pool.h"
61#include "tree-pass.h"
62#include "ssa.h"
63#include "tree-streamer.h"
64#include "cgraph.h"
65#include "diagnostic.h"
66#include "fold-const.h"
67#include "print-tree.h"
68#include "tree-inline.h"
69#include "gimple-pretty-print.h"
70#include "params.h"
71#include "cfganal.h"
72#include "gimple-iterator.h"
73#include "tree-cfg.h"
74#include "tree-ssa-loop-niter.h"
75#include "tree-ssa-loop.h"
76#include "symbol-summary.h"
77#include "ipa-prop.h"
78#include "ipa-fnsummary.h"
79#include "cfgloop.h"
80#include "tree-scalar-evolution.h"
81#include "ipa-utils.h"
82#include "cilk.h"
83#include "cfgexpand.h"
84#include "gimplify.h"
314e6352
ML
85#include "stringpool.h"
86#include "attribs.h"
27d020cf
JH
87
88/* Summaries. */
0bceb671 89function_summary <ipa_fn_summary *> *ipa_fn_summaries;
27d020cf
JH
90call_summary <ipa_call_summary *> *ipa_call_summaries;
91
92/* Edge predicates goes here. */
93static object_allocator<predicate> edge_predicate_pool ("edge predicates");
94
95
0bceb671 96/* Dump IPA hints. */
27d020cf 97void
0bceb671 98ipa_dump_hints (FILE *f, ipa_hints hints)
27d020cf
JH
99{
100 if (!hints)
101 return;
0bceb671 102 fprintf (f, "IPA hints:");
27d020cf
JH
103 if (hints & INLINE_HINT_indirect_call)
104 {
105 hints &= ~INLINE_HINT_indirect_call;
106 fprintf (f, " indirect_call");
107 }
108 if (hints & INLINE_HINT_loop_iterations)
109 {
110 hints &= ~INLINE_HINT_loop_iterations;
111 fprintf (f, " loop_iterations");
112 }
113 if (hints & INLINE_HINT_loop_stride)
114 {
115 hints &= ~INLINE_HINT_loop_stride;
116 fprintf (f, " loop_stride");
117 }
118 if (hints & INLINE_HINT_same_scc)
119 {
120 hints &= ~INLINE_HINT_same_scc;
121 fprintf (f, " same_scc");
122 }
123 if (hints & INLINE_HINT_in_scc)
124 {
125 hints &= ~INLINE_HINT_in_scc;
126 fprintf (f, " in_scc");
127 }
128 if (hints & INLINE_HINT_cross_module)
129 {
130 hints &= ~INLINE_HINT_cross_module;
131 fprintf (f, " cross_module");
132 }
133 if (hints & INLINE_HINT_declared_inline)
134 {
135 hints &= ~INLINE_HINT_declared_inline;
136 fprintf (f, " declared_inline");
137 }
138 if (hints & INLINE_HINT_array_index)
139 {
140 hints &= ~INLINE_HINT_array_index;
141 fprintf (f, " array_index");
142 }
143 if (hints & INLINE_HINT_known_hot)
144 {
145 hints &= ~INLINE_HINT_known_hot;
146 fprintf (f, " known_hot");
147 }
148 gcc_assert (!hints);
149}
150
151
152/* Record SIZE and TIME to SUMMARY.
153 The accounted code will be executed when EXEC_PRED is true.
154 When NONCONST_PRED is false the code will evaulate to constant and
155 will get optimized out in specialized clones of the function. */
156
157void
0bceb671 158ipa_fn_summary::account_size_time (int size, sreal time,
27d020cf
JH
159 const predicate &exec_pred,
160 const predicate &nonconst_pred_in)
161{
162 size_time_entry *e;
163 bool found = false;
164 int i;
165 predicate nonconst_pred;
166
167 if (exec_pred == false)
168 return;
169
170 nonconst_pred = nonconst_pred_in & exec_pred;
171
172 if (nonconst_pred == false)
173 return;
174
175 /* We need to create initial empty unconitional clause, but otherwie
176 we don't need to account empty times and sizes. */
177 if (!size && time == 0 && size_time_table)
178 return;
179
180 gcc_assert (time >= 0);
181
182 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
183 if (e->exec_predicate == exec_pred
184 && e->nonconst_predicate == nonconst_pred)
185 {
186 found = true;
187 break;
188 }
189 if (i == 256)
190 {
191 i = 0;
192 found = true;
193 e = &(*size_time_table)[0];
194 if (dump_file && (dump_flags & TDF_DETAILS))
195 fprintf (dump_file,
196 "\t\tReached limit on number of entries, "
197 "ignoring the predicate.");
198 }
199 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
200 {
201 fprintf (dump_file,
202 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
0bceb671 203 ((double) size) / ipa_fn_summary::size_scale,
27d020cf
JH
204 (time.to_double ()), found ? "" : "new ");
205 exec_pred.dump (dump_file, conds, 0);
206 if (exec_pred != nonconst_pred)
207 {
208 fprintf (dump_file, " nonconst:");
209 nonconst_pred.dump (dump_file, conds);
210 }
211 else
212 fprintf (dump_file, "\n");
213 }
214 if (!found)
215 {
216 struct size_time_entry new_entry;
217 new_entry.size = size;
218 new_entry.time = time;
219 new_entry.exec_predicate = exec_pred;
220 new_entry.nonconst_predicate = nonconst_pred;
221 vec_safe_push (size_time_table, new_entry);
222 }
223 else
224 {
225 e->size += size;
226 e->time += time;
227 }
228}
229
230/* We proved E to be unreachable, redirect it to __bultin_unreachable. */
231
232static struct cgraph_edge *
233redirect_to_unreachable (struct cgraph_edge *e)
234{
235 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
236 struct cgraph_node *target = cgraph_node::get_create
237 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
238
239 if (e->speculative)
240 e = e->resolve_speculation (target->decl);
241 else if (!e->callee)
242 e->make_direct (target);
243 else
244 e->redirect_callee (target);
245 struct ipa_call_summary *es = ipa_call_summaries->get (e);
246 e->inline_failed = CIF_UNREACHABLE;
247 e->frequency = 0;
3995f3a2 248 e->count = profile_count::zero ();
27d020cf
JH
249 es->call_stmt_size = 0;
250 es->call_stmt_time = 0;
251 if (callee)
252 callee->remove_symbol_and_inline_clones ();
253 return e;
254}
255
256/* Set predicate for edge E. */
257
258static void
259edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
260{
261 /* If the edge is determined to be never executed, redirect it
0bceb671
JH
262 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
263 be optimized out. */
27d020cf
JH
264 if (predicate && *predicate == false
265 /* When handling speculative edges, we need to do the redirection
266 just once. Do it always on the direct edge, so we do not
267 attempt to resolve speculation while duplicating the edge. */
268 && (!e->speculative || e->callee))
269 e = redirect_to_unreachable (e);
270
271 struct ipa_call_summary *es = ipa_call_summaries->get (e);
272 if (predicate && *predicate != true)
273 {
274 if (!es->predicate)
275 es->predicate = edge_predicate_pool.allocate ();
276 *es->predicate = *predicate;
277 }
278 else
279 {
280 if (es->predicate)
281 edge_predicate_pool.remove (es->predicate);
282 es->predicate = NULL;
283 }
284}
285
286/* Set predicate for hint *P. */
287
288static void
289set_hint_predicate (predicate **p, predicate new_predicate)
290{
291 if (new_predicate == false || new_predicate == true)
292 {
293 if (*p)
294 edge_predicate_pool.remove (*p);
295 *p = NULL;
296 }
297 else
298 {
299 if (!*p)
300 *p = edge_predicate_pool.allocate ();
301 **p = new_predicate;
302 }
303}
304
305
306/* Compute what conditions may or may not hold given invormation about
307 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
308 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
309 copy when called in a given context. It is a bitmask of conditions. Bit
310 0 means that condition is known to be false, while bit 1 means that condition
311 may or may not be true. These differs - for example NOT_INLINED condition
312 is always false in the second and also builtin_constant_p tests can not use
313 the fact that parameter is indeed a constant.
314
315 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
316 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
317 Return clause of possible truths. When INLINE_P is true, assume that we are
318 inlining.
319
320 ERROR_MARK means compile time invariant. */
321
322static void
323evaluate_conditions_for_known_args (struct cgraph_node *node,
324 bool inline_p,
325 vec<tree> known_vals,
326 vec<ipa_agg_jump_function_p>
327 known_aggs,
328 clause_t *ret_clause,
329 clause_t *ret_nonspec_clause)
330{
331 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
332 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
0bceb671 333 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
27d020cf
JH
334 int i;
335 struct condition *c;
336
337 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
338 {
339 tree val;
340 tree res;
341
342 /* We allow call stmt to have fewer arguments than the callee function
343 (especially for K&R style programs). So bound check here (we assume
344 known_aggs vector, if non-NULL, has the same length as
345 known_vals). */
346 gcc_checking_assert (!known_aggs.exists ()
347 || (known_vals.length () == known_aggs.length ()));
348 if (c->operand_num >= (int) known_vals.length ())
349 {
350 clause |= 1 << (i + predicate::first_dynamic_condition);
351 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
352 continue;
353 }
354
355 if (c->agg_contents)
356 {
357 struct ipa_agg_jump_function *agg;
358
359 if (c->code == predicate::changed
360 && !c->by_ref
361 && (known_vals[c->operand_num] == error_mark_node))
362 continue;
363
364 if (known_aggs.exists ())
365 {
366 agg = known_aggs[c->operand_num];
367 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
368 c->offset, c->by_ref);
369 }
370 else
371 val = NULL_TREE;
372 }
373 else
374 {
375 val = known_vals[c->operand_num];
376 if (val == error_mark_node && c->code != predicate::changed)
377 val = NULL_TREE;
378 }
379
380 if (!val)
381 {
382 clause |= 1 << (i + predicate::first_dynamic_condition);
383 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
384 continue;
385 }
386 if (c->code == predicate::changed)
387 {
388 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
389 continue;
390 }
391
392 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
393 {
394 clause |= 1 << (i + predicate::first_dynamic_condition);
395 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
396 continue;
397 }
398 if (c->code == predicate::is_not_constant)
399 {
400 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
401 continue;
402 }
403
404 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
405 res = val
406 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
407 : NULL;
408
409 if (res && integer_zerop (res))
410 continue;
411
412 clause |= 1 << (i + predicate::first_dynamic_condition);
413 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
414 }
415 *ret_clause = clause;
416 if (ret_nonspec_clause)
417 *ret_nonspec_clause = nonspec_clause;
418}
419
420
421/* Work out what conditions might be true at invocation of E. */
422
423void
424evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
425 clause_t *clause_ptr,
426 clause_t *nonspec_clause_ptr,
427 vec<tree> *known_vals_ptr,
428 vec<ipa_polymorphic_call_context>
429 *known_contexts_ptr,
430 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
431{
432 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
0bceb671 433 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
27d020cf
JH
434 vec<tree> known_vals = vNULL;
435 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
436
437 if (clause_ptr)
438 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
439 if (known_vals_ptr)
440 known_vals_ptr->create (0);
441 if (known_contexts_ptr)
442 known_contexts_ptr->create (0);
443
444 if (ipa_node_params_sum
445 && !e->call_stmt_cannot_inline_p
446 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
447 {
448 struct ipa_node_params *parms_info;
449 struct ipa_edge_args *args = IPA_EDGE_REF (e);
450 struct ipa_call_summary *es = ipa_call_summaries->get (e);
451 int i, count = ipa_get_cs_argument_count (args);
452
453 if (e->caller->global.inlined_to)
454 parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
455 else
456 parms_info = IPA_NODE_REF (e->caller);
457
458 if (count && (info->conds || known_vals_ptr))
459 known_vals.safe_grow_cleared (count);
460 if (count && (info->conds || known_aggs_ptr))
461 known_aggs.safe_grow_cleared (count);
462 if (count && known_contexts_ptr)
463 known_contexts_ptr->safe_grow_cleared (count);
464
465 for (i = 0; i < count; i++)
466 {
467 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
468 tree cst = ipa_value_from_jfunc (parms_info, jf);
469
470 if (!cst && e->call_stmt
471 && i < (int)gimple_call_num_args (e->call_stmt))
472 {
473 cst = gimple_call_arg (e->call_stmt, i);
474 if (!is_gimple_min_invariant (cst))
475 cst = NULL;
476 }
477 if (cst)
478 {
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
480 if (known_vals.exists ())
481 known_vals[i] = cst;
482 }
483 else if (inline_p && !es->param[i].change_prob)
484 known_vals[i] = error_mark_node;
485
486 if (known_contexts_ptr)
487 (*known_contexts_ptr)[i] = ipa_context_from_jfunc (parms_info, e,
488 i, jf);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs[i] = &jf->agg;
493 }
494 }
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
496 && ((clause_ptr && info->conds) || known_vals_ptr))
497 {
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
499
500 if (count && (info->conds || known_vals_ptr))
501 known_vals.safe_grow_cleared (count);
502 for (i = 0; i < count; i++)
503 {
504 tree cst = gimple_call_arg (e->call_stmt, i);
505 if (!is_gimple_min_invariant (cst))
506 cst = NULL;
507 if (cst)
508 known_vals[i] = cst;
509 }
510 }
511
512 evaluate_conditions_for_known_args (callee, inline_p,
513 known_vals, known_aggs, clause_ptr,
514 nonspec_clause_ptr);
515
516 if (known_vals_ptr)
517 *known_vals_ptr = known_vals;
518 else
519 known_vals.release ();
520
521 if (known_aggs_ptr)
522 *known_aggs_ptr = known_aggs;
523 else
524 known_aggs.release ();
525}
526
527
0bceb671 528/* Allocate the function summary. */
27d020cf
JH
529
530static void
0bceb671 531ipa_fn_summary_alloc (void)
27d020cf 532{
0bceb671
JH
533 gcc_checking_assert (!ipa_fn_summaries);
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
27d020cf
JH
536}
537
538/* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
540
541void
542ipa_call_summary::reset ()
543{
544 call_stmt_size = call_stmt_time = 0;
0fab169b 545 is_return_callee_uncaptured = false;
27d020cf
JH
546 if (predicate)
547 edge_predicate_pool.remove (predicate);
548 predicate = NULL;
549 param.release ();
550}
551
552/* We are called multiple time for given function; clear
553 data from previous run so they are not cumulated. */
554
555void
0bceb671 556ipa_fn_summary::reset (struct cgraph_node *node)
27d020cf
JH
557{
558 struct cgraph_edge *e;
559
560 self_size = 0;
561 estimated_stack_size = 0;
562 estimated_self_stack_size = 0;
563 stack_frame_offset = 0;
564 size = 0;
565 time = 0;
566 growth = 0;
567 scc_no = 0;
568 if (loop_iterations)
569 {
570 edge_predicate_pool.remove (loop_iterations);
571 loop_iterations = NULL;
572 }
573 if (loop_stride)
574 {
575 edge_predicate_pool.remove (loop_stride);
576 loop_stride = NULL;
577 }
578 if (array_index)
579 {
580 edge_predicate_pool.remove (array_index);
581 array_index = NULL;
582 }
583 vec_free (conds);
584 vec_free (size_time_table);
585 for (e = node->callees; e; e = e->next_callee)
586 ipa_call_summaries->get (e)->reset ();
587 for (e = node->indirect_calls; e; e = e->next_callee)
588 ipa_call_summaries->get (e)->reset ();
589 fp_expressions = false;
590}
591
592/* Hook that is called by cgraph.c when a node is removed. */
593
594void
0bceb671 595ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info)
27d020cf
JH
596{
597 info->reset (node);
598}
599
600/* Same as remap_predicate_after_duplication but handle hint predicate *P.
601 Additionally care about allocating new memory slot for updated predicate
602 and set it to NULL when it becomes true or false (and thus uninteresting).
603 */
604
605static void
606remap_hint_predicate_after_duplication (predicate **p,
607 clause_t possible_truths)
608{
609 predicate new_predicate;
610
611 if (!*p)
612 return;
613
614 new_predicate = (*p)->remap_after_duplication (possible_truths);
615 /* We do not want to free previous predicate; it is used by node origin. */
616 *p = NULL;
617 set_hint_predicate (p, new_predicate);
618}
619
620
621/* Hook that is called by cgraph.c when a node is duplicated. */
622void
0bceb671 623ipa_fn_summary_t::duplicate (cgraph_node *src,
27d020cf 624 cgraph_node *dst,
0bceb671
JH
625 ipa_fn_summary *,
626 ipa_fn_summary *info)
27d020cf 627{
0bceb671 628 memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary));
27d020cf
JH
629 /* TODO: as an optimization, we may avoid copying conditions
630 that are known to be false or true. */
631 info->conds = vec_safe_copy (info->conds);
632
633 /* When there are any replacements in the function body, see if we can figure
634 out that something was optimized out. */
635 if (ipa_node_params_sum && dst->clone.tree_map)
636 {
637 vec<size_time_entry, va_gc> *entry = info->size_time_table;
638 /* Use SRC parm info since it may not be copied yet. */
639 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
640 vec<tree> known_vals = vNULL;
641 int count = ipa_get_param_count (parms_info);
642 int i, j;
643 clause_t possible_truths;
644 predicate true_pred = true;
645 size_time_entry *e;
646 int optimized_out_size = 0;
647 bool inlined_to_p = false;
648 struct cgraph_edge *edge, *next;
649
650 info->size_time_table = 0;
651 known_vals.safe_grow_cleared (count);
652 for (i = 0; i < count; i++)
653 {
654 struct ipa_replace_map *r;
655
656 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
657 {
658 if (((!r->old_tree && r->parm_num == i)
659 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
660 && r->replace_p && !r->ref_p)
661 {
662 known_vals[i] = r->new_tree;
663 break;
664 }
665 }
666 }
667 evaluate_conditions_for_known_args (dst, false,
668 known_vals,
669 vNULL,
670 &possible_truths,
671 /* We are going to specialize,
672 so ignore nonspec truths. */
673 NULL);
674 known_vals.release ();
675
676 info->account_size_time (0, 0, true_pred, true_pred);
677
678 /* Remap size_time vectors.
679 Simplify the predicate by prunning out alternatives that are known
680 to be false.
681 TODO: as on optimization, we can also eliminate conditions known
682 to be true. */
683 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
684 {
685 predicate new_exec_pred;
686 predicate new_nonconst_pred;
687 new_exec_pred = e->exec_predicate.remap_after_duplication
688 (possible_truths);
689 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
690 (possible_truths);
691 if (new_exec_pred == false || new_nonconst_pred == false)
692 optimized_out_size += e->size;
693 else
694 info->account_size_time (e->size, e->time, new_exec_pred,
695 new_nonconst_pred);
696 }
697
698 /* Remap edge predicates with the same simplification as above.
699 Also copy constantness arrays. */
700 for (edge = dst->callees; edge; edge = next)
701 {
702 predicate new_predicate;
703 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
704 next = edge->next_callee;
705
706 if (!edge->inline_failed)
707 inlined_to_p = true;
708 if (!es->predicate)
709 continue;
710 new_predicate = es->predicate->remap_after_duplication
711 (possible_truths);
712 if (new_predicate == false && *es->predicate != false)
0bceb671 713 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
27d020cf
JH
714 edge_set_predicate (edge, &new_predicate);
715 }
716
717 /* Remap indirect edge predicates with the same simplificaiton as above.
718 Also copy constantness arrays. */
719 for (edge = dst->indirect_calls; edge; edge = next)
720 {
721 predicate new_predicate;
722 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
723 next = edge->next_callee;
724
725 gcc_checking_assert (edge->inline_failed);
726 if (!es->predicate)
727 continue;
728 new_predicate = es->predicate->remap_after_duplication
729 (possible_truths);
730 if (new_predicate == false && *es->predicate != false)
0bceb671 731 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
27d020cf
JH
732 edge_set_predicate (edge, &new_predicate);
733 }
734 remap_hint_predicate_after_duplication (&info->loop_iterations,
735 possible_truths);
736 remap_hint_predicate_after_duplication (&info->loop_stride,
737 possible_truths);
738 remap_hint_predicate_after_duplication (&info->array_index,
739 possible_truths);
740
741 /* If inliner or someone after inliner will ever start producing
742 non-trivial clones, we will get trouble with lack of information
743 about updating self sizes, because size vectors already contains
744 sizes of the calees. */
745 gcc_assert (!inlined_to_p || !optimized_out_size);
746 }
747 else
748 {
749 info->size_time_table = vec_safe_copy (info->size_time_table);
750 if (info->loop_iterations)
751 {
752 predicate p = *info->loop_iterations;
753 info->loop_iterations = NULL;
754 set_hint_predicate (&info->loop_iterations, p);
755 }
756 if (info->loop_stride)
757 {
758 predicate p = *info->loop_stride;
759 info->loop_stride = NULL;
760 set_hint_predicate (&info->loop_stride, p);
761 }
762 if (info->array_index)
763 {
764 predicate p = *info->array_index;
765 info->array_index = NULL;
766 set_hint_predicate (&info->array_index, p);
767 }
768 }
769 if (!dst->global.inlined_to)
0bceb671 770 ipa_update_overall_fn_summary (dst);
27d020cf
JH
771}
772
773
774/* Hook that is called by cgraph.c when a node is duplicated. */
775
776void
777ipa_call_summary_t::duplicate (struct cgraph_edge *src,
778 struct cgraph_edge *dst,
779 struct ipa_call_summary *srcinfo,
780 struct ipa_call_summary *info)
781{
782 *info = *srcinfo;
783 info->predicate = NULL;
784 edge_set_predicate (dst, srcinfo->predicate);
785 info->param = srcinfo->param.copy ();
786 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
787 {
788 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
789 - eni_size_weights.call_cost);
790 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
791 - eni_time_weights.call_cost);
792 }
793}
794
795
796/* Keep edge cache consistent across edge removal. */
797
798void
799ipa_call_summary_t::remove (struct cgraph_edge *,
800 struct ipa_call_summary *sum)
801{
802 sum->reset ();
803}
804
805
806/* Dump edge summaries associated to NODE and recursively to all clones.
807 Indent by INDENT. */
808
809static void
810dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
0bceb671 811 struct ipa_fn_summary *info)
27d020cf
JH
812{
813 struct cgraph_edge *edge;
814 for (edge = node->callees; edge; edge = edge->next_callee)
815 {
816 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
817 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
818 int i;
819
820 fprintf (f,
821 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4i size:%2i"
822 " time: %2i callee size:%2i stack:%2i",
823 indent, "", callee->name (), callee->order,
824 !edge->inline_failed
825 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
826 indent, "", es->loop_depth, edge->frequency,
827 es->call_stmt_size, es->call_stmt_time,
0bceb671
JH
828 (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale,
829 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
27d020cf
JH
830
831 if (es->predicate)
832 {
833 fprintf (f, " predicate: ");
834 es->predicate->dump (f, info->conds);
835 }
836 else
837 fprintf (f, "\n");
838 if (es->param.exists ())
839 for (i = 0; i < (int) es->param.length (); i++)
840 {
841 int prob = es->param[i].change_prob;
842
843 if (!prob)
844 fprintf (f, "%*s op%i is compile time invariant\n",
845 indent + 2, "", i);
846 else if (prob != REG_BR_PROB_BASE)
847 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
848 prob * 100.0 / REG_BR_PROB_BASE);
849 }
850 if (!edge->inline_failed)
851 {
852 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
853 " callee size %i\n",
854 indent + 2, "",
0bceb671
JH
855 (int) ipa_fn_summaries->get (callee)->stack_frame_offset,
856 (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size,
857 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
27d020cf
JH
858 dump_ipa_call_summary (f, indent + 2, callee, info);
859 }
860 }
861 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
862 {
863 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
864 fprintf (f, "%*sindirect call loop depth:%2i freq:%4i size:%2i"
865 " time: %2i",
866 indent, "",
867 es->loop_depth,
868 edge->frequency, es->call_stmt_size, es->call_stmt_time);
869 if (es->predicate)
870 {
871 fprintf (f, "predicate: ");
872 es->predicate->dump (f, info->conds);
873 }
874 else
875 fprintf (f, "\n");
876 }
877}
878
879
880void
0bceb671 881ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
27d020cf
JH
882{
883 if (node->definition)
884 {
0bceb671 885 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
27d020cf
JH
886 size_time_entry *e;
887 int i;
0bceb671 888 fprintf (f, "IPA function summary for %s/%i", node->name (),
27d020cf
JH
889 node->order);
890 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
891 fprintf (f, " always_inline");
892 if (s->inlinable)
893 fprintf (f, " inlinable");
894 if (s->contains_cilk_spawn)
895 fprintf (f, " contains_cilk_spawn");
896 if (s->fp_expressions)
897 fprintf (f, " fp_expression");
898 fprintf (f, "\n global time: %f\n", s->time.to_double ());
899 fprintf (f, " self size: %i\n", s->self_size);
900 fprintf (f, " global size: %i\n", s->size);
901 fprintf (f, " min size: %i\n", s->min_size);
902 fprintf (f, " self stack: %i\n",
903 (int) s->estimated_self_stack_size);
904 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
905 if (s->growth)
906 fprintf (f, " estimated growth:%i\n", (int) s->growth);
907 if (s->scc_no)
908 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
909 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
910 {
911 fprintf (f, " size:%f, time:%f",
0bceb671 912 (double) e->size / ipa_fn_summary::size_scale,
27d020cf
JH
913 e->time.to_double ());
914 if (e->exec_predicate != true)
915 {
916 fprintf (f, ", executed if:");
917 e->exec_predicate.dump (f, s->conds, 0);
918 }
919 if (e->exec_predicate != e->nonconst_predicate)
920 {
921 fprintf (f, ", nonconst if:");
922 e->nonconst_predicate.dump (f, s->conds, 0);
923 }
924 fprintf (f, "\n");
925 }
926 if (s->loop_iterations)
927 {
928 fprintf (f, " loop iterations:");
929 s->loop_iterations->dump (f, s->conds);
930 }
931 if (s->loop_stride)
932 {
933 fprintf (f, " loop stride:");
934 s->loop_stride->dump (f, s->conds);
935 }
936 if (s->array_index)
937 {
938 fprintf (f, " array index:");
939 s->array_index->dump (f, s->conds);
940 }
941 fprintf (f, " calls:\n");
942 dump_ipa_call_summary (f, 4, node, s);
943 fprintf (f, "\n");
944 }
945}
946
947DEBUG_FUNCTION void
0bceb671 948ipa_debug_fn_summary (struct cgraph_node *node)
27d020cf 949{
0bceb671 950 ipa_dump_fn_summary (stderr, node);
27d020cf
JH
951}
952
953void
0bceb671 954ipa_dump_fn_summaries (FILE *f)
27d020cf
JH
955{
956 struct cgraph_node *node;
957
958 FOR_EACH_DEFINED_FUNCTION (node)
959 if (!node->global.inlined_to)
0bceb671 960 ipa_dump_fn_summary (f, node);
27d020cf
JH
961}
962
963/* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
964 boolean variable pointed to by DATA. */
965
966static bool
967mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
968 void *data)
969{
970 bool *b = (bool *) data;
971 *b = true;
972 return true;
973}
974
975/* If OP refers to value of function parameter, return the corresponding
976 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
977 PARM_DECL) will be stored to *SIZE_P in that case too. */
978
979static tree
980unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
981{
982 /* SSA_NAME referring to parm default def? */
983 if (TREE_CODE (op) == SSA_NAME
984 && SSA_NAME_IS_DEFAULT_DEF (op)
985 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
986 {
987 if (size_p)
988 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
989 return SSA_NAME_VAR (op);
990 }
991 /* Non-SSA parm reference? */
992 if (TREE_CODE (op) == PARM_DECL)
993 {
994 bool modified = false;
995
996 ao_ref refd;
997 ao_ref_init (&refd, op);
998 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
999 NULL);
1000 if (!modified)
1001 {
1002 if (size_p)
1003 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1004 return op;
1005 }
1006 }
1007 return NULL_TREE;
1008}
1009
1010/* If OP refers to value of function parameter, return the corresponding
1011 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1012 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1013 stored to *SIZE_P in that case too. */
1014
1015static tree
1016unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1017{
1018 tree res = unmodified_parm_1 (stmt, op, size_p);
1019 if (res)
1020 return res;
1021
1022 if (TREE_CODE (op) == SSA_NAME
1023 && !SSA_NAME_IS_DEFAULT_DEF (op)
1024 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1025 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1026 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1027 size_p);
1028 return NULL_TREE;
1029}
1030
1031/* If OP refers to a value of a function parameter or value loaded from an
1032 aggregate passed to a parameter (either by value or reference), return TRUE
1033 and store the number of the parameter to *INDEX_P, the access size into
1034 *SIZE_P, and information whether and how it has been loaded from an
1035 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1036 statement in which OP is used or loaded. */
1037
1038static bool
1039unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1040 gimple *stmt, tree op, int *index_p,
1041 HOST_WIDE_INT *size_p,
1042 struct agg_position_info *aggpos)
1043{
1044 tree res = unmodified_parm_1 (stmt, op, size_p);
1045
1046 gcc_checking_assert (aggpos);
1047 if (res)
1048 {
1049 *index_p = ipa_get_param_decl_index (fbi->info, res);
1050 if (*index_p < 0)
1051 return false;
1052 aggpos->agg_contents = false;
1053 aggpos->by_ref = false;
1054 return true;
1055 }
1056
1057 if (TREE_CODE (op) == SSA_NAME)
1058 {
1059 if (SSA_NAME_IS_DEFAULT_DEF (op)
1060 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1061 return false;
1062 stmt = SSA_NAME_DEF_STMT (op);
1063 op = gimple_assign_rhs1 (stmt);
1064 if (!REFERENCE_CLASS_P (op))
1065 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1066 aggpos);
1067 }
1068
1069 aggpos->agg_contents = true;
1070 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1071 stmt, op, index_p, &aggpos->offset,
1072 size_p, &aggpos->by_ref);
1073}
1074
1075/* See if statement might disappear after inlining.
1076 0 - means not eliminated
1077 1 - half of statements goes away
1078 2 - for sure it is eliminated.
1079 We are not terribly sophisticated, basically looking for simple abstraction
1080 penalty wrappers. */
1081
1082static int
1083eliminated_by_inlining_prob (gimple *stmt)
1084{
1085 enum gimple_code code = gimple_code (stmt);
1086 enum tree_code rhs_code;
1087
1088 if (!optimize)
1089 return 0;
1090
1091 switch (code)
1092 {
1093 case GIMPLE_RETURN:
1094 return 2;
1095 case GIMPLE_ASSIGN:
1096 if (gimple_num_ops (stmt) != 2)
1097 return 0;
1098
1099 rhs_code = gimple_assign_rhs_code (stmt);
1100
1101 /* Casts of parameters, loads from parameters passed by reference
1102 and stores to return value or parameters are often free after
1103 inlining dua to SRA and further combining.
1104 Assume that half of statements goes away. */
1105 if (CONVERT_EXPR_CODE_P (rhs_code)
1106 || rhs_code == VIEW_CONVERT_EXPR
1107 || rhs_code == ADDR_EXPR
1108 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1109 {
1110 tree rhs = gimple_assign_rhs1 (stmt);
1111 tree lhs = gimple_assign_lhs (stmt);
1112 tree inner_rhs = get_base_address (rhs);
1113 tree inner_lhs = get_base_address (lhs);
1114 bool rhs_free = false;
1115 bool lhs_free = false;
1116
1117 if (!inner_rhs)
1118 inner_rhs = rhs;
1119 if (!inner_lhs)
1120 inner_lhs = lhs;
1121
1122 /* Reads of parameter are expected to be free. */
1123 if (unmodified_parm (stmt, inner_rhs, NULL))
1124 rhs_free = true;
1125 /* Match expressions of form &this->field. Those will most likely
1126 combine with something upstream after inlining. */
1127 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1128 {
1129 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1130 if (TREE_CODE (op) == PARM_DECL)
1131 rhs_free = true;
1132 else if (TREE_CODE (op) == MEM_REF
1133 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1134 rhs_free = true;
1135 }
1136
1137 /* When parameter is not SSA register because its address is taken
1138 and it is just copied into one, the statement will be completely
1139 free after inlining (we will copy propagate backward). */
1140 if (rhs_free && is_gimple_reg (lhs))
1141 return 2;
1142
1143 /* Reads of parameters passed by reference
1144 expected to be free (i.e. optimized out after inlining). */
1145 if (TREE_CODE (inner_rhs) == MEM_REF
1146 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1147 rhs_free = true;
1148
1149 /* Copying parameter passed by reference into gimple register is
1150 probably also going to copy propagate, but we can't be quite
1151 sure. */
1152 if (rhs_free && is_gimple_reg (lhs))
1153 lhs_free = true;
1154
1155 /* Writes to parameters, parameters passed by value and return value
1156 (either dirrectly or passed via invisible reference) are free.
1157
1158 TODO: We ought to handle testcase like
1159 struct a {int a,b;};
1160 struct a
1161 retrurnsturct (void)
1162 {
1163 struct a a ={1,2};
1164 return a;
1165 }
1166
1167 This translate into:
1168
1169 retrurnsturct ()
1170 {
1171 int a$b;
1172 int a$a;
1173 struct a a;
1174 struct a D.2739;
1175
1176 <bb 2>:
1177 D.2739.a = 1;
1178 D.2739.b = 2;
1179 return D.2739;
1180
1181 }
1182 For that we either need to copy ipa-split logic detecting writes
1183 to return value. */
1184 if (TREE_CODE (inner_lhs) == PARM_DECL
1185 || TREE_CODE (inner_lhs) == RESULT_DECL
1186 || (TREE_CODE (inner_lhs) == MEM_REF
1187 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1188 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1189 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1190 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1191 (inner_lhs,
1192 0))) == RESULT_DECL))))
1193 lhs_free = true;
1194 if (lhs_free
1195 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1196 rhs_free = true;
1197 if (lhs_free && rhs_free)
1198 return 1;
1199 }
1200 return 0;
1201 default:
1202 return 0;
1203 }
1204}
1205
1206
1207/* If BB ends by a conditional we can turn into predicates, attach corresponding
1208 predicates to the CFG edges. */
1209
1210static void
1211set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
0bceb671 1212 struct ipa_fn_summary *summary,
27d020cf
JH
1213 basic_block bb)
1214{
1215 gimple *last;
1216 tree op;
1217 int index;
1218 HOST_WIDE_INT size;
1219 struct agg_position_info aggpos;
1220 enum tree_code code, inverted_code;
1221 edge e;
1222 edge_iterator ei;
1223 gimple *set_stmt;
1224 tree op2;
1225
1226 last = last_stmt (bb);
1227 if (!last || gimple_code (last) != GIMPLE_COND)
1228 return;
1229 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1230 return;
1231 op = gimple_cond_lhs (last);
1232 /* TODO: handle conditionals like
1233 var = op0 < 4;
1234 if (var != 0). */
1235 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1236 {
1237 code = gimple_cond_code (last);
1238 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1239
1240 FOR_EACH_EDGE (e, ei, bb->succs)
1241 {
1242 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1243 ? code : inverted_code);
1244 /* invert_tree_comparison will return ERROR_MARK on FP
1245 comparsions that are not EQ/NE instead of returning proper
1246 unordered one. Be sure it is not confused with NON_CONSTANT. */
1247 if (this_code != ERROR_MARK)
1248 {
1249 predicate p
1250 = add_condition (summary, index, size, &aggpos, this_code,
1251 unshare_expr_without_location
1252 (gimple_cond_rhs (last)));
1253 e->aux = edge_predicate_pool.allocate ();
1254 *(predicate *) e->aux = p;
1255 }
1256 }
1257 }
1258
1259 if (TREE_CODE (op) != SSA_NAME)
1260 return;
1261 /* Special case
1262 if (builtin_constant_p (op))
1263 constant_code
1264 else
1265 nonconstant_code.
1266 Here we can predicate nonconstant_code. We can't
1267 really handle constant_code since we have no predicate
1268 for this and also the constant code is not known to be
1269 optimized away when inliner doen't see operand is constant.
1270 Other optimizers might think otherwise. */
1271 if (gimple_cond_code (last) != NE_EXPR
1272 || !integer_zerop (gimple_cond_rhs (last)))
1273 return;
1274 set_stmt = SSA_NAME_DEF_STMT (op);
1275 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1276 || gimple_call_num_args (set_stmt) != 1)
1277 return;
1278 op2 = gimple_call_arg (set_stmt, 0);
1279 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1280 &aggpos))
1281 return;
1282 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1283 {
1284 predicate p = add_condition (summary, index, size, &aggpos,
1285 predicate::is_not_constant, NULL_TREE);
1286 e->aux = edge_predicate_pool.allocate ();
1287 *(predicate *) e->aux = p;
1288 }
1289}
1290
1291
1292/* If BB ends by a switch we can turn into predicates, attach corresponding
1293 predicates to the CFG edges. */
1294
1295static void
1296set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
0bceb671 1297 struct ipa_fn_summary *summary,
27d020cf
JH
1298 basic_block bb)
1299{
1300 gimple *lastg;
1301 tree op;
1302 int index;
1303 HOST_WIDE_INT size;
1304 struct agg_position_info aggpos;
1305 edge e;
1306 edge_iterator ei;
1307 size_t n;
1308 size_t case_idx;
1309
1310 lastg = last_stmt (bb);
1311 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1312 return;
1313 gswitch *last = as_a <gswitch *> (lastg);
1314 op = gimple_switch_index (last);
1315 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1316 return;
1317
1318 FOR_EACH_EDGE (e, ei, bb->succs)
1319 {
1320 e->aux = edge_predicate_pool.allocate ();
1321 *(predicate *) e->aux = false;
1322 }
1323 n = gimple_switch_num_labels (last);
1324 for (case_idx = 0; case_idx < n; ++case_idx)
1325 {
1326 tree cl = gimple_switch_label (last, case_idx);
1327 tree min, max;
1328 predicate p;
1329
1330 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1331 min = CASE_LOW (cl);
1332 max = CASE_HIGH (cl);
1333
1334 /* For default we might want to construct predicate that none
1335 of cases is met, but it is bit hard to do not having negations
1336 of conditionals handy. */
1337 if (!min && !max)
1338 p = true;
1339 else if (!max)
1340 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1341 unshare_expr_without_location (min));
1342 else
1343 {
1344 predicate p1, p2;
1345 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1346 unshare_expr_without_location (min));
1347 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1348 unshare_expr_without_location (max));
1349 p = p1 & p2;
1350 }
1351 *(struct predicate *) e->aux
1352 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1353 }
1354}
1355
1356
1357/* For each BB in NODE attach to its AUX pointer predicate under
1358 which it is executable. */
1359
1360static void
1361compute_bb_predicates (struct ipa_func_body_info *fbi,
1362 struct cgraph_node *node,
0bceb671 1363 struct ipa_fn_summary *summary)
27d020cf
JH
1364{
1365 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1366 bool done = false;
1367 basic_block bb;
1368
1369 FOR_EACH_BB_FN (bb, my_function)
1370 {
1371 set_cond_stmt_execution_predicate (fbi, summary, bb);
1372 set_switch_stmt_execution_predicate (fbi, summary, bb);
1373 }
1374
1375 /* Entry block is always executable. */
1376 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1377 = edge_predicate_pool.allocate ();
1378 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1379
1380 /* A simple dataflow propagation of predicates forward in the CFG.
1381 TODO: work in reverse postorder. */
1382 while (!done)
1383 {
1384 done = true;
1385 FOR_EACH_BB_FN (bb, my_function)
1386 {
1387 predicate p = false;
1388 edge e;
1389 edge_iterator ei;
1390 FOR_EACH_EDGE (e, ei, bb->preds)
1391 {
1392 if (e->src->aux)
1393 {
1394 predicate this_bb_predicate
1395 = *(predicate *) e->src->aux;
1396 if (e->aux)
1397 this_bb_predicate &= (*(struct predicate *) e->aux);
1398 p = p.or_with (summary->conds, this_bb_predicate);
1399 if (p == true)
1400 break;
1401 }
1402 }
1403 if (p == false)
1404 gcc_checking_assert (!bb->aux);
1405 else
1406 {
1407 if (!bb->aux)
1408 {
1409 done = false;
1410 bb->aux = edge_predicate_pool.allocate ();
1411 *((predicate *) bb->aux) = p;
1412 }
1413 else if (p != *(predicate *) bb->aux)
1414 {
1415 /* This OR operation is needed to ensure monotonous data flow
1416 in the case we hit the limit on number of clauses and the
1417 and/or operations above give approximate answers. */
1418 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1419 if (p != *(predicate *) bb->aux)
1420 {
1421 done = false;
1422 *((predicate *) bb->aux) = p;
1423 }
1424 }
1425 }
1426 }
1427 }
1428}
1429
1430
1431/* Return predicate specifying when the STMT might have result that is not
1432 a compile time constant. */
1433
1434static predicate
1435will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
0bceb671 1436 struct ipa_fn_summary *summary,
27d020cf
JH
1437 tree expr,
1438 vec<predicate> nonconstant_names)
1439{
1440 tree parm;
1441 int index;
1442 HOST_WIDE_INT size;
1443
1444 while (UNARY_CLASS_P (expr))
1445 expr = TREE_OPERAND (expr, 0);
1446
1447 parm = unmodified_parm (NULL, expr, &size);
1448 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1449 return add_condition (summary, index, size, NULL, predicate::changed,
1450 NULL_TREE);
1451 if (is_gimple_min_invariant (expr))
1452 return false;
1453 if (TREE_CODE (expr) == SSA_NAME)
1454 return nonconstant_names[SSA_NAME_VERSION (expr)];
1455 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1456 {
1457 predicate p1 = will_be_nonconstant_expr_predicate
1458 (info, summary, TREE_OPERAND (expr, 0),
1459 nonconstant_names);
1460 if (p1 == true)
1461 return p1;
1462
1463 predicate p2;
1464 p2 = will_be_nonconstant_expr_predicate (info, summary,
1465 TREE_OPERAND (expr, 1),
1466 nonconstant_names);
1467 return p1.or_with (summary->conds, p2);
1468 }
1469 else if (TREE_CODE (expr) == COND_EXPR)
1470 {
1471 predicate p1 = will_be_nonconstant_expr_predicate
1472 (info, summary, TREE_OPERAND (expr, 0),
1473 nonconstant_names);
1474 if (p1 == true)
1475 return p1;
1476
1477 predicate p2;
1478 p2 = will_be_nonconstant_expr_predicate (info, summary,
1479 TREE_OPERAND (expr, 1),
1480 nonconstant_names);
1481 if (p2 == true)
1482 return p2;
1483 p1 = p1.or_with (summary->conds, p2);
1484 p2 = will_be_nonconstant_expr_predicate (info, summary,
1485 TREE_OPERAND (expr, 2),
1486 nonconstant_names);
1487 return p2.or_with (summary->conds, p1);
1488 }
1489 else
1490 {
1491 debug_tree (expr);
1492 gcc_unreachable ();
1493 }
1494 return false;
1495}
1496
1497
1498/* Return predicate specifying when the STMT might have result that is not
1499 a compile time constant. */
1500
1501static predicate
1502will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
0bceb671 1503 struct ipa_fn_summary *summary,
27d020cf
JH
1504 gimple *stmt,
1505 vec<predicate> nonconstant_names)
1506{
1507 predicate p = true;
1508 ssa_op_iter iter;
1509 tree use;
1510 predicate op_non_const;
1511 bool is_load;
1512 int base_index;
1513 HOST_WIDE_INT size;
1514 struct agg_position_info aggpos;
1515
1516 /* What statments might be optimized away
1517 when their arguments are constant. */
1518 if (gimple_code (stmt) != GIMPLE_ASSIGN
1519 && gimple_code (stmt) != GIMPLE_COND
1520 && gimple_code (stmt) != GIMPLE_SWITCH
1521 && (gimple_code (stmt) != GIMPLE_CALL
1522 || !(gimple_call_flags (stmt) & ECF_CONST)))
1523 return p;
1524
1525 /* Stores will stay anyway. */
1526 if (gimple_store_p (stmt))
1527 return p;
1528
1529 is_load = gimple_assign_load_p (stmt);
1530
1531 /* Loads can be optimized when the value is known. */
1532 if (is_load)
1533 {
1534 tree op;
1535 gcc_assert (gimple_assign_single_p (stmt));
1536 op = gimple_assign_rhs1 (stmt);
1537 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1538 &aggpos))
1539 return p;
1540 }
1541 else
1542 base_index = -1;
1543
1544 /* See if we understand all operands before we start
1545 adding conditionals. */
1546 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1547 {
1548 tree parm = unmodified_parm (stmt, use, NULL);
1549 /* For arguments we can build a condition. */
1550 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1551 continue;
1552 if (TREE_CODE (use) != SSA_NAME)
1553 return p;
1554 /* If we know when operand is constant,
1555 we still can say something useful. */
1556 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1557 continue;
1558 return p;
1559 }
1560
1561 if (is_load)
1562 op_non_const =
1563 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1564 NULL);
1565 else
1566 op_non_const = false;
1567 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1568 {
1569 HOST_WIDE_INT size;
1570 tree parm = unmodified_parm (stmt, use, &size);
1571 int index;
1572
1573 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1574 {
1575 if (index != base_index)
1576 p = add_condition (summary, index, size, NULL, predicate::changed,
1577 NULL_TREE);
1578 else
1579 continue;
1580 }
1581 else
1582 p = nonconstant_names[SSA_NAME_VERSION (use)];
1583 op_non_const = p.or_with (summary->conds, op_non_const);
1584 }
1585 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1586 && gimple_op (stmt, 0)
1587 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1588 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1589 = op_non_const;
1590 return op_non_const;
1591}
1592
1593struct record_modified_bb_info
1594{
1595 bitmap bb_set;
1596 gimple *stmt;
1597};
1598
1599/* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1600 probability how often it changes between USE_BB.
1601 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1602 is in different loop nest, we can do better.
1603 This is all just estimate. In theory we look for minimal cut separating
1604 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1605 anyway. */
1606
1607static basic_block
1608get_minimal_bb (basic_block init_bb, basic_block use_bb)
1609{
1610 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
e7a74006 1611 if (l && l->header->count < init_bb->count)
27d020cf
JH
1612 return l->header;
1613 return init_bb;
1614}
1615
1616/* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1617 set except for info->stmt. */
1618
1619static bool
1620record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1621{
1622 struct record_modified_bb_info *info =
1623 (struct record_modified_bb_info *) data;
1624 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1625 return false;
1626 bitmap_set_bit (info->bb_set,
1627 SSA_NAME_IS_DEFAULT_DEF (vdef)
1628 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1629 : get_minimal_bb
1630 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1631 gimple_bb (info->stmt))->index);
1632 return false;
1633}
1634
1635/* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1636 will change since last invocation of STMT.
1637
1638 Value 0 is reserved for compile time invariants.
1639 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1640 ought to be REG_BR_PROB_BASE / estimated_iters. */
1641
1642static int
1643param_change_prob (gimple *stmt, int i)
1644{
1645 tree op = gimple_call_arg (stmt, i);
1646 basic_block bb = gimple_bb (stmt);
1647
1648 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1649 op = TREE_OPERAND (op, 0);
1650
1651 tree base = get_base_address (op);
1652
1653 /* Global invariants never change. */
1654 if (is_gimple_min_invariant (base))
1655 return 0;
1656
1657 /* We would have to do non-trivial analysis to really work out what
1658 is the probability of value to change (i.e. when init statement
1659 is in a sibling loop of the call).
1660
1661 We do an conservative estimate: when call is executed N times more often
1662 than the statement defining value, we take the frequency 1/N. */
1663 if (TREE_CODE (base) == SSA_NAME)
1664 {
1665 int init_freq;
1666
e7a74006 1667 if (!bb->count.to_frequency (cfun))
27d020cf
JH
1668 return REG_BR_PROB_BASE;
1669
1670 if (SSA_NAME_IS_DEFAULT_DEF (base))
e7a74006 1671 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
27d020cf
JH
1672 else
1673 init_freq = get_minimal_bb
1674 (gimple_bb (SSA_NAME_DEF_STMT (base)),
e7a74006 1675 gimple_bb (stmt))->count.to_frequency (cfun);
27d020cf
JH
1676
1677 if (!init_freq)
1678 init_freq = 1;
e7a74006
JH
1679 if (init_freq < bb->count.to_frequency (cfun))
1680 return MAX (GCOV_COMPUTE_SCALE (init_freq,
1681 bb->count.to_frequency (cfun)), 1);
27d020cf
JH
1682 else
1683 return REG_BR_PROB_BASE;
1684 }
1685 else
1686 {
1687 ao_ref refd;
1688 int max;
1689 struct record_modified_bb_info info;
1690 bitmap_iterator bi;
1691 unsigned index;
1692 tree init = ctor_for_folding (base);
1693
1694 if (init != error_mark_node)
1695 return 0;
e7a74006 1696 if (!bb->count.to_frequency (cfun))
27d020cf
JH
1697 return REG_BR_PROB_BASE;
1698 ao_ref_init (&refd, op);
1699 info.stmt = stmt;
1700 info.bb_set = BITMAP_ALLOC (NULL);
1701 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1702 NULL);
1703 if (bitmap_bit_p (info.bb_set, bb->index))
1704 {
1705 BITMAP_FREE (info.bb_set);
1706 return REG_BR_PROB_BASE;
1707 }
1708
1709 /* Assume that every memory is initialized at entry.
1710 TODO: Can we easilly determine if value is always defined
1711 and thus we may skip entry block? */
e7a74006
JH
1712 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
1713 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
27d020cf
JH
1714 else
1715 max = 1;
1716
1717 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
e7a74006 1718 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun));
27d020cf
JH
1719
1720 BITMAP_FREE (info.bb_set);
e7a74006
JH
1721 if (max < bb->count.to_frequency (cfun))
1722 return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1);
27d020cf
JH
1723 else
1724 return REG_BR_PROB_BASE;
1725 }
1726}
1727
1728/* Find whether a basic block BB is the final block of a (half) diamond CFG
1729 sub-graph and if the predicate the condition depends on is known. If so,
1730 return true and store the pointer the predicate in *P. */
1731
1732static bool
1733phi_result_unknown_predicate (struct ipa_node_params *info,
0bceb671 1734 ipa_fn_summary *summary, basic_block bb,
27d020cf
JH
1735 predicate *p,
1736 vec<predicate> nonconstant_names)
1737{
1738 edge e;
1739 edge_iterator ei;
1740 basic_block first_bb = NULL;
1741 gimple *stmt;
1742
1743 if (single_pred_p (bb))
1744 {
1745 *p = false;
1746 return true;
1747 }
1748
1749 FOR_EACH_EDGE (e, ei, bb->preds)
1750 {
1751 if (single_succ_p (e->src))
1752 {
1753 if (!single_pred_p (e->src))
1754 return false;
1755 if (!first_bb)
1756 first_bb = single_pred (e->src);
1757 else if (single_pred (e->src) != first_bb)
1758 return false;
1759 }
1760 else
1761 {
1762 if (!first_bb)
1763 first_bb = e->src;
1764 else if (e->src != first_bb)
1765 return false;
1766 }
1767 }
1768
1769 if (!first_bb)
1770 return false;
1771
1772 stmt = last_stmt (first_bb);
1773 if (!stmt
1774 || gimple_code (stmt) != GIMPLE_COND
1775 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1776 return false;
1777
1778 *p = will_be_nonconstant_expr_predicate (info, summary,
1779 gimple_cond_lhs (stmt),
1780 nonconstant_names);
1781 if (*p == true)
1782 return false;
1783 else
1784 return true;
1785}
1786
1787/* Given a PHI statement in a function described by inline properties SUMMARY
1788 and *P being the predicate describing whether the selected PHI argument is
1789 known, store a predicate for the result of the PHI statement into
1790 NONCONSTANT_NAMES, if possible. */
1791
1792static void
0bceb671 1793predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
27d020cf
JH
1794 predicate *p,
1795 vec<predicate> nonconstant_names)
1796{
1797 unsigned i;
1798
1799 for (i = 0; i < gimple_phi_num_args (phi); i++)
1800 {
1801 tree arg = gimple_phi_arg (phi, i)->def;
1802 if (!is_gimple_min_invariant (arg))
1803 {
1804 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1805 *p = p->or_with (summary->conds,
1806 nonconstant_names[SSA_NAME_VERSION (arg)]);
1807 if (*p == true)
1808 return;
1809 }
1810 }
1811
1812 if (dump_file && (dump_flags & TDF_DETAILS))
1813 {
1814 fprintf (dump_file, "\t\tphi predicate: ");
1815 p->dump (dump_file, summary->conds);
1816 }
1817 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1818}
1819
1820/* Return predicate specifying when array index in access OP becomes non-constant. */
1821
1822static predicate
0bceb671 1823array_index_predicate (ipa_fn_summary *info,
27d020cf
JH
1824 vec< predicate> nonconstant_names, tree op)
1825{
1826 predicate p = false;
1827 while (handled_component_p (op))
1828 {
1829 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1830 {
1831 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1832 p = p.or_with (info->conds,
1833 nonconstant_names[SSA_NAME_VERSION
1834 (TREE_OPERAND (op, 1))]);
1835 }
1836 op = TREE_OPERAND (op, 0);
1837 }
1838 return p;
1839}
1840
1841/* For a typical usage of __builtin_expect (a<b, 1), we
1842 may introduce an extra relation stmt:
1843 With the builtin, we have
1844 t1 = a <= b;
1845 t2 = (long int) t1;
1846 t3 = __builtin_expect (t2, 1);
1847 if (t3 != 0)
1848 goto ...
1849 Without the builtin, we have
1850 if (a<=b)
1851 goto...
1852 This affects the size/time estimation and may have
1853 an impact on the earlier inlining.
1854 Here find this pattern and fix it up later. */
1855
1856static gimple *
1857find_foldable_builtin_expect (basic_block bb)
1858{
1859 gimple_stmt_iterator bsi;
1860
1861 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1862 {
1863 gimple *stmt = gsi_stmt (bsi);
1864 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1865 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1866 {
1867 tree var = gimple_call_lhs (stmt);
1868 tree arg = gimple_call_arg (stmt, 0);
1869 use_operand_p use_p;
1870 gimple *use_stmt;
1871 bool match = false;
1872 bool done = false;
1873
1874 if (!var || !arg)
1875 continue;
1876 gcc_assert (TREE_CODE (var) == SSA_NAME);
1877
1878 while (TREE_CODE (arg) == SSA_NAME)
1879 {
1880 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1881 if (!is_gimple_assign (stmt_tmp))
1882 break;
1883 switch (gimple_assign_rhs_code (stmt_tmp))
1884 {
1885 case LT_EXPR:
1886 case LE_EXPR:
1887 case GT_EXPR:
1888 case GE_EXPR:
1889 case EQ_EXPR:
1890 case NE_EXPR:
1891 match = true;
1892 done = true;
1893 break;
1894 CASE_CONVERT:
1895 break;
1896 default:
1897 done = true;
1898 break;
1899 }
1900 if (done)
1901 break;
1902 arg = gimple_assign_rhs1 (stmt_tmp);
1903 }
1904
1905 if (match && single_imm_use (var, &use_p, &use_stmt)
1906 && gimple_code (use_stmt) == GIMPLE_COND)
1907 return use_stmt;
1908 }
1909 }
1910 return NULL;
1911}
1912
1913/* Return true when the basic blocks contains only clobbers followed by RESX.
1914 Such BBs are kept around to make removal of dead stores possible with
1915 presence of EH and will be optimized out by optimize_clobbers later in the
1916 game.
1917
1918 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1919 that can be clobber only, too.. When it is false, the RESX is not necessary
1920 on the end of basic block. */
1921
1922static bool
1923clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1924{
1925 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1926 edge_iterator ei;
1927 edge e;
1928
1929 if (need_eh)
1930 {
1931 if (gsi_end_p (gsi))
1932 return false;
1933 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1934 return false;
1935 gsi_prev (&gsi);
1936 }
1937 else if (!single_succ_p (bb))
1938 return false;
1939
1940 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1941 {
1942 gimple *stmt = gsi_stmt (gsi);
1943 if (is_gimple_debug (stmt))
1944 continue;
1945 if (gimple_clobber_p (stmt))
1946 continue;
1947 if (gimple_code (stmt) == GIMPLE_LABEL)
1948 break;
1949 return false;
1950 }
1951
1952 /* See if all predecestors are either throws or clobber only BBs. */
1953 FOR_EACH_EDGE (e, ei, bb->preds)
1954 if (!(e->flags & EDGE_EH)
1955 && !clobber_only_eh_bb_p (e->src, false))
1956 return false;
1957
1958 return true;
1959}
1960
1961/* Return true if STMT compute a floating point expression that may be affected
1962 by -ffast-math and similar flags. */
1963
1964static bool
1965fp_expression_p (gimple *stmt)
1966{
1967 ssa_op_iter i;
1968 tree op;
1969
1970 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1971 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1972 return true;
1973 return false;
1974}
1975
0bceb671
JH
1976/* Analyze function body for NODE.
1977 EARLY indicates run from early optimization pipeline. */
27d020cf
JH
1978
1979static void
0bceb671 1980analyze_function_body (struct cgraph_node *node, bool early)
27d020cf
JH
1981{
1982 sreal time = 0;
1983 /* Estimate static overhead for function prologue/epilogue and alignment. */
1984 int size = 2;
1985 /* Benefits are scaled by probability of elimination that is in range
1986 <0,2>. */
1987 basic_block bb;
1988 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1989 int freq;
0bceb671 1990 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
27d020cf
JH
1991 predicate bb_predicate;
1992 struct ipa_func_body_info fbi;
1993 vec<predicate> nonconstant_names = vNULL;
1994 int nblocks, n;
1995 int *order;
1996 predicate array_index = true;
1997 gimple *fix_builtin_expect_stmt;
1998
1999 gcc_assert (my_function && my_function->cfg);
2000 gcc_assert (cfun == my_function);
2001
2002 memset(&fbi, 0, sizeof(fbi));
2003 info->conds = NULL;
2004 info->size_time_table = NULL;
2005
2006 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2007 so we can produce proper inline hints.
2008
2009 When optimizing and analyzing for early inliner, initialize node params
2010 so we can produce correct BB predicates. */
2011
2012 if (opt_for_fn (node->decl, optimize))
2013 {
2014 calculate_dominance_info (CDI_DOMINATORS);
2015 if (!early)
2016 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2017 else
2018 {
2019 ipa_check_create_node_params ();
2020 ipa_initialize_node_params (node);
2021 }
2022
2023 if (ipa_node_params_sum)
2024 {
2025 fbi.node = node;
2026 fbi.info = IPA_NODE_REF (node);
2027 fbi.bb_infos = vNULL;
2028 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2029 fbi.param_count = count_formal_params(node->decl);
2030 nonconstant_names.safe_grow_cleared
2031 (SSANAMES (my_function)->length ());
2032 }
2033 }
2034
2035 if (dump_file)
2036 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2037 node->name ());
2038
2039 /* When we run into maximal number of entries, we assign everything to the
2040 constant truth case. Be sure to have it in list. */
2041 bb_predicate = true;
2042 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2043
2044 bb_predicate = predicate::not_inlined ();
0bceb671 2045 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
27d020cf
JH
2046 bb_predicate);
2047
2048 if (fbi.info)
2049 compute_bb_predicates (&fbi, node, info);
2050 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2051 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2052 for (n = 0; n < nblocks; n++)
2053 {
2054 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2055 freq = compute_call_stmt_bb_frequency (node->decl, bb);
2056 if (clobber_only_eh_bb_p (bb))
2057 {
2058 if (dump_file && (dump_flags & TDF_DETAILS))
2059 fprintf (dump_file, "\n Ignoring BB %i;"
2060 " it will be optimized away by cleanup_clobbers\n",
2061 bb->index);
2062 continue;
2063 }
2064
2065 /* TODO: Obviously predicates can be propagated down across CFG. */
2066 if (fbi.info)
2067 {
2068 if (bb->aux)
2069 bb_predicate = *(predicate *) bb->aux;
2070 else
2071 bb_predicate = false;
2072 }
2073 else
2074 bb_predicate = true;
2075
2076 if (dump_file && (dump_flags & TDF_DETAILS))
2077 {
2078 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2079 bb_predicate.dump (dump_file, info->conds);
2080 }
2081
2082 if (fbi.info && nonconstant_names.exists ())
2083 {
2084 predicate phi_predicate;
2085 bool first_phi = true;
2086
2087 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2088 gsi_next (&bsi))
2089 {
2090 if (first_phi
2091 && !phi_result_unknown_predicate (fbi.info, info, bb,
2092 &phi_predicate,
2093 nonconstant_names))
2094 break;
2095 first_phi = false;
2096 if (dump_file && (dump_flags & TDF_DETAILS))
2097 {
2098 fprintf (dump_file, " ");
2099 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2100 }
2101 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2102 nonconstant_names);
2103 }
2104 }
2105
2106 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2107
2108 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2109 gsi_next (&bsi))
2110 {
2111 gimple *stmt = gsi_stmt (bsi);
2112 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2113 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2114 int prob;
2115 predicate will_be_nonconstant;
2116
2117 /* This relation stmt should be folded after we remove
2118 buildin_expect call. Adjust the cost here. */
2119 if (stmt == fix_builtin_expect_stmt)
2120 {
2121 this_size--;
2122 this_time--;
2123 }
2124
2125 if (dump_file && (dump_flags & TDF_DETAILS))
2126 {
2127 fprintf (dump_file, " ");
2128 print_gimple_stmt (dump_file, stmt, 0);
2129 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2130 ((double) freq) / CGRAPH_FREQ_BASE, this_size,
2131 this_time);
2132 }
2133
2134 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2135 {
2136 predicate this_array_index;
2137 this_array_index =
2138 array_index_predicate (info, nonconstant_names,
2139 gimple_assign_rhs1 (stmt));
2140 if (this_array_index != false)
2141 array_index &= this_array_index;
2142 }
2143 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2144 {
2145 predicate this_array_index;
2146 this_array_index =
2147 array_index_predicate (info, nonconstant_names,
2148 gimple_get_lhs (stmt));
2149 if (this_array_index != false)
2150 array_index &= this_array_index;
2151 }
2152
2153
2154 if (is_gimple_call (stmt)
2155 && !gimple_call_internal_p (stmt))
2156 {
2157 struct cgraph_edge *edge = node->get_edge (stmt);
2158 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2159
2160 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2161 resolved as constant. We however don't want to optimize
2162 out the cgraph edges. */
2163 if (nonconstant_names.exists ()
2164 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2165 && gimple_call_lhs (stmt)
2166 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2167 {
2168 predicate false_p = false;
2169 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2170 = false_p;
2171 }
2172 if (ipa_node_params_sum)
2173 {
2174 int count = gimple_call_num_args (stmt);
2175 int i;
2176
2177 if (count)
2178 es->param.safe_grow_cleared (count);
2179 for (i = 0; i < count; i++)
2180 {
2181 int prob = param_change_prob (stmt, i);
2182 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2183 es->param[i].change_prob = prob;
2184 }
2185 }
2186
2187 es->call_stmt_size = this_size;
2188 es->call_stmt_time = this_time;
2189 es->loop_depth = bb_loop_depth (bb);
2190 edge_set_predicate (edge, &bb_predicate);
2191 }
2192
2193 /* TODO: When conditional jump or swithc is known to be constant, but
2194 we did not translate it into the predicates, we really can account
2195 just maximum of the possible paths. */
2196 if (fbi.info)
2197 will_be_nonconstant
2198 = will_be_nonconstant_predicate (&fbi, info,
2199 stmt, nonconstant_names);
2200 else
2201 will_be_nonconstant = true;
2202 if (this_time || this_size)
2203 {
2204 this_time *= freq;
2205
2206 prob = eliminated_by_inlining_prob (stmt);
2207 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2208 fprintf (dump_file,
2209 "\t\t50%% will be eliminated by inlining\n");
2210 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2211 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2212
2213 struct predicate p = bb_predicate & will_be_nonconstant;
2214
2215 /* We can ignore statement when we proved it is never going
2216 to happen, but we can not do that for call statements
2217 because edges are accounted specially. */
2218
2219 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2220 {
2221 time += this_time;
2222 size += this_size;
2223 }
2224
2225 /* We account everything but the calls. Calls have their own
2226 size/time info attached to cgraph edges. This is necessary
2227 in order to make the cost disappear after inlining. */
2228 if (!is_gimple_call (stmt))
2229 {
2230 if (prob)
2231 {
2232 predicate ip = bb_predicate & predicate::not_inlined ();
2233 info->account_size_time (this_size * prob,
2234 (sreal)(this_time * prob)
2235 / (CGRAPH_FREQ_BASE * 2), ip,
2236 p);
2237 }
2238 if (prob != 2)
2239 info->account_size_time (this_size * (2 - prob),
2240 (sreal)(this_time * (2 - prob))
2241 / (CGRAPH_FREQ_BASE * 2),
2242 bb_predicate,
2243 p);
2244 }
2245
2246 if (!info->fp_expressions && fp_expression_p (stmt))
2247 {
2248 info->fp_expressions = true;
2249 if (dump_file)
2250 fprintf (dump_file, " fp_expression set\n");
2251 }
2252
2253 gcc_assert (time >= 0);
2254 gcc_assert (size >= 0);
2255 }
2256 }
2257 }
0bceb671 2258 set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index);
27d020cf
JH
2259 time = time / CGRAPH_FREQ_BASE;
2260 free (order);
2261
2262 if (nonconstant_names.exists () && !early)
2263 {
2264 struct loop *loop;
2265 predicate loop_iterations = true;
2266 predicate loop_stride = true;
2267
2268 if (dump_file && (dump_flags & TDF_DETAILS))
2269 flow_loops_dump (dump_file, NULL, 0);
2270 scev_initialize ();
2271 FOR_EACH_LOOP (loop, 0)
2272 {
2273 vec<edge> exits;
2274 edge ex;
2275 unsigned int j;
2276 struct tree_niter_desc niter_desc;
2277 bb_predicate = *(predicate *) loop->header->aux;
2278
2279 exits = get_loop_exit_edges (loop);
2280 FOR_EACH_VEC_ELT (exits, j, ex)
2281 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2282 && !is_gimple_min_invariant (niter_desc.niter))
2283 {
2284 predicate will_be_nonconstant
2285 = will_be_nonconstant_expr_predicate (fbi.info, info,
2286 niter_desc.niter,
2287 nonconstant_names);
2288 if (will_be_nonconstant != true)
2289 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2290 if (will_be_nonconstant != true
2291 && will_be_nonconstant != false)
2292 /* This is slightly inprecise. We may want to represent each
2293 loop with independent predicate. */
2294 loop_iterations &= will_be_nonconstant;
2295 }
2296 exits.release ();
2297 }
2298
2299 /* To avoid quadratic behavior we analyze stride predicates only
2300 with respect to the containing loop. Thus we simply iterate
2301 over all defs in the outermost loop body. */
2302 for (loop = loops_for_fn (cfun)->tree_root->inner;
2303 loop != NULL; loop = loop->next)
2304 {
2305 basic_block *body = get_loop_body (loop);
2306 for (unsigned i = 0; i < loop->num_nodes; i++)
2307 {
2308 gimple_stmt_iterator gsi;
2309 bb_predicate = *(predicate *) body[i]->aux;
2310 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2311 gsi_next (&gsi))
2312 {
2313 gimple *stmt = gsi_stmt (gsi);
2314
2315 if (!is_gimple_assign (stmt))
2316 continue;
2317
2318 tree def = gimple_assign_lhs (stmt);
2319 if (TREE_CODE (def) != SSA_NAME)
2320 continue;
2321
2322 affine_iv iv;
2323 if (!simple_iv (loop_containing_stmt (stmt),
2324 loop_containing_stmt (stmt),
2325 def, &iv, true)
2326 || is_gimple_min_invariant (iv.step))
2327 continue;
2328
2329 predicate will_be_nonconstant
2330 = will_be_nonconstant_expr_predicate (fbi.info, info,
2331 iv.step,
2332 nonconstant_names);
2333 if (will_be_nonconstant != true)
2334 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2335 if (will_be_nonconstant != true
2336 && will_be_nonconstant != false)
2337 /* This is slightly inprecise. We may want to represent
2338 each loop with independent predicate. */
2339 loop_stride = loop_stride & will_be_nonconstant;
2340 }
2341 }
2342 free (body);
2343 }
0bceb671 2344 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations,
27d020cf 2345 loop_iterations);
0bceb671 2346 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride,
27d020cf
JH
2347 loop_stride);
2348 scev_finalize ();
2349 }
2350 FOR_ALL_BB_FN (bb, my_function)
2351 {
2352 edge e;
2353 edge_iterator ei;
2354
2355 if (bb->aux)
2356 edge_predicate_pool.remove ((predicate *)bb->aux);
2357 bb->aux = NULL;
2358 FOR_EACH_EDGE (e, ei, bb->succs)
2359 {
2360 if (e->aux)
2361 edge_predicate_pool.remove ((predicate *) e->aux);
2362 e->aux = NULL;
2363 }
2364 }
0bceb671
JH
2365 ipa_fn_summaries->get (node)->time = time;
2366 ipa_fn_summaries->get (node)->self_size = size;
27d020cf
JH
2367 nonconstant_names.release ();
2368 ipa_release_body_info (&fbi);
2369 if (opt_for_fn (node->decl, optimize))
2370 {
2371 if (!early)
2372 loop_optimizer_finalize ();
2373 else if (!ipa_edge_args_sum)
2374 ipa_free_all_node_params ();
2375 free_dominance_info (CDI_DOMINATORS);
2376 }
2377 if (dump_file)
2378 {
2379 fprintf (dump_file, "\n");
0bceb671 2380 ipa_dump_fn_summary (dump_file, node);
27d020cf
JH
2381 }
2382}
2383
2384
0bceb671
JH
2385/* Compute function summary.
2386 EARLY is true when we compute parameters during early opts. */
27d020cf
JH
2387
2388void
0bceb671 2389compute_fn_summary (struct cgraph_node *node, bool early)
27d020cf
JH
2390{
2391 HOST_WIDE_INT self_stack_size;
2392 struct cgraph_edge *e;
0bceb671 2393 struct ipa_fn_summary *info;
27d020cf
JH
2394
2395 gcc_assert (!node->global.inlined_to);
2396
0bceb671
JH
2397 if (!ipa_fn_summaries)
2398 ipa_fn_summary_alloc ();
27d020cf 2399
0bceb671 2400 info = ipa_fn_summaries->get (node);
27d020cf
JH
2401 info->reset (node);
2402
2403 /* Estimate the stack size for the function if we're optimizing. */
2404 self_stack_size = optimize && !node->thunk.thunk_p
2405 ? estimated_stack_frame_size (node) : 0;
2406 info->estimated_self_stack_size = self_stack_size;
2407 info->estimated_stack_size = self_stack_size;
2408 info->stack_frame_offset = 0;
2409
2410 if (node->thunk.thunk_p)
2411 {
2412 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2413 predicate t = true;
2414
2415 node->local.can_change_signature = false;
2416 es->call_stmt_size = eni_size_weights.call_cost;
2417 es->call_stmt_time = eni_time_weights.call_cost;
0bceb671 2418 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
27d020cf 2419 t = predicate::not_inlined ();
0bceb671
JH
2420 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2421 ipa_update_overall_fn_summary (node);
27d020cf
JH
2422 info->self_size = info->size;
2423 /* We can not inline instrumentation clones. */
2424 if (node->thunk.add_pointer_bounds_args)
2425 {
2426 info->inlinable = false;
2427 node->callees->inline_failed = CIF_CHKP;
2428 }
2429 else
2430 info->inlinable = true;
2431 }
2432 else
2433 {
2434 /* Even is_gimple_min_invariant rely on current_function_decl. */
2435 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2436
2437 /* Can this function be inlined at all? */
2438 if (!opt_for_fn (node->decl, optimize)
2439 && !lookup_attribute ("always_inline",
2440 DECL_ATTRIBUTES (node->decl)))
2441 info->inlinable = false;
2442 else
2443 info->inlinable = tree_inlinable_function_p (node->decl);
2444
2445 info->contains_cilk_spawn = fn_contains_cilk_spawn_p (cfun);
2446
2447 /* Type attributes can use parameter indices to describe them. */
2448 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2449 node->local.can_change_signature = false;
2450 else
2451 {
2452 /* Otherwise, inlinable functions always can change signature. */
2453 if (info->inlinable)
2454 node->local.can_change_signature = true;
2455 else
2456 {
2457 /* Functions calling builtin_apply can not change signature. */
2458 for (e = node->callees; e; e = e->next_callee)
2459 {
2460 tree cdecl = e->callee->decl;
2461 if (DECL_BUILT_IN (cdecl)
2462 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2463 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2464 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2465 break;
2466 }
2467 node->local.can_change_signature = !e;
2468 }
2469 }
2470 /* Functions called by instrumentation thunk can't change signature
2471 because instrumentation thunk modification is not supported. */
2472 if (node->local.can_change_signature)
2473 for (e = node->callers; e; e = e->next_caller)
2474 if (e->caller->thunk.thunk_p
2475 && e->caller->thunk.add_pointer_bounds_args)
2476 {
2477 node->local.can_change_signature = false;
2478 break;
2479 }
0bceb671 2480 analyze_function_body (node, early);
27d020cf
JH
2481 pop_cfun ();
2482 }
2483 for (e = node->callees; e; e = e->next_callee)
2484 if (e->callee->comdat_local_p ())
2485 break;
2486 node->calls_comdat_local = (e != NULL);
2487
2488 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2489 info->size = info->self_size;
2490 info->stack_frame_offset = 0;
2491 info->estimated_stack_size = info->estimated_self_stack_size;
2492
2493 /* Code above should compute exactly the same result as
0bceb671 2494 ipa_update_overall_fn_summary but because computation happens in
27d020cf 2495 different order the roundoff errors result in slight changes. */
0bceb671 2496 ipa_update_overall_fn_summary (node);
27d020cf
JH
2497 gcc_assert (info->size == info->self_size);
2498}
2499
2500
2501/* Compute parameters of functions used by inliner using
2502 current_function_decl. */
2503
2504static unsigned int
0bceb671 2505compute_fn_summary_for_current (void)
27d020cf 2506{
0bceb671 2507 compute_fn_summary (cgraph_node::get (current_function_decl), true);
27d020cf
JH
2508 return 0;
2509}
2510
27d020cf
JH
2511/* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2512 KNOWN_CONTEXTS and KNOWN_AGGS. */
2513
2514static bool
2515estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2516 int *size, int *time,
2517 vec<tree> known_vals,
2518 vec<ipa_polymorphic_call_context> known_contexts,
2519 vec<ipa_agg_jump_function_p> known_aggs)
2520{
2521 tree target;
2522 struct cgraph_node *callee;
0bceb671 2523 struct ipa_fn_summary *isummary;
27d020cf
JH
2524 enum availability avail;
2525 bool speculative;
2526
2527 if (!known_vals.exists () && !known_contexts.exists ())
2528 return false;
2529 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2530 return false;
2531
2532 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2533 known_aggs, &speculative);
2534 if (!target || speculative)
2535 return false;
2536
2537 /* Account for difference in cost between indirect and direct calls. */
2538 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2539 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2540 gcc_checking_assert (*time >= 0);
2541 gcc_checking_assert (*size >= 0);
2542
2543 callee = cgraph_node::get (target);
2544 if (!callee || !callee->definition)
2545 return false;
2546 callee = callee->function_symbol (&avail);
2547 if (avail < AVAIL_AVAILABLE)
2548 return false;
0bceb671 2549 isummary = ipa_fn_summaries->get (callee);
27d020cf
JH
2550 return isummary->inlinable;
2551}
2552
2553/* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2554 handle edge E with probability PROB.
2555 Set HINTS if edge may be devirtualized.
2556 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2557 site. */
2558
2559static inline void
2560estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2561 sreal *time,
2562 int prob,
2563 vec<tree> known_vals,
2564 vec<ipa_polymorphic_call_context> known_contexts,
2565 vec<ipa_agg_jump_function_p> known_aggs,
0bceb671 2566 ipa_hints *hints)
27d020cf
JH
2567{
2568 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2569 int call_size = es->call_stmt_size;
2570 int call_time = es->call_stmt_time;
2571 int cur_size;
2572 if (!e->callee
2573 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2574 known_vals, known_contexts, known_aggs)
2575 && hints && e->maybe_hot_p ())
2576 *hints |= INLINE_HINT_indirect_call;
0bceb671 2577 cur_size = call_size * ipa_fn_summary::size_scale;
27d020cf
JH
2578 *size += cur_size;
2579 if (min_size)
2580 *min_size += cur_size;
2581 if (prob == REG_BR_PROB_BASE)
2582 *time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
2583 else
2584 *time += ((sreal)call_time) * (prob * e->frequency)
2585 / (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
2586}
2587
2588
2589
2590/* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2591 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2592 describe context of the call site. */
2593
2594static void
2595estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2596 int *min_size, sreal *time,
0bceb671 2597 ipa_hints *hints,
27d020cf
JH
2598 clause_t possible_truths,
2599 vec<tree> known_vals,
2600 vec<ipa_polymorphic_call_context> known_contexts,
2601 vec<ipa_agg_jump_function_p> known_aggs)
2602{
2603 struct cgraph_edge *e;
2604 for (e = node->callees; e; e = e->next_callee)
2605 {
2606 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2607
2608 /* Do not care about zero sized builtins. */
2609 if (e->inline_failed && !es->call_stmt_size)
2610 {
2611 gcc_checking_assert (!es->call_stmt_time);
2612 continue;
2613 }
2614 if (!es->predicate
2615 || es->predicate->evaluate (possible_truths))
2616 {
2617 if (e->inline_failed)
2618 {
2619 /* Predicates of calls shall not use NOT_CHANGED codes,
2620 sowe do not need to compute probabilities. */
2621 estimate_edge_size_and_time (e, size,
2622 es->predicate ? NULL : min_size,
2623 time, REG_BR_PROB_BASE,
2624 known_vals, known_contexts,
2625 known_aggs, hints);
2626 }
2627 else
2628 estimate_calls_size_and_time (e->callee, size, min_size, time,
2629 hints,
2630 possible_truths,
2631 known_vals, known_contexts,
2632 known_aggs);
2633 }
2634 }
2635 for (e = node->indirect_calls; e; e = e->next_callee)
2636 {
2637 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2638 if (!es->predicate
2639 || es->predicate->evaluate (possible_truths))
2640 estimate_edge_size_and_time (e, size,
2641 es->predicate ? NULL : min_size,
2642 time, REG_BR_PROB_BASE,
2643 known_vals, known_contexts, known_aggs,
2644 hints);
2645 }
2646}
2647
2648
2649/* Estimate size and time needed to execute NODE assuming
2650 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2651 information about NODE's arguments. If non-NULL use also probability
2652 information present in INLINE_PARAM_SUMMARY vector.
2653 Additionally detemine hints determined by the context. Finally compute
2654 minimal size needed for the call that is independent on the call context and
2655 can be used for fast estimates. Return the values in RET_SIZE,
2656 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2657
2658void
2659estimate_node_size_and_time (struct cgraph_node *node,
2660 clause_t possible_truths,
2661 clause_t nonspec_possible_truths,
2662 vec<tree> known_vals,
2663 vec<ipa_polymorphic_call_context> known_contexts,
2664 vec<ipa_agg_jump_function_p> known_aggs,
2665 int *ret_size, int *ret_min_size,
2666 sreal *ret_time,
2667 sreal *ret_nonspecialized_time,
0bceb671 2668 ipa_hints *ret_hints,
27d020cf
JH
2669 vec<inline_param_summary>
2670 inline_param_summary)
2671{
0bceb671 2672 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
27d020cf
JH
2673 size_time_entry *e;
2674 int size = 0;
2675 sreal time = 0;
2676 int min_size = 0;
0bceb671 2677 ipa_hints hints = 0;
27d020cf
JH
2678 int i;
2679
2680 if (dump_file && (dump_flags & TDF_DETAILS))
2681 {
2682 bool found = false;
2683 fprintf (dump_file, " Estimating body: %s/%i\n"
2684 " Known to be false: ", node->name (),
2685 node->order);
2686
2687 for (i = predicate::not_inlined_condition;
2688 i < (predicate::first_dynamic_condition
2689 + (int) vec_safe_length (info->conds)); i++)
2690 if (!(possible_truths & (1 << i)))
2691 {
2692 if (found)
2693 fprintf (dump_file, ", ");
2694 found = true;
2695 dump_condition (dump_file, info->conds, i);
2696 }
2697 }
2698
2699 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2700 known_vals, known_contexts, known_aggs);
2701 sreal nonspecialized_time = time;
2702
2703 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2704 {
27d020cf 2705 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
3494e738
JH
2706
2707 /* Because predicates are conservative, it can happen that nonconst is 1
2708 but exec is 0. */
27d020cf
JH
2709 if (exec)
2710 {
3494e738
JH
2711 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2712
27d020cf
JH
2713 gcc_checking_assert (e->time >= 0);
2714 gcc_checking_assert (time >= 0);
2715
2716 /* We compute specialized size only because size of nonspecialized
2717 copy is context independent.
2718
2719 The difference between nonspecialized execution and specialized is
2720 that nonspecialized is not going to have optimized out computations
2721 known to be constant in a specialized setting. */
2722 if (nonconst)
2723 size += e->size;
2724 nonspecialized_time += e->time;
2725 if (!nonconst)
2726 ;
2727 else if (!inline_param_summary.exists ())
2728 {
2729 if (nonconst)
2730 time += e->time;
2731 }
2732 else
2733 {
2734 int prob = e->nonconst_predicate.probability
2735 (info->conds, possible_truths,
2736 inline_param_summary);
2737 gcc_checking_assert (prob >= 0);
2738 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2739 time += e->time * prob / REG_BR_PROB_BASE;
2740 }
2741 gcc_checking_assert (time >= 0);
2742 }
2743 }
2744 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2745 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2746 min_size = (*info->size_time_table)[0].size;
2747 gcc_checking_assert (size >= 0);
2748 gcc_checking_assert (time >= 0);
2749 /* nonspecialized_time should be always bigger than specialized time.
2750 Roundoff issues however may get into the way. */
2751 gcc_checking_assert ((nonspecialized_time - time) >= -1);
2752
2753 /* Roundoff issues may make specialized time bigger than nonspecialized
2754 time. We do not really want that to happen because some heurstics
2755 may get confused by seeing negative speedups. */
2756 if (time > nonspecialized_time)
2757 time = nonspecialized_time;
2758
2759 if (info->loop_iterations
2760 && !info->loop_iterations->evaluate (possible_truths))
2761 hints |= INLINE_HINT_loop_iterations;
2762 if (info->loop_stride
2763 && !info->loop_stride->evaluate (possible_truths))
2764 hints |= INLINE_HINT_loop_stride;
2765 if (info->array_index
2766 && !info->array_index->evaluate (possible_truths))
2767 hints |= INLINE_HINT_array_index;
2768 if (info->scc_no)
2769 hints |= INLINE_HINT_in_scc;
2770 if (DECL_DECLARED_INLINE_P (node->decl))
2771 hints |= INLINE_HINT_declared_inline;
2772
0bceb671
JH
2773 size = RDIV (size, ipa_fn_summary::size_scale);
2774 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
27d020cf
JH
2775
2776 if (dump_file && (dump_flags & TDF_DETAILS))
2777 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2778 time.to_double (), nonspecialized_time.to_double ());
2779 if (ret_time)
2780 *ret_time = time;
2781 if (ret_nonspecialized_time)
2782 *ret_nonspecialized_time = nonspecialized_time;
2783 if (ret_size)
2784 *ret_size = size;
2785 if (ret_min_size)
2786 *ret_min_size = min_size;
2787 if (ret_hints)
2788 *ret_hints = hints;
2789 return;
2790}
2791
2792
2793/* Estimate size and time needed to execute callee of EDGE assuming that
2794 parameters known to be constant at caller of EDGE are propagated.
2795 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2796 and types for parameters. */
2797
2798void
2799estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2800 vec<tree> known_vals,
2801 vec<ipa_polymorphic_call_context>
2802 known_contexts,
2803 vec<ipa_agg_jump_function_p> known_aggs,
2804 int *ret_size, sreal *ret_time,
2805 sreal *ret_nonspec_time,
0bceb671 2806 ipa_hints *hints)
27d020cf
JH
2807{
2808 clause_t clause, nonspec_clause;
2809
2810 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2811 &clause, &nonspec_clause);
2812 estimate_node_size_and_time (node, clause, nonspec_clause,
2813 known_vals, known_contexts,
2814 known_aggs, ret_size, NULL, ret_time,
2815 ret_nonspec_time, hints, vNULL);
2816}
2817
2818
2819/* Update summary information of inline clones after inlining.
2820 Compute peak stack usage. */
2821
2822static void
2823inline_update_callee_summaries (struct cgraph_node *node, int depth)
2824{
2825 struct cgraph_edge *e;
0bceb671
JH
2826 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2827 struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
27d020cf
JH
2828 HOST_WIDE_INT peak;
2829
2830 callee_info->stack_frame_offset
2831 = caller_info->stack_frame_offset
2832 + caller_info->estimated_self_stack_size;
2833 peak = callee_info->stack_frame_offset
2834 + callee_info->estimated_self_stack_size;
0bceb671
JH
2835 if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2836 ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
27d020cf
JH
2837 ipa_propagate_frequency (node);
2838 for (e = node->callees; e; e = e->next_callee)
2839 {
2840 if (!e->inline_failed)
2841 inline_update_callee_summaries (e->callee, depth);
2842 ipa_call_summaries->get (e)->loop_depth += depth;
2843 }
2844 for (e = node->indirect_calls; e; e = e->next_callee)
2845 ipa_call_summaries->get (e)->loop_depth += depth;
2846}
2847
2848/* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2849 When functoin A is inlined in B and A calls C with parameter that
2850 changes with probability PROB1 and C is known to be passthroug
2851 of argument if B that change with probability PROB2, the probability
2852 of change is now PROB1*PROB2. */
2853
2854static void
2855remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2856 struct cgraph_edge *edge)
2857{
2858 if (ipa_node_params_sum)
2859 {
2860 int i;
2861 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2862 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2863 struct ipa_call_summary *inlined_es
2864 = ipa_call_summaries->get (inlined_edge);
2865
2866 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2867 {
2868 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2869 if (jfunc->type == IPA_JF_PASS_THROUGH
2870 || jfunc->type == IPA_JF_ANCESTOR)
2871 {
2872 int id = jfunc->type == IPA_JF_PASS_THROUGH
2873 ? ipa_get_jf_pass_through_formal_id (jfunc)
2874 : ipa_get_jf_ancestor_formal_id (jfunc);
2875 if (id < (int) inlined_es->param.length ())
2876 {
2877 int prob1 = es->param[i].change_prob;
2878 int prob2 = inlined_es->param[id].change_prob;
2879 int prob = combine_probabilities (prob1, prob2);
2880
2881 if (prob1 && prob2 && !prob)
2882 prob = 1;
2883
2884 es->param[i].change_prob = prob;
2885 }
2886 }
2887 }
2888 }
2889}
2890
2891/* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2892
2893 Remap predicates of callees of NODE. Rest of arguments match
2894 remap_predicate.
2895
2896 Also update change probabilities. */
2897
2898static void
2899remap_edge_summaries (struct cgraph_edge *inlined_edge,
2900 struct cgraph_node *node,
0bceb671
JH
2901 struct ipa_fn_summary *info,
2902 struct ipa_fn_summary *callee_info,
27d020cf
JH
2903 vec<int> operand_map,
2904 vec<int> offset_map,
2905 clause_t possible_truths,
2906 predicate *toplev_predicate)
2907{
2908 struct cgraph_edge *e, *next;
2909 for (e = node->callees; e; e = next)
2910 {
2911 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2912 predicate p;
2913 next = e->next_callee;
2914
2915 if (e->inline_failed)
2916 {
2917 remap_edge_change_prob (inlined_edge, e);
2918
2919 if (es->predicate)
2920 {
2921 p = es->predicate->remap_after_inlining
2922 (info, callee_info, operand_map,
2923 offset_map, possible_truths,
2924 *toplev_predicate);
2925 edge_set_predicate (e, &p);
2926 }
2927 else
2928 edge_set_predicate (e, toplev_predicate);
2929 }
2930 else
2931 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2932 operand_map, offset_map, possible_truths,
2933 toplev_predicate);
2934 }
2935 for (e = node->indirect_calls; e; e = next)
2936 {
2937 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2938 predicate p;
2939 next = e->next_callee;
2940
2941 remap_edge_change_prob (inlined_edge, e);
2942 if (es->predicate)
2943 {
2944 p = es->predicate->remap_after_inlining
2945 (info, callee_info, operand_map, offset_map,
2946 possible_truths, *toplev_predicate);
2947 edge_set_predicate (e, &p);
2948 }
2949 else
2950 edge_set_predicate (e, toplev_predicate);
2951 }
2952}
2953
2954/* Same as remap_predicate, but set result into hint *HINT. */
2955
2956static void
0bceb671
JH
2957remap_hint_predicate (struct ipa_fn_summary *info,
2958 struct ipa_fn_summary *callee_info,
27d020cf
JH
2959 predicate **hint,
2960 vec<int> operand_map,
2961 vec<int> offset_map,
2962 clause_t possible_truths,
2963 predicate *toplev_predicate)
2964{
2965 predicate p;
2966
2967 if (!*hint)
2968 return;
2969 p = (*hint)->remap_after_inlining
2970 (info, callee_info,
2971 operand_map, offset_map,
2972 possible_truths, *toplev_predicate);
2973 if (p != false && p != true)
2974 {
2975 if (!*hint)
2976 set_hint_predicate (hint, p);
2977 else
2978 **hint &= p;
2979 }
2980}
2981
2982/* We inlined EDGE. Update summary of the function we inlined into. */
2983
2984void
0bceb671 2985ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
27d020cf 2986{
0bceb671 2987 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
27d020cf
JH
2988 struct cgraph_node *to = (edge->caller->global.inlined_to
2989 ? edge->caller->global.inlined_to : edge->caller);
0bceb671 2990 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
27d020cf
JH
2991 clause_t clause = 0; /* not_inline is known to be false. */
2992 size_time_entry *e;
2993 vec<int> operand_map = vNULL;
2994 vec<int> offset_map = vNULL;
2995 int i;
2996 predicate toplev_predicate;
2997 predicate true_p = true;
2998 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2999
3000 if (es->predicate)
3001 toplev_predicate = *es->predicate;
3002 else
3003 toplev_predicate = true;
3004
3005 info->fp_expressions |= callee_info->fp_expressions;
3006
3007 if (callee_info->conds)
3008 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3009 if (ipa_node_params_sum && callee_info->conds)
3010 {
3011 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3012 int count = ipa_get_cs_argument_count (args);
3013 int i;
3014
3015 if (count)
3016 {
3017 operand_map.safe_grow_cleared (count);
3018 offset_map.safe_grow_cleared (count);
3019 }
3020 for (i = 0; i < count; i++)
3021 {
3022 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3023 int map = -1;
3024
3025 /* TODO: handle non-NOPs when merging. */
3026 if (jfunc->type == IPA_JF_PASS_THROUGH)
3027 {
3028 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3029 map = ipa_get_jf_pass_through_formal_id (jfunc);
3030 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3031 offset_map[i] = -1;
3032 }
3033 else if (jfunc->type == IPA_JF_ANCESTOR)
3034 {
3035 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3036 if (offset >= 0 && offset < INT_MAX)
3037 {
3038 map = ipa_get_jf_ancestor_formal_id (jfunc);
3039 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3040 offset = -1;
3041 offset_map[i] = offset;
3042 }
3043 }
3044 operand_map[i] = map;
3045 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3046 }
3047 }
3048 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3049 {
3050 predicate p;
3051 p = e->exec_predicate.remap_after_inlining
3052 (info, callee_info, operand_map,
3053 offset_map, clause,
3054 toplev_predicate);
3055 predicate nonconstp;
3056 nonconstp = e->nonconst_predicate.remap_after_inlining
3057 (info, callee_info, operand_map,
3058 offset_map, clause,
3059 toplev_predicate);
3060 if (p != false && nonconstp != false)
3061 {
3062 sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
3063 int prob = e->nonconst_predicate.probability (callee_info->conds,
3064 clause, es->param);
3065 add_time = add_time * prob / REG_BR_PROB_BASE;
3066 if (prob != REG_BR_PROB_BASE
3067 && dump_file && (dump_flags & TDF_DETAILS))
3068 {
3069 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3070 (double) prob / REG_BR_PROB_BASE);
3071 }
3072 info->account_size_time (e->size, add_time, p, nonconstp);
3073 }
3074 }
3075 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3076 offset_map, clause, &toplev_predicate);
3077 remap_hint_predicate (info, callee_info,
3078 &callee_info->loop_iterations,
3079 operand_map, offset_map, clause, &toplev_predicate);
3080 remap_hint_predicate (info, callee_info,
3081 &callee_info->loop_stride,
3082 operand_map, offset_map, clause, &toplev_predicate);
3083 remap_hint_predicate (info, callee_info,
3084 &callee_info->array_index,
3085 operand_map, offset_map, clause, &toplev_predicate);
3086
3087 inline_update_callee_summaries (edge->callee,
3088 ipa_call_summaries->get (edge)->loop_depth);
3089
3090 /* We do not maintain predicates of inlined edges, free it. */
3091 edge_set_predicate (edge, &true_p);
3092 /* Similarly remove param summaries. */
3093 es->param.release ();
3094 operand_map.release ();
3095 offset_map.release ();
3096}
3097
0bceb671 3098/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
27d020cf
JH
3099 and time. Recompute it. */
3100
3101void
0bceb671 3102ipa_update_overall_fn_summary (struct cgraph_node *node)
27d020cf 3103{
0bceb671 3104 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
27d020cf
JH
3105 size_time_entry *e;
3106 int i;
3107
3108 info->size = 0;
3109 info->time = 0;
3110 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3111 {
3112 info->size += e->size;
3113 info->time += e->time;
3114 }
3115 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3116 &info->time, NULL,
3117 ~(clause_t) (1 << predicate::false_condition),
3118 vNULL, vNULL, vNULL);
0bceb671 3119 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
27d020cf
JH
3120}
3121
3122
3123/* This function performs intraprocedural analysis in NODE that is required to
3124 inline indirect calls. */
3125
3126static void
3127inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3128{
3129 ipa_analyze_node (node);
3130 if (dump_file && (dump_flags & TDF_DETAILS))
3131 {
3132 ipa_print_node_params (dump_file, node);
3133 ipa_print_node_jump_functions (dump_file, node);
3134 }
3135}
3136
3137
3138/* Note function body size. */
3139
3140void
3141inline_analyze_function (struct cgraph_node *node)
3142{
3143 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3144
3145 if (dump_file)
3146 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3147 node->name (), node->order);
3148 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3149 inline_indirect_intraprocedural_analysis (node);
0bceb671 3150 compute_fn_summary (node, false);
27d020cf
JH
3151 if (!optimize)
3152 {
3153 struct cgraph_edge *e;
3154 for (e = node->callees; e; e = e->next_callee)
3155 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3156 for (e = node->indirect_calls; e; e = e->next_callee)
3157 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3158 }
3159
3160 pop_cfun ();
3161}
3162
3163
3164/* Called when new function is inserted to callgraph late. */
3165
3166void
0bceb671 3167ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
27d020cf
JH
3168{
3169 inline_analyze_function (node);
3170}
3171
3172/* Note function body size. */
3173
d2db2e6b
JH
3174static void
3175ipa_fn_summary_generate (void)
27d020cf
JH
3176{
3177 struct cgraph_node *node;
3178
3179 FOR_EACH_DEFINED_FUNCTION (node)
3180 if (DECL_STRUCT_FUNCTION (node->decl))
0688f9c1 3181 node->local.versionable = tree_versionable_function_p (node->decl);
27d020cf 3182
0bceb671 3183 ipa_fn_summary_alloc ();
27d020cf 3184
0bceb671 3185 ipa_fn_summaries->enable_insertion_hook ();
27d020cf
JH
3186
3187 ipa_register_cgraph_hooks ();
27d020cf
JH
3188
3189 FOR_EACH_DEFINED_FUNCTION (node)
29f1e2b1
JH
3190 if (!node->alias
3191 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3192 || opt_for_fn (node->decl, optimize)))
27d020cf
JH
3193 inline_analyze_function (node);
3194}
3195
3196
3197/* Write inline summary for edge E to OB. */
3198
3199static void
3200read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3201{
3202 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3203 predicate p;
3204 int length, i;
3205
3206 es->call_stmt_size = streamer_read_uhwi (ib);
3207 es->call_stmt_time = streamer_read_uhwi (ib);
3208 es->loop_depth = streamer_read_uhwi (ib);
0fab169b
PK
3209
3210 bitpack_d bp = streamer_read_bitpack (ib);
3211 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3212
27d020cf
JH
3213 p.stream_in (ib);
3214 edge_set_predicate (e, &p);
3215 length = streamer_read_uhwi (ib);
3216 if (length)
3217 {
3218 es->param.safe_grow_cleared (length);
3219 for (i = 0; i < length; i++)
3220 es->param[i].change_prob = streamer_read_uhwi (ib);
3221 }
3222}
3223
3224
3225/* Stream in inline summaries from the section. */
3226
3227static void
3228inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3229 size_t len)
3230{
3231 const struct lto_function_header *header =
3232 (const struct lto_function_header *) data;
3233 const int cfg_offset = sizeof (struct lto_function_header);
3234 const int main_offset = cfg_offset + header->cfg_size;
3235 const int string_offset = main_offset + header->main_size;
3236 struct data_in *data_in;
3237 unsigned int i, count2, j;
3238 unsigned int f_count;
3239
3240 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3241 file_data->mode_table);
3242
3243 data_in =
3244 lto_data_in_create (file_data, (const char *) data + string_offset,
3245 header->string_size, vNULL);
3246 f_count = streamer_read_uhwi (&ib);
3247 for (i = 0; i < f_count; i++)
3248 {
3249 unsigned int index;
3250 struct cgraph_node *node;
0bceb671 3251 struct ipa_fn_summary *info;
27d020cf
JH
3252 lto_symtab_encoder_t encoder;
3253 struct bitpack_d bp;
3254 struct cgraph_edge *e;
3255 predicate p;
3256
3257 index = streamer_read_uhwi (&ib);
3258 encoder = file_data->symtab_node_encoder;
3259 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3260 index));
0bceb671 3261 info = ipa_fn_summaries->get (node);
27d020cf
JH
3262
3263 info->estimated_stack_size
3264 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3265 info->size = info->self_size = streamer_read_uhwi (&ib);
3266 info->time = sreal::stream_in (&ib);
3267
3268 bp = streamer_read_bitpack (&ib);
3269 info->inlinable = bp_unpack_value (&bp, 1);
3270 info->contains_cilk_spawn = bp_unpack_value (&bp, 1);
3271 info->fp_expressions = bp_unpack_value (&bp, 1);
3272
3273 count2 = streamer_read_uhwi (&ib);
3274 gcc_assert (!info->conds);
3275 for (j = 0; j < count2; j++)
3276 {
3277 struct condition c;
3278 c.operand_num = streamer_read_uhwi (&ib);
3279 c.size = streamer_read_uhwi (&ib);
3280 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3281 c.val = stream_read_tree (&ib, data_in);
3282 bp = streamer_read_bitpack (&ib);
3283 c.agg_contents = bp_unpack_value (&bp, 1);
3284 c.by_ref = bp_unpack_value (&bp, 1);
3285 if (c.agg_contents)
3286 c.offset = streamer_read_uhwi (&ib);
3287 vec_safe_push (info->conds, c);
3288 }
3289 count2 = streamer_read_uhwi (&ib);
3290 gcc_assert (!info->size_time_table);
3291 for (j = 0; j < count2; j++)
3292 {
3293 struct size_time_entry e;
3294
3295 e.size = streamer_read_uhwi (&ib);
3296 e.time = sreal::stream_in (&ib);
3297 e.exec_predicate.stream_in (&ib);
3298 e.nonconst_predicate.stream_in (&ib);
3299
3300 vec_safe_push (info->size_time_table, e);
3301 }
3302
3303 p.stream_in (&ib);
3304 set_hint_predicate (&info->loop_iterations, p);
3305 p.stream_in (&ib);
3306 set_hint_predicate (&info->loop_stride, p);
3307 p.stream_in (&ib);
3308 set_hint_predicate (&info->array_index, p);
3309 for (e = node->callees; e; e = e->next_callee)
3310 read_ipa_call_summary (&ib, e);
3311 for (e = node->indirect_calls; e; e = e->next_callee)
3312 read_ipa_call_summary (&ib, e);
3313 }
3314
0bceb671 3315 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
27d020cf
JH
3316 len);
3317 lto_data_in_delete (data_in);
3318}
3319
3320
3321/* Read inline summary. Jump functions are shared among ipa-cp
3322 and inliner, so when ipa-cp is active, we don't need to write them
3323 twice. */
3324
d2db2e6b
JH
3325static void
3326ipa_fn_summary_read (void)
27d020cf
JH
3327{
3328 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3329 struct lto_file_decl_data *file_data;
3330 unsigned int j = 0;
3331
0bceb671 3332 ipa_fn_summary_alloc ();
27d020cf
JH
3333
3334 while ((file_data = file_data_vec[j++]))
3335 {
3336 size_t len;
3337 const char *data = lto_get_section_data (file_data,
0bceb671 3338 LTO_section_ipa_fn_summary,
27d020cf
JH
3339 NULL, &len);
3340 if (data)
3341 inline_read_section (file_data, data, len);
3342 else
3343 /* Fatal error here. We do not want to support compiling ltrans units
3344 with different version of compiler or different flags than the WPA
3345 unit, so this should never happen. */
3346 fatal_error (input_location,
3347 "ipa inline summary is missing in input file");
3348 }
29f1e2b1
JH
3349 ipa_register_cgraph_hooks ();
3350 if (!flag_ipa_cp)
3351 ipa_prop_read_jump_functions ();
27d020cf 3352
0bceb671
JH
3353 gcc_assert (ipa_fn_summaries);
3354 ipa_fn_summaries->enable_insertion_hook ();
27d020cf
JH
3355}
3356
3357
3358/* Write inline summary for edge E to OB. */
3359
3360static void
3361write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3362{
3363 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3364 int i;
3365
3366 streamer_write_uhwi (ob, es->call_stmt_size);
3367 streamer_write_uhwi (ob, es->call_stmt_time);
3368 streamer_write_uhwi (ob, es->loop_depth);
0fab169b
PK
3369
3370 bitpack_d bp = bitpack_create (ob->main_stream);
3371 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3372 streamer_write_bitpack (&bp);
3373
27d020cf
JH
3374 if (es->predicate)
3375 es->predicate->stream_out (ob);
3376 else
3377 streamer_write_uhwi (ob, 0);
3378 streamer_write_uhwi (ob, es->param.length ());
3379 for (i = 0; i < (int) es->param.length (); i++)
3380 streamer_write_uhwi (ob, es->param[i].change_prob);
3381}
3382
3383
3384/* Write inline summary for node in SET.
3385 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3386 active, we don't need to write them twice. */
3387
d2db2e6b
JH
3388static void
3389ipa_fn_summary_write (void)
27d020cf 3390{
0bceb671 3391 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
27d020cf
JH
3392 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3393 unsigned int count = 0;
3394 int i;
3395
3396 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3397 {
3398 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3399 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3400 if (cnode && cnode->definition && !cnode->alias)
3401 count++;
3402 }
3403 streamer_write_uhwi (ob, count);
3404
3405 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3406 {
3407 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3408 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3409 if (cnode && cnode->definition && !cnode->alias)
3410 {
0bceb671 3411 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
27d020cf
JH
3412 struct bitpack_d bp;
3413 struct cgraph_edge *edge;
3414 int i;
3415 size_time_entry *e;
3416 struct condition *c;
3417
3418 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3419 streamer_write_hwi (ob, info->estimated_self_stack_size);
3420 streamer_write_hwi (ob, info->self_size);
3421 info->time.stream_out (ob);
3422 bp = bitpack_create (ob->main_stream);
3423 bp_pack_value (&bp, info->inlinable, 1);
3424 bp_pack_value (&bp, info->contains_cilk_spawn, 1);
3425 bp_pack_value (&bp, info->fp_expressions, 1);
3426 streamer_write_bitpack (&bp);
3427 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3428 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3429 {
3430 streamer_write_uhwi (ob, c->operand_num);
3431 streamer_write_uhwi (ob, c->size);
3432 streamer_write_uhwi (ob, c->code);
3433 stream_write_tree (ob, c->val, true);
3434 bp = bitpack_create (ob->main_stream);
3435 bp_pack_value (&bp, c->agg_contents, 1);
3436 bp_pack_value (&bp, c->by_ref, 1);
3437 streamer_write_bitpack (&bp);
3438 if (c->agg_contents)
3439 streamer_write_uhwi (ob, c->offset);
3440 }
3441 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3442 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3443 {
3444 streamer_write_uhwi (ob, e->size);
3445 e->time.stream_out (ob);
3446 e->exec_predicate.stream_out (ob);
3447 e->nonconst_predicate.stream_out (ob);
3448 }
3449 if (info->loop_iterations)
3450 info->loop_iterations->stream_out (ob);
3451 else
3452 streamer_write_uhwi (ob, 0);
3453 if (info->loop_stride)
3454 info->loop_stride->stream_out (ob);
3455 else
3456 streamer_write_uhwi (ob, 0);
3457 if (info->array_index)
3458 info->array_index->stream_out (ob);
3459 else
3460 streamer_write_uhwi (ob, 0);
3461 for (edge = cnode->callees; edge; edge = edge->next_callee)
3462 write_ipa_call_summary (ob, edge);
3463 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3464 write_ipa_call_summary (ob, edge);
3465 }
3466 }
3467 streamer_write_char_stream (ob->main_stream, 0);
3468 produce_asm (ob, NULL);
3469 destroy_output_block (ob);
3470
29f1e2b1 3471 if (!flag_ipa_cp)
27d020cf
JH
3472 ipa_prop_write_jump_functions ();
3473}
3474
3475
3476/* Release inline summary. */
3477
3478void
d2db2e6b 3479ipa_free_fn_summary (void)
27d020cf
JH
3480{
3481 struct cgraph_node *node;
3482 if (!ipa_call_summaries)
3483 return;
3484 FOR_EACH_DEFINED_FUNCTION (node)
3485 if (!node->alias)
0bceb671
JH
3486 ipa_fn_summaries->get (node)->reset (node);
3487 ipa_fn_summaries->release ();
3488 ipa_fn_summaries = NULL;
27d020cf
JH
3489 ipa_call_summaries->release ();
3490 delete ipa_call_summaries;
3491 ipa_call_summaries = NULL;
3492 edge_predicate_pool.release ();
3493}
d2db2e6b
JH
3494
3495namespace {
3496
3497const pass_data pass_data_local_fn_summary =
3498{
3499 GIMPLE_PASS, /* type */
3500 "local-fnsummary", /* name */
3501 OPTGROUP_INLINE, /* optinfo_flags */
3502 TV_INLINE_PARAMETERS, /* tv_id */
3503 0, /* properties_required */
3504 0, /* properties_provided */
3505 0, /* properties_destroyed */
3506 0, /* todo_flags_start */
3507 0, /* todo_flags_finish */
3508};
3509
3510class pass_local_fn_summary : public gimple_opt_pass
3511{
3512public:
3513 pass_local_fn_summary (gcc::context *ctxt)
3514 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3515 {}
3516
3517 /* opt_pass methods: */
3518 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3519 virtual unsigned int execute (function *)
3520 {
3521 return compute_fn_summary_for_current ();
3522 }
3523
3524}; // class pass_local_fn_summary
3525
3526} // anon namespace
3527
3528gimple_opt_pass *
3529make_pass_local_fn_summary (gcc::context *ctxt)
3530{
3531 return new pass_local_fn_summary (ctxt);
3532}
3533
3534
3535/* Free inline summary. */
3536
3537namespace {
3538
3539const pass_data pass_data_ipa_free_fn_summary =
3540{
3541 SIMPLE_IPA_PASS, /* type */
3542 "free-fnsummary", /* name */
3543 OPTGROUP_NONE, /* optinfo_flags */
3544 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3545 0, /* properties_required */
3546 0, /* properties_provided */
3547 0, /* properties_destroyed */
3548 0, /* todo_flags_start */
3549 /* Early optimizations may make function unreachable. We can not
3550 remove unreachable functions as part of the ealry opts pass because
3551 TODOs are run before subpasses. Do it here. */
3552 ( TODO_remove_functions | TODO_dump_symtab ), /* todo_flags_finish */
3553};
3554
3555class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3556{
3557public:
3558 pass_ipa_free_fn_summary (gcc::context *ctxt)
3559 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt)
3560 {}
3561
3562 /* opt_pass methods: */
3563 virtual unsigned int execute (function *)
3564 {
3565 ipa_free_fn_summary ();
3566 return 0;
3567 }
3568
3569}; // class pass_ipa_free_fn_summary
3570
3571} // anon namespace
3572
3573simple_ipa_opt_pass *
3574make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3575{
3576 return new pass_ipa_free_fn_summary (ctxt);
3577}
3578
3579namespace {
3580
3581const pass_data pass_data_ipa_fn_summary =
3582{
3583 IPA_PASS, /* type */
3584 "fnsummary", /* name */
3585 OPTGROUP_INLINE, /* optinfo_flags */
66447ef0 3586 TV_IPA_FNSUMMARY, /* tv_id */
d2db2e6b
JH
3587 0, /* properties_required */
3588 0, /* properties_provided */
3589 0, /* properties_destroyed */
3590 0, /* todo_flags_start */
3591 ( TODO_dump_symtab ), /* todo_flags_finish */
3592};
3593
3594class pass_ipa_fn_summary : public ipa_opt_pass_d
3595{
3596public:
3597 pass_ipa_fn_summary (gcc::context *ctxt)
3598 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3599 ipa_fn_summary_generate, /* generate_summary */
3600 ipa_fn_summary_write, /* write_summary */
3601 ipa_fn_summary_read, /* read_summary */
3602 NULL, /* write_optimization_summary */
3603 NULL, /* read_optimization_summary */
3604 NULL, /* stmt_fixup */
3605 0, /* function_transform_todo_flags_start */
3606 NULL, /* function_transform */
3607 NULL) /* variable_transform */
3608 {}
3609
3610 /* opt_pass methods: */
3611 virtual unsigned int execute (function *) { return 0; }
3612
3613}; // class pass_ipa_fn_summary
3614
3615} // anon namespace
3616
3617ipa_opt_pass_d *
3618make_pass_ipa_fn_summary (gcc::context *ctxt)
3619{
3620 return new pass_ipa_fn_summary (ctxt);
3621}