]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
ipa-fnsummary.c (ipa_dump_hints): Do not dump array_index.
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "params.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86
87 /* Summaries. */
88 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
89 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143 }
144
145
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
150
151 void
152 ipa_fn_summary::account_size_time (int size, sreal time,
153 const predicate &exec_pred,
154 const predicate &nonconst_pred_in)
155 {
156 size_time_entry *e;
157 bool found = false;
158 int i;
159 predicate nonconst_pred;
160
161 if (exec_pred == false)
162 return;
163
164 nonconst_pred = nonconst_pred_in & exec_pred;
165
166 if (nonconst_pred == false)
167 return;
168
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size && time == 0 && size_time_table)
172 return;
173
174 gcc_assert (time >= 0);
175
176 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
177 if (e->exec_predicate == exec_pred
178 && e->nonconst_predicate == nonconst_pred)
179 {
180 found = true;
181 break;
182 }
183 if (i == 256)
184 {
185 i = 0;
186 found = true;
187 e = &(*size_time_table)[0];
188 if (dump_file && (dump_flags & TDF_DETAILS))
189 fprintf (dump_file,
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
192 }
193 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
194 {
195 fprintf (dump_file,
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
197 ((double) size) / ipa_fn_summary::size_scale,
198 (time.to_double ()), found ? "" : "new ");
199 exec_pred.dump (dump_file, conds, 0);
200 if (exec_pred != nonconst_pred)
201 {
202 fprintf (dump_file, " nonconst:");
203 nonconst_pred.dump (dump_file, conds);
204 }
205 else
206 fprintf (dump_file, "\n");
207 }
208 if (!found)
209 {
210 class size_time_entry new_entry;
211 new_entry.size = size;
212 new_entry.time = time;
213 new_entry.exec_predicate = exec_pred;
214 new_entry.nonconst_predicate = nonconst_pred;
215 vec_safe_push (size_time_table, new_entry);
216 }
217 else
218 {
219 e->size += size;
220 e->time += time;
221 }
222 }
223
224 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
225
226 static struct cgraph_edge *
227 redirect_to_unreachable (struct cgraph_edge *e)
228 {
229 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
230 struct cgraph_node *target = cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
232
233 if (e->speculative)
234 e = e->resolve_speculation (target->decl);
235 else if (!e->callee)
236 e->make_direct (target);
237 else
238 e->redirect_callee (target);
239 class ipa_call_summary *es = ipa_call_summaries->get (e);
240 e->inline_failed = CIF_UNREACHABLE;
241 e->count = profile_count::zero ();
242 es->call_stmt_size = 0;
243 es->call_stmt_time = 0;
244 if (callee)
245 callee->remove_symbol_and_inline_clones ();
246 return e;
247 }
248
249 /* Set predicate for edge E. */
250
251 static void
252 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
253 {
254 /* If the edge is determined to be never executed, redirect it
255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
256 be optimized out. */
257 if (predicate && *predicate == false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e->speculative || e->callee))
262 e = redirect_to_unreachable (e);
263
264 class ipa_call_summary *es = ipa_call_summaries->get (e);
265 if (predicate && *predicate != true)
266 {
267 if (!es->predicate)
268 es->predicate = edge_predicate_pool.allocate ();
269 *es->predicate = *predicate;
270 }
271 else
272 {
273 if (es->predicate)
274 edge_predicate_pool.remove (es->predicate);
275 es->predicate = NULL;
276 }
277 }
278
279 /* Set predicate for hint *P. */
280
281 static void
282 set_hint_predicate (predicate **p, predicate new_predicate)
283 {
284 if (new_predicate == false || new_predicate == true)
285 {
286 if (*p)
287 edge_predicate_pool.remove (*p);
288 *p = NULL;
289 }
290 else
291 {
292 if (!*p)
293 *p = edge_predicate_pool.allocate ();
294 **p = new_predicate;
295 }
296 }
297
298
299 /* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
305 is always false in the second and also builtin_constant_p tests cannot use
306 the fact that parameter is indeed a constant.
307
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
310 Return clause of possible truths. When INLINE_P is true, assume that we are
311 inlining.
312
313 ERROR_MARK means compile time invariant. */
314
315 static void
316 evaluate_conditions_for_known_args (struct cgraph_node *node,
317 bool inline_p,
318 vec<tree> known_vals,
319 vec<ipa_agg_jump_function_p>
320 known_aggs,
321 clause_t *ret_clause,
322 clause_t *ret_nonspec_clause)
323 {
324 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
325 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
326 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
327 int i;
328 struct condition *c;
329
330 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
331 {
332 tree val;
333 tree res;
334
335 /* We allow call stmt to have fewer arguments than the callee function
336 (especially for K&R style programs). So bound check here (we assume
337 known_aggs vector, if non-NULL, has the same length as
338 known_vals). */
339 gcc_checking_assert (!known_aggs.exists ()
340 || (known_vals.length () == known_aggs.length ()));
341 if (c->operand_num >= (int) known_vals.length ())
342 {
343 clause |= 1 << (i + predicate::first_dynamic_condition);
344 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
345 continue;
346 }
347
348 if (c->agg_contents)
349 {
350 struct ipa_agg_jump_function *agg;
351
352 if (c->code == predicate::changed
353 && !c->by_ref
354 && (known_vals[c->operand_num] == error_mark_node))
355 continue;
356
357 if (known_aggs.exists ())
358 {
359 agg = known_aggs[c->operand_num];
360 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
361 c->offset, c->by_ref);
362 }
363 else
364 val = NULL_TREE;
365 }
366 else
367 {
368 val = known_vals[c->operand_num];
369 if (val == error_mark_node && c->code != predicate::changed)
370 val = NULL_TREE;
371 }
372
373 if (!val)
374 {
375 clause |= 1 << (i + predicate::first_dynamic_condition);
376 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
377 continue;
378 }
379 if (c->code == predicate::changed)
380 {
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
382 continue;
383 }
384
385 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
386 {
387 clause |= 1 << (i + predicate::first_dynamic_condition);
388 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
389 continue;
390 }
391 if (c->code == predicate::is_not_constant)
392 {
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396
397 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
398 res = val
399 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
400 : NULL;
401
402 if (res && integer_zerop (res))
403 continue;
404
405 clause |= 1 << (i + predicate::first_dynamic_condition);
406 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
407 }
408 *ret_clause = clause;
409 if (ret_nonspec_clause)
410 *ret_nonspec_clause = nonspec_clause;
411 }
412
413
414 /* Work out what conditions might be true at invocation of E. */
415
416 void
417 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
418 clause_t *clause_ptr,
419 clause_t *nonspec_clause_ptr,
420 vec<tree> *known_vals_ptr,
421 vec<ipa_polymorphic_call_context>
422 *known_contexts_ptr,
423 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
424 {
425 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
426 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
427 vec<tree> known_vals = vNULL;
428 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
429
430 if (clause_ptr)
431 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
432 if (known_vals_ptr)
433 known_vals_ptr->create (0);
434 if (known_contexts_ptr)
435 known_contexts_ptr->create (0);
436
437 if (ipa_node_params_sum
438 && !e->call_stmt_cannot_inline_p
439 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
440 {
441 class ipa_node_params *caller_parms_info, *callee_pi;
442 class ipa_edge_args *args = IPA_EDGE_REF (e);
443 class ipa_call_summary *es = ipa_call_summaries->get (e);
444 int i, count = ipa_get_cs_argument_count (args);
445
446 if (e->caller->global.inlined_to)
447 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
448 else
449 caller_parms_info = IPA_NODE_REF (e->caller);
450 callee_pi = IPA_NODE_REF (e->callee);
451
452 if (count && (info->conds || known_vals_ptr))
453 known_vals.safe_grow_cleared (count);
454 if (count && (info->conds || known_aggs_ptr))
455 known_aggs.safe_grow_cleared (count);
456 if (count && known_contexts_ptr)
457 known_contexts_ptr->safe_grow_cleared (count);
458
459 for (i = 0; i < count; i++)
460 {
461 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
462 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
463 ipa_get_type (callee_pi, i));
464
465 if (!cst && e->call_stmt
466 && i < (int)gimple_call_num_args (e->call_stmt))
467 {
468 cst = gimple_call_arg (e->call_stmt, i);
469 if (!is_gimple_min_invariant (cst))
470 cst = NULL;
471 }
472 if (cst)
473 {
474 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
475 if (known_vals.exists ())
476 known_vals[i] = cst;
477 }
478 else if (inline_p && !es->param[i].change_prob)
479 known_vals[i] = error_mark_node;
480
481 if (known_contexts_ptr)
482 (*known_contexts_ptr)[i]
483 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
484 /* TODO: When IPA-CP starts propagating and merging aggregate jump
485 functions, use its knowledge of the caller too, just like the
486 scalar case above. */
487 known_aggs[i] = &jf->agg;
488 }
489 }
490 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
491 && ((clause_ptr && info->conds) || known_vals_ptr))
492 {
493 int i, count = (int)gimple_call_num_args (e->call_stmt);
494
495 if (count && (info->conds || known_vals_ptr))
496 known_vals.safe_grow_cleared (count);
497 for (i = 0; i < count; i++)
498 {
499 tree cst = gimple_call_arg (e->call_stmt, i);
500 if (!is_gimple_min_invariant (cst))
501 cst = NULL;
502 if (cst)
503 known_vals[i] = cst;
504 }
505 }
506
507 evaluate_conditions_for_known_args (callee, inline_p,
508 known_vals, known_aggs, clause_ptr,
509 nonspec_clause_ptr);
510
511 if (known_vals_ptr)
512 *known_vals_ptr = known_vals;
513 else
514 known_vals.release ();
515
516 if (known_aggs_ptr)
517 *known_aggs_ptr = known_aggs;
518 else
519 known_aggs.release ();
520 }
521
522
523 /* Allocate the function summary. */
524
525 static void
526 ipa_fn_summary_alloc (void)
527 {
528 gcc_checking_assert (!ipa_fn_summaries);
529 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
530 ipa_call_summaries = new ipa_call_summary_t (symtab);
531 }
532
533 ipa_call_summary::~ipa_call_summary ()
534 {
535 if (predicate)
536 edge_predicate_pool.remove (predicate);
537
538 param.release ();
539 }
540
541 ipa_fn_summary::~ipa_fn_summary ()
542 {
543 if (loop_iterations)
544 edge_predicate_pool.remove (loop_iterations);
545 if (loop_stride)
546 edge_predicate_pool.remove (loop_stride);
547 vec_free (conds);
548 vec_free (size_time_table);
549 }
550
551 void
552 ipa_fn_summary_t::remove_callees (cgraph_node *node)
553 {
554 cgraph_edge *e;
555 for (e = node->callees; e; e = e->next_callee)
556 ipa_call_summaries->remove (e);
557 for (e = node->indirect_calls; e; e = e->next_callee)
558 ipa_call_summaries->remove (e);
559 }
560
561 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
562 Additionally care about allocating new memory slot for updated predicate
563 and set it to NULL when it becomes true or false (and thus uninteresting).
564 */
565
566 static void
567 remap_hint_predicate_after_duplication (predicate **p,
568 clause_t possible_truths)
569 {
570 predicate new_predicate;
571
572 if (!*p)
573 return;
574
575 new_predicate = (*p)->remap_after_duplication (possible_truths);
576 /* We do not want to free previous predicate; it is used by node origin. */
577 *p = NULL;
578 set_hint_predicate (p, new_predicate);
579 }
580
581
582 /* Hook that is called by cgraph.c when a node is duplicated. */
583 void
584 ipa_fn_summary_t::duplicate (cgraph_node *src,
585 cgraph_node *dst,
586 ipa_fn_summary *,
587 ipa_fn_summary *info)
588 {
589 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
590 /* TODO: as an optimization, we may avoid copying conditions
591 that are known to be false or true. */
592 info->conds = vec_safe_copy (info->conds);
593
594 /* When there are any replacements in the function body, see if we can figure
595 out that something was optimized out. */
596 if (ipa_node_params_sum && dst->clone.tree_map)
597 {
598 vec<size_time_entry, va_gc> *entry = info->size_time_table;
599 /* Use SRC parm info since it may not be copied yet. */
600 class ipa_node_params *parms_info = IPA_NODE_REF (src);
601 vec<tree> known_vals = vNULL;
602 int count = ipa_get_param_count (parms_info);
603 int i, j;
604 clause_t possible_truths;
605 predicate true_pred = true;
606 size_time_entry *e;
607 int optimized_out_size = 0;
608 bool inlined_to_p = false;
609 struct cgraph_edge *edge, *next;
610
611 info->size_time_table = 0;
612 known_vals.safe_grow_cleared (count);
613 for (i = 0; i < count; i++)
614 {
615 struct ipa_replace_map *r;
616
617 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
618 {
619 if (((!r->old_tree && r->parm_num == i)
620 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
621 && r->replace_p && !r->ref_p)
622 {
623 known_vals[i] = r->new_tree;
624 break;
625 }
626 }
627 }
628 evaluate_conditions_for_known_args (dst, false,
629 known_vals,
630 vNULL,
631 &possible_truths,
632 /* We are going to specialize,
633 so ignore nonspec truths. */
634 NULL);
635 known_vals.release ();
636
637 info->account_size_time (0, 0, true_pred, true_pred);
638
639 /* Remap size_time vectors.
640 Simplify the predicate by prunning out alternatives that are known
641 to be false.
642 TODO: as on optimization, we can also eliminate conditions known
643 to be true. */
644 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
645 {
646 predicate new_exec_pred;
647 predicate new_nonconst_pred;
648 new_exec_pred = e->exec_predicate.remap_after_duplication
649 (possible_truths);
650 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
651 (possible_truths);
652 if (new_exec_pred == false || new_nonconst_pred == false)
653 optimized_out_size += e->size;
654 else
655 info->account_size_time (e->size, e->time, new_exec_pred,
656 new_nonconst_pred);
657 }
658
659 /* Remap edge predicates with the same simplification as above.
660 Also copy constantness arrays. */
661 for (edge = dst->callees; edge; edge = next)
662 {
663 predicate new_predicate;
664 class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
665 next = edge->next_callee;
666
667 if (!edge->inline_failed)
668 inlined_to_p = true;
669 if (!es->predicate)
670 continue;
671 new_predicate = es->predicate->remap_after_duplication
672 (possible_truths);
673 if (new_predicate == false && *es->predicate != false)
674 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
675 edge_set_predicate (edge, &new_predicate);
676 }
677
678 /* Remap indirect edge predicates with the same simplificaiton as above.
679 Also copy constantness arrays. */
680 for (edge = dst->indirect_calls; edge; edge = next)
681 {
682 predicate new_predicate;
683 class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
684 next = edge->next_callee;
685
686 gcc_checking_assert (edge->inline_failed);
687 if (!es->predicate)
688 continue;
689 new_predicate = es->predicate->remap_after_duplication
690 (possible_truths);
691 if (new_predicate == false && *es->predicate != false)
692 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
693 edge_set_predicate (edge, &new_predicate);
694 }
695 remap_hint_predicate_after_duplication (&info->loop_iterations,
696 possible_truths);
697 remap_hint_predicate_after_duplication (&info->loop_stride,
698 possible_truths);
699
700 /* If inliner or someone after inliner will ever start producing
701 non-trivial clones, we will get trouble with lack of information
702 about updating self sizes, because size vectors already contains
703 sizes of the calees. */
704 gcc_assert (!inlined_to_p || !optimized_out_size);
705 }
706 else
707 {
708 info->size_time_table = vec_safe_copy (info->size_time_table);
709 if (info->loop_iterations)
710 {
711 predicate p = *info->loop_iterations;
712 info->loop_iterations = NULL;
713 set_hint_predicate (&info->loop_iterations, p);
714 }
715 if (info->loop_stride)
716 {
717 predicate p = *info->loop_stride;
718 info->loop_stride = NULL;
719 set_hint_predicate (&info->loop_stride, p);
720 }
721 }
722 if (!dst->global.inlined_to)
723 ipa_update_overall_fn_summary (dst);
724 }
725
726
727 /* Hook that is called by cgraph.c when a node is duplicated. */
728
729 void
730 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
731 struct cgraph_edge *dst,
732 class ipa_call_summary *srcinfo,
733 class ipa_call_summary *info)
734 {
735 new (info) ipa_call_summary (*srcinfo);
736 info->predicate = NULL;
737 edge_set_predicate (dst, srcinfo->predicate);
738 info->param = srcinfo->param.copy ();
739 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
740 {
741 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
742 - eni_size_weights.call_cost);
743 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
744 - eni_time_weights.call_cost);
745 }
746 }
747
748 /* Dump edge summaries associated to NODE and recursively to all clones.
749 Indent by INDENT. */
750
751 static void
752 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
753 class ipa_fn_summary *info)
754 {
755 struct cgraph_edge *edge;
756 for (edge = node->callees; edge; edge = edge->next_callee)
757 {
758 class ipa_call_summary *es = ipa_call_summaries->get (edge);
759 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
760 int i;
761
762 fprintf (f,
763 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
764 indent, "", callee->name (), callee->order,
765 !edge->inline_failed
766 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
767 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
768 es->call_stmt_size, es->call_stmt_time);
769
770 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
771 if (s != NULL)
772 fprintf (f, "callee size:%2i stack:%2i",
773 (int) (s->size / ipa_fn_summary::size_scale),
774 (int) s->estimated_stack_size);
775
776 if (es->predicate)
777 {
778 fprintf (f, " predicate: ");
779 es->predicate->dump (f, info->conds);
780 }
781 else
782 fprintf (f, "\n");
783 if (es->param.exists ())
784 for (i = 0; i < (int) es->param.length (); i++)
785 {
786 int prob = es->param[i].change_prob;
787
788 if (!prob)
789 fprintf (f, "%*s op%i is compile time invariant\n",
790 indent + 2, "", i);
791 else if (prob != REG_BR_PROB_BASE)
792 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
793 prob * 100.0 / REG_BR_PROB_BASE);
794 }
795 if (!edge->inline_failed)
796 {
797 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
798 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
799 " callee size %i\n",
800 indent + 2, "",
801 (int) s->stack_frame_offset,
802 (int) s->estimated_self_stack_size,
803 (int) s->estimated_stack_size);
804 dump_ipa_call_summary (f, indent + 2, callee, info);
805 }
806 }
807 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
808 {
809 class ipa_call_summary *es = ipa_call_summaries->get (edge);
810 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
811 " time: %2i",
812 indent, "",
813 es->loop_depth,
814 edge->sreal_frequency ().to_double (), es->call_stmt_size,
815 es->call_stmt_time);
816 if (es->predicate)
817 {
818 fprintf (f, "predicate: ");
819 es->predicate->dump (f, info->conds);
820 }
821 else
822 fprintf (f, "\n");
823 }
824 }
825
826
827 void
828 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
829 {
830 if (node->definition)
831 {
832 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
833 if (s != NULL)
834 {
835 size_time_entry *e;
836 int i;
837 fprintf (f, "IPA function summary for %s", node->dump_name ());
838 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
839 fprintf (f, " always_inline");
840 if (s->inlinable)
841 fprintf (f, " inlinable");
842 if (s->fp_expressions)
843 fprintf (f, " fp_expression");
844 fprintf (f, "\n global time: %f\n", s->time.to_double ());
845 fprintf (f, " self size: %i\n", s->self_size);
846 fprintf (f, " global size: %i\n", s->size);
847 fprintf (f, " min size: %i\n", s->min_size);
848 fprintf (f, " self stack: %i\n",
849 (int) s->estimated_self_stack_size);
850 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
851 if (s->growth)
852 fprintf (f, " estimated growth:%i\n", (int) s->growth);
853 if (s->scc_no)
854 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
855 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
856 {
857 fprintf (f, " size:%f, time:%f",
858 (double) e->size / ipa_fn_summary::size_scale,
859 e->time.to_double ());
860 if (e->exec_predicate != true)
861 {
862 fprintf (f, ", executed if:");
863 e->exec_predicate.dump (f, s->conds, 0);
864 }
865 if (e->exec_predicate != e->nonconst_predicate)
866 {
867 fprintf (f, ", nonconst if:");
868 e->nonconst_predicate.dump (f, s->conds, 0);
869 }
870 fprintf (f, "\n");
871 }
872 if (s->loop_iterations)
873 {
874 fprintf (f, " loop iterations:");
875 s->loop_iterations->dump (f, s->conds);
876 }
877 if (s->loop_stride)
878 {
879 fprintf (f, " loop stride:");
880 s->loop_stride->dump (f, s->conds);
881 }
882 fprintf (f, " calls:\n");
883 dump_ipa_call_summary (f, 4, node, s);
884 fprintf (f, "\n");
885 }
886 else
887 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
888 }
889 }
890
891 DEBUG_FUNCTION void
892 ipa_debug_fn_summary (struct cgraph_node *node)
893 {
894 ipa_dump_fn_summary (stderr, node);
895 }
896
897 void
898 ipa_dump_fn_summaries (FILE *f)
899 {
900 struct cgraph_node *node;
901
902 FOR_EACH_DEFINED_FUNCTION (node)
903 if (!node->global.inlined_to)
904 ipa_dump_fn_summary (f, node);
905 }
906
907 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
908 boolean variable pointed to by DATA. */
909
910 static bool
911 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
912 void *data)
913 {
914 bool *b = (bool *) data;
915 *b = true;
916 return true;
917 }
918
919 /* If OP refers to value of function parameter, return the corresponding
920 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
921 PARM_DECL) will be stored to *SIZE_P in that case too. */
922
923 static tree
924 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
925 HOST_WIDE_INT *size_p)
926 {
927 /* SSA_NAME referring to parm default def? */
928 if (TREE_CODE (op) == SSA_NAME
929 && SSA_NAME_IS_DEFAULT_DEF (op)
930 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
931 {
932 if (size_p)
933 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
934 return SSA_NAME_VAR (op);
935 }
936 /* Non-SSA parm reference? */
937 if (TREE_CODE (op) == PARM_DECL)
938 {
939 bool modified = false;
940
941 ao_ref refd;
942 ao_ref_init (&refd, op);
943 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
944 mark_modified, &modified, NULL, NULL,
945 fbi->aa_walk_budget + 1);
946 if (walked < 0)
947 {
948 fbi->aa_walk_budget = 0;
949 return NULL_TREE;
950 }
951 if (!modified)
952 {
953 if (size_p)
954 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
955 return op;
956 }
957 }
958 return NULL_TREE;
959 }
960
961 /* If OP refers to value of function parameter, return the corresponding
962 parameter. Also traverse chains of SSA register assignments. If non-NULL,
963 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
964 stored to *SIZE_P in that case too. */
965
966 static tree
967 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
968 HOST_WIDE_INT *size_p)
969 {
970 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
971 if (res)
972 return res;
973
974 if (TREE_CODE (op) == SSA_NAME
975 && !SSA_NAME_IS_DEFAULT_DEF (op)
976 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
977 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
978 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
979 size_p);
980 return NULL_TREE;
981 }
982
983 /* If OP refers to a value of a function parameter or value loaded from an
984 aggregate passed to a parameter (either by value or reference), return TRUE
985 and store the number of the parameter to *INDEX_P, the access size into
986 *SIZE_P, and information whether and how it has been loaded from an
987 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
988 statement in which OP is used or loaded. */
989
990 static bool
991 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
992 gimple *stmt, tree op, int *index_p,
993 HOST_WIDE_INT *size_p,
994 struct agg_position_info *aggpos)
995 {
996 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
997
998 gcc_checking_assert (aggpos);
999 if (res)
1000 {
1001 *index_p = ipa_get_param_decl_index (fbi->info, res);
1002 if (*index_p < 0)
1003 return false;
1004 aggpos->agg_contents = false;
1005 aggpos->by_ref = false;
1006 return true;
1007 }
1008
1009 if (TREE_CODE (op) == SSA_NAME)
1010 {
1011 if (SSA_NAME_IS_DEFAULT_DEF (op)
1012 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1013 return false;
1014 stmt = SSA_NAME_DEF_STMT (op);
1015 op = gimple_assign_rhs1 (stmt);
1016 if (!REFERENCE_CLASS_P (op))
1017 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1018 aggpos);
1019 }
1020
1021 aggpos->agg_contents = true;
1022 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1023 stmt, op, index_p, &aggpos->offset,
1024 size_p, &aggpos->by_ref);
1025 }
1026
1027 /* See if statement might disappear after inlining.
1028 0 - means not eliminated
1029 1 - half of statements goes away
1030 2 - for sure it is eliminated.
1031 We are not terribly sophisticated, basically looking for simple abstraction
1032 penalty wrappers. */
1033
1034 static int
1035 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1036 {
1037 enum gimple_code code = gimple_code (stmt);
1038 enum tree_code rhs_code;
1039
1040 if (!optimize)
1041 return 0;
1042
1043 switch (code)
1044 {
1045 case GIMPLE_RETURN:
1046 return 2;
1047 case GIMPLE_ASSIGN:
1048 if (gimple_num_ops (stmt) != 2)
1049 return 0;
1050
1051 rhs_code = gimple_assign_rhs_code (stmt);
1052
1053 /* Casts of parameters, loads from parameters passed by reference
1054 and stores to return value or parameters are often free after
1055 inlining dua to SRA and further combining.
1056 Assume that half of statements goes away. */
1057 if (CONVERT_EXPR_CODE_P (rhs_code)
1058 || rhs_code == VIEW_CONVERT_EXPR
1059 || rhs_code == ADDR_EXPR
1060 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1061 {
1062 tree rhs = gimple_assign_rhs1 (stmt);
1063 tree lhs = gimple_assign_lhs (stmt);
1064 tree inner_rhs = get_base_address (rhs);
1065 tree inner_lhs = get_base_address (lhs);
1066 bool rhs_free = false;
1067 bool lhs_free = false;
1068
1069 if (!inner_rhs)
1070 inner_rhs = rhs;
1071 if (!inner_lhs)
1072 inner_lhs = lhs;
1073
1074 /* Reads of parameter are expected to be free. */
1075 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1076 rhs_free = true;
1077 /* Match expressions of form &this->field. Those will most likely
1078 combine with something upstream after inlining. */
1079 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1080 {
1081 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1082 if (TREE_CODE (op) == PARM_DECL)
1083 rhs_free = true;
1084 else if (TREE_CODE (op) == MEM_REF
1085 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1086 NULL))
1087 rhs_free = true;
1088 }
1089
1090 /* When parameter is not SSA register because its address is taken
1091 and it is just copied into one, the statement will be completely
1092 free after inlining (we will copy propagate backward). */
1093 if (rhs_free && is_gimple_reg (lhs))
1094 return 2;
1095
1096 /* Reads of parameters passed by reference
1097 expected to be free (i.e. optimized out after inlining). */
1098 if (TREE_CODE (inner_rhs) == MEM_REF
1099 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1100 rhs_free = true;
1101
1102 /* Copying parameter passed by reference into gimple register is
1103 probably also going to copy propagate, but we can't be quite
1104 sure. */
1105 if (rhs_free && is_gimple_reg (lhs))
1106 lhs_free = true;
1107
1108 /* Writes to parameters, parameters passed by value and return value
1109 (either dirrectly or passed via invisible reference) are free.
1110
1111 TODO: We ought to handle testcase like
1112 struct a {int a,b;};
1113 struct a
1114 retrurnsturct (void)
1115 {
1116 struct a a ={1,2};
1117 return a;
1118 }
1119
1120 This translate into:
1121
1122 retrurnsturct ()
1123 {
1124 int a$b;
1125 int a$a;
1126 struct a a;
1127 struct a D.2739;
1128
1129 <bb 2>:
1130 D.2739.a = 1;
1131 D.2739.b = 2;
1132 return D.2739;
1133
1134 }
1135 For that we either need to copy ipa-split logic detecting writes
1136 to return value. */
1137 if (TREE_CODE (inner_lhs) == PARM_DECL
1138 || TREE_CODE (inner_lhs) == RESULT_DECL
1139 || (TREE_CODE (inner_lhs) == MEM_REF
1140 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1141 NULL)
1142 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1143 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1144 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1145 (inner_lhs,
1146 0))) == RESULT_DECL))))
1147 lhs_free = true;
1148 if (lhs_free
1149 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1150 rhs_free = true;
1151 if (lhs_free && rhs_free)
1152 return 1;
1153 }
1154 return 0;
1155 default:
1156 return 0;
1157 }
1158 }
1159
1160
1161 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1162 predicates to the CFG edges. */
1163
1164 static void
1165 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1166 class ipa_fn_summary *summary,
1167 basic_block bb)
1168 {
1169 gimple *last;
1170 tree op;
1171 int index;
1172 HOST_WIDE_INT size;
1173 struct agg_position_info aggpos;
1174 enum tree_code code, inverted_code;
1175 edge e;
1176 edge_iterator ei;
1177 gimple *set_stmt;
1178 tree op2;
1179
1180 last = last_stmt (bb);
1181 if (!last || gimple_code (last) != GIMPLE_COND)
1182 return;
1183 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1184 return;
1185 op = gimple_cond_lhs (last);
1186 /* TODO: handle conditionals like
1187 var = op0 < 4;
1188 if (var != 0). */
1189 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1190 {
1191 code = gimple_cond_code (last);
1192 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1193
1194 FOR_EACH_EDGE (e, ei, bb->succs)
1195 {
1196 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1197 ? code : inverted_code);
1198 /* invert_tree_comparison will return ERROR_MARK on FP
1199 comparsions that are not EQ/NE instead of returning proper
1200 unordered one. Be sure it is not confused with NON_CONSTANT. */
1201 if (this_code != ERROR_MARK)
1202 {
1203 predicate p
1204 = add_condition (summary, index, size, &aggpos, this_code,
1205 unshare_expr_without_location
1206 (gimple_cond_rhs (last)));
1207 e->aux = edge_predicate_pool.allocate ();
1208 *(predicate *) e->aux = p;
1209 }
1210 }
1211 }
1212
1213 if (TREE_CODE (op) != SSA_NAME)
1214 return;
1215 /* Special case
1216 if (builtin_constant_p (op))
1217 constant_code
1218 else
1219 nonconstant_code.
1220 Here we can predicate nonconstant_code. We can't
1221 really handle constant_code since we have no predicate
1222 for this and also the constant code is not known to be
1223 optimized away when inliner doen't see operand is constant.
1224 Other optimizers might think otherwise. */
1225 if (gimple_cond_code (last) != NE_EXPR
1226 || !integer_zerop (gimple_cond_rhs (last)))
1227 return;
1228 set_stmt = SSA_NAME_DEF_STMT (op);
1229 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1230 || gimple_call_num_args (set_stmt) != 1)
1231 return;
1232 op2 = gimple_call_arg (set_stmt, 0);
1233 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1234 &aggpos))
1235 return;
1236 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1237 {
1238 predicate p = add_condition (summary, index, size, &aggpos,
1239 predicate::is_not_constant, NULL_TREE);
1240 e->aux = edge_predicate_pool.allocate ();
1241 *(predicate *) e->aux = p;
1242 }
1243 }
1244
1245
1246 /* If BB ends by a switch we can turn into predicates, attach corresponding
1247 predicates to the CFG edges. */
1248
1249 static void
1250 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1251 class ipa_fn_summary *summary,
1252 basic_block bb)
1253 {
1254 gimple *lastg;
1255 tree op;
1256 int index;
1257 HOST_WIDE_INT size;
1258 struct agg_position_info aggpos;
1259 edge e;
1260 edge_iterator ei;
1261 size_t n;
1262 size_t case_idx;
1263
1264 lastg = last_stmt (bb);
1265 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1266 return;
1267 gswitch *last = as_a <gswitch *> (lastg);
1268 op = gimple_switch_index (last);
1269 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1270 return;
1271
1272 FOR_EACH_EDGE (e, ei, bb->succs)
1273 {
1274 e->aux = edge_predicate_pool.allocate ();
1275 *(predicate *) e->aux = false;
1276 }
1277 n = gimple_switch_num_labels (last);
1278 for (case_idx = 0; case_idx < n; ++case_idx)
1279 {
1280 tree cl = gimple_switch_label (last, case_idx);
1281 tree min, max;
1282 predicate p;
1283
1284 e = gimple_switch_edge (cfun, last, case_idx);
1285 min = CASE_LOW (cl);
1286 max = CASE_HIGH (cl);
1287
1288 /* For default we might want to construct predicate that none
1289 of cases is met, but it is bit hard to do not having negations
1290 of conditionals handy. */
1291 if (!min && !max)
1292 p = true;
1293 else if (!max)
1294 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1295 unshare_expr_without_location (min));
1296 else
1297 {
1298 predicate p1, p2;
1299 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1300 unshare_expr_without_location (min));
1301 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1302 unshare_expr_without_location (max));
1303 p = p1 & p2;
1304 }
1305 *(class predicate *) e->aux
1306 = p.or_with (summary->conds, *(class predicate *) e->aux);
1307 }
1308 }
1309
1310
1311 /* For each BB in NODE attach to its AUX pointer predicate under
1312 which it is executable. */
1313
1314 static void
1315 compute_bb_predicates (struct ipa_func_body_info *fbi,
1316 struct cgraph_node *node,
1317 class ipa_fn_summary *summary)
1318 {
1319 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1320 bool done = false;
1321 basic_block bb;
1322
1323 FOR_EACH_BB_FN (bb, my_function)
1324 {
1325 set_cond_stmt_execution_predicate (fbi, summary, bb);
1326 set_switch_stmt_execution_predicate (fbi, summary, bb);
1327 }
1328
1329 /* Entry block is always executable. */
1330 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1331 = edge_predicate_pool.allocate ();
1332 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1333
1334 /* A simple dataflow propagation of predicates forward in the CFG.
1335 TODO: work in reverse postorder. */
1336 while (!done)
1337 {
1338 done = true;
1339 FOR_EACH_BB_FN (bb, my_function)
1340 {
1341 predicate p = false;
1342 edge e;
1343 edge_iterator ei;
1344 FOR_EACH_EDGE (e, ei, bb->preds)
1345 {
1346 if (e->src->aux)
1347 {
1348 predicate this_bb_predicate
1349 = *(predicate *) e->src->aux;
1350 if (e->aux)
1351 this_bb_predicate &= (*(class predicate *) e->aux);
1352 p = p.or_with (summary->conds, this_bb_predicate);
1353 if (p == true)
1354 break;
1355 }
1356 }
1357 if (p == false)
1358 gcc_checking_assert (!bb->aux);
1359 else
1360 {
1361 if (!bb->aux)
1362 {
1363 done = false;
1364 bb->aux = edge_predicate_pool.allocate ();
1365 *((predicate *) bb->aux) = p;
1366 }
1367 else if (p != *(predicate *) bb->aux)
1368 {
1369 /* This OR operation is needed to ensure monotonous data flow
1370 in the case we hit the limit on number of clauses and the
1371 and/or operations above give approximate answers. */
1372 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1373 if (p != *(predicate *) bb->aux)
1374 {
1375 done = false;
1376 *((predicate *) bb->aux) = p;
1377 }
1378 }
1379 }
1380 }
1381 }
1382 }
1383
1384
1385 /* Return predicate specifying when the STMT might have result that is not
1386 a compile time constant. */
1387
1388 static predicate
1389 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1390 class ipa_fn_summary *summary,
1391 tree expr,
1392 vec<predicate> nonconstant_names)
1393 {
1394 tree parm;
1395 int index;
1396 HOST_WIDE_INT size;
1397
1398 while (UNARY_CLASS_P (expr))
1399 expr = TREE_OPERAND (expr, 0);
1400
1401 parm = unmodified_parm (fbi, NULL, expr, &size);
1402 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1403 return add_condition (summary, index, size, NULL, predicate::changed,
1404 NULL_TREE);
1405 if (is_gimple_min_invariant (expr))
1406 return false;
1407 if (TREE_CODE (expr) == SSA_NAME)
1408 return nonconstant_names[SSA_NAME_VERSION (expr)];
1409 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1410 {
1411 predicate p1
1412 = will_be_nonconstant_expr_predicate (fbi, summary,
1413 TREE_OPERAND (expr, 0),
1414 nonconstant_names);
1415 if (p1 == true)
1416 return p1;
1417
1418 predicate p2
1419 = will_be_nonconstant_expr_predicate (fbi, summary,
1420 TREE_OPERAND (expr, 1),
1421 nonconstant_names);
1422 return p1.or_with (summary->conds, p2);
1423 }
1424 else if (TREE_CODE (expr) == COND_EXPR)
1425 {
1426 predicate p1
1427 = will_be_nonconstant_expr_predicate (fbi, summary,
1428 TREE_OPERAND (expr, 0),
1429 nonconstant_names);
1430 if (p1 == true)
1431 return p1;
1432
1433 predicate p2
1434 = will_be_nonconstant_expr_predicate (fbi, summary,
1435 TREE_OPERAND (expr, 1),
1436 nonconstant_names);
1437 if (p2 == true)
1438 return p2;
1439 p1 = p1.or_with (summary->conds, p2);
1440 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1441 TREE_OPERAND (expr, 2),
1442 nonconstant_names);
1443 return p2.or_with (summary->conds, p1);
1444 }
1445 else if (TREE_CODE (expr) == CALL_EXPR)
1446 return true;
1447 else
1448 {
1449 debug_tree (expr);
1450 gcc_unreachable ();
1451 }
1452 return false;
1453 }
1454
1455
1456 /* Return predicate specifying when the STMT might have result that is not
1457 a compile time constant. */
1458
1459 static predicate
1460 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1461 class ipa_fn_summary *summary,
1462 gimple *stmt,
1463 vec<predicate> nonconstant_names)
1464 {
1465 predicate p = true;
1466 ssa_op_iter iter;
1467 tree use;
1468 predicate op_non_const;
1469 bool is_load;
1470 int base_index;
1471 HOST_WIDE_INT size;
1472 struct agg_position_info aggpos;
1473
1474 /* What statments might be optimized away
1475 when their arguments are constant. */
1476 if (gimple_code (stmt) != GIMPLE_ASSIGN
1477 && gimple_code (stmt) != GIMPLE_COND
1478 && gimple_code (stmt) != GIMPLE_SWITCH
1479 && (gimple_code (stmt) != GIMPLE_CALL
1480 || !(gimple_call_flags (stmt) & ECF_CONST)))
1481 return p;
1482
1483 /* Stores will stay anyway. */
1484 if (gimple_store_p (stmt))
1485 return p;
1486
1487 is_load = gimple_assign_load_p (stmt);
1488
1489 /* Loads can be optimized when the value is known. */
1490 if (is_load)
1491 {
1492 tree op;
1493 gcc_assert (gimple_assign_single_p (stmt));
1494 op = gimple_assign_rhs1 (stmt);
1495 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1496 &aggpos))
1497 return p;
1498 }
1499 else
1500 base_index = -1;
1501
1502 /* See if we understand all operands before we start
1503 adding conditionals. */
1504 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1505 {
1506 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1507 /* For arguments we can build a condition. */
1508 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1509 continue;
1510 if (TREE_CODE (use) != SSA_NAME)
1511 return p;
1512 /* If we know when operand is constant,
1513 we still can say something useful. */
1514 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1515 continue;
1516 return p;
1517 }
1518
1519 if (is_load)
1520 op_non_const =
1521 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1522 NULL);
1523 else
1524 op_non_const = false;
1525 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1526 {
1527 HOST_WIDE_INT size;
1528 tree parm = unmodified_parm (fbi, stmt, use, &size);
1529 int index;
1530
1531 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1532 {
1533 if (index != base_index)
1534 p = add_condition (summary, index, size, NULL, predicate::changed,
1535 NULL_TREE);
1536 else
1537 continue;
1538 }
1539 else
1540 p = nonconstant_names[SSA_NAME_VERSION (use)];
1541 op_non_const = p.or_with (summary->conds, op_non_const);
1542 }
1543 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1544 && gimple_op (stmt, 0)
1545 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1546 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1547 = op_non_const;
1548 return op_non_const;
1549 }
1550
1551 struct record_modified_bb_info
1552 {
1553 tree op;
1554 bitmap bb_set;
1555 gimple *stmt;
1556 };
1557
1558 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1559 probability how often it changes between USE_BB.
1560 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1561 is in different loop nest, we can do better.
1562 This is all just estimate. In theory we look for minimal cut separating
1563 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1564 anyway. */
1565
1566 static basic_block
1567 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1568 {
1569 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1570 if (l && l->header->count < init_bb->count)
1571 return l->header;
1572 return init_bb;
1573 }
1574
1575 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1576 set except for info->stmt. */
1577
1578 static bool
1579 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1580 {
1581 struct record_modified_bb_info *info =
1582 (struct record_modified_bb_info *) data;
1583 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1584 return false;
1585 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1586 return false;
1587 bitmap_set_bit (info->bb_set,
1588 SSA_NAME_IS_DEFAULT_DEF (vdef)
1589 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1590 : get_minimal_bb
1591 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1592 gimple_bb (info->stmt))->index);
1593 if (dump_file)
1594 {
1595 fprintf (dump_file, " Param ");
1596 print_generic_expr (dump_file, info->op, TDF_SLIM);
1597 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1598 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1599 get_minimal_bb
1600 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1601 gimple_bb (info->stmt))->index);
1602 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1603 }
1604 return false;
1605 }
1606
1607 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1608 will change since last invocation of STMT.
1609
1610 Value 0 is reserved for compile time invariants.
1611 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1612 ought to be REG_BR_PROB_BASE / estimated_iters. */
1613
1614 static int
1615 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
1616 {
1617 tree op = gimple_call_arg (stmt, i);
1618 basic_block bb = gimple_bb (stmt);
1619
1620 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1621 op = TREE_OPERAND (op, 0);
1622
1623 tree base = get_base_address (op);
1624
1625 /* Global invariants never change. */
1626 if (is_gimple_min_invariant (base))
1627 return 0;
1628
1629 /* We would have to do non-trivial analysis to really work out what
1630 is the probability of value to change (i.e. when init statement
1631 is in a sibling loop of the call).
1632
1633 We do an conservative estimate: when call is executed N times more often
1634 than the statement defining value, we take the frequency 1/N. */
1635 if (TREE_CODE (base) == SSA_NAME)
1636 {
1637 profile_count init_count;
1638
1639 if (!bb->count.nonzero_p ())
1640 return REG_BR_PROB_BASE;
1641
1642 if (SSA_NAME_IS_DEFAULT_DEF (base))
1643 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1644 else
1645 init_count = get_minimal_bb
1646 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1647 gimple_bb (stmt))->count;
1648
1649 if (init_count < bb->count)
1650 return MAX ((init_count.to_sreal_scale (bb->count)
1651 * REG_BR_PROB_BASE).to_int (), 1);
1652 return REG_BR_PROB_BASE;
1653 }
1654 else
1655 {
1656 ao_ref refd;
1657 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1658 struct record_modified_bb_info info;
1659 tree init = ctor_for_folding (base);
1660
1661 if (init != error_mark_node)
1662 return 0;
1663 if (!bb->count.nonzero_p ())
1664 return REG_BR_PROB_BASE;
1665 if (dump_file)
1666 {
1667 fprintf (dump_file, " Analyzing param change probablity of ");
1668 print_generic_expr (dump_file, op, TDF_SLIM);
1669 fprintf (dump_file, "\n");
1670 }
1671 ao_ref_init (&refd, op);
1672 info.op = op;
1673 info.stmt = stmt;
1674 info.bb_set = BITMAP_ALLOC (NULL);
1675 int walked
1676 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1677 NULL, NULL, fbi->aa_walk_budget);
1678 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
1679 {
1680 if (dump_file)
1681 {
1682 if (walked < 0)
1683 fprintf (dump_file, " Ran out of AA walking budget.\n");
1684 else
1685 fprintf (dump_file, " Set in same BB as used.\n");
1686 }
1687 BITMAP_FREE (info.bb_set);
1688 return REG_BR_PROB_BASE;
1689 }
1690
1691 bitmap_iterator bi;
1692 unsigned index;
1693 /* Lookup the most frequent update of the value and believe that
1694 it dominates all the other; precise analysis here is difficult. */
1695 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1696 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
1697 if (dump_file)
1698 {
1699 fprintf (dump_file, " Set with count ");
1700 max.dump (dump_file);
1701 fprintf (dump_file, " and used with count ");
1702 bb->count.dump (dump_file);
1703 fprintf (dump_file, " freq %f\n",
1704 max.to_sreal_scale (bb->count).to_double ());
1705 }
1706
1707 BITMAP_FREE (info.bb_set);
1708 if (max < bb->count)
1709 return MAX ((max.to_sreal_scale (bb->count)
1710 * REG_BR_PROB_BASE).to_int (), 1);
1711 return REG_BR_PROB_BASE;
1712 }
1713 }
1714
1715 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1716 sub-graph and if the predicate the condition depends on is known. If so,
1717 return true and store the pointer the predicate in *P. */
1718
1719 static bool
1720 phi_result_unknown_predicate (ipa_func_body_info *fbi,
1721 ipa_fn_summary *summary, basic_block bb,
1722 predicate *p,
1723 vec<predicate> nonconstant_names)
1724 {
1725 edge e;
1726 edge_iterator ei;
1727 basic_block first_bb = NULL;
1728 gimple *stmt;
1729
1730 if (single_pred_p (bb))
1731 {
1732 *p = false;
1733 return true;
1734 }
1735
1736 FOR_EACH_EDGE (e, ei, bb->preds)
1737 {
1738 if (single_succ_p (e->src))
1739 {
1740 if (!single_pred_p (e->src))
1741 return false;
1742 if (!first_bb)
1743 first_bb = single_pred (e->src);
1744 else if (single_pred (e->src) != first_bb)
1745 return false;
1746 }
1747 else
1748 {
1749 if (!first_bb)
1750 first_bb = e->src;
1751 else if (e->src != first_bb)
1752 return false;
1753 }
1754 }
1755
1756 if (!first_bb)
1757 return false;
1758
1759 stmt = last_stmt (first_bb);
1760 if (!stmt
1761 || gimple_code (stmt) != GIMPLE_COND
1762 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1763 return false;
1764
1765 *p = will_be_nonconstant_expr_predicate (fbi, summary,
1766 gimple_cond_lhs (stmt),
1767 nonconstant_names);
1768 if (*p == true)
1769 return false;
1770 else
1771 return true;
1772 }
1773
1774 /* Given a PHI statement in a function described by inline properties SUMMARY
1775 and *P being the predicate describing whether the selected PHI argument is
1776 known, store a predicate for the result of the PHI statement into
1777 NONCONSTANT_NAMES, if possible. */
1778
1779 static void
1780 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
1781 predicate *p,
1782 vec<predicate> nonconstant_names)
1783 {
1784 unsigned i;
1785
1786 for (i = 0; i < gimple_phi_num_args (phi); i++)
1787 {
1788 tree arg = gimple_phi_arg (phi, i)->def;
1789 if (!is_gimple_min_invariant (arg))
1790 {
1791 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1792 *p = p->or_with (summary->conds,
1793 nonconstant_names[SSA_NAME_VERSION (arg)]);
1794 if (*p == true)
1795 return;
1796 }
1797 }
1798
1799 if (dump_file && (dump_flags & TDF_DETAILS))
1800 {
1801 fprintf (dump_file, "\t\tphi predicate: ");
1802 p->dump (dump_file, summary->conds);
1803 }
1804 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1805 }
1806
1807 /* For a typical usage of __builtin_expect (a<b, 1), we
1808 may introduce an extra relation stmt:
1809 With the builtin, we have
1810 t1 = a <= b;
1811 t2 = (long int) t1;
1812 t3 = __builtin_expect (t2, 1);
1813 if (t3 != 0)
1814 goto ...
1815 Without the builtin, we have
1816 if (a<=b)
1817 goto...
1818 This affects the size/time estimation and may have
1819 an impact on the earlier inlining.
1820 Here find this pattern and fix it up later. */
1821
1822 static gimple *
1823 find_foldable_builtin_expect (basic_block bb)
1824 {
1825 gimple_stmt_iterator bsi;
1826
1827 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1828 {
1829 gimple *stmt = gsi_stmt (bsi);
1830 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1831 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
1832 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1833 {
1834 tree var = gimple_call_lhs (stmt);
1835 tree arg = gimple_call_arg (stmt, 0);
1836 use_operand_p use_p;
1837 gimple *use_stmt;
1838 bool match = false;
1839 bool done = false;
1840
1841 if (!var || !arg)
1842 continue;
1843 gcc_assert (TREE_CODE (var) == SSA_NAME);
1844
1845 while (TREE_CODE (arg) == SSA_NAME)
1846 {
1847 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1848 if (!is_gimple_assign (stmt_tmp))
1849 break;
1850 switch (gimple_assign_rhs_code (stmt_tmp))
1851 {
1852 case LT_EXPR:
1853 case LE_EXPR:
1854 case GT_EXPR:
1855 case GE_EXPR:
1856 case EQ_EXPR:
1857 case NE_EXPR:
1858 match = true;
1859 done = true;
1860 break;
1861 CASE_CONVERT:
1862 break;
1863 default:
1864 done = true;
1865 break;
1866 }
1867 if (done)
1868 break;
1869 arg = gimple_assign_rhs1 (stmt_tmp);
1870 }
1871
1872 if (match && single_imm_use (var, &use_p, &use_stmt)
1873 && gimple_code (use_stmt) == GIMPLE_COND)
1874 return use_stmt;
1875 }
1876 }
1877 return NULL;
1878 }
1879
1880 /* Return true when the basic blocks contains only clobbers followed by RESX.
1881 Such BBs are kept around to make removal of dead stores possible with
1882 presence of EH and will be optimized out by optimize_clobbers later in the
1883 game.
1884
1885 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1886 that can be clobber only, too.. When it is false, the RESX is not necessary
1887 on the end of basic block. */
1888
1889 static bool
1890 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1891 {
1892 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1893 edge_iterator ei;
1894 edge e;
1895
1896 if (need_eh)
1897 {
1898 if (gsi_end_p (gsi))
1899 return false;
1900 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1901 return false;
1902 gsi_prev (&gsi);
1903 }
1904 else if (!single_succ_p (bb))
1905 return false;
1906
1907 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1908 {
1909 gimple *stmt = gsi_stmt (gsi);
1910 if (is_gimple_debug (stmt))
1911 continue;
1912 if (gimple_clobber_p (stmt))
1913 continue;
1914 if (gimple_code (stmt) == GIMPLE_LABEL)
1915 break;
1916 return false;
1917 }
1918
1919 /* See if all predecestors are either throws or clobber only BBs. */
1920 FOR_EACH_EDGE (e, ei, bb->preds)
1921 if (!(e->flags & EDGE_EH)
1922 && !clobber_only_eh_bb_p (e->src, false))
1923 return false;
1924
1925 return true;
1926 }
1927
1928 /* Return true if STMT compute a floating point expression that may be affected
1929 by -ffast-math and similar flags. */
1930
1931 static bool
1932 fp_expression_p (gimple *stmt)
1933 {
1934 ssa_op_iter i;
1935 tree op;
1936
1937 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1938 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1939 return true;
1940 return false;
1941 }
1942
1943 /* Analyze function body for NODE.
1944 EARLY indicates run from early optimization pipeline. */
1945
1946 static void
1947 analyze_function_body (struct cgraph_node *node, bool early)
1948 {
1949 sreal time = PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME);
1950 /* Estimate static overhead for function prologue/epilogue and alignment. */
1951 int size = PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS);
1952 /* Benefits are scaled by probability of elimination that is in range
1953 <0,2>. */
1954 basic_block bb;
1955 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1956 sreal freq;
1957 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
1958 predicate bb_predicate;
1959 struct ipa_func_body_info fbi;
1960 vec<predicate> nonconstant_names = vNULL;
1961 int nblocks, n;
1962 int *order;
1963 gimple *fix_builtin_expect_stmt;
1964
1965 gcc_assert (my_function && my_function->cfg);
1966 gcc_assert (cfun == my_function);
1967
1968 memset(&fbi, 0, sizeof(fbi));
1969 vec_free (info->conds);
1970 info->conds = NULL;
1971 vec_free (info->size_time_table);
1972 info->size_time_table = NULL;
1973
1974 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
1975 so we can produce proper inline hints.
1976
1977 When optimizing and analyzing for early inliner, initialize node params
1978 so we can produce correct BB predicates. */
1979
1980 if (opt_for_fn (node->decl, optimize))
1981 {
1982 calculate_dominance_info (CDI_DOMINATORS);
1983 if (!early)
1984 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
1985 else
1986 {
1987 ipa_check_create_node_params ();
1988 ipa_initialize_node_params (node);
1989 }
1990
1991 if (ipa_node_params_sum)
1992 {
1993 fbi.node = node;
1994 fbi.info = IPA_NODE_REF (node);
1995 fbi.bb_infos = vNULL;
1996 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
1997 fbi.param_count = count_formal_params (node->decl);
1998 fbi.aa_walk_budget = PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS);
1999
2000 nonconstant_names.safe_grow_cleared
2001 (SSANAMES (my_function)->length ());
2002 }
2003 }
2004
2005 if (dump_file)
2006 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2007 node->name ());
2008
2009 /* When we run into maximal number of entries, we assign everything to the
2010 constant truth case. Be sure to have it in list. */
2011 bb_predicate = true;
2012 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2013
2014 bb_predicate = predicate::not_inlined ();
2015 info->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS)
2016 * ipa_fn_summary::size_scale,
2017 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME),
2018 bb_predicate,
2019 bb_predicate);
2020
2021 if (fbi.info)
2022 compute_bb_predicates (&fbi, node, info);
2023 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2024 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2025 for (n = 0; n < nblocks; n++)
2026 {
2027 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2028 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2029 if (clobber_only_eh_bb_p (bb))
2030 {
2031 if (dump_file && (dump_flags & TDF_DETAILS))
2032 fprintf (dump_file, "\n Ignoring BB %i;"
2033 " it will be optimized away by cleanup_clobbers\n",
2034 bb->index);
2035 continue;
2036 }
2037
2038 /* TODO: Obviously predicates can be propagated down across CFG. */
2039 if (fbi.info)
2040 {
2041 if (bb->aux)
2042 bb_predicate = *(predicate *) bb->aux;
2043 else
2044 bb_predicate = false;
2045 }
2046 else
2047 bb_predicate = true;
2048
2049 if (dump_file && (dump_flags & TDF_DETAILS))
2050 {
2051 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2052 bb_predicate.dump (dump_file, info->conds);
2053 }
2054
2055 if (fbi.info && nonconstant_names.exists ())
2056 {
2057 predicate phi_predicate;
2058 bool first_phi = true;
2059
2060 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2061 gsi_next (&bsi))
2062 {
2063 if (first_phi
2064 && !phi_result_unknown_predicate (&fbi, info, bb,
2065 &phi_predicate,
2066 nonconstant_names))
2067 break;
2068 first_phi = false;
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2070 {
2071 fprintf (dump_file, " ");
2072 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2073 }
2074 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2075 nonconstant_names);
2076 }
2077 }
2078
2079 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2080
2081 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2082 gsi_next (&bsi))
2083 {
2084 gimple *stmt = gsi_stmt (bsi);
2085 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2086 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2087 int prob;
2088 predicate will_be_nonconstant;
2089
2090 /* This relation stmt should be folded after we remove
2091 buildin_expect call. Adjust the cost here. */
2092 if (stmt == fix_builtin_expect_stmt)
2093 {
2094 this_size--;
2095 this_time--;
2096 }
2097
2098 if (dump_file && (dump_flags & TDF_DETAILS))
2099 {
2100 fprintf (dump_file, " ");
2101 print_gimple_stmt (dump_file, stmt, 0);
2102 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2103 freq.to_double (), this_size,
2104 this_time);
2105 }
2106
2107 if (is_gimple_call (stmt)
2108 && !gimple_call_internal_p (stmt))
2109 {
2110 struct cgraph_edge *edge = node->get_edge (stmt);
2111 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2112
2113 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2114 resolved as constant. We however don't want to optimize
2115 out the cgraph edges. */
2116 if (nonconstant_names.exists ()
2117 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2118 && gimple_call_lhs (stmt)
2119 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2120 {
2121 predicate false_p = false;
2122 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2123 = false_p;
2124 }
2125 if (ipa_node_params_sum)
2126 {
2127 int count = gimple_call_num_args (stmt);
2128 int i;
2129
2130 if (count)
2131 es->param.safe_grow_cleared (count);
2132 for (i = 0; i < count; i++)
2133 {
2134 int prob = param_change_prob (&fbi, stmt, i);
2135 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2136 es->param[i].change_prob = prob;
2137 }
2138 }
2139
2140 es->call_stmt_size = this_size;
2141 es->call_stmt_time = this_time;
2142 es->loop_depth = bb_loop_depth (bb);
2143 edge_set_predicate (edge, &bb_predicate);
2144 if (edge->speculative)
2145 {
2146 cgraph_edge *direct, *indirect;
2147 ipa_ref *ref;
2148 edge->speculative_call_info (direct, indirect, ref);
2149 gcc_assert (direct == edge);
2150 ipa_call_summary *es2
2151 = ipa_call_summaries->get_create (indirect);
2152 ipa_call_summaries->duplicate (edge, indirect,
2153 es, es2);
2154 }
2155 }
2156
2157 /* TODO: When conditional jump or swithc is known to be constant, but
2158 we did not translate it into the predicates, we really can account
2159 just maximum of the possible paths. */
2160 if (fbi.info)
2161 will_be_nonconstant
2162 = will_be_nonconstant_predicate (&fbi, info,
2163 stmt, nonconstant_names);
2164 else
2165 will_be_nonconstant = true;
2166 if (this_time || this_size)
2167 {
2168 sreal final_time = (sreal)this_time * freq;
2169
2170 prob = eliminated_by_inlining_prob (&fbi, stmt);
2171 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2172 fprintf (dump_file,
2173 "\t\t50%% will be eliminated by inlining\n");
2174 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2175 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2176
2177 class predicate p = bb_predicate & will_be_nonconstant;
2178
2179 /* We can ignore statement when we proved it is never going
2180 to happen, but we cannot do that for call statements
2181 because edges are accounted specially. */
2182
2183 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2184 {
2185 time += final_time;
2186 size += this_size;
2187 }
2188
2189 /* We account everything but the calls. Calls have their own
2190 size/time info attached to cgraph edges. This is necessary
2191 in order to make the cost disappear after inlining. */
2192 if (!is_gimple_call (stmt))
2193 {
2194 if (prob)
2195 {
2196 predicate ip = bb_predicate & predicate::not_inlined ();
2197 info->account_size_time (this_size * prob,
2198 (final_time * prob) / 2, ip,
2199 p);
2200 }
2201 if (prob != 2)
2202 info->account_size_time (this_size * (2 - prob),
2203 (final_time * (2 - prob) / 2),
2204 bb_predicate,
2205 p);
2206 }
2207
2208 if (!info->fp_expressions && fp_expression_p (stmt))
2209 {
2210 info->fp_expressions = true;
2211 if (dump_file)
2212 fprintf (dump_file, " fp_expression set\n");
2213 }
2214 }
2215
2216 /* Account cost of address calculations in the statements. */
2217 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2218 {
2219 for (tree op = gimple_op (stmt, i);
2220 op && handled_component_p (op);
2221 op = TREE_OPERAND (op, 0))
2222 if ((TREE_CODE (op) == ARRAY_REF
2223 || TREE_CODE (op) == ARRAY_RANGE_REF)
2224 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2225 {
2226 predicate p = bb_predicate;
2227 if (fbi.info)
2228 p = p & will_be_nonconstant_expr_predicate
2229 (&fbi, info, TREE_OPERAND (op, 1),
2230 nonconstant_names);
2231 if (p != false)
2232 {
2233 time += freq;
2234 size += 1;
2235 if (dump_file)
2236 fprintf (dump_file,
2237 "\t\tAccounting address calculation.\n");
2238 info->account_size_time (ipa_fn_summary::size_scale,
2239 freq,
2240 bb_predicate,
2241 p);
2242 }
2243 }
2244 }
2245
2246 }
2247 }
2248 free (order);
2249
2250 if (nonconstant_names.exists () && !early)
2251 {
2252 class loop *loop;
2253 predicate loop_iterations = true;
2254 predicate loop_stride = true;
2255
2256 if (dump_file && (dump_flags & TDF_DETAILS))
2257 flow_loops_dump (dump_file, NULL, 0);
2258 scev_initialize ();
2259 FOR_EACH_LOOP (loop, 0)
2260 {
2261 vec<edge> exits;
2262 edge ex;
2263 unsigned int j;
2264 class tree_niter_desc niter_desc;
2265 bb_predicate = *(predicate *) loop->header->aux;
2266
2267 exits = get_loop_exit_edges (loop);
2268 FOR_EACH_VEC_ELT (exits, j, ex)
2269 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2270 && !is_gimple_min_invariant (niter_desc.niter))
2271 {
2272 predicate will_be_nonconstant
2273 = will_be_nonconstant_expr_predicate (&fbi, info,
2274 niter_desc.niter,
2275 nonconstant_names);
2276 if (will_be_nonconstant != true)
2277 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2278 if (will_be_nonconstant != true
2279 && will_be_nonconstant != false)
2280 /* This is slightly inprecise. We may want to represent each
2281 loop with independent predicate. */
2282 loop_iterations &= will_be_nonconstant;
2283 }
2284 exits.release ();
2285 }
2286
2287 /* To avoid quadratic behavior we analyze stride predicates only
2288 with respect to the containing loop. Thus we simply iterate
2289 over all defs in the outermost loop body. */
2290 for (loop = loops_for_fn (cfun)->tree_root->inner;
2291 loop != NULL; loop = loop->next)
2292 {
2293 basic_block *body = get_loop_body (loop);
2294 for (unsigned i = 0; i < loop->num_nodes; i++)
2295 {
2296 gimple_stmt_iterator gsi;
2297 bb_predicate = *(predicate *) body[i]->aux;
2298 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2299 gsi_next (&gsi))
2300 {
2301 gimple *stmt = gsi_stmt (gsi);
2302
2303 if (!is_gimple_assign (stmt))
2304 continue;
2305
2306 tree def = gimple_assign_lhs (stmt);
2307 if (TREE_CODE (def) != SSA_NAME)
2308 continue;
2309
2310 affine_iv iv;
2311 if (!simple_iv (loop_containing_stmt (stmt),
2312 loop_containing_stmt (stmt),
2313 def, &iv, true)
2314 || is_gimple_min_invariant (iv.step))
2315 continue;
2316
2317 predicate will_be_nonconstant
2318 = will_be_nonconstant_expr_predicate (&fbi, info, iv.step,
2319 nonconstant_names);
2320 if (will_be_nonconstant != true)
2321 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2322 if (will_be_nonconstant != true
2323 && will_be_nonconstant != false)
2324 /* This is slightly inprecise. We may want to represent
2325 each loop with independent predicate. */
2326 loop_stride = loop_stride & will_be_nonconstant;
2327 }
2328 }
2329 free (body);
2330 }
2331 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2332 set_hint_predicate (&s->loop_iterations, loop_iterations);
2333 set_hint_predicate (&s->loop_stride, loop_stride);
2334 scev_finalize ();
2335 }
2336 FOR_ALL_BB_FN (bb, my_function)
2337 {
2338 edge e;
2339 edge_iterator ei;
2340
2341 if (bb->aux)
2342 edge_predicate_pool.remove ((predicate *)bb->aux);
2343 bb->aux = NULL;
2344 FOR_EACH_EDGE (e, ei, bb->succs)
2345 {
2346 if (e->aux)
2347 edge_predicate_pool.remove ((predicate *) e->aux);
2348 e->aux = NULL;
2349 }
2350 }
2351 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2352 s->time = time;
2353 s->self_size = size;
2354 nonconstant_names.release ();
2355 ipa_release_body_info (&fbi);
2356 if (opt_for_fn (node->decl, optimize))
2357 {
2358 if (!early)
2359 loop_optimizer_finalize ();
2360 else if (!ipa_edge_args_sum)
2361 ipa_free_all_node_params ();
2362 free_dominance_info (CDI_DOMINATORS);
2363 }
2364 if (dump_file)
2365 {
2366 fprintf (dump_file, "\n");
2367 ipa_dump_fn_summary (dump_file, node);
2368 }
2369 }
2370
2371
2372 /* Compute function summary.
2373 EARLY is true when we compute parameters during early opts. */
2374
2375 void
2376 compute_fn_summary (struct cgraph_node *node, bool early)
2377 {
2378 HOST_WIDE_INT self_stack_size;
2379 struct cgraph_edge *e;
2380 class ipa_fn_summary *info;
2381
2382 gcc_assert (!node->global.inlined_to);
2383
2384 if (!ipa_fn_summaries)
2385 ipa_fn_summary_alloc ();
2386
2387 /* Create a new ipa_fn_summary. */
2388 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2389 ipa_fn_summaries->remove (node);
2390 info = ipa_fn_summaries->get_create (node);
2391
2392 /* Estimate the stack size for the function if we're optimizing. */
2393 self_stack_size = optimize && !node->thunk.thunk_p
2394 ? estimated_stack_frame_size (node) : 0;
2395 info->estimated_self_stack_size = self_stack_size;
2396 info->estimated_stack_size = self_stack_size;
2397 info->stack_frame_offset = 0;
2398
2399 if (node->thunk.thunk_p)
2400 {
2401 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2402 predicate t = true;
2403
2404 node->local.can_change_signature = false;
2405 es->call_stmt_size = eni_size_weights.call_cost;
2406 es->call_stmt_time = eni_time_weights.call_cost;
2407 info->account_size_time (ipa_fn_summary::size_scale
2408 * PARAM_VALUE
2409 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS),
2410 PARAM_VALUE
2411 (PARAM_UNINLINED_FUNCTION_THUNK_TIME), t, t);
2412 t = predicate::not_inlined ();
2413 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2414 ipa_update_overall_fn_summary (node);
2415 info->self_size = info->size;
2416 if (stdarg_p (TREE_TYPE (node->decl)))
2417 {
2418 info->inlinable = false;
2419 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2420 }
2421 else
2422 info->inlinable = true;
2423 }
2424 else
2425 {
2426 /* Even is_gimple_min_invariant rely on current_function_decl. */
2427 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2428
2429 /* Can this function be inlined at all? */
2430 if (!opt_for_fn (node->decl, optimize)
2431 && !lookup_attribute ("always_inline",
2432 DECL_ATTRIBUTES (node->decl)))
2433 info->inlinable = false;
2434 else
2435 info->inlinable = tree_inlinable_function_p (node->decl);
2436
2437 /* Type attributes can use parameter indices to describe them. */
2438 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2439 /* Likewise for #pragma omp declare simd functions or functions
2440 with simd attribute. */
2441 || lookup_attribute ("omp declare simd",
2442 DECL_ATTRIBUTES (node->decl)))
2443 node->local.can_change_signature = false;
2444 else
2445 {
2446 /* Otherwise, inlinable functions always can change signature. */
2447 if (info->inlinable)
2448 node->local.can_change_signature = true;
2449 else
2450 {
2451 /* Functions calling builtin_apply cannot change signature. */
2452 for (e = node->callees; e; e = e->next_callee)
2453 {
2454 tree cdecl = e->callee->decl;
2455 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2456 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2457 break;
2458 }
2459 node->local.can_change_signature = !e;
2460 }
2461 }
2462 analyze_function_body (node, early);
2463 pop_cfun ();
2464 }
2465 for (e = node->callees; e; e = e->next_callee)
2466 if (e->callee->comdat_local_p ())
2467 break;
2468 node->calls_comdat_local = (e != NULL);
2469
2470 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2471 info->size = info->self_size;
2472 info->stack_frame_offset = 0;
2473 info->estimated_stack_size = info->estimated_self_stack_size;
2474
2475 /* Code above should compute exactly the same result as
2476 ipa_update_overall_fn_summary but because computation happens in
2477 different order the roundoff errors result in slight changes. */
2478 ipa_update_overall_fn_summary (node);
2479 /* In LTO mode we may have speculative edges set. */
2480 gcc_assert (in_lto_p || info->size == info->self_size);
2481 }
2482
2483
2484 /* Compute parameters of functions used by inliner using
2485 current_function_decl. */
2486
2487 static unsigned int
2488 compute_fn_summary_for_current (void)
2489 {
2490 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2491 return 0;
2492 }
2493
2494 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2495 KNOWN_CONTEXTS and KNOWN_AGGS. */
2496
2497 static bool
2498 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2499 int *size, int *time,
2500 vec<tree> known_vals,
2501 vec<ipa_polymorphic_call_context> known_contexts,
2502 vec<ipa_agg_jump_function_p> known_aggs)
2503 {
2504 tree target;
2505 struct cgraph_node *callee;
2506 class ipa_fn_summary *isummary;
2507 enum availability avail;
2508 bool speculative;
2509
2510 if (!known_vals.exists () && !known_contexts.exists ())
2511 return false;
2512 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2513 return false;
2514
2515 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2516 known_aggs, &speculative);
2517 if (!target || speculative)
2518 return false;
2519
2520 /* Account for difference in cost between indirect and direct calls. */
2521 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2522 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2523 gcc_checking_assert (*time >= 0);
2524 gcc_checking_assert (*size >= 0);
2525
2526 callee = cgraph_node::get (target);
2527 if (!callee || !callee->definition)
2528 return false;
2529 callee = callee->function_symbol (&avail);
2530 if (avail < AVAIL_AVAILABLE)
2531 return false;
2532 isummary = ipa_fn_summaries->get (callee);
2533 if (isummary == NULL)
2534 return false;
2535
2536 return isummary->inlinable;
2537 }
2538
2539 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2540 handle edge E with probability PROB.
2541 Set HINTS if edge may be devirtualized.
2542 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2543 site. */
2544
2545 static inline void
2546 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2547 sreal *time,
2548 int prob,
2549 vec<tree> known_vals,
2550 vec<ipa_polymorphic_call_context> known_contexts,
2551 vec<ipa_agg_jump_function_p> known_aggs,
2552 ipa_hints *hints)
2553 {
2554 class ipa_call_summary *es = ipa_call_summaries->get (e);
2555 int call_size = es->call_stmt_size;
2556 int call_time = es->call_stmt_time;
2557 int cur_size;
2558 if (!e->callee
2559 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2560 known_vals, known_contexts, known_aggs)
2561 && hints && e->maybe_hot_p ())
2562 *hints |= INLINE_HINT_indirect_call;
2563 cur_size = call_size * ipa_fn_summary::size_scale;
2564 *size += cur_size;
2565 if (min_size)
2566 *min_size += cur_size;
2567 if (prob == REG_BR_PROB_BASE)
2568 *time += ((sreal)call_time) * e->sreal_frequency ();
2569 else
2570 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2571 }
2572
2573
2574
2575 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2576 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2577 describe context of the call site. */
2578
2579 static void
2580 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2581 int *min_size, sreal *time,
2582 ipa_hints *hints,
2583 clause_t possible_truths,
2584 vec<tree> known_vals,
2585 vec<ipa_polymorphic_call_context> known_contexts,
2586 vec<ipa_agg_jump_function_p> known_aggs)
2587 {
2588 struct cgraph_edge *e;
2589 for (e = node->callees; e; e = e->next_callee)
2590 {
2591 class ipa_call_summary *es = ipa_call_summaries->get_create (e);
2592
2593 /* Do not care about zero sized builtins. */
2594 if (e->inline_failed && !es->call_stmt_size)
2595 {
2596 gcc_checking_assert (!es->call_stmt_time);
2597 continue;
2598 }
2599 if (!es->predicate
2600 || es->predicate->evaluate (possible_truths))
2601 {
2602 if (e->inline_failed)
2603 {
2604 /* Predicates of calls shall not use NOT_CHANGED codes,
2605 sowe do not need to compute probabilities. */
2606 estimate_edge_size_and_time (e, size,
2607 es->predicate ? NULL : min_size,
2608 time, REG_BR_PROB_BASE,
2609 known_vals, known_contexts,
2610 known_aggs, hints);
2611 }
2612 else
2613 estimate_calls_size_and_time (e->callee, size, min_size, time,
2614 hints,
2615 possible_truths,
2616 known_vals, known_contexts,
2617 known_aggs);
2618 }
2619 }
2620 for (e = node->indirect_calls; e; e = e->next_callee)
2621 {
2622 class ipa_call_summary *es = ipa_call_summaries->get_create (e);
2623 if (!es->predicate
2624 || es->predicate->evaluate (possible_truths))
2625 estimate_edge_size_and_time (e, size,
2626 es->predicate ? NULL : min_size,
2627 time, REG_BR_PROB_BASE,
2628 known_vals, known_contexts, known_aggs,
2629 hints);
2630 }
2631 }
2632
2633
2634 /* Estimate size and time needed to execute NODE assuming
2635 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2636 information about NODE's arguments. If non-NULL use also probability
2637 information present in INLINE_PARAM_SUMMARY vector.
2638 Additionally detemine hints determined by the context. Finally compute
2639 minimal size needed for the call that is independent on the call context and
2640 can be used for fast estimates. Return the values in RET_SIZE,
2641 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2642
2643 void
2644 estimate_node_size_and_time (struct cgraph_node *node,
2645 clause_t possible_truths,
2646 clause_t nonspec_possible_truths,
2647 vec<tree> known_vals,
2648 vec<ipa_polymorphic_call_context> known_contexts,
2649 vec<ipa_agg_jump_function_p> known_aggs,
2650 int *ret_size, int *ret_min_size,
2651 sreal *ret_time,
2652 sreal *ret_nonspecialized_time,
2653 ipa_hints *ret_hints,
2654 vec<inline_param_summary>
2655 inline_param_summary)
2656 {
2657 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2658 size_time_entry *e;
2659 int size = 0;
2660 sreal time = 0;
2661 int min_size = 0;
2662 ipa_hints hints = 0;
2663 int i;
2664
2665 if (dump_file && (dump_flags & TDF_DETAILS))
2666 {
2667 bool found = false;
2668 fprintf (dump_file, " Estimating body: %s/%i\n"
2669 " Known to be false: ", node->name (),
2670 node->order);
2671
2672 for (i = predicate::not_inlined_condition;
2673 i < (predicate::first_dynamic_condition
2674 + (int) vec_safe_length (info->conds)); i++)
2675 if (!(possible_truths & (1 << i)))
2676 {
2677 if (found)
2678 fprintf (dump_file, ", ");
2679 found = true;
2680 dump_condition (dump_file, info->conds, i);
2681 }
2682 }
2683
2684 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2685 known_vals, known_contexts, known_aggs);
2686 sreal nonspecialized_time = time;
2687
2688 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2689 {
2690 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2691
2692 /* Because predicates are conservative, it can happen that nonconst is 1
2693 but exec is 0. */
2694 if (exec)
2695 {
2696 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2697
2698 gcc_checking_assert (e->time >= 0);
2699 gcc_checking_assert (time >= 0);
2700
2701 /* We compute specialized size only because size of nonspecialized
2702 copy is context independent.
2703
2704 The difference between nonspecialized execution and specialized is
2705 that nonspecialized is not going to have optimized out computations
2706 known to be constant in a specialized setting. */
2707 if (nonconst)
2708 size += e->size;
2709 nonspecialized_time += e->time;
2710 if (!nonconst)
2711 ;
2712 else if (!inline_param_summary.exists ())
2713 {
2714 if (nonconst)
2715 time += e->time;
2716 }
2717 else
2718 {
2719 int prob = e->nonconst_predicate.probability
2720 (info->conds, possible_truths,
2721 inline_param_summary);
2722 gcc_checking_assert (prob >= 0);
2723 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2724 time += e->time * prob / REG_BR_PROB_BASE;
2725 }
2726 gcc_checking_assert (time >= 0);
2727 }
2728 }
2729 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2730 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2731 min_size = (*info->size_time_table)[0].size;
2732 gcc_checking_assert (size >= 0);
2733 gcc_checking_assert (time >= 0);
2734 /* nonspecialized_time should be always bigger than specialized time.
2735 Roundoff issues however may get into the way. */
2736 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
2737
2738 /* Roundoff issues may make specialized time bigger than nonspecialized
2739 time. We do not really want that to happen because some heurstics
2740 may get confused by seeing negative speedups. */
2741 if (time > nonspecialized_time)
2742 time = nonspecialized_time;
2743
2744 if (info->loop_iterations
2745 && !info->loop_iterations->evaluate (possible_truths))
2746 hints |= INLINE_HINT_loop_iterations;
2747 if (info->loop_stride
2748 && !info->loop_stride->evaluate (possible_truths))
2749 hints |= INLINE_HINT_loop_stride;
2750 if (info->scc_no)
2751 hints |= INLINE_HINT_in_scc;
2752 if (DECL_DECLARED_INLINE_P (node->decl))
2753 hints |= INLINE_HINT_declared_inline;
2754
2755 size = RDIV (size, ipa_fn_summary::size_scale);
2756 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
2757
2758 if (dump_file && (dump_flags & TDF_DETAILS))
2759 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2760 time.to_double (), nonspecialized_time.to_double ());
2761 if (ret_time)
2762 *ret_time = time;
2763 if (ret_nonspecialized_time)
2764 *ret_nonspecialized_time = nonspecialized_time;
2765 if (ret_size)
2766 *ret_size = size;
2767 if (ret_min_size)
2768 *ret_min_size = min_size;
2769 if (ret_hints)
2770 *ret_hints = hints;
2771 return;
2772 }
2773
2774
2775 /* Estimate size and time needed to execute callee of EDGE assuming that
2776 parameters known to be constant at caller of EDGE are propagated.
2777 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2778 and types for parameters. */
2779
2780 void
2781 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2782 vec<tree> known_vals,
2783 vec<ipa_polymorphic_call_context>
2784 known_contexts,
2785 vec<ipa_agg_jump_function_p> known_aggs,
2786 int *ret_size, sreal *ret_time,
2787 sreal *ret_nonspec_time,
2788 ipa_hints *hints)
2789 {
2790 clause_t clause, nonspec_clause;
2791
2792 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2793 &clause, &nonspec_clause);
2794 estimate_node_size_and_time (node, clause, nonspec_clause,
2795 known_vals, known_contexts,
2796 known_aggs, ret_size, NULL, ret_time,
2797 ret_nonspec_time, hints, vNULL);
2798 }
2799
2800
2801 /* Update summary information of inline clones after inlining.
2802 Compute peak stack usage. */
2803
2804 static void
2805 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2806 {
2807 struct cgraph_edge *e;
2808 ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2809 ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
2810 HOST_WIDE_INT peak;
2811
2812 callee_info->stack_frame_offset
2813 = caller_info->stack_frame_offset
2814 + caller_info->estimated_self_stack_size;
2815 peak = callee_info->stack_frame_offset
2816 + callee_info->estimated_self_stack_size;
2817
2818 ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
2819 if (s->estimated_stack_size < peak)
2820 s->estimated_stack_size = peak;
2821 ipa_propagate_frequency (node);
2822 for (e = node->callees; e; e = e->next_callee)
2823 {
2824 if (!e->inline_failed)
2825 inline_update_callee_summaries (e->callee, depth);
2826 ipa_call_summaries->get (e)->loop_depth += depth;
2827 }
2828 for (e = node->indirect_calls; e; e = e->next_callee)
2829 ipa_call_summaries->get (e)->loop_depth += depth;
2830 }
2831
2832 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2833 When functoin A is inlined in B and A calls C with parameter that
2834 changes with probability PROB1 and C is known to be passthroug
2835 of argument if B that change with probability PROB2, the probability
2836 of change is now PROB1*PROB2. */
2837
2838 static void
2839 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2840 struct cgraph_edge *edge)
2841 {
2842 if (ipa_node_params_sum)
2843 {
2844 int i;
2845 class ipa_edge_args *args = IPA_EDGE_REF (edge);
2846 class ipa_call_summary *es = ipa_call_summaries->get (edge);
2847 class ipa_call_summary *inlined_es
2848 = ipa_call_summaries->get (inlined_edge);
2849
2850 if (es->param.length () == 0)
2851 return;
2852
2853 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2854 {
2855 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2856 if (jfunc->type == IPA_JF_PASS_THROUGH
2857 || jfunc->type == IPA_JF_ANCESTOR)
2858 {
2859 int id = jfunc->type == IPA_JF_PASS_THROUGH
2860 ? ipa_get_jf_pass_through_formal_id (jfunc)
2861 : ipa_get_jf_ancestor_formal_id (jfunc);
2862 if (id < (int) inlined_es->param.length ())
2863 {
2864 int prob1 = es->param[i].change_prob;
2865 int prob2 = inlined_es->param[id].change_prob;
2866 int prob = combine_probabilities (prob1, prob2);
2867
2868 if (prob1 && prob2 && !prob)
2869 prob = 1;
2870
2871 es->param[i].change_prob = prob;
2872 }
2873 }
2874 }
2875 }
2876 }
2877
2878 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2879
2880 Remap predicates of callees of NODE. Rest of arguments match
2881 remap_predicate.
2882
2883 Also update change probabilities. */
2884
2885 static void
2886 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2887 struct cgraph_node *node,
2888 class ipa_fn_summary *info,
2889 class ipa_fn_summary *callee_info,
2890 vec<int> operand_map,
2891 vec<int> offset_map,
2892 clause_t possible_truths,
2893 predicate *toplev_predicate)
2894 {
2895 struct cgraph_edge *e, *next;
2896 for (e = node->callees; e; e = next)
2897 {
2898 class ipa_call_summary *es = ipa_call_summaries->get (e);
2899 predicate p;
2900 next = e->next_callee;
2901
2902 if (e->inline_failed)
2903 {
2904 remap_edge_change_prob (inlined_edge, e);
2905
2906 if (es->predicate)
2907 {
2908 p = es->predicate->remap_after_inlining
2909 (info, callee_info, operand_map,
2910 offset_map, possible_truths,
2911 *toplev_predicate);
2912 edge_set_predicate (e, &p);
2913 }
2914 else
2915 edge_set_predicate (e, toplev_predicate);
2916 }
2917 else
2918 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2919 operand_map, offset_map, possible_truths,
2920 toplev_predicate);
2921 }
2922 for (e = node->indirect_calls; e; e = next)
2923 {
2924 class ipa_call_summary *es = ipa_call_summaries->get (e);
2925 predicate p;
2926 next = e->next_callee;
2927
2928 remap_edge_change_prob (inlined_edge, e);
2929 if (es->predicate)
2930 {
2931 p = es->predicate->remap_after_inlining
2932 (info, callee_info, operand_map, offset_map,
2933 possible_truths, *toplev_predicate);
2934 edge_set_predicate (e, &p);
2935 }
2936 else
2937 edge_set_predicate (e, toplev_predicate);
2938 }
2939 }
2940
2941 /* Same as remap_predicate, but set result into hint *HINT. */
2942
2943 static void
2944 remap_hint_predicate (class ipa_fn_summary *info,
2945 class ipa_fn_summary *callee_info,
2946 predicate **hint,
2947 vec<int> operand_map,
2948 vec<int> offset_map,
2949 clause_t possible_truths,
2950 predicate *toplev_predicate)
2951 {
2952 predicate p;
2953
2954 if (!*hint)
2955 return;
2956 p = (*hint)->remap_after_inlining
2957 (info, callee_info,
2958 operand_map, offset_map,
2959 possible_truths, *toplev_predicate);
2960 if (p != false && p != true)
2961 {
2962 if (!*hint)
2963 set_hint_predicate (hint, p);
2964 else
2965 **hint &= p;
2966 }
2967 }
2968
2969 /* We inlined EDGE. Update summary of the function we inlined into. */
2970
2971 void
2972 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
2973 {
2974 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
2975 struct cgraph_node *to = (edge->caller->global.inlined_to
2976 ? edge->caller->global.inlined_to : edge->caller);
2977 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
2978 clause_t clause = 0; /* not_inline is known to be false. */
2979 size_time_entry *e;
2980 vec<int> operand_map = vNULL;
2981 vec<int> offset_map = vNULL;
2982 int i;
2983 predicate toplev_predicate;
2984 predicate true_p = true;
2985 class ipa_call_summary *es = ipa_call_summaries->get (edge);
2986
2987 if (es->predicate)
2988 toplev_predicate = *es->predicate;
2989 else
2990 toplev_predicate = true;
2991
2992 info->fp_expressions |= callee_info->fp_expressions;
2993
2994 if (callee_info->conds)
2995 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
2996 if (ipa_node_params_sum && callee_info->conds)
2997 {
2998 class ipa_edge_args *args = IPA_EDGE_REF (edge);
2999 int count = ipa_get_cs_argument_count (args);
3000 int i;
3001
3002 if (count)
3003 {
3004 operand_map.safe_grow_cleared (count);
3005 offset_map.safe_grow_cleared (count);
3006 }
3007 for (i = 0; i < count; i++)
3008 {
3009 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3010 int map = -1;
3011
3012 /* TODO: handle non-NOPs when merging. */
3013 if (jfunc->type == IPA_JF_PASS_THROUGH)
3014 {
3015 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3016 map = ipa_get_jf_pass_through_formal_id (jfunc);
3017 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3018 offset_map[i] = -1;
3019 }
3020 else if (jfunc->type == IPA_JF_ANCESTOR)
3021 {
3022 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3023 if (offset >= 0 && offset < INT_MAX)
3024 {
3025 map = ipa_get_jf_ancestor_formal_id (jfunc);
3026 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3027 offset = -1;
3028 offset_map[i] = offset;
3029 }
3030 }
3031 operand_map[i] = map;
3032 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3033 }
3034 }
3035 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3036 {
3037 predicate p;
3038 p = e->exec_predicate.remap_after_inlining
3039 (info, callee_info, operand_map,
3040 offset_map, clause,
3041 toplev_predicate);
3042 predicate nonconstp;
3043 nonconstp = e->nonconst_predicate.remap_after_inlining
3044 (info, callee_info, operand_map,
3045 offset_map, clause,
3046 toplev_predicate);
3047 if (p != false && nonconstp != false)
3048 {
3049 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
3050 int prob = e->nonconst_predicate.probability (callee_info->conds,
3051 clause, es->param);
3052 add_time = add_time * prob / REG_BR_PROB_BASE;
3053 if (prob != REG_BR_PROB_BASE
3054 && dump_file && (dump_flags & TDF_DETAILS))
3055 {
3056 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3057 (double) prob / REG_BR_PROB_BASE);
3058 }
3059 info->account_size_time (e->size, add_time, p, nonconstp);
3060 }
3061 }
3062 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3063 offset_map, clause, &toplev_predicate);
3064 remap_hint_predicate (info, callee_info,
3065 &callee_info->loop_iterations,
3066 operand_map, offset_map, clause, &toplev_predicate);
3067 remap_hint_predicate (info, callee_info,
3068 &callee_info->loop_stride,
3069 operand_map, offset_map, clause, &toplev_predicate);
3070
3071 ipa_call_summary *s = ipa_call_summaries->get (edge);
3072 inline_update_callee_summaries (edge->callee, s->loop_depth);
3073
3074 /* We do not maintain predicates of inlined edges, free it. */
3075 edge_set_predicate (edge, &true_p);
3076 /* Similarly remove param summaries. */
3077 es->param.release ();
3078 operand_map.release ();
3079 offset_map.release ();
3080 }
3081
3082 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3083 and time. Recompute it. */
3084
3085 void
3086 ipa_update_overall_fn_summary (struct cgraph_node *node)
3087 {
3088 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3089 size_time_entry *e;
3090 int i;
3091
3092 info->size = 0;
3093 info->time = 0;
3094 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3095 {
3096 info->size += e->size;
3097 info->time += e->time;
3098 }
3099 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3100 &info->time, NULL,
3101 ~(clause_t) (1 << predicate::false_condition),
3102 vNULL, vNULL, vNULL);
3103 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
3104 }
3105
3106
3107 /* This function performs intraprocedural analysis in NODE that is required to
3108 inline indirect calls. */
3109
3110 static void
3111 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3112 {
3113 ipa_analyze_node (node);
3114 if (dump_file && (dump_flags & TDF_DETAILS))
3115 {
3116 ipa_print_node_params (dump_file, node);
3117 ipa_print_node_jump_functions (dump_file, node);
3118 }
3119 }
3120
3121
3122 /* Note function body size. */
3123
3124 void
3125 inline_analyze_function (struct cgraph_node *node)
3126 {
3127 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3128
3129 if (dump_file)
3130 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3131 node->name (), node->order);
3132 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3133 inline_indirect_intraprocedural_analysis (node);
3134 compute_fn_summary (node, false);
3135 if (!optimize)
3136 {
3137 struct cgraph_edge *e;
3138 for (e = node->callees; e; e = e->next_callee)
3139 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3140 for (e = node->indirect_calls; e; e = e->next_callee)
3141 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3142 }
3143
3144 pop_cfun ();
3145 }
3146
3147
3148 /* Called when new function is inserted to callgraph late. */
3149
3150 void
3151 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3152 {
3153 inline_analyze_function (node);
3154 }
3155
3156 /* Note function body size. */
3157
3158 static void
3159 ipa_fn_summary_generate (void)
3160 {
3161 struct cgraph_node *node;
3162
3163 FOR_EACH_DEFINED_FUNCTION (node)
3164 if (DECL_STRUCT_FUNCTION (node->decl))
3165 node->local.versionable = tree_versionable_function_p (node->decl);
3166
3167 ipa_fn_summary_alloc ();
3168
3169 ipa_fn_summaries->enable_insertion_hook ();
3170
3171 ipa_register_cgraph_hooks ();
3172
3173 FOR_EACH_DEFINED_FUNCTION (node)
3174 if (!node->alias
3175 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3176 || opt_for_fn (node->decl, optimize)))
3177 inline_analyze_function (node);
3178 }
3179
3180
3181 /* Write inline summary for edge E to OB. */
3182
3183 static void
3184 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
3185 bool prevails)
3186 {
3187 class ipa_call_summary *es = prevails
3188 ? ipa_call_summaries->get_create (e) : NULL;
3189 predicate p;
3190 int length, i;
3191
3192 int size = streamer_read_uhwi (ib);
3193 int time = streamer_read_uhwi (ib);
3194 int depth = streamer_read_uhwi (ib);
3195
3196 if (es)
3197 {
3198 es->call_stmt_size = size;
3199 es->call_stmt_time = time;
3200 es->loop_depth = depth;
3201 }
3202
3203 bitpack_d bp = streamer_read_bitpack (ib);
3204 if (es)
3205 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3206 else
3207 bp_unpack_value (&bp, 1);
3208
3209 p.stream_in (ib);
3210 if (es)
3211 edge_set_predicate (e, &p);
3212 length = streamer_read_uhwi (ib);
3213 if (length && es && e->possibly_call_in_translation_unit_p ())
3214 {
3215 es->param.safe_grow_cleared (length);
3216 for (i = 0; i < length; i++)
3217 es->param[i].change_prob = streamer_read_uhwi (ib);
3218 }
3219 else
3220 {
3221 for (i = 0; i < length; i++)
3222 streamer_read_uhwi (ib);
3223 }
3224 }
3225
3226
3227 /* Stream in inline summaries from the section. */
3228
3229 static void
3230 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3231 size_t len)
3232 {
3233 const struct lto_function_header *header =
3234 (const struct lto_function_header *) data;
3235 const int cfg_offset = sizeof (struct lto_function_header);
3236 const int main_offset = cfg_offset + header->cfg_size;
3237 const int string_offset = main_offset + header->main_size;
3238 class data_in *data_in;
3239 unsigned int i, count2, j;
3240 unsigned int f_count;
3241
3242 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3243 file_data->mode_table);
3244
3245 data_in =
3246 lto_data_in_create (file_data, (const char *) data + string_offset,
3247 header->string_size, vNULL);
3248 f_count = streamer_read_uhwi (&ib);
3249 for (i = 0; i < f_count; i++)
3250 {
3251 unsigned int index;
3252 struct cgraph_node *node;
3253 class ipa_fn_summary *info;
3254 lto_symtab_encoder_t encoder;
3255 struct bitpack_d bp;
3256 struct cgraph_edge *e;
3257 predicate p;
3258
3259 index = streamer_read_uhwi (&ib);
3260 encoder = file_data->symtab_node_encoder;
3261 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3262 index));
3263 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
3264
3265 int stack_size = streamer_read_uhwi (&ib);
3266 int size = streamer_read_uhwi (&ib);
3267 sreal time = sreal::stream_in (&ib);
3268
3269 if (info)
3270 {
3271 info->estimated_stack_size
3272 = info->estimated_self_stack_size = stack_size;
3273 info->size = info->self_size = size;
3274 info->time = time;
3275 }
3276
3277 bp = streamer_read_bitpack (&ib);
3278 if (info)
3279 {
3280 info->inlinable = bp_unpack_value (&bp, 1);
3281 info->fp_expressions = bp_unpack_value (&bp, 1);
3282 }
3283 else
3284 {
3285 bp_unpack_value (&bp, 1);
3286 bp_unpack_value (&bp, 1);
3287 }
3288
3289 count2 = streamer_read_uhwi (&ib);
3290 gcc_assert (!info || !info->conds);
3291 for (j = 0; j < count2; j++)
3292 {
3293 struct condition c;
3294 c.operand_num = streamer_read_uhwi (&ib);
3295 c.size = streamer_read_uhwi (&ib);
3296 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3297 c.val = stream_read_tree (&ib, data_in);
3298 bp = streamer_read_bitpack (&ib);
3299 c.agg_contents = bp_unpack_value (&bp, 1);
3300 c.by_ref = bp_unpack_value (&bp, 1);
3301 if (c.agg_contents)
3302 c.offset = streamer_read_uhwi (&ib);
3303 if (info)
3304 vec_safe_push (info->conds, c);
3305 }
3306 count2 = streamer_read_uhwi (&ib);
3307 gcc_assert (!info || !info->size_time_table);
3308 for (j = 0; j < count2; j++)
3309 {
3310 class size_time_entry e;
3311
3312 e.size = streamer_read_uhwi (&ib);
3313 e.time = sreal::stream_in (&ib);
3314 e.exec_predicate.stream_in (&ib);
3315 e.nonconst_predicate.stream_in (&ib);
3316
3317 if (info)
3318 vec_safe_push (info->size_time_table, e);
3319 }
3320
3321 p.stream_in (&ib);
3322 if (info)
3323 set_hint_predicate (&info->loop_iterations, p);
3324 p.stream_in (&ib);
3325 if (info)
3326 set_hint_predicate (&info->loop_stride, p);
3327 for (e = node->callees; e; e = e->next_callee)
3328 read_ipa_call_summary (&ib, e, info != NULL);
3329 for (e = node->indirect_calls; e; e = e->next_callee)
3330 read_ipa_call_summary (&ib, e, info != NULL);
3331 }
3332
3333 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3334 len);
3335 lto_data_in_delete (data_in);
3336 }
3337
3338
3339 /* Read inline summary. Jump functions are shared among ipa-cp
3340 and inliner, so when ipa-cp is active, we don't need to write them
3341 twice. */
3342
3343 static void
3344 ipa_fn_summary_read (void)
3345 {
3346 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3347 struct lto_file_decl_data *file_data;
3348 unsigned int j = 0;
3349
3350 ipa_fn_summary_alloc ();
3351
3352 while ((file_data = file_data_vec[j++]))
3353 {
3354 size_t len;
3355 const char *data = lto_get_section_data (file_data,
3356 LTO_section_ipa_fn_summary,
3357 NULL, &len);
3358 if (data)
3359 inline_read_section (file_data, data, len);
3360 else
3361 /* Fatal error here. We do not want to support compiling ltrans units
3362 with different version of compiler or different flags than the WPA
3363 unit, so this should never happen. */
3364 fatal_error (input_location,
3365 "ipa inline summary is missing in input file");
3366 }
3367 ipa_register_cgraph_hooks ();
3368 if (!flag_ipa_cp)
3369 ipa_prop_read_jump_functions ();
3370
3371 gcc_assert (ipa_fn_summaries);
3372 ipa_fn_summaries->enable_insertion_hook ();
3373 }
3374
3375
3376 /* Write inline summary for edge E to OB. */
3377
3378 static void
3379 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3380 {
3381 class ipa_call_summary *es = ipa_call_summaries->get (e);
3382 int i;
3383
3384 streamer_write_uhwi (ob, es->call_stmt_size);
3385 streamer_write_uhwi (ob, es->call_stmt_time);
3386 streamer_write_uhwi (ob, es->loop_depth);
3387
3388 bitpack_d bp = bitpack_create (ob->main_stream);
3389 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3390 streamer_write_bitpack (&bp);
3391
3392 if (es->predicate)
3393 es->predicate->stream_out (ob);
3394 else
3395 streamer_write_uhwi (ob, 0);
3396 streamer_write_uhwi (ob, es->param.length ());
3397 for (i = 0; i < (int) es->param.length (); i++)
3398 streamer_write_uhwi (ob, es->param[i].change_prob);
3399 }
3400
3401
3402 /* Write inline summary for node in SET.
3403 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3404 active, we don't need to write them twice. */
3405
3406 static void
3407 ipa_fn_summary_write (void)
3408 {
3409 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
3410 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3411 unsigned int count = 0;
3412 int i;
3413
3414 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3415 {
3416 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3417 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3418 if (cnode && cnode->definition && !cnode->alias)
3419 count++;
3420 }
3421 streamer_write_uhwi (ob, count);
3422
3423 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3424 {
3425 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3426 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3427 if (cnode && cnode->definition && !cnode->alias)
3428 {
3429 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
3430 struct bitpack_d bp;
3431 struct cgraph_edge *edge;
3432 int i;
3433 size_time_entry *e;
3434 struct condition *c;
3435
3436 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3437 streamer_write_hwi (ob, info->estimated_self_stack_size);
3438 streamer_write_hwi (ob, info->self_size);
3439 info->time.stream_out (ob);
3440 bp = bitpack_create (ob->main_stream);
3441 bp_pack_value (&bp, info->inlinable, 1);
3442 bp_pack_value (&bp, false, 1);
3443 bp_pack_value (&bp, info->fp_expressions, 1);
3444 streamer_write_bitpack (&bp);
3445 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3446 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3447 {
3448 streamer_write_uhwi (ob, c->operand_num);
3449 streamer_write_uhwi (ob, c->size);
3450 streamer_write_uhwi (ob, c->code);
3451 stream_write_tree (ob, c->val, true);
3452 bp = bitpack_create (ob->main_stream);
3453 bp_pack_value (&bp, c->agg_contents, 1);
3454 bp_pack_value (&bp, c->by_ref, 1);
3455 streamer_write_bitpack (&bp);
3456 if (c->agg_contents)
3457 streamer_write_uhwi (ob, c->offset);
3458 }
3459 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3460 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3461 {
3462 streamer_write_uhwi (ob, e->size);
3463 e->time.stream_out (ob);
3464 e->exec_predicate.stream_out (ob);
3465 e->nonconst_predicate.stream_out (ob);
3466 }
3467 if (info->loop_iterations)
3468 info->loop_iterations->stream_out (ob);
3469 else
3470 streamer_write_uhwi (ob, 0);
3471 if (info->loop_stride)
3472 info->loop_stride->stream_out (ob);
3473 else
3474 streamer_write_uhwi (ob, 0);
3475 for (edge = cnode->callees; edge; edge = edge->next_callee)
3476 write_ipa_call_summary (ob, edge);
3477 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3478 write_ipa_call_summary (ob, edge);
3479 }
3480 }
3481 streamer_write_char_stream (ob->main_stream, 0);
3482 produce_asm (ob, NULL);
3483 destroy_output_block (ob);
3484
3485 if (!flag_ipa_cp)
3486 ipa_prop_write_jump_functions ();
3487 }
3488
3489
3490 /* Release inline summary. */
3491
3492 void
3493 ipa_free_fn_summary (void)
3494 {
3495 struct cgraph_node *node;
3496 if (!ipa_call_summaries)
3497 return;
3498 FOR_EACH_DEFINED_FUNCTION (node)
3499 if (!node->alias)
3500 ipa_fn_summaries->remove (node);
3501 ipa_fn_summaries->release ();
3502 ipa_fn_summaries = NULL;
3503 ipa_call_summaries->release ();
3504 delete ipa_call_summaries;
3505 ipa_call_summaries = NULL;
3506 edge_predicate_pool.release ();
3507 }
3508
3509 namespace {
3510
3511 const pass_data pass_data_local_fn_summary =
3512 {
3513 GIMPLE_PASS, /* type */
3514 "local-fnsummary", /* name */
3515 OPTGROUP_INLINE, /* optinfo_flags */
3516 TV_INLINE_PARAMETERS, /* tv_id */
3517 0, /* properties_required */
3518 0, /* properties_provided */
3519 0, /* properties_destroyed */
3520 0, /* todo_flags_start */
3521 0, /* todo_flags_finish */
3522 };
3523
3524 class pass_local_fn_summary : public gimple_opt_pass
3525 {
3526 public:
3527 pass_local_fn_summary (gcc::context *ctxt)
3528 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3529 {}
3530
3531 /* opt_pass methods: */
3532 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3533 virtual unsigned int execute (function *)
3534 {
3535 return compute_fn_summary_for_current ();
3536 }
3537
3538 }; // class pass_local_fn_summary
3539
3540 } // anon namespace
3541
3542 gimple_opt_pass *
3543 make_pass_local_fn_summary (gcc::context *ctxt)
3544 {
3545 return new pass_local_fn_summary (ctxt);
3546 }
3547
3548
3549 /* Free inline summary. */
3550
3551 namespace {
3552
3553 const pass_data pass_data_ipa_free_fn_summary =
3554 {
3555 SIMPLE_IPA_PASS, /* type */
3556 "free-fnsummary", /* name */
3557 OPTGROUP_NONE, /* optinfo_flags */
3558 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3559 0, /* properties_required */
3560 0, /* properties_provided */
3561 0, /* properties_destroyed */
3562 0, /* todo_flags_start */
3563 0, /* todo_flags_finish */
3564 };
3565
3566 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3567 {
3568 public:
3569 pass_ipa_free_fn_summary (gcc::context *ctxt)
3570 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
3571 small_p (false)
3572 {}
3573
3574 /* opt_pass methods: */
3575 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
3576 void set_pass_param (unsigned int n, bool param)
3577 {
3578 gcc_assert (n == 0);
3579 small_p = param;
3580 }
3581 virtual bool gate (function *) { return small_p || !flag_wpa; }
3582 virtual unsigned int execute (function *)
3583 {
3584 ipa_free_fn_summary ();
3585 return 0;
3586 }
3587
3588 private:
3589 bool small_p;
3590 }; // class pass_ipa_free_fn_summary
3591
3592 } // anon namespace
3593
3594 simple_ipa_opt_pass *
3595 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3596 {
3597 return new pass_ipa_free_fn_summary (ctxt);
3598 }
3599
3600 namespace {
3601
3602 const pass_data pass_data_ipa_fn_summary =
3603 {
3604 IPA_PASS, /* type */
3605 "fnsummary", /* name */
3606 OPTGROUP_INLINE, /* optinfo_flags */
3607 TV_IPA_FNSUMMARY, /* tv_id */
3608 0, /* properties_required */
3609 0, /* properties_provided */
3610 0, /* properties_destroyed */
3611 0, /* todo_flags_start */
3612 ( TODO_dump_symtab ), /* todo_flags_finish */
3613 };
3614
3615 class pass_ipa_fn_summary : public ipa_opt_pass_d
3616 {
3617 public:
3618 pass_ipa_fn_summary (gcc::context *ctxt)
3619 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3620 ipa_fn_summary_generate, /* generate_summary */
3621 ipa_fn_summary_write, /* write_summary */
3622 ipa_fn_summary_read, /* read_summary */
3623 NULL, /* write_optimization_summary */
3624 NULL, /* read_optimization_summary */
3625 NULL, /* stmt_fixup */
3626 0, /* function_transform_todo_flags_start */
3627 NULL, /* function_transform */
3628 NULL) /* variable_transform */
3629 {}
3630
3631 /* opt_pass methods: */
3632 virtual unsigned int execute (function *) { return 0; }
3633
3634 }; // class pass_ipa_fn_summary
3635
3636 } // anon namespace
3637
3638 ipa_opt_pass_d *
3639 make_pass_ipa_fn_summary (gcc::context *ctxt)
3640 {
3641 return new pass_ipa_fn_summary (ctxt);
3642 }
3643
3644 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3645 within the same process. For use by toplev::finalize. */
3646
3647 void
3648 ipa_fnsummary_c_finalize (void)
3649 {
3650 ipa_free_fn_summary ();
3651 }