]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
Update copyright years.
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "params.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86
87 /* Summaries. */
88 function_summary <ipa_fn_summary *> *ipa_fn_summaries;
89 call_summary <ipa_call_summary *> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_array_index)
138 {
139 hints &= ~INLINE_HINT_array_index;
140 fprintf (f, " array_index");
141 }
142 if (hints & INLINE_HINT_known_hot)
143 {
144 hints &= ~INLINE_HINT_known_hot;
145 fprintf (f, " known_hot");
146 }
147 gcc_assert (!hints);
148 }
149
150
151 /* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
155
156 void
157 ipa_fn_summary::account_size_time (int size, sreal time,
158 const predicate &exec_pred,
159 const predicate &nonconst_pred_in)
160 {
161 size_time_entry *e;
162 bool found = false;
163 int i;
164 predicate nonconst_pred;
165
166 if (exec_pred == false)
167 return;
168
169 nonconst_pred = nonconst_pred_in & exec_pred;
170
171 if (nonconst_pred == false)
172 return;
173
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size && time == 0 && size_time_table)
177 return;
178
179 gcc_assert (time >= 0);
180
181 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
182 if (e->exec_predicate == exec_pred
183 && e->nonconst_predicate == nonconst_pred)
184 {
185 found = true;
186 break;
187 }
188 if (i == 256)
189 {
190 i = 0;
191 found = true;
192 e = &(*size_time_table)[0];
193 if (dump_file && (dump_flags & TDF_DETAILS))
194 fprintf (dump_file,
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
197 }
198 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
199 {
200 fprintf (dump_file,
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size) / ipa_fn_summary::size_scale,
203 (time.to_double ()), found ? "" : "new ");
204 exec_pred.dump (dump_file, conds, 0);
205 if (exec_pred != nonconst_pred)
206 {
207 fprintf (dump_file, " nonconst:");
208 nonconst_pred.dump (dump_file, conds);
209 }
210 else
211 fprintf (dump_file, "\n");
212 }
213 if (!found)
214 {
215 struct size_time_entry new_entry;
216 new_entry.size = size;
217 new_entry.time = time;
218 new_entry.exec_predicate = exec_pred;
219 new_entry.nonconst_predicate = nonconst_pred;
220 vec_safe_push (size_time_table, new_entry);
221 }
222 else
223 {
224 e->size += size;
225 e->time += time;
226 }
227 }
228
229 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
230
231 static struct cgraph_edge *
232 redirect_to_unreachable (struct cgraph_edge *e)
233 {
234 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
235 struct cgraph_node *target = cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
237
238 if (e->speculative)
239 e = e->resolve_speculation (target->decl);
240 else if (!e->callee)
241 e->make_direct (target);
242 else
243 e->redirect_callee (target);
244 struct ipa_call_summary *es = ipa_call_summaries->get (e);
245 e->inline_failed = CIF_UNREACHABLE;
246 e->count = profile_count::zero ();
247 es->call_stmt_size = 0;
248 es->call_stmt_time = 0;
249 if (callee)
250 callee->remove_symbol_and_inline_clones ();
251 return e;
252 }
253
254 /* Set predicate for edge E. */
255
256 static void
257 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
258 {
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
261 be optimized out. */
262 if (predicate && *predicate == false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e->speculative || e->callee))
267 e = redirect_to_unreachable (e);
268
269 struct ipa_call_summary *es = ipa_call_summaries->get (e);
270 if (predicate && *predicate != true)
271 {
272 if (!es->predicate)
273 es->predicate = edge_predicate_pool.allocate ();
274 *es->predicate = *predicate;
275 }
276 else
277 {
278 if (es->predicate)
279 edge_predicate_pool.remove (es->predicate);
280 es->predicate = NULL;
281 }
282 }
283
284 /* Set predicate for hint *P. */
285
286 static void
287 set_hint_predicate (predicate **p, predicate new_predicate)
288 {
289 if (new_predicate == false || new_predicate == true)
290 {
291 if (*p)
292 edge_predicate_pool.remove (*p);
293 *p = NULL;
294 }
295 else
296 {
297 if (!*p)
298 *p = edge_predicate_pool.allocate ();
299 **p = new_predicate;
300 }
301 }
302
303
304 /* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests can not use
311 the fact that parameter is indeed a constant.
312
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
316 inlining.
317
318 ERROR_MARK means compile time invariant. */
319
320 static void
321 evaluate_conditions_for_known_args (struct cgraph_node *node,
322 bool inline_p,
323 vec<tree> known_vals,
324 vec<ipa_agg_jump_function_p>
325 known_aggs,
326 clause_t *ret_clause,
327 clause_t *ret_nonspec_clause)
328 {
329 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
330 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
331 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
332 int i;
333 struct condition *c;
334
335 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
336 {
337 tree val;
338 tree res;
339
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
343 known_vals). */
344 gcc_checking_assert (!known_aggs.exists ()
345 || (known_vals.length () == known_aggs.length ()));
346 if (c->operand_num >= (int) known_vals.length ())
347 {
348 clause |= 1 << (i + predicate::first_dynamic_condition);
349 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
350 continue;
351 }
352
353 if (c->agg_contents)
354 {
355 struct ipa_agg_jump_function *agg;
356
357 if (c->code == predicate::changed
358 && !c->by_ref
359 && (known_vals[c->operand_num] == error_mark_node))
360 continue;
361
362 if (known_aggs.exists ())
363 {
364 agg = known_aggs[c->operand_num];
365 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
366 c->offset, c->by_ref);
367 }
368 else
369 val = NULL_TREE;
370 }
371 else
372 {
373 val = known_vals[c->operand_num];
374 if (val == error_mark_node && c->code != predicate::changed)
375 val = NULL_TREE;
376 }
377
378 if (!val)
379 {
380 clause |= 1 << (i + predicate::first_dynamic_condition);
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
382 continue;
383 }
384 if (c->code == predicate::changed)
385 {
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
391 {
392 clause |= 1 << (i + predicate::first_dynamic_condition);
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396 if (c->code == predicate::is_not_constant)
397 {
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401
402 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
403 res = val
404 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
405 : NULL;
406
407 if (res && integer_zerop (res))
408 continue;
409
410 clause |= 1 << (i + predicate::first_dynamic_condition);
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
412 }
413 *ret_clause = clause;
414 if (ret_nonspec_clause)
415 *ret_nonspec_clause = nonspec_clause;
416 }
417
418
419 /* Work out what conditions might be true at invocation of E. */
420
421 void
422 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
423 clause_t *clause_ptr,
424 clause_t *nonspec_clause_ptr,
425 vec<tree> *known_vals_ptr,
426 vec<ipa_polymorphic_call_context>
427 *known_contexts_ptr,
428 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
429 {
430 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
431 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
432 vec<tree> known_vals = vNULL;
433 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
434
435 if (clause_ptr)
436 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
437 if (known_vals_ptr)
438 known_vals_ptr->create (0);
439 if (known_contexts_ptr)
440 known_contexts_ptr->create (0);
441
442 if (ipa_node_params_sum
443 && !e->call_stmt_cannot_inline_p
444 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
445 {
446 struct ipa_node_params *caller_parms_info, *callee_pi;
447 struct ipa_edge_args *args = IPA_EDGE_REF (e);
448 struct ipa_call_summary *es = ipa_call_summaries->get (e);
449 int i, count = ipa_get_cs_argument_count (args);
450
451 if (e->caller->global.inlined_to)
452 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
453 else
454 caller_parms_info = IPA_NODE_REF (e->caller);
455 callee_pi = IPA_NODE_REF (e->callee);
456
457 if (count && (info->conds || known_vals_ptr))
458 known_vals.safe_grow_cleared (count);
459 if (count && (info->conds || known_aggs_ptr))
460 known_aggs.safe_grow_cleared (count);
461 if (count && known_contexts_ptr)
462 known_contexts_ptr->safe_grow_cleared (count);
463
464 for (i = 0; i < count; i++)
465 {
466 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
467 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
468 ipa_get_type (callee_pi, i));
469
470 if (!cst && e->call_stmt
471 && i < (int)gimple_call_num_args (e->call_stmt))
472 {
473 cst = gimple_call_arg (e->call_stmt, i);
474 if (!is_gimple_min_invariant (cst))
475 cst = NULL;
476 }
477 if (cst)
478 {
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
480 if (known_vals.exists ())
481 known_vals[i] = cst;
482 }
483 else if (inline_p && !es->param[i].change_prob)
484 known_vals[i] = error_mark_node;
485
486 if (known_contexts_ptr)
487 (*known_contexts_ptr)[i]
488 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs[i] = &jf->agg;
493 }
494 }
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
496 && ((clause_ptr && info->conds) || known_vals_ptr))
497 {
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
499
500 if (count && (info->conds || known_vals_ptr))
501 known_vals.safe_grow_cleared (count);
502 for (i = 0; i < count; i++)
503 {
504 tree cst = gimple_call_arg (e->call_stmt, i);
505 if (!is_gimple_min_invariant (cst))
506 cst = NULL;
507 if (cst)
508 known_vals[i] = cst;
509 }
510 }
511
512 evaluate_conditions_for_known_args (callee, inline_p,
513 known_vals, known_aggs, clause_ptr,
514 nonspec_clause_ptr);
515
516 if (known_vals_ptr)
517 *known_vals_ptr = known_vals;
518 else
519 known_vals.release ();
520
521 if (known_aggs_ptr)
522 *known_aggs_ptr = known_aggs;
523 else
524 known_aggs.release ();
525 }
526
527
528 /* Allocate the function summary. */
529
530 static void
531 ipa_fn_summary_alloc (void)
532 {
533 gcc_checking_assert (!ipa_fn_summaries);
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
536 }
537
538 ipa_call_summary::~ipa_call_summary ()
539 {
540 if (predicate)
541 edge_predicate_pool.remove (predicate);
542
543 param.release ();
544 }
545
546 ipa_fn_summary::~ipa_fn_summary ()
547 {
548 if (loop_iterations)
549 edge_predicate_pool.remove (loop_iterations);
550 if (loop_stride)
551 edge_predicate_pool.remove (loop_stride);
552 if (array_index)
553 edge_predicate_pool.remove (array_index);
554 vec_free (conds);
555 vec_free (size_time_table);
556 }
557
558 void
559 ipa_fn_summary_t::remove_callees (cgraph_node *node)
560 {
561 cgraph_edge *e;
562 for (e = node->callees; e; e = e->next_callee)
563 ipa_call_summaries->remove (e);
564 for (e = node->indirect_calls; e; e = e->next_callee)
565 ipa_call_summaries->remove (e);
566 }
567
568 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
569 Additionally care about allocating new memory slot for updated predicate
570 and set it to NULL when it becomes true or false (and thus uninteresting).
571 */
572
573 static void
574 remap_hint_predicate_after_duplication (predicate **p,
575 clause_t possible_truths)
576 {
577 predicate new_predicate;
578
579 if (!*p)
580 return;
581
582 new_predicate = (*p)->remap_after_duplication (possible_truths);
583 /* We do not want to free previous predicate; it is used by node origin. */
584 *p = NULL;
585 set_hint_predicate (p, new_predicate);
586 }
587
588
589 /* Hook that is called by cgraph.c when a node is duplicated. */
590 void
591 ipa_fn_summary_t::duplicate (cgraph_node *src,
592 cgraph_node *dst,
593 ipa_fn_summary *,
594 ipa_fn_summary *info)
595 {
596 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
597 /* TODO: as an optimization, we may avoid copying conditions
598 that are known to be false or true. */
599 info->conds = vec_safe_copy (info->conds);
600
601 /* When there are any replacements in the function body, see if we can figure
602 out that something was optimized out. */
603 if (ipa_node_params_sum && dst->clone.tree_map)
604 {
605 vec<size_time_entry, va_gc> *entry = info->size_time_table;
606 /* Use SRC parm info since it may not be copied yet. */
607 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
608 vec<tree> known_vals = vNULL;
609 int count = ipa_get_param_count (parms_info);
610 int i, j;
611 clause_t possible_truths;
612 predicate true_pred = true;
613 size_time_entry *e;
614 int optimized_out_size = 0;
615 bool inlined_to_p = false;
616 struct cgraph_edge *edge, *next;
617
618 info->size_time_table = 0;
619 known_vals.safe_grow_cleared (count);
620 for (i = 0; i < count; i++)
621 {
622 struct ipa_replace_map *r;
623
624 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
625 {
626 if (((!r->old_tree && r->parm_num == i)
627 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
628 && r->replace_p && !r->ref_p)
629 {
630 known_vals[i] = r->new_tree;
631 break;
632 }
633 }
634 }
635 evaluate_conditions_for_known_args (dst, false,
636 known_vals,
637 vNULL,
638 &possible_truths,
639 /* We are going to specialize,
640 so ignore nonspec truths. */
641 NULL);
642 known_vals.release ();
643
644 info->account_size_time (0, 0, true_pred, true_pred);
645
646 /* Remap size_time vectors.
647 Simplify the predicate by prunning out alternatives that are known
648 to be false.
649 TODO: as on optimization, we can also eliminate conditions known
650 to be true. */
651 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
652 {
653 predicate new_exec_pred;
654 predicate new_nonconst_pred;
655 new_exec_pred = e->exec_predicate.remap_after_duplication
656 (possible_truths);
657 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
658 (possible_truths);
659 if (new_exec_pred == false || new_nonconst_pred == false)
660 optimized_out_size += e->size;
661 else
662 info->account_size_time (e->size, e->time, new_exec_pred,
663 new_nonconst_pred);
664 }
665
666 /* Remap edge predicates with the same simplification as above.
667 Also copy constantness arrays. */
668 for (edge = dst->callees; edge; edge = next)
669 {
670 predicate new_predicate;
671 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
672 next = edge->next_callee;
673
674 if (!edge->inline_failed)
675 inlined_to_p = true;
676 if (!es->predicate)
677 continue;
678 new_predicate = es->predicate->remap_after_duplication
679 (possible_truths);
680 if (new_predicate == false && *es->predicate != false)
681 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
682 edge_set_predicate (edge, &new_predicate);
683 }
684
685 /* Remap indirect edge predicates with the same simplificaiton as above.
686 Also copy constantness arrays. */
687 for (edge = dst->indirect_calls; edge; edge = next)
688 {
689 predicate new_predicate;
690 struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
691 next = edge->next_callee;
692
693 gcc_checking_assert (edge->inline_failed);
694 if (!es->predicate)
695 continue;
696 new_predicate = es->predicate->remap_after_duplication
697 (possible_truths);
698 if (new_predicate == false && *es->predicate != false)
699 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
700 edge_set_predicate (edge, &new_predicate);
701 }
702 remap_hint_predicate_after_duplication (&info->loop_iterations,
703 possible_truths);
704 remap_hint_predicate_after_duplication (&info->loop_stride,
705 possible_truths);
706 remap_hint_predicate_after_duplication (&info->array_index,
707 possible_truths);
708
709 /* If inliner or someone after inliner will ever start producing
710 non-trivial clones, we will get trouble with lack of information
711 about updating self sizes, because size vectors already contains
712 sizes of the calees. */
713 gcc_assert (!inlined_to_p || !optimized_out_size);
714 }
715 else
716 {
717 info->size_time_table = vec_safe_copy (info->size_time_table);
718 if (info->loop_iterations)
719 {
720 predicate p = *info->loop_iterations;
721 info->loop_iterations = NULL;
722 set_hint_predicate (&info->loop_iterations, p);
723 }
724 if (info->loop_stride)
725 {
726 predicate p = *info->loop_stride;
727 info->loop_stride = NULL;
728 set_hint_predicate (&info->loop_stride, p);
729 }
730 if (info->array_index)
731 {
732 predicate p = *info->array_index;
733 info->array_index = NULL;
734 set_hint_predicate (&info->array_index, p);
735 }
736 }
737 if (!dst->global.inlined_to)
738 ipa_update_overall_fn_summary (dst);
739 }
740
741
742 /* Hook that is called by cgraph.c when a node is duplicated. */
743
744 void
745 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
746 struct cgraph_edge *dst,
747 struct ipa_call_summary *srcinfo,
748 struct ipa_call_summary *info)
749 {
750 new (info) ipa_call_summary (*srcinfo);
751 info->predicate = NULL;
752 edge_set_predicate (dst, srcinfo->predicate);
753 info->param = srcinfo->param.copy ();
754 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
755 {
756 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
757 - eni_size_weights.call_cost);
758 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
759 - eni_time_weights.call_cost);
760 }
761 }
762
763 /* Dump edge summaries associated to NODE and recursively to all clones.
764 Indent by INDENT. */
765
766 static void
767 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
768 struct ipa_fn_summary *info)
769 {
770 struct cgraph_edge *edge;
771 for (edge = node->callees; edge; edge = edge->next_callee)
772 {
773 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
774 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
775 int i;
776
777 fprintf (f,
778 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
779 indent, "", callee->name (), callee->order,
780 !edge->inline_failed
781 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
782 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
783 es->call_stmt_size, es->call_stmt_time);
784
785 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
786 if (s != NULL)
787 fprintf (f, "callee size:%2i stack:%2i",
788 (int) (s->size / ipa_fn_summary::size_scale),
789 (int) s->estimated_stack_size);
790
791 if (es->predicate)
792 {
793 fprintf (f, " predicate: ");
794 es->predicate->dump (f, info->conds);
795 }
796 else
797 fprintf (f, "\n");
798 if (es->param.exists ())
799 for (i = 0; i < (int) es->param.length (); i++)
800 {
801 int prob = es->param[i].change_prob;
802
803 if (!prob)
804 fprintf (f, "%*s op%i is compile time invariant\n",
805 indent + 2, "", i);
806 else if (prob != REG_BR_PROB_BASE)
807 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
808 prob * 100.0 / REG_BR_PROB_BASE);
809 }
810 if (!edge->inline_failed)
811 {
812 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
813 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
814 " callee size %i\n",
815 indent + 2, "",
816 (int) s->stack_frame_offset,
817 (int) s->estimated_self_stack_size,
818 (int) s->estimated_stack_size);
819 dump_ipa_call_summary (f, indent + 2, callee, info);
820 }
821 }
822 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
823 {
824 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
825 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
826 " time: %2i",
827 indent, "",
828 es->loop_depth,
829 edge->sreal_frequency ().to_double (), es->call_stmt_size,
830 es->call_stmt_time);
831 if (es->predicate)
832 {
833 fprintf (f, "predicate: ");
834 es->predicate->dump (f, info->conds);
835 }
836 else
837 fprintf (f, "\n");
838 }
839 }
840
841
842 void
843 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
844 {
845 if (node->definition)
846 {
847 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
848 if (s != NULL)
849 {
850 size_time_entry *e;
851 int i;
852 fprintf (f, "IPA function summary for %s", node->dump_name ());
853 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
854 fprintf (f, " always_inline");
855 if (s->inlinable)
856 fprintf (f, " inlinable");
857 if (s->fp_expressions)
858 fprintf (f, " fp_expression");
859 fprintf (f, "\n global time: %f\n", s->time.to_double ());
860 fprintf (f, " self size: %i\n", s->self_size);
861 fprintf (f, " global size: %i\n", s->size);
862 fprintf (f, " min size: %i\n", s->min_size);
863 fprintf (f, " self stack: %i\n",
864 (int) s->estimated_self_stack_size);
865 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
866 if (s->growth)
867 fprintf (f, " estimated growth:%i\n", (int) s->growth);
868 if (s->scc_no)
869 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
870 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
871 {
872 fprintf (f, " size:%f, time:%f",
873 (double) e->size / ipa_fn_summary::size_scale,
874 e->time.to_double ());
875 if (e->exec_predicate != true)
876 {
877 fprintf (f, ", executed if:");
878 e->exec_predicate.dump (f, s->conds, 0);
879 }
880 if (e->exec_predicate != e->nonconst_predicate)
881 {
882 fprintf (f, ", nonconst if:");
883 e->nonconst_predicate.dump (f, s->conds, 0);
884 }
885 fprintf (f, "\n");
886 }
887 if (s->loop_iterations)
888 {
889 fprintf (f, " loop iterations:");
890 s->loop_iterations->dump (f, s->conds);
891 }
892 if (s->loop_stride)
893 {
894 fprintf (f, " loop stride:");
895 s->loop_stride->dump (f, s->conds);
896 }
897 if (s->array_index)
898 {
899 fprintf (f, " array index:");
900 s->array_index->dump (f, s->conds);
901 }
902 fprintf (f, " calls:\n");
903 dump_ipa_call_summary (f, 4, node, s);
904 fprintf (f, "\n");
905 }
906 else
907 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
908 }
909 }
910
911 DEBUG_FUNCTION void
912 ipa_debug_fn_summary (struct cgraph_node *node)
913 {
914 ipa_dump_fn_summary (stderr, node);
915 }
916
917 void
918 ipa_dump_fn_summaries (FILE *f)
919 {
920 struct cgraph_node *node;
921
922 FOR_EACH_DEFINED_FUNCTION (node)
923 if (!node->global.inlined_to)
924 ipa_dump_fn_summary (f, node);
925 }
926
927 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
928 boolean variable pointed to by DATA. */
929
930 static bool
931 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
932 void *data)
933 {
934 bool *b = (bool *) data;
935 *b = true;
936 return true;
937 }
938
939 /* If OP refers to value of function parameter, return the corresponding
940 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
941 PARM_DECL) will be stored to *SIZE_P in that case too. */
942
943 static tree
944 unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
945 {
946 /* SSA_NAME referring to parm default def? */
947 if (TREE_CODE (op) == SSA_NAME
948 && SSA_NAME_IS_DEFAULT_DEF (op)
949 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
950 {
951 if (size_p)
952 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
953 return SSA_NAME_VAR (op);
954 }
955 /* Non-SSA parm reference? */
956 if (TREE_CODE (op) == PARM_DECL)
957 {
958 bool modified = false;
959
960 ao_ref refd;
961 ao_ref_init (&refd, op);
962 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
963 NULL);
964 if (!modified)
965 {
966 if (size_p)
967 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
968 return op;
969 }
970 }
971 return NULL_TREE;
972 }
973
974 /* If OP refers to value of function parameter, return the corresponding
975 parameter. Also traverse chains of SSA register assignments. If non-NULL,
976 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
977 stored to *SIZE_P in that case too. */
978
979 static tree
980 unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
981 {
982 tree res = unmodified_parm_1 (stmt, op, size_p);
983 if (res)
984 return res;
985
986 if (TREE_CODE (op) == SSA_NAME
987 && !SSA_NAME_IS_DEFAULT_DEF (op)
988 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
989 return unmodified_parm (SSA_NAME_DEF_STMT (op),
990 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
991 size_p);
992 return NULL_TREE;
993 }
994
995 /* If OP refers to a value of a function parameter or value loaded from an
996 aggregate passed to a parameter (either by value or reference), return TRUE
997 and store the number of the parameter to *INDEX_P, the access size into
998 *SIZE_P, and information whether and how it has been loaded from an
999 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1000 statement in which OP is used or loaded. */
1001
1002 static bool
1003 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1004 gimple *stmt, tree op, int *index_p,
1005 HOST_WIDE_INT *size_p,
1006 struct agg_position_info *aggpos)
1007 {
1008 tree res = unmodified_parm_1 (stmt, op, size_p);
1009
1010 gcc_checking_assert (aggpos);
1011 if (res)
1012 {
1013 *index_p = ipa_get_param_decl_index (fbi->info, res);
1014 if (*index_p < 0)
1015 return false;
1016 aggpos->agg_contents = false;
1017 aggpos->by_ref = false;
1018 return true;
1019 }
1020
1021 if (TREE_CODE (op) == SSA_NAME)
1022 {
1023 if (SSA_NAME_IS_DEFAULT_DEF (op)
1024 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1025 return false;
1026 stmt = SSA_NAME_DEF_STMT (op);
1027 op = gimple_assign_rhs1 (stmt);
1028 if (!REFERENCE_CLASS_P (op))
1029 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1030 aggpos);
1031 }
1032
1033 aggpos->agg_contents = true;
1034 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1035 stmt, op, index_p, &aggpos->offset,
1036 size_p, &aggpos->by_ref);
1037 }
1038
1039 /* See if statement might disappear after inlining.
1040 0 - means not eliminated
1041 1 - half of statements goes away
1042 2 - for sure it is eliminated.
1043 We are not terribly sophisticated, basically looking for simple abstraction
1044 penalty wrappers. */
1045
1046 static int
1047 eliminated_by_inlining_prob (gimple *stmt)
1048 {
1049 enum gimple_code code = gimple_code (stmt);
1050 enum tree_code rhs_code;
1051
1052 if (!optimize)
1053 return 0;
1054
1055 switch (code)
1056 {
1057 case GIMPLE_RETURN:
1058 return 2;
1059 case GIMPLE_ASSIGN:
1060 if (gimple_num_ops (stmt) != 2)
1061 return 0;
1062
1063 rhs_code = gimple_assign_rhs_code (stmt);
1064
1065 /* Casts of parameters, loads from parameters passed by reference
1066 and stores to return value or parameters are often free after
1067 inlining dua to SRA and further combining.
1068 Assume that half of statements goes away. */
1069 if (CONVERT_EXPR_CODE_P (rhs_code)
1070 || rhs_code == VIEW_CONVERT_EXPR
1071 || rhs_code == ADDR_EXPR
1072 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1073 {
1074 tree rhs = gimple_assign_rhs1 (stmt);
1075 tree lhs = gimple_assign_lhs (stmt);
1076 tree inner_rhs = get_base_address (rhs);
1077 tree inner_lhs = get_base_address (lhs);
1078 bool rhs_free = false;
1079 bool lhs_free = false;
1080
1081 if (!inner_rhs)
1082 inner_rhs = rhs;
1083 if (!inner_lhs)
1084 inner_lhs = lhs;
1085
1086 /* Reads of parameter are expected to be free. */
1087 if (unmodified_parm (stmt, inner_rhs, NULL))
1088 rhs_free = true;
1089 /* Match expressions of form &this->field. Those will most likely
1090 combine with something upstream after inlining. */
1091 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1092 {
1093 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1094 if (TREE_CODE (op) == PARM_DECL)
1095 rhs_free = true;
1096 else if (TREE_CODE (op) == MEM_REF
1097 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1098 rhs_free = true;
1099 }
1100
1101 /* When parameter is not SSA register because its address is taken
1102 and it is just copied into one, the statement will be completely
1103 free after inlining (we will copy propagate backward). */
1104 if (rhs_free && is_gimple_reg (lhs))
1105 return 2;
1106
1107 /* Reads of parameters passed by reference
1108 expected to be free (i.e. optimized out after inlining). */
1109 if (TREE_CODE (inner_rhs) == MEM_REF
1110 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1111 rhs_free = true;
1112
1113 /* Copying parameter passed by reference into gimple register is
1114 probably also going to copy propagate, but we can't be quite
1115 sure. */
1116 if (rhs_free && is_gimple_reg (lhs))
1117 lhs_free = true;
1118
1119 /* Writes to parameters, parameters passed by value and return value
1120 (either dirrectly or passed via invisible reference) are free.
1121
1122 TODO: We ought to handle testcase like
1123 struct a {int a,b;};
1124 struct a
1125 retrurnsturct (void)
1126 {
1127 struct a a ={1,2};
1128 return a;
1129 }
1130
1131 This translate into:
1132
1133 retrurnsturct ()
1134 {
1135 int a$b;
1136 int a$a;
1137 struct a a;
1138 struct a D.2739;
1139
1140 <bb 2>:
1141 D.2739.a = 1;
1142 D.2739.b = 2;
1143 return D.2739;
1144
1145 }
1146 For that we either need to copy ipa-split logic detecting writes
1147 to return value. */
1148 if (TREE_CODE (inner_lhs) == PARM_DECL
1149 || TREE_CODE (inner_lhs) == RESULT_DECL
1150 || (TREE_CODE (inner_lhs) == MEM_REF
1151 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1152 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1153 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1154 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1155 (inner_lhs,
1156 0))) == RESULT_DECL))))
1157 lhs_free = true;
1158 if (lhs_free
1159 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1160 rhs_free = true;
1161 if (lhs_free && rhs_free)
1162 return 1;
1163 }
1164 return 0;
1165 default:
1166 return 0;
1167 }
1168 }
1169
1170
1171 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1172 predicates to the CFG edges. */
1173
1174 static void
1175 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1176 struct ipa_fn_summary *summary,
1177 basic_block bb)
1178 {
1179 gimple *last;
1180 tree op;
1181 int index;
1182 HOST_WIDE_INT size;
1183 struct agg_position_info aggpos;
1184 enum tree_code code, inverted_code;
1185 edge e;
1186 edge_iterator ei;
1187 gimple *set_stmt;
1188 tree op2;
1189
1190 last = last_stmt (bb);
1191 if (!last || gimple_code (last) != GIMPLE_COND)
1192 return;
1193 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1194 return;
1195 op = gimple_cond_lhs (last);
1196 /* TODO: handle conditionals like
1197 var = op0 < 4;
1198 if (var != 0). */
1199 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1200 {
1201 code = gimple_cond_code (last);
1202 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1203
1204 FOR_EACH_EDGE (e, ei, bb->succs)
1205 {
1206 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1207 ? code : inverted_code);
1208 /* invert_tree_comparison will return ERROR_MARK on FP
1209 comparsions that are not EQ/NE instead of returning proper
1210 unordered one. Be sure it is not confused with NON_CONSTANT. */
1211 if (this_code != ERROR_MARK)
1212 {
1213 predicate p
1214 = add_condition (summary, index, size, &aggpos, this_code,
1215 unshare_expr_without_location
1216 (gimple_cond_rhs (last)));
1217 e->aux = edge_predicate_pool.allocate ();
1218 *(predicate *) e->aux = p;
1219 }
1220 }
1221 }
1222
1223 if (TREE_CODE (op) != SSA_NAME)
1224 return;
1225 /* Special case
1226 if (builtin_constant_p (op))
1227 constant_code
1228 else
1229 nonconstant_code.
1230 Here we can predicate nonconstant_code. We can't
1231 really handle constant_code since we have no predicate
1232 for this and also the constant code is not known to be
1233 optimized away when inliner doen't see operand is constant.
1234 Other optimizers might think otherwise. */
1235 if (gimple_cond_code (last) != NE_EXPR
1236 || !integer_zerop (gimple_cond_rhs (last)))
1237 return;
1238 set_stmt = SSA_NAME_DEF_STMT (op);
1239 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1240 || gimple_call_num_args (set_stmt) != 1)
1241 return;
1242 op2 = gimple_call_arg (set_stmt, 0);
1243 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1244 &aggpos))
1245 return;
1246 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1247 {
1248 predicate p = add_condition (summary, index, size, &aggpos,
1249 predicate::is_not_constant, NULL_TREE);
1250 e->aux = edge_predicate_pool.allocate ();
1251 *(predicate *) e->aux = p;
1252 }
1253 }
1254
1255
1256 /* If BB ends by a switch we can turn into predicates, attach corresponding
1257 predicates to the CFG edges. */
1258
1259 static void
1260 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1261 struct ipa_fn_summary *summary,
1262 basic_block bb)
1263 {
1264 gimple *lastg;
1265 tree op;
1266 int index;
1267 HOST_WIDE_INT size;
1268 struct agg_position_info aggpos;
1269 edge e;
1270 edge_iterator ei;
1271 size_t n;
1272 size_t case_idx;
1273
1274 lastg = last_stmt (bb);
1275 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1276 return;
1277 gswitch *last = as_a <gswitch *> (lastg);
1278 op = gimple_switch_index (last);
1279 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1280 return;
1281
1282 FOR_EACH_EDGE (e, ei, bb->succs)
1283 {
1284 e->aux = edge_predicate_pool.allocate ();
1285 *(predicate *) e->aux = false;
1286 }
1287 n = gimple_switch_num_labels (last);
1288 for (case_idx = 0; case_idx < n; ++case_idx)
1289 {
1290 tree cl = gimple_switch_label (last, case_idx);
1291 tree min, max;
1292 predicate p;
1293
1294 e = gimple_switch_edge (cfun, last, case_idx);
1295 min = CASE_LOW (cl);
1296 max = CASE_HIGH (cl);
1297
1298 /* For default we might want to construct predicate that none
1299 of cases is met, but it is bit hard to do not having negations
1300 of conditionals handy. */
1301 if (!min && !max)
1302 p = true;
1303 else if (!max)
1304 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1305 unshare_expr_without_location (min));
1306 else
1307 {
1308 predicate p1, p2;
1309 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1310 unshare_expr_without_location (min));
1311 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1312 unshare_expr_without_location (max));
1313 p = p1 & p2;
1314 }
1315 *(struct predicate *) e->aux
1316 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1317 }
1318 }
1319
1320
1321 /* For each BB in NODE attach to its AUX pointer predicate under
1322 which it is executable. */
1323
1324 static void
1325 compute_bb_predicates (struct ipa_func_body_info *fbi,
1326 struct cgraph_node *node,
1327 struct ipa_fn_summary *summary)
1328 {
1329 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1330 bool done = false;
1331 basic_block bb;
1332
1333 FOR_EACH_BB_FN (bb, my_function)
1334 {
1335 set_cond_stmt_execution_predicate (fbi, summary, bb);
1336 set_switch_stmt_execution_predicate (fbi, summary, bb);
1337 }
1338
1339 /* Entry block is always executable. */
1340 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1341 = edge_predicate_pool.allocate ();
1342 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1343
1344 /* A simple dataflow propagation of predicates forward in the CFG.
1345 TODO: work in reverse postorder. */
1346 while (!done)
1347 {
1348 done = true;
1349 FOR_EACH_BB_FN (bb, my_function)
1350 {
1351 predicate p = false;
1352 edge e;
1353 edge_iterator ei;
1354 FOR_EACH_EDGE (e, ei, bb->preds)
1355 {
1356 if (e->src->aux)
1357 {
1358 predicate this_bb_predicate
1359 = *(predicate *) e->src->aux;
1360 if (e->aux)
1361 this_bb_predicate &= (*(struct predicate *) e->aux);
1362 p = p.or_with (summary->conds, this_bb_predicate);
1363 if (p == true)
1364 break;
1365 }
1366 }
1367 if (p == false)
1368 gcc_checking_assert (!bb->aux);
1369 else
1370 {
1371 if (!bb->aux)
1372 {
1373 done = false;
1374 bb->aux = edge_predicate_pool.allocate ();
1375 *((predicate *) bb->aux) = p;
1376 }
1377 else if (p != *(predicate *) bb->aux)
1378 {
1379 /* This OR operation is needed to ensure monotonous data flow
1380 in the case we hit the limit on number of clauses and the
1381 and/or operations above give approximate answers. */
1382 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1383 if (p != *(predicate *) bb->aux)
1384 {
1385 done = false;
1386 *((predicate *) bb->aux) = p;
1387 }
1388 }
1389 }
1390 }
1391 }
1392 }
1393
1394
1395 /* Return predicate specifying when the STMT might have result that is not
1396 a compile time constant. */
1397
1398 static predicate
1399 will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1400 struct ipa_fn_summary *summary,
1401 tree expr,
1402 vec<predicate> nonconstant_names)
1403 {
1404 tree parm;
1405 int index;
1406 HOST_WIDE_INT size;
1407
1408 while (UNARY_CLASS_P (expr))
1409 expr = TREE_OPERAND (expr, 0);
1410
1411 parm = unmodified_parm (NULL, expr, &size);
1412 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1413 return add_condition (summary, index, size, NULL, predicate::changed,
1414 NULL_TREE);
1415 if (is_gimple_min_invariant (expr))
1416 return false;
1417 if (TREE_CODE (expr) == SSA_NAME)
1418 return nonconstant_names[SSA_NAME_VERSION (expr)];
1419 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1420 {
1421 predicate p1 = will_be_nonconstant_expr_predicate
1422 (info, summary, TREE_OPERAND (expr, 0),
1423 nonconstant_names);
1424 if (p1 == true)
1425 return p1;
1426
1427 predicate p2;
1428 p2 = will_be_nonconstant_expr_predicate (info, summary,
1429 TREE_OPERAND (expr, 1),
1430 nonconstant_names);
1431 return p1.or_with (summary->conds, p2);
1432 }
1433 else if (TREE_CODE (expr) == COND_EXPR)
1434 {
1435 predicate p1 = will_be_nonconstant_expr_predicate
1436 (info, summary, TREE_OPERAND (expr, 0),
1437 nonconstant_names);
1438 if (p1 == true)
1439 return p1;
1440
1441 predicate p2;
1442 p2 = will_be_nonconstant_expr_predicate (info, summary,
1443 TREE_OPERAND (expr, 1),
1444 nonconstant_names);
1445 if (p2 == true)
1446 return p2;
1447 p1 = p1.or_with (summary->conds, p2);
1448 p2 = will_be_nonconstant_expr_predicate (info, summary,
1449 TREE_OPERAND (expr, 2),
1450 nonconstant_names);
1451 return p2.or_with (summary->conds, p1);
1452 }
1453 else if (TREE_CODE (expr) == CALL_EXPR)
1454 return true;
1455 else
1456 {
1457 debug_tree (expr);
1458 gcc_unreachable ();
1459 }
1460 return false;
1461 }
1462
1463
1464 /* Return predicate specifying when the STMT might have result that is not
1465 a compile time constant. */
1466
1467 static predicate
1468 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1469 struct ipa_fn_summary *summary,
1470 gimple *stmt,
1471 vec<predicate> nonconstant_names)
1472 {
1473 predicate p = true;
1474 ssa_op_iter iter;
1475 tree use;
1476 predicate op_non_const;
1477 bool is_load;
1478 int base_index;
1479 HOST_WIDE_INT size;
1480 struct agg_position_info aggpos;
1481
1482 /* What statments might be optimized away
1483 when their arguments are constant. */
1484 if (gimple_code (stmt) != GIMPLE_ASSIGN
1485 && gimple_code (stmt) != GIMPLE_COND
1486 && gimple_code (stmt) != GIMPLE_SWITCH
1487 && (gimple_code (stmt) != GIMPLE_CALL
1488 || !(gimple_call_flags (stmt) & ECF_CONST)))
1489 return p;
1490
1491 /* Stores will stay anyway. */
1492 if (gimple_store_p (stmt))
1493 return p;
1494
1495 is_load = gimple_assign_load_p (stmt);
1496
1497 /* Loads can be optimized when the value is known. */
1498 if (is_load)
1499 {
1500 tree op;
1501 gcc_assert (gimple_assign_single_p (stmt));
1502 op = gimple_assign_rhs1 (stmt);
1503 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1504 &aggpos))
1505 return p;
1506 }
1507 else
1508 base_index = -1;
1509
1510 /* See if we understand all operands before we start
1511 adding conditionals. */
1512 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1513 {
1514 tree parm = unmodified_parm (stmt, use, NULL);
1515 /* For arguments we can build a condition. */
1516 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1517 continue;
1518 if (TREE_CODE (use) != SSA_NAME)
1519 return p;
1520 /* If we know when operand is constant,
1521 we still can say something useful. */
1522 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1523 continue;
1524 return p;
1525 }
1526
1527 if (is_load)
1528 op_non_const =
1529 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1530 NULL);
1531 else
1532 op_non_const = false;
1533 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1534 {
1535 HOST_WIDE_INT size;
1536 tree parm = unmodified_parm (stmt, use, &size);
1537 int index;
1538
1539 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1540 {
1541 if (index != base_index)
1542 p = add_condition (summary, index, size, NULL, predicate::changed,
1543 NULL_TREE);
1544 else
1545 continue;
1546 }
1547 else
1548 p = nonconstant_names[SSA_NAME_VERSION (use)];
1549 op_non_const = p.or_with (summary->conds, op_non_const);
1550 }
1551 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1552 && gimple_op (stmt, 0)
1553 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1554 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1555 = op_non_const;
1556 return op_non_const;
1557 }
1558
1559 struct record_modified_bb_info
1560 {
1561 tree op;
1562 bitmap bb_set;
1563 gimple *stmt;
1564 };
1565
1566 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1567 probability how often it changes between USE_BB.
1568 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1569 is in different loop nest, we can do better.
1570 This is all just estimate. In theory we look for minimal cut separating
1571 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1572 anyway. */
1573
1574 static basic_block
1575 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1576 {
1577 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1578 if (l && l->header->count < init_bb->count)
1579 return l->header;
1580 return init_bb;
1581 }
1582
1583 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1584 set except for info->stmt. */
1585
1586 static bool
1587 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1588 {
1589 struct record_modified_bb_info *info =
1590 (struct record_modified_bb_info *) data;
1591 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1592 return false;
1593 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1594 return false;
1595 bitmap_set_bit (info->bb_set,
1596 SSA_NAME_IS_DEFAULT_DEF (vdef)
1597 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1598 : get_minimal_bb
1599 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1600 gimple_bb (info->stmt))->index);
1601 if (dump_file)
1602 {
1603 fprintf (dump_file, " Param ");
1604 print_generic_expr (dump_file, info->op, TDF_SLIM);
1605 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1606 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1607 get_minimal_bb
1608 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1609 gimple_bb (info->stmt))->index);
1610 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1611 }
1612 return false;
1613 }
1614
1615 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1616 will change since last invocation of STMT.
1617
1618 Value 0 is reserved for compile time invariants.
1619 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1620 ought to be REG_BR_PROB_BASE / estimated_iters. */
1621
1622 static int
1623 param_change_prob (gimple *stmt, int i)
1624 {
1625 tree op = gimple_call_arg (stmt, i);
1626 basic_block bb = gimple_bb (stmt);
1627
1628 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1629 op = TREE_OPERAND (op, 0);
1630
1631 tree base = get_base_address (op);
1632
1633 /* Global invariants never change. */
1634 if (is_gimple_min_invariant (base))
1635 return 0;
1636
1637 /* We would have to do non-trivial analysis to really work out what
1638 is the probability of value to change (i.e. when init statement
1639 is in a sibling loop of the call).
1640
1641 We do an conservative estimate: when call is executed N times more often
1642 than the statement defining value, we take the frequency 1/N. */
1643 if (TREE_CODE (base) == SSA_NAME)
1644 {
1645 profile_count init_count;
1646
1647 if (!bb->count.nonzero_p ())
1648 return REG_BR_PROB_BASE;
1649
1650 if (SSA_NAME_IS_DEFAULT_DEF (base))
1651 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1652 else
1653 init_count = get_minimal_bb
1654 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1655 gimple_bb (stmt))->count;
1656
1657 if (init_count < bb->count)
1658 return MAX ((init_count.to_sreal_scale (bb->count)
1659 * REG_BR_PROB_BASE).to_int (), 1);
1660 return REG_BR_PROB_BASE;
1661 }
1662 else
1663 {
1664 ao_ref refd;
1665 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1666 struct record_modified_bb_info info;
1667 tree init = ctor_for_folding (base);
1668
1669 if (init != error_mark_node)
1670 return 0;
1671 if (!bb->count.nonzero_p ())
1672 return REG_BR_PROB_BASE;
1673 if (dump_file)
1674 {
1675 fprintf (dump_file, " Analyzing param change probablity of ");
1676 print_generic_expr (dump_file, op, TDF_SLIM);
1677 fprintf (dump_file, "\n");
1678 }
1679 ao_ref_init (&refd, op);
1680 info.op = op;
1681 info.stmt = stmt;
1682 info.bb_set = BITMAP_ALLOC (NULL);
1683 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1684 NULL);
1685 if (bitmap_bit_p (info.bb_set, bb->index))
1686 {
1687 if (dump_file)
1688 fprintf (dump_file, " Set in same BB as used.\n");
1689 BITMAP_FREE (info.bb_set);
1690 return REG_BR_PROB_BASE;
1691 }
1692
1693 bitmap_iterator bi;
1694 unsigned index;
1695 /* Lookup the most frequent update of the value and believe that
1696 it dominates all the other; precise analysis here is difficult. */
1697 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1698 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
1699 if (dump_file)
1700 {
1701 fprintf (dump_file, " Set with count ");
1702 max.dump (dump_file);
1703 fprintf (dump_file, " and used with count ");
1704 bb->count.dump (dump_file);
1705 fprintf (dump_file, " freq %f\n",
1706 max.to_sreal_scale (bb->count).to_double ());
1707 }
1708
1709 BITMAP_FREE (info.bb_set);
1710 if (max < bb->count)
1711 return MAX ((max.to_sreal_scale (bb->count)
1712 * REG_BR_PROB_BASE).to_int (), 1);
1713 return REG_BR_PROB_BASE;
1714 }
1715 }
1716
1717 /* Find whether a basic block BB is the final block of a (half) diamond CFG
1718 sub-graph and if the predicate the condition depends on is known. If so,
1719 return true and store the pointer the predicate in *P. */
1720
1721 static bool
1722 phi_result_unknown_predicate (struct ipa_node_params *info,
1723 ipa_fn_summary *summary, basic_block bb,
1724 predicate *p,
1725 vec<predicate> nonconstant_names)
1726 {
1727 edge e;
1728 edge_iterator ei;
1729 basic_block first_bb = NULL;
1730 gimple *stmt;
1731
1732 if (single_pred_p (bb))
1733 {
1734 *p = false;
1735 return true;
1736 }
1737
1738 FOR_EACH_EDGE (e, ei, bb->preds)
1739 {
1740 if (single_succ_p (e->src))
1741 {
1742 if (!single_pred_p (e->src))
1743 return false;
1744 if (!first_bb)
1745 first_bb = single_pred (e->src);
1746 else if (single_pred (e->src) != first_bb)
1747 return false;
1748 }
1749 else
1750 {
1751 if (!first_bb)
1752 first_bb = e->src;
1753 else if (e->src != first_bb)
1754 return false;
1755 }
1756 }
1757
1758 if (!first_bb)
1759 return false;
1760
1761 stmt = last_stmt (first_bb);
1762 if (!stmt
1763 || gimple_code (stmt) != GIMPLE_COND
1764 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1765 return false;
1766
1767 *p = will_be_nonconstant_expr_predicate (info, summary,
1768 gimple_cond_lhs (stmt),
1769 nonconstant_names);
1770 if (*p == true)
1771 return false;
1772 else
1773 return true;
1774 }
1775
1776 /* Given a PHI statement in a function described by inline properties SUMMARY
1777 and *P being the predicate describing whether the selected PHI argument is
1778 known, store a predicate for the result of the PHI statement into
1779 NONCONSTANT_NAMES, if possible. */
1780
1781 static void
1782 predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
1783 predicate *p,
1784 vec<predicate> nonconstant_names)
1785 {
1786 unsigned i;
1787
1788 for (i = 0; i < gimple_phi_num_args (phi); i++)
1789 {
1790 tree arg = gimple_phi_arg (phi, i)->def;
1791 if (!is_gimple_min_invariant (arg))
1792 {
1793 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1794 *p = p->or_with (summary->conds,
1795 nonconstant_names[SSA_NAME_VERSION (arg)]);
1796 if (*p == true)
1797 return;
1798 }
1799 }
1800
1801 if (dump_file && (dump_flags & TDF_DETAILS))
1802 {
1803 fprintf (dump_file, "\t\tphi predicate: ");
1804 p->dump (dump_file, summary->conds);
1805 }
1806 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1807 }
1808
1809 /* Return predicate specifying when array index in access OP becomes non-constant. */
1810
1811 static predicate
1812 array_index_predicate (ipa_fn_summary *info,
1813 vec< predicate> nonconstant_names, tree op)
1814 {
1815 predicate p = false;
1816 while (handled_component_p (op))
1817 {
1818 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1819 {
1820 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1821 p = p.or_with (info->conds,
1822 nonconstant_names[SSA_NAME_VERSION
1823 (TREE_OPERAND (op, 1))]);
1824 }
1825 op = TREE_OPERAND (op, 0);
1826 }
1827 return p;
1828 }
1829
1830 /* For a typical usage of __builtin_expect (a<b, 1), we
1831 may introduce an extra relation stmt:
1832 With the builtin, we have
1833 t1 = a <= b;
1834 t2 = (long int) t1;
1835 t3 = __builtin_expect (t2, 1);
1836 if (t3 != 0)
1837 goto ...
1838 Without the builtin, we have
1839 if (a<=b)
1840 goto...
1841 This affects the size/time estimation and may have
1842 an impact on the earlier inlining.
1843 Here find this pattern and fix it up later. */
1844
1845 static gimple *
1846 find_foldable_builtin_expect (basic_block bb)
1847 {
1848 gimple_stmt_iterator bsi;
1849
1850 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1851 {
1852 gimple *stmt = gsi_stmt (bsi);
1853 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1854 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
1855 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1856 {
1857 tree var = gimple_call_lhs (stmt);
1858 tree arg = gimple_call_arg (stmt, 0);
1859 use_operand_p use_p;
1860 gimple *use_stmt;
1861 bool match = false;
1862 bool done = false;
1863
1864 if (!var || !arg)
1865 continue;
1866 gcc_assert (TREE_CODE (var) == SSA_NAME);
1867
1868 while (TREE_CODE (arg) == SSA_NAME)
1869 {
1870 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1871 if (!is_gimple_assign (stmt_tmp))
1872 break;
1873 switch (gimple_assign_rhs_code (stmt_tmp))
1874 {
1875 case LT_EXPR:
1876 case LE_EXPR:
1877 case GT_EXPR:
1878 case GE_EXPR:
1879 case EQ_EXPR:
1880 case NE_EXPR:
1881 match = true;
1882 done = true;
1883 break;
1884 CASE_CONVERT:
1885 break;
1886 default:
1887 done = true;
1888 break;
1889 }
1890 if (done)
1891 break;
1892 arg = gimple_assign_rhs1 (stmt_tmp);
1893 }
1894
1895 if (match && single_imm_use (var, &use_p, &use_stmt)
1896 && gimple_code (use_stmt) == GIMPLE_COND)
1897 return use_stmt;
1898 }
1899 }
1900 return NULL;
1901 }
1902
1903 /* Return true when the basic blocks contains only clobbers followed by RESX.
1904 Such BBs are kept around to make removal of dead stores possible with
1905 presence of EH and will be optimized out by optimize_clobbers later in the
1906 game.
1907
1908 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1909 that can be clobber only, too.. When it is false, the RESX is not necessary
1910 on the end of basic block. */
1911
1912 static bool
1913 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1914 {
1915 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1916 edge_iterator ei;
1917 edge e;
1918
1919 if (need_eh)
1920 {
1921 if (gsi_end_p (gsi))
1922 return false;
1923 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1924 return false;
1925 gsi_prev (&gsi);
1926 }
1927 else if (!single_succ_p (bb))
1928 return false;
1929
1930 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1931 {
1932 gimple *stmt = gsi_stmt (gsi);
1933 if (is_gimple_debug (stmt))
1934 continue;
1935 if (gimple_clobber_p (stmt))
1936 continue;
1937 if (gimple_code (stmt) == GIMPLE_LABEL)
1938 break;
1939 return false;
1940 }
1941
1942 /* See if all predecestors are either throws or clobber only BBs. */
1943 FOR_EACH_EDGE (e, ei, bb->preds)
1944 if (!(e->flags & EDGE_EH)
1945 && !clobber_only_eh_bb_p (e->src, false))
1946 return false;
1947
1948 return true;
1949 }
1950
1951 /* Return true if STMT compute a floating point expression that may be affected
1952 by -ffast-math and similar flags. */
1953
1954 static bool
1955 fp_expression_p (gimple *stmt)
1956 {
1957 ssa_op_iter i;
1958 tree op;
1959
1960 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1961 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1962 return true;
1963 return false;
1964 }
1965
1966 /* Analyze function body for NODE.
1967 EARLY indicates run from early optimization pipeline. */
1968
1969 static void
1970 analyze_function_body (struct cgraph_node *node, bool early)
1971 {
1972 sreal time = 0;
1973 /* Estimate static overhead for function prologue/epilogue and alignment. */
1974 int size = 2;
1975 /* Benefits are scaled by probability of elimination that is in range
1976 <0,2>. */
1977 basic_block bb;
1978 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1979 sreal freq;
1980 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
1981 predicate bb_predicate;
1982 struct ipa_func_body_info fbi;
1983 vec<predicate> nonconstant_names = vNULL;
1984 int nblocks, n;
1985 int *order;
1986 predicate array_index = true;
1987 gimple *fix_builtin_expect_stmt;
1988
1989 gcc_assert (my_function && my_function->cfg);
1990 gcc_assert (cfun == my_function);
1991
1992 memset(&fbi, 0, sizeof(fbi));
1993 vec_free (info->conds);
1994 info->conds = NULL;
1995 vec_free (info->size_time_table);
1996 info->size_time_table = NULL;
1997
1998 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
1999 so we can produce proper inline hints.
2000
2001 When optimizing and analyzing for early inliner, initialize node params
2002 so we can produce correct BB predicates. */
2003
2004 if (opt_for_fn (node->decl, optimize))
2005 {
2006 calculate_dominance_info (CDI_DOMINATORS);
2007 if (!early)
2008 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2009 else
2010 {
2011 ipa_check_create_node_params ();
2012 ipa_initialize_node_params (node);
2013 }
2014
2015 if (ipa_node_params_sum)
2016 {
2017 fbi.node = node;
2018 fbi.info = IPA_NODE_REF (node);
2019 fbi.bb_infos = vNULL;
2020 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2021 fbi.param_count = count_formal_params(node->decl);
2022 nonconstant_names.safe_grow_cleared
2023 (SSANAMES (my_function)->length ());
2024 }
2025 }
2026
2027 if (dump_file)
2028 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2029 node->name ());
2030
2031 /* When we run into maximal number of entries, we assign everything to the
2032 constant truth case. Be sure to have it in list. */
2033 bb_predicate = true;
2034 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2035
2036 bb_predicate = predicate::not_inlined ();
2037 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
2038 bb_predicate);
2039
2040 if (fbi.info)
2041 compute_bb_predicates (&fbi, node, info);
2042 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2043 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2044 for (n = 0; n < nblocks; n++)
2045 {
2046 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2047 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2048 if (clobber_only_eh_bb_p (bb))
2049 {
2050 if (dump_file && (dump_flags & TDF_DETAILS))
2051 fprintf (dump_file, "\n Ignoring BB %i;"
2052 " it will be optimized away by cleanup_clobbers\n",
2053 bb->index);
2054 continue;
2055 }
2056
2057 /* TODO: Obviously predicates can be propagated down across CFG. */
2058 if (fbi.info)
2059 {
2060 if (bb->aux)
2061 bb_predicate = *(predicate *) bb->aux;
2062 else
2063 bb_predicate = false;
2064 }
2065 else
2066 bb_predicate = true;
2067
2068 if (dump_file && (dump_flags & TDF_DETAILS))
2069 {
2070 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2071 bb_predicate.dump (dump_file, info->conds);
2072 }
2073
2074 if (fbi.info && nonconstant_names.exists ())
2075 {
2076 predicate phi_predicate;
2077 bool first_phi = true;
2078
2079 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2080 gsi_next (&bsi))
2081 {
2082 if (first_phi
2083 && !phi_result_unknown_predicate (fbi.info, info, bb,
2084 &phi_predicate,
2085 nonconstant_names))
2086 break;
2087 first_phi = false;
2088 if (dump_file && (dump_flags & TDF_DETAILS))
2089 {
2090 fprintf (dump_file, " ");
2091 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2092 }
2093 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2094 nonconstant_names);
2095 }
2096 }
2097
2098 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2099
2100 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2101 gsi_next (&bsi))
2102 {
2103 gimple *stmt = gsi_stmt (bsi);
2104 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2105 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2106 int prob;
2107 predicate will_be_nonconstant;
2108
2109 /* This relation stmt should be folded after we remove
2110 buildin_expect call. Adjust the cost here. */
2111 if (stmt == fix_builtin_expect_stmt)
2112 {
2113 this_size--;
2114 this_time--;
2115 }
2116
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 {
2119 fprintf (dump_file, " ");
2120 print_gimple_stmt (dump_file, stmt, 0);
2121 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2122 freq.to_double (), this_size,
2123 this_time);
2124 }
2125
2126 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2127 {
2128 predicate this_array_index;
2129 this_array_index =
2130 array_index_predicate (info, nonconstant_names,
2131 gimple_assign_rhs1 (stmt));
2132 if (this_array_index != false)
2133 array_index &= this_array_index;
2134 }
2135 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2136 {
2137 predicate this_array_index;
2138 this_array_index =
2139 array_index_predicate (info, nonconstant_names,
2140 gimple_get_lhs (stmt));
2141 if (this_array_index != false)
2142 array_index &= this_array_index;
2143 }
2144
2145
2146 if (is_gimple_call (stmt)
2147 && !gimple_call_internal_p (stmt))
2148 {
2149 struct cgraph_edge *edge = node->get_edge (stmt);
2150 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2151
2152 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2153 resolved as constant. We however don't want to optimize
2154 out the cgraph edges. */
2155 if (nonconstant_names.exists ()
2156 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2157 && gimple_call_lhs (stmt)
2158 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2159 {
2160 predicate false_p = false;
2161 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2162 = false_p;
2163 }
2164 if (ipa_node_params_sum)
2165 {
2166 int count = gimple_call_num_args (stmt);
2167 int i;
2168
2169 if (count)
2170 es->param.safe_grow_cleared (count);
2171 for (i = 0; i < count; i++)
2172 {
2173 int prob = param_change_prob (stmt, i);
2174 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2175 es->param[i].change_prob = prob;
2176 }
2177 }
2178
2179 es->call_stmt_size = this_size;
2180 es->call_stmt_time = this_time;
2181 es->loop_depth = bb_loop_depth (bb);
2182 edge_set_predicate (edge, &bb_predicate);
2183 if (edge->speculative)
2184 {
2185 cgraph_edge *direct, *indirect;
2186 ipa_ref *ref;
2187 edge->speculative_call_info (direct, indirect, ref);
2188 gcc_assert (direct == edge);
2189 ipa_call_summary *es2
2190 = ipa_call_summaries->get_create (indirect);
2191 ipa_call_summaries->duplicate (edge, indirect,
2192 es, es2);
2193 }
2194 }
2195
2196 /* TODO: When conditional jump or swithc is known to be constant, but
2197 we did not translate it into the predicates, we really can account
2198 just maximum of the possible paths. */
2199 if (fbi.info)
2200 will_be_nonconstant
2201 = will_be_nonconstant_predicate (&fbi, info,
2202 stmt, nonconstant_names);
2203 else
2204 will_be_nonconstant = true;
2205 if (this_time || this_size)
2206 {
2207 sreal final_time = (sreal)this_time * freq;
2208
2209 prob = eliminated_by_inlining_prob (stmt);
2210 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2211 fprintf (dump_file,
2212 "\t\t50%% will be eliminated by inlining\n");
2213 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2214 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2215
2216 struct predicate p = bb_predicate & will_be_nonconstant;
2217
2218 /* We can ignore statement when we proved it is never going
2219 to happen, but we can not do that for call statements
2220 because edges are accounted specially. */
2221
2222 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2223 {
2224 time += final_time;
2225 size += this_size;
2226 }
2227
2228 /* We account everything but the calls. Calls have their own
2229 size/time info attached to cgraph edges. This is necessary
2230 in order to make the cost disappear after inlining. */
2231 if (!is_gimple_call (stmt))
2232 {
2233 if (prob)
2234 {
2235 predicate ip = bb_predicate & predicate::not_inlined ();
2236 info->account_size_time (this_size * prob,
2237 (this_time * prob) / 2, ip,
2238 p);
2239 }
2240 if (prob != 2)
2241 info->account_size_time (this_size * (2 - prob),
2242 (this_time * (2 - prob) / 2),
2243 bb_predicate,
2244 p);
2245 }
2246
2247 if (!info->fp_expressions && fp_expression_p (stmt))
2248 {
2249 info->fp_expressions = true;
2250 if (dump_file)
2251 fprintf (dump_file, " fp_expression set\n");
2252 }
2253
2254 gcc_assert (time >= 0);
2255 gcc_assert (size >= 0);
2256 }
2257 }
2258 }
2259 set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index,
2260 array_index);
2261 free (order);
2262
2263 if (nonconstant_names.exists () && !early)
2264 {
2265 struct loop *loop;
2266 predicate loop_iterations = true;
2267 predicate loop_stride = true;
2268
2269 if (dump_file && (dump_flags & TDF_DETAILS))
2270 flow_loops_dump (dump_file, NULL, 0);
2271 scev_initialize ();
2272 FOR_EACH_LOOP (loop, 0)
2273 {
2274 vec<edge> exits;
2275 edge ex;
2276 unsigned int j;
2277 struct tree_niter_desc niter_desc;
2278 bb_predicate = *(predicate *) loop->header->aux;
2279
2280 exits = get_loop_exit_edges (loop);
2281 FOR_EACH_VEC_ELT (exits, j, ex)
2282 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2283 && !is_gimple_min_invariant (niter_desc.niter))
2284 {
2285 predicate will_be_nonconstant
2286 = will_be_nonconstant_expr_predicate (fbi.info, info,
2287 niter_desc.niter,
2288 nonconstant_names);
2289 if (will_be_nonconstant != true)
2290 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2291 if (will_be_nonconstant != true
2292 && will_be_nonconstant != false)
2293 /* This is slightly inprecise. We may want to represent each
2294 loop with independent predicate. */
2295 loop_iterations &= will_be_nonconstant;
2296 }
2297 exits.release ();
2298 }
2299
2300 /* To avoid quadratic behavior we analyze stride predicates only
2301 with respect to the containing loop. Thus we simply iterate
2302 over all defs in the outermost loop body. */
2303 for (loop = loops_for_fn (cfun)->tree_root->inner;
2304 loop != NULL; loop = loop->next)
2305 {
2306 basic_block *body = get_loop_body (loop);
2307 for (unsigned i = 0; i < loop->num_nodes; i++)
2308 {
2309 gimple_stmt_iterator gsi;
2310 bb_predicate = *(predicate *) body[i]->aux;
2311 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2312 gsi_next (&gsi))
2313 {
2314 gimple *stmt = gsi_stmt (gsi);
2315
2316 if (!is_gimple_assign (stmt))
2317 continue;
2318
2319 tree def = gimple_assign_lhs (stmt);
2320 if (TREE_CODE (def) != SSA_NAME)
2321 continue;
2322
2323 affine_iv iv;
2324 if (!simple_iv (loop_containing_stmt (stmt),
2325 loop_containing_stmt (stmt),
2326 def, &iv, true)
2327 || is_gimple_min_invariant (iv.step))
2328 continue;
2329
2330 predicate will_be_nonconstant
2331 = will_be_nonconstant_expr_predicate (fbi.info, info,
2332 iv.step,
2333 nonconstant_names);
2334 if (will_be_nonconstant != true)
2335 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2336 if (will_be_nonconstant != true
2337 && will_be_nonconstant != false)
2338 /* This is slightly inprecise. We may want to represent
2339 each loop with independent predicate. */
2340 loop_stride = loop_stride & will_be_nonconstant;
2341 }
2342 }
2343 free (body);
2344 }
2345 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2346 set_hint_predicate (&s->loop_iterations, loop_iterations);
2347 set_hint_predicate (&s->loop_stride, loop_stride);
2348 scev_finalize ();
2349 }
2350 FOR_ALL_BB_FN (bb, my_function)
2351 {
2352 edge e;
2353 edge_iterator ei;
2354
2355 if (bb->aux)
2356 edge_predicate_pool.remove ((predicate *)bb->aux);
2357 bb->aux = NULL;
2358 FOR_EACH_EDGE (e, ei, bb->succs)
2359 {
2360 if (e->aux)
2361 edge_predicate_pool.remove ((predicate *) e->aux);
2362 e->aux = NULL;
2363 }
2364 }
2365 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2366 s->time = time;
2367 s->self_size = size;
2368 nonconstant_names.release ();
2369 ipa_release_body_info (&fbi);
2370 if (opt_for_fn (node->decl, optimize))
2371 {
2372 if (!early)
2373 loop_optimizer_finalize ();
2374 else if (!ipa_edge_args_sum)
2375 ipa_free_all_node_params ();
2376 free_dominance_info (CDI_DOMINATORS);
2377 }
2378 if (dump_file)
2379 {
2380 fprintf (dump_file, "\n");
2381 ipa_dump_fn_summary (dump_file, node);
2382 }
2383 }
2384
2385
2386 /* Compute function summary.
2387 EARLY is true when we compute parameters during early opts. */
2388
2389 void
2390 compute_fn_summary (struct cgraph_node *node, bool early)
2391 {
2392 HOST_WIDE_INT self_stack_size;
2393 struct cgraph_edge *e;
2394 struct ipa_fn_summary *info;
2395
2396 gcc_assert (!node->global.inlined_to);
2397
2398 if (!ipa_fn_summaries)
2399 ipa_fn_summary_alloc ();
2400
2401 /* Create a new ipa_fn_summary. */
2402 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2403 ipa_fn_summaries->remove (node);
2404 info = ipa_fn_summaries->get_create (node);
2405
2406 /* Estimate the stack size for the function if we're optimizing. */
2407 self_stack_size = optimize && !node->thunk.thunk_p
2408 ? estimated_stack_frame_size (node) : 0;
2409 info->estimated_self_stack_size = self_stack_size;
2410 info->estimated_stack_size = self_stack_size;
2411 info->stack_frame_offset = 0;
2412
2413 if (node->thunk.thunk_p)
2414 {
2415 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2416 predicate t = true;
2417
2418 node->local.can_change_signature = false;
2419 es->call_stmt_size = eni_size_weights.call_cost;
2420 es->call_stmt_time = eni_time_weights.call_cost;
2421 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
2422 t = predicate::not_inlined ();
2423 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2424 ipa_update_overall_fn_summary (node);
2425 info->self_size = info->size;
2426 /* We can not inline instrumentation clones. */
2427 if (node->thunk.add_pointer_bounds_args)
2428 {
2429 info->inlinable = false;
2430 node->callees->inline_failed = CIF_CHKP;
2431 }
2432 else if (stdarg_p (TREE_TYPE (node->decl)))
2433 {
2434 info->inlinable = false;
2435 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2436 }
2437 else
2438 info->inlinable = true;
2439 }
2440 else
2441 {
2442 /* Even is_gimple_min_invariant rely on current_function_decl. */
2443 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2444
2445 /* Can this function be inlined at all? */
2446 if (!opt_for_fn (node->decl, optimize)
2447 && !lookup_attribute ("always_inline",
2448 DECL_ATTRIBUTES (node->decl)))
2449 info->inlinable = false;
2450 else
2451 info->inlinable = tree_inlinable_function_p (node->decl);
2452
2453 /* Type attributes can use parameter indices to describe them. */
2454 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2455 /* Likewise for #pragma omp declare simd functions or functions
2456 with simd attribute. */
2457 || lookup_attribute ("omp declare simd",
2458 DECL_ATTRIBUTES (node->decl)))
2459 node->local.can_change_signature = false;
2460 else
2461 {
2462 /* Otherwise, inlinable functions always can change signature. */
2463 if (info->inlinable)
2464 node->local.can_change_signature = true;
2465 else
2466 {
2467 /* Functions calling builtin_apply can not change signature. */
2468 for (e = node->callees; e; e = e->next_callee)
2469 {
2470 tree cdecl = e->callee->decl;
2471 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2472 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2473 break;
2474 }
2475 node->local.can_change_signature = !e;
2476 }
2477 }
2478 /* Functions called by instrumentation thunk can't change signature
2479 because instrumentation thunk modification is not supported. */
2480 if (node->local.can_change_signature)
2481 for (e = node->callers; e; e = e->next_caller)
2482 if (e->caller->thunk.thunk_p
2483 && e->caller->thunk.add_pointer_bounds_args)
2484 {
2485 node->local.can_change_signature = false;
2486 break;
2487 }
2488 analyze_function_body (node, early);
2489 pop_cfun ();
2490 }
2491 for (e = node->callees; e; e = e->next_callee)
2492 if (e->callee->comdat_local_p ())
2493 break;
2494 node->calls_comdat_local = (e != NULL);
2495
2496 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2497 info->size = info->self_size;
2498 info->stack_frame_offset = 0;
2499 info->estimated_stack_size = info->estimated_self_stack_size;
2500
2501 /* Code above should compute exactly the same result as
2502 ipa_update_overall_fn_summary but because computation happens in
2503 different order the roundoff errors result in slight changes. */
2504 ipa_update_overall_fn_summary (node);
2505 /* In LTO mode we may have speculative edges set. */
2506 gcc_assert (in_lto_p || info->size == info->self_size);
2507 }
2508
2509
2510 /* Compute parameters of functions used by inliner using
2511 current_function_decl. */
2512
2513 static unsigned int
2514 compute_fn_summary_for_current (void)
2515 {
2516 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2517 return 0;
2518 }
2519
2520 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2521 KNOWN_CONTEXTS and KNOWN_AGGS. */
2522
2523 static bool
2524 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2525 int *size, int *time,
2526 vec<tree> known_vals,
2527 vec<ipa_polymorphic_call_context> known_contexts,
2528 vec<ipa_agg_jump_function_p> known_aggs)
2529 {
2530 tree target;
2531 struct cgraph_node *callee;
2532 struct ipa_fn_summary *isummary;
2533 enum availability avail;
2534 bool speculative;
2535
2536 if (!known_vals.exists () && !known_contexts.exists ())
2537 return false;
2538 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2539 return false;
2540
2541 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2542 known_aggs, &speculative);
2543 if (!target || speculative)
2544 return false;
2545
2546 /* Account for difference in cost between indirect and direct calls. */
2547 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2548 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2549 gcc_checking_assert (*time >= 0);
2550 gcc_checking_assert (*size >= 0);
2551
2552 callee = cgraph_node::get (target);
2553 if (!callee || !callee->definition)
2554 return false;
2555 callee = callee->function_symbol (&avail);
2556 if (avail < AVAIL_AVAILABLE)
2557 return false;
2558 isummary = ipa_fn_summaries->get (callee);
2559 return isummary->inlinable;
2560 }
2561
2562 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2563 handle edge E with probability PROB.
2564 Set HINTS if edge may be devirtualized.
2565 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2566 site. */
2567
2568 static inline void
2569 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2570 sreal *time,
2571 int prob,
2572 vec<tree> known_vals,
2573 vec<ipa_polymorphic_call_context> known_contexts,
2574 vec<ipa_agg_jump_function_p> known_aggs,
2575 ipa_hints *hints)
2576 {
2577 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2578 int call_size = es->call_stmt_size;
2579 int call_time = es->call_stmt_time;
2580 int cur_size;
2581 if (!e->callee
2582 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2583 known_vals, known_contexts, known_aggs)
2584 && hints && e->maybe_hot_p ())
2585 *hints |= INLINE_HINT_indirect_call;
2586 cur_size = call_size * ipa_fn_summary::size_scale;
2587 *size += cur_size;
2588 if (min_size)
2589 *min_size += cur_size;
2590 if (prob == REG_BR_PROB_BASE)
2591 *time += ((sreal)call_time) * e->sreal_frequency ();
2592 else
2593 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2594 }
2595
2596
2597
2598 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2599 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2600 describe context of the call site. */
2601
2602 static void
2603 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2604 int *min_size, sreal *time,
2605 ipa_hints *hints,
2606 clause_t possible_truths,
2607 vec<tree> known_vals,
2608 vec<ipa_polymorphic_call_context> known_contexts,
2609 vec<ipa_agg_jump_function_p> known_aggs)
2610 {
2611 struct cgraph_edge *e;
2612 for (e = node->callees; e; e = e->next_callee)
2613 {
2614 struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
2615
2616 /* Do not care about zero sized builtins. */
2617 if (e->inline_failed && !es->call_stmt_size)
2618 {
2619 gcc_checking_assert (!es->call_stmt_time);
2620 continue;
2621 }
2622 if (!es->predicate
2623 || es->predicate->evaluate (possible_truths))
2624 {
2625 if (e->inline_failed)
2626 {
2627 /* Predicates of calls shall not use NOT_CHANGED codes,
2628 sowe do not need to compute probabilities. */
2629 estimate_edge_size_and_time (e, size,
2630 es->predicate ? NULL : min_size,
2631 time, REG_BR_PROB_BASE,
2632 known_vals, known_contexts,
2633 known_aggs, hints);
2634 }
2635 else
2636 estimate_calls_size_and_time (e->callee, size, min_size, time,
2637 hints,
2638 possible_truths,
2639 known_vals, known_contexts,
2640 known_aggs);
2641 }
2642 }
2643 for (e = node->indirect_calls; e; e = e->next_callee)
2644 {
2645 struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
2646 if (!es->predicate
2647 || es->predicate->evaluate (possible_truths))
2648 estimate_edge_size_and_time (e, size,
2649 es->predicate ? NULL : min_size,
2650 time, REG_BR_PROB_BASE,
2651 known_vals, known_contexts, known_aggs,
2652 hints);
2653 }
2654 }
2655
2656
2657 /* Estimate size and time needed to execute NODE assuming
2658 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2659 information about NODE's arguments. If non-NULL use also probability
2660 information present in INLINE_PARAM_SUMMARY vector.
2661 Additionally detemine hints determined by the context. Finally compute
2662 minimal size needed for the call that is independent on the call context and
2663 can be used for fast estimates. Return the values in RET_SIZE,
2664 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2665
2666 void
2667 estimate_node_size_and_time (struct cgraph_node *node,
2668 clause_t possible_truths,
2669 clause_t nonspec_possible_truths,
2670 vec<tree> known_vals,
2671 vec<ipa_polymorphic_call_context> known_contexts,
2672 vec<ipa_agg_jump_function_p> known_aggs,
2673 int *ret_size, int *ret_min_size,
2674 sreal *ret_time,
2675 sreal *ret_nonspecialized_time,
2676 ipa_hints *ret_hints,
2677 vec<inline_param_summary>
2678 inline_param_summary)
2679 {
2680 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2681 size_time_entry *e;
2682 int size = 0;
2683 sreal time = 0;
2684 int min_size = 0;
2685 ipa_hints hints = 0;
2686 int i;
2687
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2689 {
2690 bool found = false;
2691 fprintf (dump_file, " Estimating body: %s/%i\n"
2692 " Known to be false: ", node->name (),
2693 node->order);
2694
2695 for (i = predicate::not_inlined_condition;
2696 i < (predicate::first_dynamic_condition
2697 + (int) vec_safe_length (info->conds)); i++)
2698 if (!(possible_truths & (1 << i)))
2699 {
2700 if (found)
2701 fprintf (dump_file, ", ");
2702 found = true;
2703 dump_condition (dump_file, info->conds, i);
2704 }
2705 }
2706
2707 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2708 known_vals, known_contexts, known_aggs);
2709 sreal nonspecialized_time = time;
2710
2711 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2712 {
2713 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2714
2715 /* Because predicates are conservative, it can happen that nonconst is 1
2716 but exec is 0. */
2717 if (exec)
2718 {
2719 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2720
2721 gcc_checking_assert (e->time >= 0);
2722 gcc_checking_assert (time >= 0);
2723
2724 /* We compute specialized size only because size of nonspecialized
2725 copy is context independent.
2726
2727 The difference between nonspecialized execution and specialized is
2728 that nonspecialized is not going to have optimized out computations
2729 known to be constant in a specialized setting. */
2730 if (nonconst)
2731 size += e->size;
2732 nonspecialized_time += e->time;
2733 if (!nonconst)
2734 ;
2735 else if (!inline_param_summary.exists ())
2736 {
2737 if (nonconst)
2738 time += e->time;
2739 }
2740 else
2741 {
2742 int prob = e->nonconst_predicate.probability
2743 (info->conds, possible_truths,
2744 inline_param_summary);
2745 gcc_checking_assert (prob >= 0);
2746 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2747 time += e->time * prob / REG_BR_PROB_BASE;
2748 }
2749 gcc_checking_assert (time >= 0);
2750 }
2751 }
2752 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2753 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2754 min_size = (*info->size_time_table)[0].size;
2755 gcc_checking_assert (size >= 0);
2756 gcc_checking_assert (time >= 0);
2757 /* nonspecialized_time should be always bigger than specialized time.
2758 Roundoff issues however may get into the way. */
2759 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
2760
2761 /* Roundoff issues may make specialized time bigger than nonspecialized
2762 time. We do not really want that to happen because some heurstics
2763 may get confused by seeing negative speedups. */
2764 if (time > nonspecialized_time)
2765 time = nonspecialized_time;
2766
2767 if (info->loop_iterations
2768 && !info->loop_iterations->evaluate (possible_truths))
2769 hints |= INLINE_HINT_loop_iterations;
2770 if (info->loop_stride
2771 && !info->loop_stride->evaluate (possible_truths))
2772 hints |= INLINE_HINT_loop_stride;
2773 if (info->array_index
2774 && !info->array_index->evaluate (possible_truths))
2775 hints |= INLINE_HINT_array_index;
2776 if (info->scc_no)
2777 hints |= INLINE_HINT_in_scc;
2778 if (DECL_DECLARED_INLINE_P (node->decl))
2779 hints |= INLINE_HINT_declared_inline;
2780
2781 size = RDIV (size, ipa_fn_summary::size_scale);
2782 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
2783
2784 if (dump_file && (dump_flags & TDF_DETAILS))
2785 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2786 time.to_double (), nonspecialized_time.to_double ());
2787 if (ret_time)
2788 *ret_time = time;
2789 if (ret_nonspecialized_time)
2790 *ret_nonspecialized_time = nonspecialized_time;
2791 if (ret_size)
2792 *ret_size = size;
2793 if (ret_min_size)
2794 *ret_min_size = min_size;
2795 if (ret_hints)
2796 *ret_hints = hints;
2797 return;
2798 }
2799
2800
2801 /* Estimate size and time needed to execute callee of EDGE assuming that
2802 parameters known to be constant at caller of EDGE are propagated.
2803 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2804 and types for parameters. */
2805
2806 void
2807 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2808 vec<tree> known_vals,
2809 vec<ipa_polymorphic_call_context>
2810 known_contexts,
2811 vec<ipa_agg_jump_function_p> known_aggs,
2812 int *ret_size, sreal *ret_time,
2813 sreal *ret_nonspec_time,
2814 ipa_hints *hints)
2815 {
2816 clause_t clause, nonspec_clause;
2817
2818 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2819 &clause, &nonspec_clause);
2820 estimate_node_size_and_time (node, clause, nonspec_clause,
2821 known_vals, known_contexts,
2822 known_aggs, ret_size, NULL, ret_time,
2823 ret_nonspec_time, hints, vNULL);
2824 }
2825
2826
2827 /* Update summary information of inline clones after inlining.
2828 Compute peak stack usage. */
2829
2830 static void
2831 inline_update_callee_summaries (struct cgraph_node *node, int depth)
2832 {
2833 struct cgraph_edge *e;
2834 ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2835 ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
2836 HOST_WIDE_INT peak;
2837
2838 callee_info->stack_frame_offset
2839 = caller_info->stack_frame_offset
2840 + caller_info->estimated_self_stack_size;
2841 peak = callee_info->stack_frame_offset
2842 + callee_info->estimated_self_stack_size;
2843
2844 ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
2845 if (s->estimated_stack_size < peak)
2846 s->estimated_stack_size = peak;
2847 ipa_propagate_frequency (node);
2848 for (e = node->callees; e; e = e->next_callee)
2849 {
2850 if (!e->inline_failed)
2851 inline_update_callee_summaries (e->callee, depth);
2852 ipa_call_summaries->get (e)->loop_depth += depth;
2853 }
2854 for (e = node->indirect_calls; e; e = e->next_callee)
2855 ipa_call_summaries->get (e)->loop_depth += depth;
2856 }
2857
2858 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2859 When functoin A is inlined in B and A calls C with parameter that
2860 changes with probability PROB1 and C is known to be passthroug
2861 of argument if B that change with probability PROB2, the probability
2862 of change is now PROB1*PROB2. */
2863
2864 static void
2865 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2866 struct cgraph_edge *edge)
2867 {
2868 if (ipa_node_params_sum)
2869 {
2870 int i;
2871 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2872 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2873 struct ipa_call_summary *inlined_es
2874 = ipa_call_summaries->get (inlined_edge);
2875
2876 if (es->param.length () == 0)
2877 return;
2878
2879 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2880 {
2881 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2882 if (jfunc->type == IPA_JF_PASS_THROUGH
2883 || jfunc->type == IPA_JF_ANCESTOR)
2884 {
2885 int id = jfunc->type == IPA_JF_PASS_THROUGH
2886 ? ipa_get_jf_pass_through_formal_id (jfunc)
2887 : ipa_get_jf_ancestor_formal_id (jfunc);
2888 if (id < (int) inlined_es->param.length ())
2889 {
2890 int prob1 = es->param[i].change_prob;
2891 int prob2 = inlined_es->param[id].change_prob;
2892 int prob = combine_probabilities (prob1, prob2);
2893
2894 if (prob1 && prob2 && !prob)
2895 prob = 1;
2896
2897 es->param[i].change_prob = prob;
2898 }
2899 }
2900 }
2901 }
2902 }
2903
2904 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2905
2906 Remap predicates of callees of NODE. Rest of arguments match
2907 remap_predicate.
2908
2909 Also update change probabilities. */
2910
2911 static void
2912 remap_edge_summaries (struct cgraph_edge *inlined_edge,
2913 struct cgraph_node *node,
2914 struct ipa_fn_summary *info,
2915 struct ipa_fn_summary *callee_info,
2916 vec<int> operand_map,
2917 vec<int> offset_map,
2918 clause_t possible_truths,
2919 predicate *toplev_predicate)
2920 {
2921 struct cgraph_edge *e, *next;
2922 for (e = node->callees; e; e = next)
2923 {
2924 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2925 predicate p;
2926 next = e->next_callee;
2927
2928 if (e->inline_failed)
2929 {
2930 remap_edge_change_prob (inlined_edge, e);
2931
2932 if (es->predicate)
2933 {
2934 p = es->predicate->remap_after_inlining
2935 (info, callee_info, operand_map,
2936 offset_map, possible_truths,
2937 *toplev_predicate);
2938 edge_set_predicate (e, &p);
2939 }
2940 else
2941 edge_set_predicate (e, toplev_predicate);
2942 }
2943 else
2944 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2945 operand_map, offset_map, possible_truths,
2946 toplev_predicate);
2947 }
2948 for (e = node->indirect_calls; e; e = next)
2949 {
2950 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2951 predicate p;
2952 next = e->next_callee;
2953
2954 remap_edge_change_prob (inlined_edge, e);
2955 if (es->predicate)
2956 {
2957 p = es->predicate->remap_after_inlining
2958 (info, callee_info, operand_map, offset_map,
2959 possible_truths, *toplev_predicate);
2960 edge_set_predicate (e, &p);
2961 }
2962 else
2963 edge_set_predicate (e, toplev_predicate);
2964 }
2965 }
2966
2967 /* Same as remap_predicate, but set result into hint *HINT. */
2968
2969 static void
2970 remap_hint_predicate (struct ipa_fn_summary *info,
2971 struct ipa_fn_summary *callee_info,
2972 predicate **hint,
2973 vec<int> operand_map,
2974 vec<int> offset_map,
2975 clause_t possible_truths,
2976 predicate *toplev_predicate)
2977 {
2978 predicate p;
2979
2980 if (!*hint)
2981 return;
2982 p = (*hint)->remap_after_inlining
2983 (info, callee_info,
2984 operand_map, offset_map,
2985 possible_truths, *toplev_predicate);
2986 if (p != false && p != true)
2987 {
2988 if (!*hint)
2989 set_hint_predicate (hint, p);
2990 else
2991 **hint &= p;
2992 }
2993 }
2994
2995 /* We inlined EDGE. Update summary of the function we inlined into. */
2996
2997 void
2998 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
2999 {
3000 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3001 struct cgraph_node *to = (edge->caller->global.inlined_to
3002 ? edge->caller->global.inlined_to : edge->caller);
3003 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
3004 clause_t clause = 0; /* not_inline is known to be false. */
3005 size_time_entry *e;
3006 vec<int> operand_map = vNULL;
3007 vec<int> offset_map = vNULL;
3008 int i;
3009 predicate toplev_predicate;
3010 predicate true_p = true;
3011 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
3012
3013 if (es->predicate)
3014 toplev_predicate = *es->predicate;
3015 else
3016 toplev_predicate = true;
3017
3018 info->fp_expressions |= callee_info->fp_expressions;
3019
3020 if (callee_info->conds)
3021 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3022 if (ipa_node_params_sum && callee_info->conds)
3023 {
3024 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3025 int count = ipa_get_cs_argument_count (args);
3026 int i;
3027
3028 if (count)
3029 {
3030 operand_map.safe_grow_cleared (count);
3031 offset_map.safe_grow_cleared (count);
3032 }
3033 for (i = 0; i < count; i++)
3034 {
3035 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3036 int map = -1;
3037
3038 /* TODO: handle non-NOPs when merging. */
3039 if (jfunc->type == IPA_JF_PASS_THROUGH)
3040 {
3041 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3042 map = ipa_get_jf_pass_through_formal_id (jfunc);
3043 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3044 offset_map[i] = -1;
3045 }
3046 else if (jfunc->type == IPA_JF_ANCESTOR)
3047 {
3048 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3049 if (offset >= 0 && offset < INT_MAX)
3050 {
3051 map = ipa_get_jf_ancestor_formal_id (jfunc);
3052 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3053 offset = -1;
3054 offset_map[i] = offset;
3055 }
3056 }
3057 operand_map[i] = map;
3058 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3059 }
3060 }
3061 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3062 {
3063 predicate p;
3064 p = e->exec_predicate.remap_after_inlining
3065 (info, callee_info, operand_map,
3066 offset_map, clause,
3067 toplev_predicate);
3068 predicate nonconstp;
3069 nonconstp = e->nonconst_predicate.remap_after_inlining
3070 (info, callee_info, operand_map,
3071 offset_map, clause,
3072 toplev_predicate);
3073 if (p != false && nonconstp != false)
3074 {
3075 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
3076 int prob = e->nonconst_predicate.probability (callee_info->conds,
3077 clause, es->param);
3078 add_time = add_time * prob / REG_BR_PROB_BASE;
3079 if (prob != REG_BR_PROB_BASE
3080 && dump_file && (dump_flags & TDF_DETAILS))
3081 {
3082 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3083 (double) prob / REG_BR_PROB_BASE);
3084 }
3085 info->account_size_time (e->size, add_time, p, nonconstp);
3086 }
3087 }
3088 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3089 offset_map, clause, &toplev_predicate);
3090 remap_hint_predicate (info, callee_info,
3091 &callee_info->loop_iterations,
3092 operand_map, offset_map, clause, &toplev_predicate);
3093 remap_hint_predicate (info, callee_info,
3094 &callee_info->loop_stride,
3095 operand_map, offset_map, clause, &toplev_predicate);
3096 remap_hint_predicate (info, callee_info,
3097 &callee_info->array_index,
3098 operand_map, offset_map, clause, &toplev_predicate);
3099
3100 ipa_call_summary *s = ipa_call_summaries->get (edge);
3101 inline_update_callee_summaries (edge->callee, s->loop_depth);
3102
3103 /* We do not maintain predicates of inlined edges, free it. */
3104 edge_set_predicate (edge, &true_p);
3105 /* Similarly remove param summaries. */
3106 es->param.release ();
3107 operand_map.release ();
3108 offset_map.release ();
3109 }
3110
3111 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3112 and time. Recompute it. */
3113
3114 void
3115 ipa_update_overall_fn_summary (struct cgraph_node *node)
3116 {
3117 struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3118 size_time_entry *e;
3119 int i;
3120
3121 info->size = 0;
3122 info->time = 0;
3123 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3124 {
3125 info->size += e->size;
3126 info->time += e->time;
3127 }
3128 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3129 &info->time, NULL,
3130 ~(clause_t) (1 << predicate::false_condition),
3131 vNULL, vNULL, vNULL);
3132 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
3133 }
3134
3135
3136 /* This function performs intraprocedural analysis in NODE that is required to
3137 inline indirect calls. */
3138
3139 static void
3140 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3141 {
3142 ipa_analyze_node (node);
3143 if (dump_file && (dump_flags & TDF_DETAILS))
3144 {
3145 ipa_print_node_params (dump_file, node);
3146 ipa_print_node_jump_functions (dump_file, node);
3147 }
3148 }
3149
3150
3151 /* Note function body size. */
3152
3153 void
3154 inline_analyze_function (struct cgraph_node *node)
3155 {
3156 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3157
3158 if (dump_file)
3159 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3160 node->name (), node->order);
3161 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3162 inline_indirect_intraprocedural_analysis (node);
3163 compute_fn_summary (node, false);
3164 if (!optimize)
3165 {
3166 struct cgraph_edge *e;
3167 for (e = node->callees; e; e = e->next_callee)
3168 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3169 for (e = node->indirect_calls; e; e = e->next_callee)
3170 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3171 }
3172
3173 pop_cfun ();
3174 }
3175
3176
3177 /* Called when new function is inserted to callgraph late. */
3178
3179 void
3180 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3181 {
3182 inline_analyze_function (node);
3183 }
3184
3185 /* Note function body size. */
3186
3187 static void
3188 ipa_fn_summary_generate (void)
3189 {
3190 struct cgraph_node *node;
3191
3192 FOR_EACH_DEFINED_FUNCTION (node)
3193 if (DECL_STRUCT_FUNCTION (node->decl))
3194 node->local.versionable = tree_versionable_function_p (node->decl);
3195
3196 ipa_fn_summary_alloc ();
3197
3198 ipa_fn_summaries->enable_insertion_hook ();
3199
3200 ipa_register_cgraph_hooks ();
3201
3202 FOR_EACH_DEFINED_FUNCTION (node)
3203 if (!node->alias
3204 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3205 || opt_for_fn (node->decl, optimize)))
3206 inline_analyze_function (node);
3207 }
3208
3209
3210 /* Write inline summary for edge E to OB. */
3211
3212 static void
3213 read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e,
3214 bool prevails)
3215 {
3216 struct ipa_call_summary *es = prevails
3217 ? ipa_call_summaries->get_create (e) : NULL;
3218 predicate p;
3219 int length, i;
3220
3221 int size = streamer_read_uhwi (ib);
3222 int time = streamer_read_uhwi (ib);
3223 int depth = streamer_read_uhwi (ib);
3224
3225 if (es)
3226 {
3227 es->call_stmt_size = size;
3228 es->call_stmt_time = time;
3229 es->loop_depth = depth;
3230 }
3231
3232 bitpack_d bp = streamer_read_bitpack (ib);
3233 if (es)
3234 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3235 else
3236 bp_unpack_value (&bp, 1);
3237
3238 p.stream_in (ib);
3239 if (es)
3240 edge_set_predicate (e, &p);
3241 length = streamer_read_uhwi (ib);
3242 if (length && es && e->possibly_call_in_translation_unit_p ())
3243 {
3244 es->param.safe_grow_cleared (length);
3245 for (i = 0; i < length; i++)
3246 es->param[i].change_prob = streamer_read_uhwi (ib);
3247 }
3248 else
3249 {
3250 for (i = 0; i < length; i++)
3251 streamer_read_uhwi (ib);
3252 }
3253 }
3254
3255
3256 /* Stream in inline summaries from the section. */
3257
3258 static void
3259 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3260 size_t len)
3261 {
3262 const struct lto_function_header *header =
3263 (const struct lto_function_header *) data;
3264 const int cfg_offset = sizeof (struct lto_function_header);
3265 const int main_offset = cfg_offset + header->cfg_size;
3266 const int string_offset = main_offset + header->main_size;
3267 struct data_in *data_in;
3268 unsigned int i, count2, j;
3269 unsigned int f_count;
3270
3271 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3272 file_data->mode_table);
3273
3274 data_in =
3275 lto_data_in_create (file_data, (const char *) data + string_offset,
3276 header->string_size, vNULL);
3277 f_count = streamer_read_uhwi (&ib);
3278 for (i = 0; i < f_count; i++)
3279 {
3280 unsigned int index;
3281 struct cgraph_node *node;
3282 struct ipa_fn_summary *info;
3283 lto_symtab_encoder_t encoder;
3284 struct bitpack_d bp;
3285 struct cgraph_edge *e;
3286 predicate p;
3287
3288 index = streamer_read_uhwi (&ib);
3289 encoder = file_data->symtab_node_encoder;
3290 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3291 index));
3292 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
3293
3294 int stack_size = streamer_read_uhwi (&ib);
3295 int size = streamer_read_uhwi (&ib);
3296 sreal time = sreal::stream_in (&ib);
3297
3298 if (info)
3299 {
3300 info->estimated_stack_size
3301 = info->estimated_self_stack_size = stack_size;
3302 info->size = info->self_size = size;
3303 info->time = time;
3304 }
3305
3306 bp = streamer_read_bitpack (&ib);
3307 if (info)
3308 {
3309 info->inlinable = bp_unpack_value (&bp, 1);
3310 info->fp_expressions = bp_unpack_value (&bp, 1);
3311 }
3312 else
3313 {
3314 bp_unpack_value (&bp, 1);
3315 bp_unpack_value (&bp, 1);
3316 }
3317
3318 count2 = streamer_read_uhwi (&ib);
3319 gcc_assert (!info || !info->conds);
3320 for (j = 0; j < count2; j++)
3321 {
3322 struct condition c;
3323 c.operand_num = streamer_read_uhwi (&ib);
3324 c.size = streamer_read_uhwi (&ib);
3325 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3326 c.val = stream_read_tree (&ib, data_in);
3327 bp = streamer_read_bitpack (&ib);
3328 c.agg_contents = bp_unpack_value (&bp, 1);
3329 c.by_ref = bp_unpack_value (&bp, 1);
3330 if (c.agg_contents)
3331 c.offset = streamer_read_uhwi (&ib);
3332 if (info)
3333 vec_safe_push (info->conds, c);
3334 }
3335 count2 = streamer_read_uhwi (&ib);
3336 gcc_assert (!info || !info->size_time_table);
3337 for (j = 0; j < count2; j++)
3338 {
3339 struct size_time_entry e;
3340
3341 e.size = streamer_read_uhwi (&ib);
3342 e.time = sreal::stream_in (&ib);
3343 e.exec_predicate.stream_in (&ib);
3344 e.nonconst_predicate.stream_in (&ib);
3345
3346 if (info)
3347 vec_safe_push (info->size_time_table, e);
3348 }
3349
3350 p.stream_in (&ib);
3351 if (info)
3352 set_hint_predicate (&info->loop_iterations, p);
3353 p.stream_in (&ib);
3354 if (info)
3355 set_hint_predicate (&info->loop_stride, p);
3356 p.stream_in (&ib);
3357 if (info)
3358 set_hint_predicate (&info->array_index, p);
3359 for (e = node->callees; e; e = e->next_callee)
3360 read_ipa_call_summary (&ib, e, info != NULL);
3361 for (e = node->indirect_calls; e; e = e->next_callee)
3362 read_ipa_call_summary (&ib, e, info != NULL);
3363 }
3364
3365 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3366 len);
3367 lto_data_in_delete (data_in);
3368 }
3369
3370
3371 /* Read inline summary. Jump functions are shared among ipa-cp
3372 and inliner, so when ipa-cp is active, we don't need to write them
3373 twice. */
3374
3375 static void
3376 ipa_fn_summary_read (void)
3377 {
3378 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3379 struct lto_file_decl_data *file_data;
3380 unsigned int j = 0;
3381
3382 ipa_fn_summary_alloc ();
3383
3384 while ((file_data = file_data_vec[j++]))
3385 {
3386 size_t len;
3387 const char *data = lto_get_section_data (file_data,
3388 LTO_section_ipa_fn_summary,
3389 NULL, &len);
3390 if (data)
3391 inline_read_section (file_data, data, len);
3392 else
3393 /* Fatal error here. We do not want to support compiling ltrans units
3394 with different version of compiler or different flags than the WPA
3395 unit, so this should never happen. */
3396 fatal_error (input_location,
3397 "ipa inline summary is missing in input file");
3398 }
3399 ipa_register_cgraph_hooks ();
3400 if (!flag_ipa_cp)
3401 ipa_prop_read_jump_functions ();
3402
3403 gcc_assert (ipa_fn_summaries);
3404 ipa_fn_summaries->enable_insertion_hook ();
3405 }
3406
3407
3408 /* Write inline summary for edge E to OB. */
3409
3410 static void
3411 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3412 {
3413 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3414 int i;
3415
3416 streamer_write_uhwi (ob, es->call_stmt_size);
3417 streamer_write_uhwi (ob, es->call_stmt_time);
3418 streamer_write_uhwi (ob, es->loop_depth);
3419
3420 bitpack_d bp = bitpack_create (ob->main_stream);
3421 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3422 streamer_write_bitpack (&bp);
3423
3424 if (es->predicate)
3425 es->predicate->stream_out (ob);
3426 else
3427 streamer_write_uhwi (ob, 0);
3428 streamer_write_uhwi (ob, es->param.length ());
3429 for (i = 0; i < (int) es->param.length (); i++)
3430 streamer_write_uhwi (ob, es->param[i].change_prob);
3431 }
3432
3433
3434 /* Write inline summary for node in SET.
3435 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3436 active, we don't need to write them twice. */
3437
3438 static void
3439 ipa_fn_summary_write (void)
3440 {
3441 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
3442 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3443 unsigned int count = 0;
3444 int i;
3445
3446 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3447 {
3448 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3449 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3450 if (cnode && cnode->definition && !cnode->alias)
3451 count++;
3452 }
3453 streamer_write_uhwi (ob, count);
3454
3455 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3456 {
3457 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3458 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3459 if (cnode && cnode->definition && !cnode->alias)
3460 {
3461 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
3462 struct bitpack_d bp;
3463 struct cgraph_edge *edge;
3464 int i;
3465 size_time_entry *e;
3466 struct condition *c;
3467
3468 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3469 streamer_write_hwi (ob, info->estimated_self_stack_size);
3470 streamer_write_hwi (ob, info->self_size);
3471 info->time.stream_out (ob);
3472 bp = bitpack_create (ob->main_stream);
3473 bp_pack_value (&bp, info->inlinable, 1);
3474 bp_pack_value (&bp, false, 1);
3475 bp_pack_value (&bp, info->fp_expressions, 1);
3476 streamer_write_bitpack (&bp);
3477 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3478 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3479 {
3480 streamer_write_uhwi (ob, c->operand_num);
3481 streamer_write_uhwi (ob, c->size);
3482 streamer_write_uhwi (ob, c->code);
3483 stream_write_tree (ob, c->val, true);
3484 bp = bitpack_create (ob->main_stream);
3485 bp_pack_value (&bp, c->agg_contents, 1);
3486 bp_pack_value (&bp, c->by_ref, 1);
3487 streamer_write_bitpack (&bp);
3488 if (c->agg_contents)
3489 streamer_write_uhwi (ob, c->offset);
3490 }
3491 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3492 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3493 {
3494 streamer_write_uhwi (ob, e->size);
3495 e->time.stream_out (ob);
3496 e->exec_predicate.stream_out (ob);
3497 e->nonconst_predicate.stream_out (ob);
3498 }
3499 if (info->loop_iterations)
3500 info->loop_iterations->stream_out (ob);
3501 else
3502 streamer_write_uhwi (ob, 0);
3503 if (info->loop_stride)
3504 info->loop_stride->stream_out (ob);
3505 else
3506 streamer_write_uhwi (ob, 0);
3507 if (info->array_index)
3508 info->array_index->stream_out (ob);
3509 else
3510 streamer_write_uhwi (ob, 0);
3511 for (edge = cnode->callees; edge; edge = edge->next_callee)
3512 write_ipa_call_summary (ob, edge);
3513 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3514 write_ipa_call_summary (ob, edge);
3515 }
3516 }
3517 streamer_write_char_stream (ob->main_stream, 0);
3518 produce_asm (ob, NULL);
3519 destroy_output_block (ob);
3520
3521 if (!flag_ipa_cp)
3522 ipa_prop_write_jump_functions ();
3523 }
3524
3525
3526 /* Release inline summary. */
3527
3528 void
3529 ipa_free_fn_summary (void)
3530 {
3531 struct cgraph_node *node;
3532 if (!ipa_call_summaries)
3533 return;
3534 FOR_EACH_DEFINED_FUNCTION (node)
3535 if (!node->alias)
3536 ipa_fn_summaries->remove (node);
3537 ipa_fn_summaries->release ();
3538 ipa_fn_summaries = NULL;
3539 ipa_call_summaries->release ();
3540 delete ipa_call_summaries;
3541 ipa_call_summaries = NULL;
3542 edge_predicate_pool.release ();
3543 }
3544
3545 namespace {
3546
3547 const pass_data pass_data_local_fn_summary =
3548 {
3549 GIMPLE_PASS, /* type */
3550 "local-fnsummary", /* name */
3551 OPTGROUP_INLINE, /* optinfo_flags */
3552 TV_INLINE_PARAMETERS, /* tv_id */
3553 0, /* properties_required */
3554 0, /* properties_provided */
3555 0, /* properties_destroyed */
3556 0, /* todo_flags_start */
3557 0, /* todo_flags_finish */
3558 };
3559
3560 class pass_local_fn_summary : public gimple_opt_pass
3561 {
3562 public:
3563 pass_local_fn_summary (gcc::context *ctxt)
3564 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3565 {}
3566
3567 /* opt_pass methods: */
3568 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3569 virtual unsigned int execute (function *)
3570 {
3571 return compute_fn_summary_for_current ();
3572 }
3573
3574 }; // class pass_local_fn_summary
3575
3576 } // anon namespace
3577
3578 gimple_opt_pass *
3579 make_pass_local_fn_summary (gcc::context *ctxt)
3580 {
3581 return new pass_local_fn_summary (ctxt);
3582 }
3583
3584
3585 /* Free inline summary. */
3586
3587 namespace {
3588
3589 const pass_data pass_data_ipa_free_fn_summary =
3590 {
3591 SIMPLE_IPA_PASS, /* type */
3592 "free-fnsummary", /* name */
3593 OPTGROUP_NONE, /* optinfo_flags */
3594 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3595 0, /* properties_required */
3596 0, /* properties_provided */
3597 0, /* properties_destroyed */
3598 0, /* todo_flags_start */
3599 0, /* todo_flags_finish */
3600 };
3601
3602 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3603 {
3604 public:
3605 pass_ipa_free_fn_summary (gcc::context *ctxt)
3606 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
3607 small_p (false)
3608 {}
3609
3610 /* opt_pass methods: */
3611 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
3612 void set_pass_param (unsigned int n, bool param)
3613 {
3614 gcc_assert (n == 0);
3615 small_p = param;
3616 }
3617 virtual bool gate (function *) { return small_p || !flag_wpa; }
3618 virtual unsigned int execute (function *)
3619 {
3620 ipa_free_fn_summary ();
3621 return 0;
3622 }
3623
3624 private:
3625 bool small_p;
3626 }; // class pass_ipa_free_fn_summary
3627
3628 } // anon namespace
3629
3630 simple_ipa_opt_pass *
3631 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3632 {
3633 return new pass_ipa_free_fn_summary (ctxt);
3634 }
3635
3636 namespace {
3637
3638 const pass_data pass_data_ipa_fn_summary =
3639 {
3640 IPA_PASS, /* type */
3641 "fnsummary", /* name */
3642 OPTGROUP_INLINE, /* optinfo_flags */
3643 TV_IPA_FNSUMMARY, /* tv_id */
3644 0, /* properties_required */
3645 0, /* properties_provided */
3646 0, /* properties_destroyed */
3647 0, /* todo_flags_start */
3648 ( TODO_dump_symtab ), /* todo_flags_finish */
3649 };
3650
3651 class pass_ipa_fn_summary : public ipa_opt_pass_d
3652 {
3653 public:
3654 pass_ipa_fn_summary (gcc::context *ctxt)
3655 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3656 ipa_fn_summary_generate, /* generate_summary */
3657 ipa_fn_summary_write, /* write_summary */
3658 ipa_fn_summary_read, /* read_summary */
3659 NULL, /* write_optimization_summary */
3660 NULL, /* read_optimization_summary */
3661 NULL, /* stmt_fixup */
3662 0, /* function_transform_todo_flags_start */
3663 NULL, /* function_transform */
3664 NULL) /* variable_transform */
3665 {}
3666
3667 /* opt_pass methods: */
3668 virtual unsigned int execute (function *) { return 0; }
3669
3670 }; // class pass_ipa_fn_summary
3671
3672 } // anon namespace
3673
3674 ipa_opt_pass_d *
3675 make_pass_ipa_fn_summary (gcc::context *ctxt)
3676 {
3677 return new pass_ipa_fn_summary (ctxt);
3678 }
3679
3680 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3681 within the same process. For use by toplev::finalize. */
3682
3683 void
3684 ipa_fnsummary_c_finalize (void)
3685 {
3686 ipa_free_fn_summary ();
3687 }