]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
Apply mechanical replacement (generated patch).
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "params.h"
71 #include "cfganal.h"
72 #include "gimple-iterator.h"
73 #include "tree-cfg.h"
74 #include "tree-ssa-loop-niter.h"
75 #include "tree-ssa-loop.h"
76 #include "symbol-summary.h"
77 #include "ipa-prop.h"
78 #include "ipa-fnsummary.h"
79 #include "cfgloop.h"
80 #include "tree-scalar-evolution.h"
81 #include "ipa-utils.h"
82 #include "cfgexpand.h"
83 #include "gimplify.h"
84 #include "stringpool.h"
85 #include "attribs.h"
86
87 /* Summaries. */
88 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
89 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
90 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
91
92 /* Edge predicates goes here. */
93 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
94
95
96 /* Dump IPA hints. */
97 void
98 ipa_dump_hints (FILE *f, ipa_hints hints)
99 {
100 if (!hints)
101 return;
102 fprintf (f, "IPA hints:");
103 if (hints & INLINE_HINT_indirect_call)
104 {
105 hints &= ~INLINE_HINT_indirect_call;
106 fprintf (f, " indirect_call");
107 }
108 if (hints & INLINE_HINT_loop_iterations)
109 {
110 hints &= ~INLINE_HINT_loop_iterations;
111 fprintf (f, " loop_iterations");
112 }
113 if (hints & INLINE_HINT_loop_stride)
114 {
115 hints &= ~INLINE_HINT_loop_stride;
116 fprintf (f, " loop_stride");
117 }
118 if (hints & INLINE_HINT_same_scc)
119 {
120 hints &= ~INLINE_HINT_same_scc;
121 fprintf (f, " same_scc");
122 }
123 if (hints & INLINE_HINT_in_scc)
124 {
125 hints &= ~INLINE_HINT_in_scc;
126 fprintf (f, " in_scc");
127 }
128 if (hints & INLINE_HINT_cross_module)
129 {
130 hints &= ~INLINE_HINT_cross_module;
131 fprintf (f, " cross_module");
132 }
133 if (hints & INLINE_HINT_declared_inline)
134 {
135 hints &= ~INLINE_HINT_declared_inline;
136 fprintf (f, " declared_inline");
137 }
138 if (hints & INLINE_HINT_known_hot)
139 {
140 hints &= ~INLINE_HINT_known_hot;
141 fprintf (f, " known_hot");
142 }
143 gcc_assert (!hints);
144 }
145
146
147 /* Record SIZE and TIME to SUMMARY.
148 The accounted code will be executed when EXEC_PRED is true.
149 When NONCONST_PRED is false the code will evaulate to constant and
150 will get optimized out in specialized clones of the function. */
151
152 void
153 ipa_fn_summary::account_size_time (int size, sreal time,
154 const predicate &exec_pred,
155 const predicate &nonconst_pred_in)
156 {
157 size_time_entry *e;
158 bool found = false;
159 int i;
160 predicate nonconst_pred;
161
162 if (exec_pred == false)
163 return;
164
165 nonconst_pred = nonconst_pred_in & exec_pred;
166
167 if (nonconst_pred == false)
168 return;
169
170 /* We need to create initial empty unconitional clause, but otherwie
171 we don't need to account empty times and sizes. */
172 if (!size && time == 0 && size_time_table)
173 return;
174
175 gcc_assert (time >= 0);
176
177 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
178 if (e->exec_predicate == exec_pred
179 && e->nonconst_predicate == nonconst_pred)
180 {
181 found = true;
182 break;
183 }
184 if (i == 256)
185 {
186 i = 0;
187 found = true;
188 e = &(*size_time_table)[0];
189 if (dump_file && (dump_flags & TDF_DETAILS))
190 fprintf (dump_file,
191 "\t\tReached limit on number of entries, "
192 "ignoring the predicate.");
193 }
194 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
195 {
196 fprintf (dump_file,
197 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
198 ((double) size) / ipa_fn_summary::size_scale,
199 (time.to_double ()), found ? "" : "new ");
200 exec_pred.dump (dump_file, conds, 0);
201 if (exec_pred != nonconst_pred)
202 {
203 fprintf (dump_file, " nonconst:");
204 nonconst_pred.dump (dump_file, conds);
205 }
206 else
207 fprintf (dump_file, "\n");
208 }
209 if (!found)
210 {
211 class size_time_entry new_entry;
212 new_entry.size = size;
213 new_entry.time = time;
214 new_entry.exec_predicate = exec_pred;
215 new_entry.nonconst_predicate = nonconst_pred;
216 vec_safe_push (size_time_table, new_entry);
217 }
218 else
219 {
220 e->size += size;
221 e->time += time;
222 }
223 }
224
225 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
226
227 static struct cgraph_edge *
228 redirect_to_unreachable (struct cgraph_edge *e)
229 {
230 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
231 struct cgraph_node *target = cgraph_node::get_create
232 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
233
234 if (e->speculative)
235 e = e->resolve_speculation (target->decl);
236 else if (!e->callee)
237 e->make_direct (target);
238 else
239 e->redirect_callee (target);
240 class ipa_call_summary *es = ipa_call_summaries->get (e);
241 e->inline_failed = CIF_UNREACHABLE;
242 e->count = profile_count::zero ();
243 es->call_stmt_size = 0;
244 es->call_stmt_time = 0;
245 if (callee)
246 callee->remove_symbol_and_inline_clones ();
247 return e;
248 }
249
250 /* Set predicate for edge E. */
251
252 static void
253 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
254 {
255 /* If the edge is determined to be never executed, redirect it
256 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
257 be optimized out. */
258 if (predicate && *predicate == false
259 /* When handling speculative edges, we need to do the redirection
260 just once. Do it always on the direct edge, so we do not
261 attempt to resolve speculation while duplicating the edge. */
262 && (!e->speculative || e->callee))
263 e = redirect_to_unreachable (e);
264
265 class ipa_call_summary *es = ipa_call_summaries->get (e);
266 if (predicate && *predicate != true)
267 {
268 if (!es->predicate)
269 es->predicate = edge_predicate_pool.allocate ();
270 *es->predicate = *predicate;
271 }
272 else
273 {
274 if (es->predicate)
275 edge_predicate_pool.remove (es->predicate);
276 es->predicate = NULL;
277 }
278 }
279
280 /* Set predicate for hint *P. */
281
282 static void
283 set_hint_predicate (predicate **p, predicate new_predicate)
284 {
285 if (new_predicate == false || new_predicate == true)
286 {
287 if (*p)
288 edge_predicate_pool.remove (*p);
289 *p = NULL;
290 }
291 else
292 {
293 if (!*p)
294 *p = edge_predicate_pool.allocate ();
295 **p = new_predicate;
296 }
297 }
298
299
300 /* Compute what conditions may or may not hold given invormation about
301 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
302 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
303 copy when called in a given context. It is a bitmask of conditions. Bit
304 0 means that condition is known to be false, while bit 1 means that condition
305 may or may not be true. These differs - for example NOT_INLINED condition
306 is always false in the second and also builtin_constant_p tests cannot use
307 the fact that parameter is indeed a constant.
308
309 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
310 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
311 Return clause of possible truths. When INLINE_P is true, assume that we are
312 inlining.
313
314 ERROR_MARK means compile time invariant. */
315
316 static void
317 evaluate_conditions_for_known_args (struct cgraph_node *node,
318 bool inline_p,
319 vec<tree> known_vals,
320 vec<ipa_agg_jump_function_p>
321 known_aggs,
322 clause_t *ret_clause,
323 clause_t *ret_nonspec_clause)
324 {
325 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
326 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
327 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
328 int i;
329 struct condition *c;
330
331 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
332 {
333 tree val;
334 tree res;
335 int j;
336 struct expr_eval_op *op;
337
338 /* We allow call stmt to have fewer arguments than the callee function
339 (especially for K&R style programs). So bound check here (we assume
340 known_aggs vector, if non-NULL, has the same length as
341 known_vals). */
342 gcc_checking_assert (!known_aggs.exists ()
343 || (known_vals.length () == known_aggs.length ()));
344 if (c->operand_num >= (int) known_vals.length ())
345 {
346 clause |= 1 << (i + predicate::first_dynamic_condition);
347 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
348 continue;
349 }
350
351 if (c->agg_contents)
352 {
353 struct ipa_agg_jump_function *agg;
354
355 if (c->code == predicate::changed
356 && !c->by_ref
357 && (known_vals[c->operand_num] == error_mark_node))
358 continue;
359
360 if (known_aggs.exists ())
361 {
362 agg = known_aggs[c->operand_num];
363 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
364 c->offset, c->by_ref);
365 }
366 else
367 val = NULL_TREE;
368 }
369 else
370 {
371 val = known_vals[c->operand_num];
372 if (val == error_mark_node && c->code != predicate::changed)
373 val = NULL_TREE;
374 }
375
376 if (!val)
377 {
378 clause |= 1 << (i + predicate::first_dynamic_condition);
379 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
380 continue;
381 }
382 if (c->code == predicate::changed)
383 {
384 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
385 continue;
386 }
387
388 if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val)))
389 {
390 clause |= 1 << (i + predicate::first_dynamic_condition);
391 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
392 continue;
393 }
394 if (c->code == predicate::is_not_constant)
395 {
396 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
397 continue;
398 }
399
400 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
401 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
402 {
403 if (!val)
404 break;
405 if (!op->val[0])
406 val = fold_unary (op->code, op->type, val);
407 else if (!op->val[1])
408 val = fold_binary (op->code, op->type,
409 op->index ? op->val[0] : val,
410 op->index ? val : op->val[0]);
411 else if (op->index == 0)
412 val = fold_ternary (op->code, op->type,
413 val, op->val[0], op->val[1]);
414 else if (op->index == 1)
415 val = fold_ternary (op->code, op->type,
416 op->val[0], val, op->val[1]);
417 else if (op->index == 2)
418 val = fold_ternary (op->code, op->type,
419 op->val[0], op->val[1], val);
420 else
421 val = NULL_TREE;
422 }
423
424 res = val
425 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
426 : NULL;
427
428 if (res && integer_zerop (res))
429 continue;
430
431 clause |= 1 << (i + predicate::first_dynamic_condition);
432 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
433 }
434 *ret_clause = clause;
435 if (ret_nonspec_clause)
436 *ret_nonspec_clause = nonspec_clause;
437 }
438
439
440 /* Work out what conditions might be true at invocation of E. */
441
442 void
443 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
444 clause_t *clause_ptr,
445 clause_t *nonspec_clause_ptr,
446 vec<tree> *known_vals_ptr,
447 vec<ipa_polymorphic_call_context>
448 *known_contexts_ptr,
449 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
450 {
451 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
452 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
453 vec<tree> known_vals = vNULL;
454 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
455 class ipa_edge_args *args;
456
457 if (clause_ptr)
458 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
459 if (known_vals_ptr)
460 known_vals_ptr->create (0);
461 if (known_contexts_ptr)
462 known_contexts_ptr->create (0);
463
464 if (ipa_node_params_sum
465 && !e->call_stmt_cannot_inline_p
466 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)
467 && (args = IPA_EDGE_REF (e)) != NULL)
468 {
469 class ipa_node_params *caller_parms_info, *callee_pi;
470 class ipa_call_summary *es = ipa_call_summaries->get (e);
471 int i, count = ipa_get_cs_argument_count (args);
472
473 if (e->caller->inlined_to)
474 caller_parms_info = IPA_NODE_REF (e->caller->inlined_to);
475 else
476 caller_parms_info = IPA_NODE_REF (e->caller);
477 callee_pi = IPA_NODE_REF (callee);
478
479 if (count && (info->conds || known_vals_ptr))
480 known_vals.safe_grow_cleared (count);
481 if (count && (info->conds || known_aggs_ptr))
482 known_aggs.safe_grow_cleared (count);
483 if (count && known_contexts_ptr)
484 known_contexts_ptr->safe_grow_cleared (count);
485
486 if (callee_pi)
487 for (i = 0; i < count; i++)
488 {
489 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
490 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
491 ipa_get_type (callee_pi, i));
492
493 if (!cst && e->call_stmt
494 && i < (int)gimple_call_num_args (e->call_stmt))
495 {
496 cst = gimple_call_arg (e->call_stmt, i);
497 if (!is_gimple_min_invariant (cst))
498 cst = NULL;
499 }
500 if (cst)
501 {
502 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
503 if (known_vals.exists ())
504 known_vals[i] = cst;
505 }
506 else if (inline_p && !es->param[i].change_prob)
507 known_vals[i] = error_mark_node;
508
509 if (known_contexts_ptr)
510 (*known_contexts_ptr)[i]
511 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
512 /* TODO: When IPA-CP starts propagating and merging aggregate jump
513 functions, use its knowledge of the caller too, just like the
514 scalar case above. */
515 known_aggs[i] = &jf->agg;
516 }
517 else
518 gcc_assert (callee->thunk.thunk_p);
519 }
520 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
521 && ((clause_ptr && info->conds) || known_vals_ptr))
522 {
523 int i, count = (int)gimple_call_num_args (e->call_stmt);
524
525 if (count && (info->conds || known_vals_ptr))
526 known_vals.safe_grow_cleared (count);
527 for (i = 0; i < count; i++)
528 {
529 tree cst = gimple_call_arg (e->call_stmt, i);
530 if (!is_gimple_min_invariant (cst))
531 cst = NULL;
532 if (cst)
533 known_vals[i] = cst;
534 }
535 }
536
537 evaluate_conditions_for_known_args (callee, inline_p,
538 known_vals, known_aggs, clause_ptr,
539 nonspec_clause_ptr);
540
541 if (known_vals_ptr)
542 *known_vals_ptr = known_vals;
543 else
544 known_vals.release ();
545
546 if (known_aggs_ptr)
547 *known_aggs_ptr = known_aggs;
548 else
549 known_aggs.release ();
550 }
551
552
553 /* Allocate the function summary. */
554
555 static void
556 ipa_fn_summary_alloc (void)
557 {
558 gcc_checking_assert (!ipa_fn_summaries);
559 ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
560 (symtab);
561 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
562 ipa_call_summaries = new ipa_call_summary_t (symtab);
563 }
564
565 ipa_call_summary::~ipa_call_summary ()
566 {
567 if (predicate)
568 edge_predicate_pool.remove (predicate);
569
570 param.release ();
571 }
572
573 ipa_fn_summary::~ipa_fn_summary ()
574 {
575 if (loop_iterations)
576 edge_predicate_pool.remove (loop_iterations);
577 if (loop_stride)
578 edge_predicate_pool.remove (loop_stride);
579 vec_free (conds);
580 vec_free (size_time_table);
581 }
582
583 void
584 ipa_fn_summary_t::remove_callees (cgraph_node *node)
585 {
586 cgraph_edge *e;
587 for (e = node->callees; e; e = e->next_callee)
588 ipa_call_summaries->remove (e);
589 for (e = node->indirect_calls; e; e = e->next_callee)
590 ipa_call_summaries->remove (e);
591 }
592
593 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
594 Additionally care about allocating new memory slot for updated predicate
595 and set it to NULL when it becomes true or false (and thus uninteresting).
596 */
597
598 static void
599 remap_hint_predicate_after_duplication (predicate **p,
600 clause_t possible_truths)
601 {
602 predicate new_predicate;
603
604 if (!*p)
605 return;
606
607 new_predicate = (*p)->remap_after_duplication (possible_truths);
608 /* We do not want to free previous predicate; it is used by node origin. */
609 *p = NULL;
610 set_hint_predicate (p, new_predicate);
611 }
612
613
614 /* Hook that is called by cgraph.c when a node is duplicated. */
615 void
616 ipa_fn_summary_t::duplicate (cgraph_node *src,
617 cgraph_node *dst,
618 ipa_fn_summary *,
619 ipa_fn_summary *info)
620 {
621 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
622 /* TODO: as an optimization, we may avoid copying conditions
623 that are known to be false or true. */
624 info->conds = vec_safe_copy (info->conds);
625
626 /* When there are any replacements in the function body, see if we can figure
627 out that something was optimized out. */
628 if (ipa_node_params_sum && dst->clone.tree_map)
629 {
630 vec<size_time_entry, va_gc> *entry = info->size_time_table;
631 /* Use SRC parm info since it may not be copied yet. */
632 class ipa_node_params *parms_info = IPA_NODE_REF (src);
633 vec<tree> known_vals = vNULL;
634 int count = ipa_get_param_count (parms_info);
635 int i, j;
636 clause_t possible_truths;
637 predicate true_pred = true;
638 size_time_entry *e;
639 int optimized_out_size = 0;
640 bool inlined_to_p = false;
641 struct cgraph_edge *edge, *next;
642
643 info->size_time_table = 0;
644 known_vals.safe_grow_cleared (count);
645 for (i = 0; i < count; i++)
646 {
647 struct ipa_replace_map *r;
648
649 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
650 {
651 if (r->parm_num == i)
652 {
653 known_vals[i] = r->new_tree;
654 break;
655 }
656 }
657 }
658 evaluate_conditions_for_known_args (dst, false,
659 known_vals,
660 vNULL,
661 &possible_truths,
662 /* We are going to specialize,
663 so ignore nonspec truths. */
664 NULL);
665 known_vals.release ();
666
667 info->account_size_time (0, 0, true_pred, true_pred);
668
669 /* Remap size_time vectors.
670 Simplify the predicate by prunning out alternatives that are known
671 to be false.
672 TODO: as on optimization, we can also eliminate conditions known
673 to be true. */
674 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
675 {
676 predicate new_exec_pred;
677 predicate new_nonconst_pred;
678 new_exec_pred = e->exec_predicate.remap_after_duplication
679 (possible_truths);
680 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
681 (possible_truths);
682 if (new_exec_pred == false || new_nonconst_pred == false)
683 optimized_out_size += e->size;
684 else
685 info->account_size_time (e->size, e->time, new_exec_pred,
686 new_nonconst_pred);
687 }
688
689 /* Remap edge predicates with the same simplification as above.
690 Also copy constantness arrays. */
691 for (edge = dst->callees; edge; edge = next)
692 {
693 predicate new_predicate;
694 class ipa_call_summary *es = ipa_call_summaries->get (edge);
695 next = edge->next_callee;
696
697 if (!edge->inline_failed)
698 inlined_to_p = true;
699 if (!es->predicate)
700 continue;
701 new_predicate = es->predicate->remap_after_duplication
702 (possible_truths);
703 if (new_predicate == false && *es->predicate != false)
704 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
705 edge_set_predicate (edge, &new_predicate);
706 }
707
708 /* Remap indirect edge predicates with the same simplificaiton as above.
709 Also copy constantness arrays. */
710 for (edge = dst->indirect_calls; edge; edge = next)
711 {
712 predicate new_predicate;
713 class ipa_call_summary *es = ipa_call_summaries->get (edge);
714 next = edge->next_callee;
715
716 gcc_checking_assert (edge->inline_failed);
717 if (!es->predicate)
718 continue;
719 new_predicate = es->predicate->remap_after_duplication
720 (possible_truths);
721 if (new_predicate == false && *es->predicate != false)
722 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
723 edge_set_predicate (edge, &new_predicate);
724 }
725 remap_hint_predicate_after_duplication (&info->loop_iterations,
726 possible_truths);
727 remap_hint_predicate_after_duplication (&info->loop_stride,
728 possible_truths);
729
730 /* If inliner or someone after inliner will ever start producing
731 non-trivial clones, we will get trouble with lack of information
732 about updating self sizes, because size vectors already contains
733 sizes of the calees. */
734 gcc_assert (!inlined_to_p || !optimized_out_size);
735 }
736 else
737 {
738 info->size_time_table = vec_safe_copy (info->size_time_table);
739 if (info->loop_iterations)
740 {
741 predicate p = *info->loop_iterations;
742 info->loop_iterations = NULL;
743 set_hint_predicate (&info->loop_iterations, p);
744 }
745 if (info->loop_stride)
746 {
747 predicate p = *info->loop_stride;
748 info->loop_stride = NULL;
749 set_hint_predicate (&info->loop_stride, p);
750 }
751 }
752 if (!dst->inlined_to)
753 ipa_update_overall_fn_summary (dst);
754 }
755
756
757 /* Hook that is called by cgraph.c when a node is duplicated. */
758
759 void
760 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
761 struct cgraph_edge *dst,
762 class ipa_call_summary *srcinfo,
763 class ipa_call_summary *info)
764 {
765 new (info) ipa_call_summary (*srcinfo);
766 info->predicate = NULL;
767 edge_set_predicate (dst, srcinfo->predicate);
768 info->param = srcinfo->param.copy ();
769 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
770 {
771 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
772 - eni_size_weights.call_cost);
773 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
774 - eni_time_weights.call_cost);
775 }
776 }
777
778 /* Dump edge summaries associated to NODE and recursively to all clones.
779 Indent by INDENT. */
780
781 static void
782 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
783 class ipa_fn_summary *info)
784 {
785 struct cgraph_edge *edge;
786 for (edge = node->callees; edge; edge = edge->next_callee)
787 {
788 class ipa_call_summary *es = ipa_call_summaries->get (edge);
789 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
790 int i;
791
792 fprintf (f,
793 "%*s%s/%i %s\n%*s freq:%4.2f",
794 indent, "", callee->name (), callee->order,
795 !edge->inline_failed
796 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
797 indent, "", edge->sreal_frequency ().to_double ());
798
799 if (es)
800 fprintf (f, " loop depth:%2i size:%2i time: %2i",
801 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
802
803 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
804 ipa_size_summary *ss = ipa_size_summaries->get (callee);
805 if (s != NULL)
806 fprintf (f, " callee size:%2i stack:%2i",
807 (int) (ss->size / ipa_fn_summary::size_scale),
808 (int) s->estimated_stack_size);
809
810 if (es && es->predicate)
811 {
812 fprintf (f, " predicate: ");
813 es->predicate->dump (f, info->conds);
814 }
815 else
816 fprintf (f, "\n");
817 if (es && es->param.exists ())
818 for (i = 0; i < (int) es->param.length (); i++)
819 {
820 int prob = es->param[i].change_prob;
821
822 if (!prob)
823 fprintf (f, "%*s op%i is compile time invariant\n",
824 indent + 2, "", i);
825 else if (prob != REG_BR_PROB_BASE)
826 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
827 prob * 100.0 / REG_BR_PROB_BASE);
828 }
829 if (!edge->inline_failed)
830 {
831 ipa_size_summary *ss = ipa_size_summaries->get (callee);
832 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
833 indent + 2, "",
834 (int) ipa_get_stack_frame_offset (callee),
835 (int) ss->estimated_self_stack_size);
836 dump_ipa_call_summary (f, indent + 2, callee, info);
837 }
838 }
839 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
840 {
841 class ipa_call_summary *es = ipa_call_summaries->get (edge);
842 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
843 " time: %2i",
844 indent, "",
845 es->loop_depth,
846 edge->sreal_frequency ().to_double (), es->call_stmt_size,
847 es->call_stmt_time);
848 if (es->predicate)
849 {
850 fprintf (f, "predicate: ");
851 es->predicate->dump (f, info->conds);
852 }
853 else
854 fprintf (f, "\n");
855 }
856 }
857
858
859 void
860 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
861 {
862 if (node->definition)
863 {
864 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
865 class ipa_size_summary *ss = ipa_size_summaries->get (node);
866 if (s != NULL)
867 {
868 size_time_entry *e;
869 int i;
870 fprintf (f, "IPA function summary for %s", node->dump_name ());
871 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
872 fprintf (f, " always_inline");
873 if (s->inlinable)
874 fprintf (f, " inlinable");
875 if (s->fp_expressions)
876 fprintf (f, " fp_expression");
877 fprintf (f, "\n global time: %f\n", s->time.to_double ());
878 fprintf (f, " self size: %i\n", ss->self_size);
879 fprintf (f, " global size: %i\n", ss->size);
880 fprintf (f, " min size: %i\n", s->min_size);
881 fprintf (f, " self stack: %i\n",
882 (int) ss->estimated_self_stack_size);
883 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
884 if (s->growth)
885 fprintf (f, " estimated growth:%i\n", (int) s->growth);
886 if (s->scc_no)
887 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
888 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
889 {
890 fprintf (f, " size:%f, time:%f",
891 (double) e->size / ipa_fn_summary::size_scale,
892 e->time.to_double ());
893 if (e->exec_predicate != true)
894 {
895 fprintf (f, ", executed if:");
896 e->exec_predicate.dump (f, s->conds, 0);
897 }
898 if (e->exec_predicate != e->nonconst_predicate)
899 {
900 fprintf (f, ", nonconst if:");
901 e->nonconst_predicate.dump (f, s->conds, 0);
902 }
903 fprintf (f, "\n");
904 }
905 if (s->loop_iterations)
906 {
907 fprintf (f, " loop iterations:");
908 s->loop_iterations->dump (f, s->conds);
909 }
910 if (s->loop_stride)
911 {
912 fprintf (f, " loop stride:");
913 s->loop_stride->dump (f, s->conds);
914 }
915 fprintf (f, " calls:\n");
916 dump_ipa_call_summary (f, 4, node, s);
917 fprintf (f, "\n");
918 }
919 else
920 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
921 }
922 }
923
924 DEBUG_FUNCTION void
925 ipa_debug_fn_summary (struct cgraph_node *node)
926 {
927 ipa_dump_fn_summary (stderr, node);
928 }
929
930 void
931 ipa_dump_fn_summaries (FILE *f)
932 {
933 struct cgraph_node *node;
934
935 FOR_EACH_DEFINED_FUNCTION (node)
936 if (!node->inlined_to)
937 ipa_dump_fn_summary (f, node);
938 }
939
940 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
941 boolean variable pointed to by DATA. */
942
943 static bool
944 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
945 void *data)
946 {
947 bool *b = (bool *) data;
948 *b = true;
949 return true;
950 }
951
952 /* If OP refers to value of function parameter, return the corresponding
953 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
954 PARM_DECL) will be stored to *SIZE_P in that case too. */
955
956 static tree
957 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
958 poly_int64 *size_p)
959 {
960 /* SSA_NAME referring to parm default def? */
961 if (TREE_CODE (op) == SSA_NAME
962 && SSA_NAME_IS_DEFAULT_DEF (op)
963 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
964 {
965 if (size_p)
966 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
967 return SSA_NAME_VAR (op);
968 }
969 /* Non-SSA parm reference? */
970 if (TREE_CODE (op) == PARM_DECL)
971 {
972 bool modified = false;
973
974 ao_ref refd;
975 ao_ref_init (&refd, op);
976 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
977 mark_modified, &modified, NULL, NULL,
978 fbi->aa_walk_budget + 1);
979 if (walked < 0)
980 {
981 fbi->aa_walk_budget = 0;
982 return NULL_TREE;
983 }
984 if (!modified)
985 {
986 if (size_p)
987 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
988 return op;
989 }
990 }
991 return NULL_TREE;
992 }
993
994 /* If OP refers to value of function parameter, return the corresponding
995 parameter. Also traverse chains of SSA register assignments. If non-NULL,
996 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
997 stored to *SIZE_P in that case too. */
998
999 static tree
1000 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1001 poly_int64 *size_p)
1002 {
1003 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1004 if (res)
1005 return res;
1006
1007 if (TREE_CODE (op) == SSA_NAME
1008 && !SSA_NAME_IS_DEFAULT_DEF (op)
1009 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1010 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1011 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1012 size_p);
1013 return NULL_TREE;
1014 }
1015
1016 /* If OP refers to a value of a function parameter or value loaded from an
1017 aggregate passed to a parameter (either by value or reference), return TRUE
1018 and store the number of the parameter to *INDEX_P, the access size into
1019 *SIZE_P, and information whether and how it has been loaded from an
1020 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1021 statement in which OP is used or loaded. */
1022
1023 static bool
1024 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1025 gimple *stmt, tree op, int *index_p,
1026 poly_int64 *size_p,
1027 struct agg_position_info *aggpos)
1028 {
1029 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1030
1031 gcc_checking_assert (aggpos);
1032 if (res)
1033 {
1034 *index_p = ipa_get_param_decl_index (fbi->info, res);
1035 if (*index_p < 0)
1036 return false;
1037 aggpos->agg_contents = false;
1038 aggpos->by_ref = false;
1039 return true;
1040 }
1041
1042 if (TREE_CODE (op) == SSA_NAME)
1043 {
1044 if (SSA_NAME_IS_DEFAULT_DEF (op)
1045 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1046 return false;
1047 stmt = SSA_NAME_DEF_STMT (op);
1048 op = gimple_assign_rhs1 (stmt);
1049 if (!REFERENCE_CLASS_P (op))
1050 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1051 aggpos);
1052 }
1053
1054 aggpos->agg_contents = true;
1055 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1056 stmt, op, index_p, &aggpos->offset,
1057 size_p, &aggpos->by_ref);
1058 }
1059
1060 /* See if statement might disappear after inlining.
1061 0 - means not eliminated
1062 1 - half of statements goes away
1063 2 - for sure it is eliminated.
1064 We are not terribly sophisticated, basically looking for simple abstraction
1065 penalty wrappers. */
1066
1067 static int
1068 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1069 {
1070 enum gimple_code code = gimple_code (stmt);
1071 enum tree_code rhs_code;
1072
1073 if (!optimize)
1074 return 0;
1075
1076 switch (code)
1077 {
1078 case GIMPLE_RETURN:
1079 return 2;
1080 case GIMPLE_ASSIGN:
1081 if (gimple_num_ops (stmt) != 2)
1082 return 0;
1083
1084 rhs_code = gimple_assign_rhs_code (stmt);
1085
1086 /* Casts of parameters, loads from parameters passed by reference
1087 and stores to return value or parameters are often free after
1088 inlining dua to SRA and further combining.
1089 Assume that half of statements goes away. */
1090 if (CONVERT_EXPR_CODE_P (rhs_code)
1091 || rhs_code == VIEW_CONVERT_EXPR
1092 || rhs_code == ADDR_EXPR
1093 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1094 {
1095 tree rhs = gimple_assign_rhs1 (stmt);
1096 tree lhs = gimple_assign_lhs (stmt);
1097 tree inner_rhs = get_base_address (rhs);
1098 tree inner_lhs = get_base_address (lhs);
1099 bool rhs_free = false;
1100 bool lhs_free = false;
1101
1102 if (!inner_rhs)
1103 inner_rhs = rhs;
1104 if (!inner_lhs)
1105 inner_lhs = lhs;
1106
1107 /* Reads of parameter are expected to be free. */
1108 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1109 rhs_free = true;
1110 /* Match expressions of form &this->field. Those will most likely
1111 combine with something upstream after inlining. */
1112 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1113 {
1114 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1115 if (TREE_CODE (op) == PARM_DECL)
1116 rhs_free = true;
1117 else if (TREE_CODE (op) == MEM_REF
1118 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1119 NULL))
1120 rhs_free = true;
1121 }
1122
1123 /* When parameter is not SSA register because its address is taken
1124 and it is just copied into one, the statement will be completely
1125 free after inlining (we will copy propagate backward). */
1126 if (rhs_free && is_gimple_reg (lhs))
1127 return 2;
1128
1129 /* Reads of parameters passed by reference
1130 expected to be free (i.e. optimized out after inlining). */
1131 if (TREE_CODE (inner_rhs) == MEM_REF
1132 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1133 rhs_free = true;
1134
1135 /* Copying parameter passed by reference into gimple register is
1136 probably also going to copy propagate, but we can't be quite
1137 sure. */
1138 if (rhs_free && is_gimple_reg (lhs))
1139 lhs_free = true;
1140
1141 /* Writes to parameters, parameters passed by value and return value
1142 (either dirrectly or passed via invisible reference) are free.
1143
1144 TODO: We ought to handle testcase like
1145 struct a {int a,b;};
1146 struct a
1147 retrurnsturct (void)
1148 {
1149 struct a a ={1,2};
1150 return a;
1151 }
1152
1153 This translate into:
1154
1155 retrurnsturct ()
1156 {
1157 int a$b;
1158 int a$a;
1159 struct a a;
1160 struct a D.2739;
1161
1162 <bb 2>:
1163 D.2739.a = 1;
1164 D.2739.b = 2;
1165 return D.2739;
1166
1167 }
1168 For that we either need to copy ipa-split logic detecting writes
1169 to return value. */
1170 if (TREE_CODE (inner_lhs) == PARM_DECL
1171 || TREE_CODE (inner_lhs) == RESULT_DECL
1172 || (TREE_CODE (inner_lhs) == MEM_REF
1173 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1174 NULL)
1175 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1176 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1177 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1178 (inner_lhs,
1179 0))) == RESULT_DECL))))
1180 lhs_free = true;
1181 if (lhs_free
1182 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1183 rhs_free = true;
1184 if (lhs_free && rhs_free)
1185 return 1;
1186 }
1187 return 0;
1188 default:
1189 return 0;
1190 }
1191 }
1192
1193 /* Analyze EXPR if it represents a series of simple operations performed on
1194 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1195 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1196 Type of the parameter or load from an aggregate via the parameter is
1197 stored in *TYPE_P. Operations on the parameter are recorded to
1198 PARAM_OPS_P if it is not NULL. */
1199
1200 static bool
1201 decompose_param_expr (struct ipa_func_body_info *fbi,
1202 gimple *stmt, tree expr,
1203 int *index_p, tree *type_p,
1204 struct agg_position_info *aggpos,
1205 expr_eval_ops *param_ops_p = NULL)
1206 {
1207 int op_limit = param_ipa_max_param_expr_ops;
1208 int op_count = 0;
1209
1210 if (param_ops_p)
1211 *param_ops_p = NULL;
1212
1213 while (true)
1214 {
1215 expr_eval_op eval_op;
1216 unsigned rhs_count;
1217 unsigned cst_count = 0;
1218
1219 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1220 aggpos))
1221 {
1222 tree type = TREE_TYPE (expr);
1223
1224 if (aggpos->agg_contents)
1225 {
1226 /* Stop if containing bit-field. */
1227 if (TREE_CODE (expr) == BIT_FIELD_REF
1228 || contains_bitfld_component_ref_p (expr))
1229 break;
1230 }
1231
1232 *type_p = type;
1233 return true;
1234 }
1235
1236 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1237 break;
1238
1239 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1240 break;
1241
1242 switch (gimple_assign_rhs_class (stmt))
1243 {
1244 case GIMPLE_SINGLE_RHS:
1245 expr = gimple_assign_rhs1 (stmt);
1246 continue;
1247
1248 case GIMPLE_UNARY_RHS:
1249 rhs_count = 1;
1250 break;
1251
1252 case GIMPLE_BINARY_RHS:
1253 rhs_count = 2;
1254 break;
1255
1256 case GIMPLE_TERNARY_RHS:
1257 rhs_count = 3;
1258 break;
1259
1260 default:
1261 goto fail;
1262 }
1263
1264 /* Stop if expression is too complex. */
1265 if (op_count++ == op_limit)
1266 break;
1267
1268 if (param_ops_p)
1269 {
1270 eval_op.code = gimple_assign_rhs_code (stmt);
1271 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1272 eval_op.val[0] = NULL_TREE;
1273 eval_op.val[1] = NULL_TREE;
1274 }
1275
1276 expr = NULL_TREE;
1277 for (unsigned i = 0; i < rhs_count; i++)
1278 {
1279 tree op = gimple_op (stmt, i + 1);
1280
1281 gcc_assert (op && !TYPE_P (op));
1282 if (is_gimple_ip_invariant (op))
1283 {
1284 if (++cst_count == rhs_count)
1285 goto fail;
1286
1287 eval_op.val[cst_count - 1] = op;
1288 }
1289 else if (!expr)
1290 {
1291 /* Found a non-constant operand, and record its index in rhs
1292 operands. */
1293 eval_op.index = i;
1294 expr = op;
1295 }
1296 else
1297 {
1298 /* Found more than one non-constant operands. */
1299 goto fail;
1300 }
1301 }
1302
1303 if (param_ops_p)
1304 vec_safe_insert (*param_ops_p, 0, eval_op);
1305 }
1306
1307 /* Failed to decompose, free resource and return. */
1308 fail:
1309 if (param_ops_p)
1310 vec_free (*param_ops_p);
1311
1312 return false;
1313 }
1314
1315 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1316 predicates to the CFG edges. */
1317
1318 static void
1319 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1320 class ipa_fn_summary *summary,
1321 class ipa_node_params *params_summary,
1322 basic_block bb)
1323 {
1324 gimple *last;
1325 tree op, op2;
1326 int index;
1327 struct agg_position_info aggpos;
1328 enum tree_code code, inverted_code;
1329 edge e;
1330 edge_iterator ei;
1331 gimple *set_stmt;
1332 tree param_type;
1333 expr_eval_ops param_ops;
1334
1335 last = last_stmt (bb);
1336 if (!last || gimple_code (last) != GIMPLE_COND)
1337 return;
1338 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1339 return;
1340 op = gimple_cond_lhs (last);
1341
1342 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1343 &param_ops))
1344 {
1345 code = gimple_cond_code (last);
1346 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1347
1348 FOR_EACH_EDGE (e, ei, bb->succs)
1349 {
1350 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1351 ? code : inverted_code);
1352 /* invert_tree_comparison will return ERROR_MARK on FP
1353 comparsions that are not EQ/NE instead of returning proper
1354 unordered one. Be sure it is not confused with NON_CONSTANT.
1355
1356 And if the edge's target is the final block of diamond CFG graph
1357 of this conditional statement, we do not need to compute
1358 predicate for the edge because the final block's predicate must
1359 be at least as that of the first block of the statement. */
1360 if (this_code != ERROR_MARK
1361 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1362 {
1363 predicate p
1364 = add_condition (summary, params_summary, index,
1365 param_type, &aggpos,
1366 this_code, gimple_cond_rhs (last), param_ops);
1367 e->aux = edge_predicate_pool.allocate ();
1368 *(predicate *) e->aux = p;
1369 }
1370 }
1371 vec_free (param_ops);
1372 }
1373
1374 if (TREE_CODE (op) != SSA_NAME)
1375 return;
1376 /* Special case
1377 if (builtin_constant_p (op))
1378 constant_code
1379 else
1380 nonconstant_code.
1381 Here we can predicate nonconstant_code. We can't
1382 really handle constant_code since we have no predicate
1383 for this and also the constant code is not known to be
1384 optimized away when inliner doen't see operand is constant.
1385 Other optimizers might think otherwise. */
1386 if (gimple_cond_code (last) != NE_EXPR
1387 || !integer_zerop (gimple_cond_rhs (last)))
1388 return;
1389 set_stmt = SSA_NAME_DEF_STMT (op);
1390 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1391 || gimple_call_num_args (set_stmt) != 1)
1392 return;
1393 op2 = gimple_call_arg (set_stmt, 0);
1394 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1395 return;
1396 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1397 {
1398 predicate p = add_condition (summary, params_summary, index,
1399 param_type, &aggpos,
1400 predicate::is_not_constant, NULL_TREE);
1401 e->aux = edge_predicate_pool.allocate ();
1402 *(predicate *) e->aux = p;
1403 }
1404 }
1405
1406
1407 /* If BB ends by a switch we can turn into predicates, attach corresponding
1408 predicates to the CFG edges. */
1409
1410 static void
1411 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1412 class ipa_fn_summary *summary,
1413 class ipa_node_params *params_summary,
1414 basic_block bb)
1415 {
1416 gimple *lastg;
1417 tree op;
1418 int index;
1419 struct agg_position_info aggpos;
1420 edge e;
1421 edge_iterator ei;
1422 size_t n;
1423 size_t case_idx;
1424 tree param_type;
1425 expr_eval_ops param_ops;
1426
1427 lastg = last_stmt (bb);
1428 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1429 return;
1430 gswitch *last = as_a <gswitch *> (lastg);
1431 op = gimple_switch_index (last);
1432 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1433 &param_ops))
1434 return;
1435
1436 auto_vec<std::pair<tree, tree> > ranges;
1437 tree type = TREE_TYPE (op);
1438 int bound_limit = param_ipa_max_switch_predicate_bounds;
1439 int bound_count = 0;
1440 wide_int vr_wmin, vr_wmax;
1441 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1442
1443 FOR_EACH_EDGE (e, ei, bb->succs)
1444 {
1445 e->aux = edge_predicate_pool.allocate ();
1446 *(predicate *) e->aux = false;
1447 }
1448
1449 e = gimple_switch_edge (cfun, last, 0);
1450 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1451 default case if its target basic block is in convergence point of all
1452 switch cases, which can be determined by checking whether it
1453 post-dominates the switch statement. */
1454 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1455 bound_count = INT_MAX;
1456
1457 n = gimple_switch_num_labels (last);
1458 for (case_idx = 1; case_idx < n; ++case_idx)
1459 {
1460 tree cl = gimple_switch_label (last, case_idx);
1461 tree min = CASE_LOW (cl);
1462 tree max = CASE_HIGH (cl);
1463 predicate p;
1464
1465 e = gimple_switch_edge (cfun, last, case_idx);
1466
1467 /* The case value might not have same type as switch expression,
1468 extend the value based on the expression type. */
1469 if (TREE_TYPE (min) != type)
1470 min = wide_int_to_tree (type, wi::to_wide (min));
1471
1472 if (!max)
1473 max = min;
1474 else if (TREE_TYPE (max) != type)
1475 max = wide_int_to_tree (type, wi::to_wide (max));
1476
1477 /* The case's target basic block is in convergence point of all switch
1478 cases, its predicate should be at least as that of the switch
1479 statement. */
1480 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1481 p = true;
1482 else if (min == max)
1483 p = add_condition (summary, params_summary, index, param_type,
1484 &aggpos, EQ_EXPR, min, param_ops);
1485 else
1486 {
1487 predicate p1, p2;
1488 p1 = add_condition (summary, params_summary, index, param_type,
1489 &aggpos, GE_EXPR, min, param_ops);
1490 p2 = add_condition (summary, params_summary,index, param_type,
1491 &aggpos, LE_EXPR, max, param_ops);
1492 p = p1 & p2;
1493 }
1494 *(class predicate *) e->aux
1495 = p.or_with (summary->conds, *(class predicate *) e->aux);
1496
1497 /* If there are too many disjoint case ranges, predicate for default
1498 case might become too complicated. So add a limit here. */
1499 if (bound_count > bound_limit)
1500 continue;
1501
1502 bool new_range = true;
1503
1504 if (!ranges.is_empty ())
1505 {
1506 wide_int curr_wmin = wi::to_wide (min);
1507 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1508
1509 /* Merge case ranges if they are continuous. */
1510 if (curr_wmin == last_wmax + 1)
1511 new_range = false;
1512 else if (vr_type == VR_ANTI_RANGE)
1513 {
1514 /* If two disjoint case ranges can be connected by anti-range
1515 of switch index, combine them to one range. */
1516 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1517 vr_type = VR_UNDEFINED;
1518 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1519 new_range = false;
1520 }
1521 }
1522
1523 /* Create/extend a case range. And we count endpoints of range set,
1524 this number nearly equals to number of conditions that we will create
1525 for predicate of default case. */
1526 if (new_range)
1527 {
1528 bound_count += (min == max) ? 1 : 2;
1529 ranges.safe_push (std::make_pair (min, max));
1530 }
1531 else
1532 {
1533 bound_count += (ranges.last ().first == ranges.last ().second);
1534 ranges.last ().second = max;
1535 }
1536 }
1537
1538 e = gimple_switch_edge (cfun, last, 0);
1539 if (bound_count > bound_limit)
1540 {
1541 *(class predicate *) e->aux = true;
1542 vec_free (param_ops);
1543 return;
1544 }
1545
1546 predicate p_seg = true;
1547 predicate p_all = false;
1548
1549 if (vr_type != VR_RANGE)
1550 {
1551 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1552 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1553 }
1554
1555 /* Construct predicate to represent default range set that is negation of
1556 all case ranges. Case range is classified as containing single/non-single
1557 values. Suppose a piece of case ranges in the following.
1558
1559 [D1...D2] [S1] ... [Sn] [D3...D4]
1560
1561 To represent default case's range sets between two non-single value
1562 case ranges (From D2 to D3), we construct predicate as:
1563
1564 D2 < x < D3 && x != S1 && ... && x != Sn
1565 */
1566 for (size_t i = 0; i < ranges.length (); i++)
1567 {
1568 tree min = ranges[i].first;
1569 tree max = ranges[i].second;
1570
1571 if (min == max)
1572 p_seg &= add_condition (summary, params_summary, index,
1573 param_type, &aggpos, NE_EXPR,
1574 min, param_ops);
1575 else
1576 {
1577 /* Do not create sub-predicate for range that is beyond low bound
1578 of switch index. */
1579 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1580 {
1581 p_seg &= add_condition (summary, params_summary, index,
1582 param_type, &aggpos,
1583 LT_EXPR, min, param_ops);
1584 p_all = p_all.or_with (summary->conds, p_seg);
1585 }
1586
1587 /* Do not create sub-predicate for range that is beyond up bound
1588 of switch index. */
1589 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1590 {
1591 p_seg = false;
1592 break;
1593 }
1594
1595 p_seg = add_condition (summary, params_summary, index,
1596 param_type, &aggpos, GT_EXPR,
1597 max, param_ops);
1598 }
1599 }
1600
1601 p_all = p_all.or_with (summary->conds, p_seg);
1602 *(class predicate *) e->aux
1603 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1604
1605 vec_free (param_ops);
1606 }
1607
1608
1609 /* For each BB in NODE attach to its AUX pointer predicate under
1610 which it is executable. */
1611
1612 static void
1613 compute_bb_predicates (struct ipa_func_body_info *fbi,
1614 struct cgraph_node *node,
1615 class ipa_fn_summary *summary,
1616 class ipa_node_params *params_summary)
1617 {
1618 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1619 bool done = false;
1620 basic_block bb;
1621
1622 FOR_EACH_BB_FN (bb, my_function)
1623 {
1624 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1625 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1626 }
1627
1628 /* Entry block is always executable. */
1629 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1630 = edge_predicate_pool.allocate ();
1631 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1632
1633 /* A simple dataflow propagation of predicates forward in the CFG.
1634 TODO: work in reverse postorder. */
1635 while (!done)
1636 {
1637 done = true;
1638 FOR_EACH_BB_FN (bb, my_function)
1639 {
1640 predicate p = false;
1641 edge e;
1642 edge_iterator ei;
1643 FOR_EACH_EDGE (e, ei, bb->preds)
1644 {
1645 if (e->src->aux)
1646 {
1647 predicate this_bb_predicate
1648 = *(predicate *) e->src->aux;
1649 if (e->aux)
1650 this_bb_predicate &= (*(class predicate *) e->aux);
1651 p = p.or_with (summary->conds, this_bb_predicate);
1652 if (p == true)
1653 break;
1654 }
1655 }
1656 if (p != false)
1657 {
1658 basic_block pdom_bb;
1659
1660 if (!bb->aux)
1661 {
1662 done = false;
1663 bb->aux = edge_predicate_pool.allocate ();
1664 *((predicate *) bb->aux) = p;
1665 }
1666 else if (p != *(predicate *) bb->aux)
1667 {
1668 /* This OR operation is needed to ensure monotonous data flow
1669 in the case we hit the limit on number of clauses and the
1670 and/or operations above give approximate answers. */
1671 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1672 if (p != *(predicate *) bb->aux)
1673 {
1674 done = false;
1675 *((predicate *) bb->aux) = p;
1676 }
1677 }
1678
1679 /* For switch/if statement, we can OR-combine predicates of all
1680 its cases/branches to get predicate for basic block in their
1681 convergence point, but sometimes this will generate very
1682 complicated predicate. Actually, we can get simplified
1683 predicate in another way by using the fact that predicate
1684 for a basic block must also hold true for its post dominators.
1685 To be specific, basic block in convergence point of
1686 conditional statement should include predicate of the
1687 statement. */
1688 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1689 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1690 ;
1691 else if (!pdom_bb->aux)
1692 {
1693 done = false;
1694 pdom_bb->aux = edge_predicate_pool.allocate ();
1695 *((predicate *) pdom_bb->aux) = p;
1696 }
1697 else if (p != *(predicate *) pdom_bb->aux)
1698 {
1699 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1700 if (p != *(predicate *) pdom_bb->aux)
1701 {
1702 done = false;
1703 *((predicate *) pdom_bb->aux) = p;
1704 }
1705 }
1706 }
1707 }
1708 }
1709 }
1710
1711
1712 /* Return predicate specifying when the STMT might have result that is not
1713 a compile time constant. */
1714
1715 static predicate
1716 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1717 class ipa_fn_summary *summary,
1718 class ipa_node_params *params_summary,
1719 tree expr,
1720 vec<predicate> nonconstant_names)
1721 {
1722 tree parm;
1723 int index;
1724
1725 while (UNARY_CLASS_P (expr))
1726 expr = TREE_OPERAND (expr, 0);
1727
1728 parm = unmodified_parm (fbi, NULL, expr, NULL);
1729 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1730 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1731 predicate::changed, NULL_TREE);
1732 if (is_gimple_min_invariant (expr))
1733 return false;
1734 if (TREE_CODE (expr) == SSA_NAME)
1735 return nonconstant_names[SSA_NAME_VERSION (expr)];
1736 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1737 {
1738 predicate p1
1739 = will_be_nonconstant_expr_predicate (fbi, summary,
1740 params_summary,
1741 TREE_OPERAND (expr, 0),
1742 nonconstant_names);
1743 if (p1 == true)
1744 return p1;
1745
1746 predicate p2
1747 = will_be_nonconstant_expr_predicate (fbi, summary,
1748 params_summary,
1749 TREE_OPERAND (expr, 1),
1750 nonconstant_names);
1751 return p1.or_with (summary->conds, p2);
1752 }
1753 else if (TREE_CODE (expr) == COND_EXPR)
1754 {
1755 predicate p1
1756 = will_be_nonconstant_expr_predicate (fbi, summary,
1757 params_summary,
1758 TREE_OPERAND (expr, 0),
1759 nonconstant_names);
1760 if (p1 == true)
1761 return p1;
1762
1763 predicate p2
1764 = will_be_nonconstant_expr_predicate (fbi, summary,
1765 params_summary,
1766 TREE_OPERAND (expr, 1),
1767 nonconstant_names);
1768 if (p2 == true)
1769 return p2;
1770 p1 = p1.or_with (summary->conds, p2);
1771 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1772 params_summary,
1773 TREE_OPERAND (expr, 2),
1774 nonconstant_names);
1775 return p2.or_with (summary->conds, p1);
1776 }
1777 else if (TREE_CODE (expr) == CALL_EXPR)
1778 return true;
1779 else
1780 {
1781 debug_tree (expr);
1782 gcc_unreachable ();
1783 }
1784 return false;
1785 }
1786
1787
1788 /* Return predicate specifying when the STMT might have result that is not
1789 a compile time constant. */
1790
1791 static predicate
1792 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1793 class ipa_fn_summary *summary,
1794 class ipa_node_params *params_summary,
1795 gimple *stmt,
1796 vec<predicate> nonconstant_names)
1797 {
1798 predicate p = true;
1799 ssa_op_iter iter;
1800 tree use;
1801 tree param_type = NULL_TREE;
1802 predicate op_non_const;
1803 bool is_load;
1804 int base_index;
1805 struct agg_position_info aggpos;
1806
1807 /* What statments might be optimized away
1808 when their arguments are constant. */
1809 if (gimple_code (stmt) != GIMPLE_ASSIGN
1810 && gimple_code (stmt) != GIMPLE_COND
1811 && gimple_code (stmt) != GIMPLE_SWITCH
1812 && (gimple_code (stmt) != GIMPLE_CALL
1813 || !(gimple_call_flags (stmt) & ECF_CONST)))
1814 return p;
1815
1816 /* Stores will stay anyway. */
1817 if (gimple_store_p (stmt))
1818 return p;
1819
1820 is_load = gimple_assign_load_p (stmt);
1821
1822 /* Loads can be optimized when the value is known. */
1823 if (is_load)
1824 {
1825 tree op = gimple_assign_rhs1 (stmt);
1826 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1827 &aggpos))
1828 return p;
1829 }
1830 else
1831 base_index = -1;
1832
1833 /* See if we understand all operands before we start
1834 adding conditionals. */
1835 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1836 {
1837 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1838 /* For arguments we can build a condition. */
1839 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1840 continue;
1841 if (TREE_CODE (use) != SSA_NAME)
1842 return p;
1843 /* If we know when operand is constant,
1844 we still can say something useful. */
1845 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1846 continue;
1847 return p;
1848 }
1849
1850 if (is_load)
1851 op_non_const =
1852 add_condition (summary, params_summary,
1853 base_index, param_type, &aggpos,
1854 predicate::changed, NULL_TREE);
1855 else
1856 op_non_const = false;
1857 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1858 {
1859 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1860 int index;
1861
1862 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1863 {
1864 if (index != base_index)
1865 p = add_condition (summary, params_summary, index,
1866 TREE_TYPE (parm), NULL,
1867 predicate::changed, NULL_TREE);
1868 else
1869 continue;
1870 }
1871 else
1872 p = nonconstant_names[SSA_NAME_VERSION (use)];
1873 op_non_const = p.or_with (summary->conds, op_non_const);
1874 }
1875 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1876 && gimple_op (stmt, 0)
1877 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1878 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1879 = op_non_const;
1880 return op_non_const;
1881 }
1882
1883 struct record_modified_bb_info
1884 {
1885 tree op;
1886 bitmap bb_set;
1887 gimple *stmt;
1888 };
1889
1890 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1891 probability how often it changes between USE_BB.
1892 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1893 is in different loop nest, we can do better.
1894 This is all just estimate. In theory we look for minimal cut separating
1895 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1896 anyway. */
1897
1898 static basic_block
1899 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1900 {
1901 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1902 if (l && l->header->count < init_bb->count)
1903 return l->header;
1904 return init_bb;
1905 }
1906
1907 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1908 set except for info->stmt. */
1909
1910 static bool
1911 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1912 {
1913 struct record_modified_bb_info *info =
1914 (struct record_modified_bb_info *) data;
1915 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1916 return false;
1917 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1918 return false;
1919 bitmap_set_bit (info->bb_set,
1920 SSA_NAME_IS_DEFAULT_DEF (vdef)
1921 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1922 : get_minimal_bb
1923 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1924 gimple_bb (info->stmt))->index);
1925 if (dump_file)
1926 {
1927 fprintf (dump_file, " Param ");
1928 print_generic_expr (dump_file, info->op, TDF_SLIM);
1929 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1930 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1931 get_minimal_bb
1932 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1933 gimple_bb (info->stmt))->index);
1934 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1935 }
1936 return false;
1937 }
1938
1939 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1940 will change since last invocation of STMT.
1941
1942 Value 0 is reserved for compile time invariants.
1943 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1944 ought to be REG_BR_PROB_BASE / estimated_iters. */
1945
1946 static int
1947 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
1948 {
1949 tree op = gimple_call_arg (stmt, i);
1950 basic_block bb = gimple_bb (stmt);
1951
1952 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1953 op = TREE_OPERAND (op, 0);
1954
1955 tree base = get_base_address (op);
1956
1957 /* Global invariants never change. */
1958 if (is_gimple_min_invariant (base))
1959 return 0;
1960
1961 /* We would have to do non-trivial analysis to really work out what
1962 is the probability of value to change (i.e. when init statement
1963 is in a sibling loop of the call).
1964
1965 We do an conservative estimate: when call is executed N times more often
1966 than the statement defining value, we take the frequency 1/N. */
1967 if (TREE_CODE (base) == SSA_NAME)
1968 {
1969 profile_count init_count;
1970
1971 if (!bb->count.nonzero_p ())
1972 return REG_BR_PROB_BASE;
1973
1974 if (SSA_NAME_IS_DEFAULT_DEF (base))
1975 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1976 else
1977 init_count = get_minimal_bb
1978 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1979 gimple_bb (stmt))->count;
1980
1981 if (init_count < bb->count)
1982 return MAX ((init_count.to_sreal_scale (bb->count)
1983 * REG_BR_PROB_BASE).to_int (), 1);
1984 return REG_BR_PROB_BASE;
1985 }
1986 else
1987 {
1988 ao_ref refd;
1989 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1990 struct record_modified_bb_info info;
1991 tree init = ctor_for_folding (base);
1992
1993 if (init != error_mark_node)
1994 return 0;
1995 if (!bb->count.nonzero_p ())
1996 return REG_BR_PROB_BASE;
1997 if (dump_file)
1998 {
1999 fprintf (dump_file, " Analyzing param change probability of ");
2000 print_generic_expr (dump_file, op, TDF_SLIM);
2001 fprintf (dump_file, "\n");
2002 }
2003 ao_ref_init (&refd, op);
2004 info.op = op;
2005 info.stmt = stmt;
2006 info.bb_set = BITMAP_ALLOC (NULL);
2007 int walked
2008 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2009 NULL, NULL, fbi->aa_walk_budget);
2010 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2011 {
2012 if (dump_file)
2013 {
2014 if (walked < 0)
2015 fprintf (dump_file, " Ran out of AA walking budget.\n");
2016 else
2017 fprintf (dump_file, " Set in same BB as used.\n");
2018 }
2019 BITMAP_FREE (info.bb_set);
2020 return REG_BR_PROB_BASE;
2021 }
2022
2023 bitmap_iterator bi;
2024 unsigned index;
2025 /* Lookup the most frequent update of the value and believe that
2026 it dominates all the other; precise analysis here is difficult. */
2027 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2028 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2029 if (dump_file)
2030 {
2031 fprintf (dump_file, " Set with count ");
2032 max.dump (dump_file);
2033 fprintf (dump_file, " and used with count ");
2034 bb->count.dump (dump_file);
2035 fprintf (dump_file, " freq %f\n",
2036 max.to_sreal_scale (bb->count).to_double ());
2037 }
2038
2039 BITMAP_FREE (info.bb_set);
2040 if (max < bb->count)
2041 return MAX ((max.to_sreal_scale (bb->count)
2042 * REG_BR_PROB_BASE).to_int (), 1);
2043 return REG_BR_PROB_BASE;
2044 }
2045 }
2046
2047 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2048 sub-graph and if the predicate the condition depends on is known. If so,
2049 return true and store the pointer the predicate in *P. */
2050
2051 static bool
2052 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2053 ipa_fn_summary *summary,
2054 class ipa_node_params *params_summary,
2055 basic_block bb,
2056 predicate *p,
2057 vec<predicate> nonconstant_names)
2058 {
2059 edge e;
2060 edge_iterator ei;
2061 basic_block first_bb = NULL;
2062 gimple *stmt;
2063
2064 if (single_pred_p (bb))
2065 {
2066 *p = false;
2067 return true;
2068 }
2069
2070 FOR_EACH_EDGE (e, ei, bb->preds)
2071 {
2072 if (single_succ_p (e->src))
2073 {
2074 if (!single_pred_p (e->src))
2075 return false;
2076 if (!first_bb)
2077 first_bb = single_pred (e->src);
2078 else if (single_pred (e->src) != first_bb)
2079 return false;
2080 }
2081 else
2082 {
2083 if (!first_bb)
2084 first_bb = e->src;
2085 else if (e->src != first_bb)
2086 return false;
2087 }
2088 }
2089
2090 if (!first_bb)
2091 return false;
2092
2093 stmt = last_stmt (first_bb);
2094 if (!stmt
2095 || gimple_code (stmt) != GIMPLE_COND
2096 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2097 return false;
2098
2099 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2100 gimple_cond_lhs (stmt),
2101 nonconstant_names);
2102 if (*p == true)
2103 return false;
2104 else
2105 return true;
2106 }
2107
2108 /* Given a PHI statement in a function described by inline properties SUMMARY
2109 and *P being the predicate describing whether the selected PHI argument is
2110 known, store a predicate for the result of the PHI statement into
2111 NONCONSTANT_NAMES, if possible. */
2112
2113 static void
2114 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2115 predicate *p,
2116 vec<predicate> nonconstant_names)
2117 {
2118 unsigned i;
2119
2120 for (i = 0; i < gimple_phi_num_args (phi); i++)
2121 {
2122 tree arg = gimple_phi_arg (phi, i)->def;
2123 if (!is_gimple_min_invariant (arg))
2124 {
2125 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2126 *p = p->or_with (summary->conds,
2127 nonconstant_names[SSA_NAME_VERSION (arg)]);
2128 if (*p == true)
2129 return;
2130 }
2131 }
2132
2133 if (dump_file && (dump_flags & TDF_DETAILS))
2134 {
2135 fprintf (dump_file, "\t\tphi predicate: ");
2136 p->dump (dump_file, summary->conds);
2137 }
2138 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2139 }
2140
2141 /* For a typical usage of __builtin_expect (a<b, 1), we
2142 may introduce an extra relation stmt:
2143 With the builtin, we have
2144 t1 = a <= b;
2145 t2 = (long int) t1;
2146 t3 = __builtin_expect (t2, 1);
2147 if (t3 != 0)
2148 goto ...
2149 Without the builtin, we have
2150 if (a<=b)
2151 goto...
2152 This affects the size/time estimation and may have
2153 an impact on the earlier inlining.
2154 Here find this pattern and fix it up later. */
2155
2156 static gimple *
2157 find_foldable_builtin_expect (basic_block bb)
2158 {
2159 gimple_stmt_iterator bsi;
2160
2161 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2162 {
2163 gimple *stmt = gsi_stmt (bsi);
2164 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2165 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2166 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2167 {
2168 tree var = gimple_call_lhs (stmt);
2169 tree arg = gimple_call_arg (stmt, 0);
2170 use_operand_p use_p;
2171 gimple *use_stmt;
2172 bool match = false;
2173 bool done = false;
2174
2175 if (!var || !arg)
2176 continue;
2177 gcc_assert (TREE_CODE (var) == SSA_NAME);
2178
2179 while (TREE_CODE (arg) == SSA_NAME)
2180 {
2181 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2182 if (!is_gimple_assign (stmt_tmp))
2183 break;
2184 switch (gimple_assign_rhs_code (stmt_tmp))
2185 {
2186 case LT_EXPR:
2187 case LE_EXPR:
2188 case GT_EXPR:
2189 case GE_EXPR:
2190 case EQ_EXPR:
2191 case NE_EXPR:
2192 match = true;
2193 done = true;
2194 break;
2195 CASE_CONVERT:
2196 break;
2197 default:
2198 done = true;
2199 break;
2200 }
2201 if (done)
2202 break;
2203 arg = gimple_assign_rhs1 (stmt_tmp);
2204 }
2205
2206 if (match && single_imm_use (var, &use_p, &use_stmt)
2207 && gimple_code (use_stmt) == GIMPLE_COND)
2208 return use_stmt;
2209 }
2210 }
2211 return NULL;
2212 }
2213
2214 /* Return true when the basic blocks contains only clobbers followed by RESX.
2215 Such BBs are kept around to make removal of dead stores possible with
2216 presence of EH and will be optimized out by optimize_clobbers later in the
2217 game.
2218
2219 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2220 that can be clobber only, too.. When it is false, the RESX is not necessary
2221 on the end of basic block. */
2222
2223 static bool
2224 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2225 {
2226 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2227 edge_iterator ei;
2228 edge e;
2229
2230 if (need_eh)
2231 {
2232 if (gsi_end_p (gsi))
2233 return false;
2234 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2235 return false;
2236 gsi_prev (&gsi);
2237 }
2238 else if (!single_succ_p (bb))
2239 return false;
2240
2241 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2242 {
2243 gimple *stmt = gsi_stmt (gsi);
2244 if (is_gimple_debug (stmt))
2245 continue;
2246 if (gimple_clobber_p (stmt))
2247 continue;
2248 if (gimple_code (stmt) == GIMPLE_LABEL)
2249 break;
2250 return false;
2251 }
2252
2253 /* See if all predecestors are either throws or clobber only BBs. */
2254 FOR_EACH_EDGE (e, ei, bb->preds)
2255 if (!(e->flags & EDGE_EH)
2256 && !clobber_only_eh_bb_p (e->src, false))
2257 return false;
2258
2259 return true;
2260 }
2261
2262 /* Return true if STMT compute a floating point expression that may be affected
2263 by -ffast-math and similar flags. */
2264
2265 static bool
2266 fp_expression_p (gimple *stmt)
2267 {
2268 ssa_op_iter i;
2269 tree op;
2270
2271 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2272 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2273 return true;
2274 return false;
2275 }
2276
2277 /* Analyze function body for NODE.
2278 EARLY indicates run from early optimization pipeline. */
2279
2280 static void
2281 analyze_function_body (struct cgraph_node *node, bool early)
2282 {
2283 sreal time = param_uninlined_function_time;
2284 /* Estimate static overhead for function prologue/epilogue and alignment. */
2285 int size = param_uninlined_function_insns;
2286 /* Benefits are scaled by probability of elimination that is in range
2287 <0,2>. */
2288 basic_block bb;
2289 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2290 sreal freq;
2291 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2292 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2293 predicate bb_predicate;
2294 struct ipa_func_body_info fbi;
2295 vec<predicate> nonconstant_names = vNULL;
2296 int nblocks, n;
2297 int *order;
2298 gimple *fix_builtin_expect_stmt;
2299
2300 gcc_assert (my_function && my_function->cfg);
2301 gcc_assert (cfun == my_function);
2302
2303 memset(&fbi, 0, sizeof(fbi));
2304 vec_free (info->conds);
2305 info->conds = NULL;
2306 vec_free (info->size_time_table);
2307 info->size_time_table = NULL;
2308
2309 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2310 so we can produce proper inline hints.
2311
2312 When optimizing and analyzing for early inliner, initialize node params
2313 so we can produce correct BB predicates. */
2314
2315 if (opt_for_fn (node->decl, optimize))
2316 {
2317 calculate_dominance_info (CDI_DOMINATORS);
2318 calculate_dominance_info (CDI_POST_DOMINATORS);
2319 if (!early)
2320 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2321 else
2322 {
2323 ipa_check_create_node_params ();
2324 ipa_initialize_node_params (node);
2325 }
2326
2327 if (ipa_node_params_sum)
2328 {
2329 fbi.node = node;
2330 fbi.info = IPA_NODE_REF (node);
2331 fbi.bb_infos = vNULL;
2332 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2333 fbi.param_count = count_formal_params (node->decl);
2334 fbi.aa_walk_budget = param_ipa_max_aa_steps;
2335
2336 nonconstant_names.safe_grow_cleared
2337 (SSANAMES (my_function)->length ());
2338 }
2339 }
2340
2341 if (dump_file)
2342 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2343 node->name ());
2344
2345 /* When we run into maximal number of entries, we assign everything to the
2346 constant truth case. Be sure to have it in list. */
2347 bb_predicate = true;
2348 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2349
2350 bb_predicate = predicate::not_inlined ();
2351 info->account_size_time (param_uninlined_function_insns
2352 * ipa_fn_summary::size_scale,
2353 param_uninlined_function_time,
2354 bb_predicate,
2355 bb_predicate);
2356
2357 if (fbi.info)
2358 compute_bb_predicates (&fbi, node, info, params_summary);
2359 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2360 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2361 for (n = 0; n < nblocks; n++)
2362 {
2363 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2364 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2365 if (clobber_only_eh_bb_p (bb))
2366 {
2367 if (dump_file && (dump_flags & TDF_DETAILS))
2368 fprintf (dump_file, "\n Ignoring BB %i;"
2369 " it will be optimized away by cleanup_clobbers\n",
2370 bb->index);
2371 continue;
2372 }
2373
2374 /* TODO: Obviously predicates can be propagated down across CFG. */
2375 if (fbi.info)
2376 {
2377 if (bb->aux)
2378 bb_predicate = *(predicate *) bb->aux;
2379 else
2380 bb_predicate = false;
2381 }
2382 else
2383 bb_predicate = true;
2384
2385 if (dump_file && (dump_flags & TDF_DETAILS))
2386 {
2387 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2388 bb_predicate.dump (dump_file, info->conds);
2389 }
2390
2391 if (fbi.info && nonconstant_names.exists ())
2392 {
2393 predicate phi_predicate;
2394 bool first_phi = true;
2395
2396 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2397 gsi_next (&bsi))
2398 {
2399 if (first_phi
2400 && !phi_result_unknown_predicate (&fbi, info,
2401 params_summary,
2402 bb,
2403 &phi_predicate,
2404 nonconstant_names))
2405 break;
2406 first_phi = false;
2407 if (dump_file && (dump_flags & TDF_DETAILS))
2408 {
2409 fprintf (dump_file, " ");
2410 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2411 }
2412 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2413 nonconstant_names);
2414 }
2415 }
2416
2417 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2418
2419 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2420 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2421 {
2422 gimple *stmt = gsi_stmt (bsi);
2423 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2424 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2425 int prob;
2426 predicate will_be_nonconstant;
2427
2428 /* This relation stmt should be folded after we remove
2429 buildin_expect call. Adjust the cost here. */
2430 if (stmt == fix_builtin_expect_stmt)
2431 {
2432 this_size--;
2433 this_time--;
2434 }
2435
2436 if (dump_file && (dump_flags & TDF_DETAILS))
2437 {
2438 fprintf (dump_file, " ");
2439 print_gimple_stmt (dump_file, stmt, 0);
2440 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2441 freq.to_double (), this_size,
2442 this_time);
2443 }
2444
2445 if (is_gimple_call (stmt)
2446 && !gimple_call_internal_p (stmt))
2447 {
2448 struct cgraph_edge *edge = node->get_edge (stmt);
2449 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2450
2451 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2452 resolved as constant. We however don't want to optimize
2453 out the cgraph edges. */
2454 if (nonconstant_names.exists ()
2455 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2456 && gimple_call_lhs (stmt)
2457 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2458 {
2459 predicate false_p = false;
2460 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2461 = false_p;
2462 }
2463 if (ipa_node_params_sum)
2464 {
2465 int count = gimple_call_num_args (stmt);
2466 int i;
2467
2468 if (count)
2469 es->param.safe_grow_cleared (count);
2470 for (i = 0; i < count; i++)
2471 {
2472 int prob = param_change_prob (&fbi, stmt, i);
2473 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2474 es->param[i].change_prob = prob;
2475 }
2476 }
2477
2478 es->call_stmt_size = this_size;
2479 es->call_stmt_time = this_time;
2480 es->loop_depth = bb_loop_depth (bb);
2481 edge_set_predicate (edge, &bb_predicate);
2482 if (edge->speculative)
2483 {
2484 cgraph_edge *direct, *indirect;
2485 ipa_ref *ref;
2486 edge->speculative_call_info (direct, indirect, ref);
2487 gcc_assert (direct == edge);
2488 ipa_call_summary *es2
2489 = ipa_call_summaries->get_create (indirect);
2490 ipa_call_summaries->duplicate (edge, indirect,
2491 es, es2);
2492 }
2493 }
2494
2495 /* TODO: When conditional jump or swithc is known to be constant, but
2496 we did not translate it into the predicates, we really can account
2497 just maximum of the possible paths. */
2498 if (fbi.info)
2499 will_be_nonconstant
2500 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2501 stmt, nonconstant_names);
2502 else
2503 will_be_nonconstant = true;
2504 if (this_time || this_size)
2505 {
2506 sreal final_time = (sreal)this_time * freq;
2507
2508 prob = eliminated_by_inlining_prob (&fbi, stmt);
2509 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2510 fprintf (dump_file,
2511 "\t\t50%% will be eliminated by inlining\n");
2512 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2514
2515 class predicate p = bb_predicate & will_be_nonconstant;
2516
2517 /* We can ignore statement when we proved it is never going
2518 to happen, but we cannot do that for call statements
2519 because edges are accounted specially. */
2520
2521 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2522 {
2523 time += final_time;
2524 size += this_size;
2525 }
2526
2527 /* We account everything but the calls. Calls have their own
2528 size/time info attached to cgraph edges. This is necessary
2529 in order to make the cost disappear after inlining. */
2530 if (!is_gimple_call (stmt))
2531 {
2532 if (prob)
2533 {
2534 predicate ip = bb_predicate & predicate::not_inlined ();
2535 info->account_size_time (this_size * prob,
2536 (final_time * prob) / 2, ip,
2537 p);
2538 }
2539 if (prob != 2)
2540 info->account_size_time (this_size * (2 - prob),
2541 (final_time * (2 - prob) / 2),
2542 bb_predicate,
2543 p);
2544 }
2545
2546 if (!info->fp_expressions && fp_expression_p (stmt))
2547 {
2548 info->fp_expressions = true;
2549 if (dump_file)
2550 fprintf (dump_file, " fp_expression set\n");
2551 }
2552 }
2553
2554 /* Account cost of address calculations in the statements. */
2555 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2556 {
2557 for (tree op = gimple_op (stmt, i);
2558 op && handled_component_p (op);
2559 op = TREE_OPERAND (op, 0))
2560 if ((TREE_CODE (op) == ARRAY_REF
2561 || TREE_CODE (op) == ARRAY_RANGE_REF)
2562 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2563 {
2564 predicate p = bb_predicate;
2565 if (fbi.info)
2566 p = p & will_be_nonconstant_expr_predicate
2567 (&fbi, info, params_summary,
2568 TREE_OPERAND (op, 1),
2569 nonconstant_names);
2570 if (p != false)
2571 {
2572 time += freq;
2573 size += 1;
2574 if (dump_file)
2575 fprintf (dump_file,
2576 "\t\tAccounting address calculation.\n");
2577 info->account_size_time (ipa_fn_summary::size_scale,
2578 freq,
2579 bb_predicate,
2580 p);
2581 }
2582 }
2583 }
2584
2585 }
2586 }
2587 free (order);
2588
2589 if (nonconstant_names.exists () && !early)
2590 {
2591 class loop *loop;
2592 predicate loop_iterations = true;
2593 predicate loop_stride = true;
2594
2595 if (dump_file && (dump_flags & TDF_DETAILS))
2596 flow_loops_dump (dump_file, NULL, 0);
2597 scev_initialize ();
2598 FOR_EACH_LOOP (loop, 0)
2599 {
2600 vec<edge> exits;
2601 edge ex;
2602 unsigned int j;
2603 class tree_niter_desc niter_desc;
2604 bb_predicate = *(predicate *) loop->header->aux;
2605
2606 exits = get_loop_exit_edges (loop);
2607 FOR_EACH_VEC_ELT (exits, j, ex)
2608 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2609 && !is_gimple_min_invariant (niter_desc.niter))
2610 {
2611 predicate will_be_nonconstant
2612 = will_be_nonconstant_expr_predicate (&fbi, info,
2613 params_summary,
2614 niter_desc.niter,
2615 nonconstant_names);
2616 if (will_be_nonconstant != true)
2617 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2618 if (will_be_nonconstant != true
2619 && will_be_nonconstant != false)
2620 /* This is slightly inprecise. We may want to represent each
2621 loop with independent predicate. */
2622 loop_iterations &= will_be_nonconstant;
2623 }
2624 exits.release ();
2625 }
2626
2627 /* To avoid quadratic behavior we analyze stride predicates only
2628 with respect to the containing loop. Thus we simply iterate
2629 over all defs in the outermost loop body. */
2630 for (loop = loops_for_fn (cfun)->tree_root->inner;
2631 loop != NULL; loop = loop->next)
2632 {
2633 basic_block *body = get_loop_body (loop);
2634 for (unsigned i = 0; i < loop->num_nodes; i++)
2635 {
2636 gimple_stmt_iterator gsi;
2637 bb_predicate = *(predicate *) body[i]->aux;
2638 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2639 gsi_next (&gsi))
2640 {
2641 gimple *stmt = gsi_stmt (gsi);
2642
2643 if (!is_gimple_assign (stmt))
2644 continue;
2645
2646 tree def = gimple_assign_lhs (stmt);
2647 if (TREE_CODE (def) != SSA_NAME)
2648 continue;
2649
2650 affine_iv iv;
2651 if (!simple_iv (loop_containing_stmt (stmt),
2652 loop_containing_stmt (stmt),
2653 def, &iv, true)
2654 || is_gimple_min_invariant (iv.step))
2655 continue;
2656
2657 predicate will_be_nonconstant
2658 = will_be_nonconstant_expr_predicate (&fbi, info,
2659 params_summary,
2660 iv.step,
2661 nonconstant_names);
2662 if (will_be_nonconstant != true)
2663 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2664 if (will_be_nonconstant != true
2665 && will_be_nonconstant != false)
2666 /* This is slightly inprecise. We may want to represent
2667 each loop with independent predicate. */
2668 loop_stride = loop_stride & will_be_nonconstant;
2669 }
2670 }
2671 free (body);
2672 }
2673 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2674 set_hint_predicate (&s->loop_iterations, loop_iterations);
2675 set_hint_predicate (&s->loop_stride, loop_stride);
2676 scev_finalize ();
2677 }
2678 FOR_ALL_BB_FN (bb, my_function)
2679 {
2680 edge e;
2681 edge_iterator ei;
2682
2683 if (bb->aux)
2684 edge_predicate_pool.remove ((predicate *)bb->aux);
2685 bb->aux = NULL;
2686 FOR_EACH_EDGE (e, ei, bb->succs)
2687 {
2688 if (e->aux)
2689 edge_predicate_pool.remove ((predicate *) e->aux);
2690 e->aux = NULL;
2691 }
2692 }
2693 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2694 ipa_size_summary *ss = ipa_size_summaries->get (node);
2695 s->time = time;
2696 ss->self_size = size;
2697 nonconstant_names.release ();
2698 ipa_release_body_info (&fbi);
2699 if (opt_for_fn (node->decl, optimize))
2700 {
2701 if (!early)
2702 loop_optimizer_finalize ();
2703 else if (!ipa_edge_args_sum)
2704 ipa_free_all_node_params ();
2705 free_dominance_info (CDI_DOMINATORS);
2706 free_dominance_info (CDI_POST_DOMINATORS);
2707 }
2708 if (dump_file)
2709 {
2710 fprintf (dump_file, "\n");
2711 ipa_dump_fn_summary (dump_file, node);
2712 }
2713 }
2714
2715
2716 /* Compute function summary.
2717 EARLY is true when we compute parameters during early opts. */
2718
2719 void
2720 compute_fn_summary (struct cgraph_node *node, bool early)
2721 {
2722 HOST_WIDE_INT self_stack_size;
2723 struct cgraph_edge *e;
2724
2725 gcc_assert (!node->inlined_to);
2726
2727 if (!ipa_fn_summaries)
2728 ipa_fn_summary_alloc ();
2729
2730 /* Create a new ipa_fn_summary. */
2731 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2732 ipa_fn_summaries->remove (node);
2733 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2734 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2735
2736 /* Estimate the stack size for the function if we're optimizing. */
2737 self_stack_size = optimize && !node->thunk.thunk_p
2738 ? estimated_stack_frame_size (node) : 0;
2739 size_info->estimated_self_stack_size = self_stack_size;
2740 info->estimated_stack_size = self_stack_size;
2741
2742 if (node->thunk.thunk_p)
2743 {
2744 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2745 predicate t = true;
2746
2747 node->can_change_signature = false;
2748 es->call_stmt_size = eni_size_weights.call_cost;
2749 es->call_stmt_time = eni_time_weights.call_cost;
2750 info->account_size_time (ipa_fn_summary::size_scale
2751 * param_uninlined_function_thunk_insns,
2752 param_uninlined_function_thunk_time, t, t);
2753 t = predicate::not_inlined ();
2754 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2755 ipa_update_overall_fn_summary (node);
2756 size_info->self_size = size_info->size;
2757 if (stdarg_p (TREE_TYPE (node->decl)))
2758 {
2759 info->inlinable = false;
2760 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2761 }
2762 else
2763 info->inlinable = true;
2764 }
2765 else
2766 {
2767 /* Even is_gimple_min_invariant rely on current_function_decl. */
2768 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2769
2770 /* Can this function be inlined at all? */
2771 if (!opt_for_fn (node->decl, optimize)
2772 && !lookup_attribute ("always_inline",
2773 DECL_ATTRIBUTES (node->decl)))
2774 info->inlinable = false;
2775 else
2776 info->inlinable = tree_inlinable_function_p (node->decl);
2777
2778 /* Type attributes can use parameter indices to describe them. */
2779 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2780 /* Likewise for #pragma omp declare simd functions or functions
2781 with simd attribute. */
2782 || lookup_attribute ("omp declare simd",
2783 DECL_ATTRIBUTES (node->decl)))
2784 node->can_change_signature = false;
2785 else
2786 {
2787 /* Otherwise, inlinable functions always can change signature. */
2788 if (info->inlinable)
2789 node->can_change_signature = true;
2790 else
2791 {
2792 /* Functions calling builtin_apply cannot change signature. */
2793 for (e = node->callees; e; e = e->next_callee)
2794 {
2795 tree cdecl = e->callee->decl;
2796 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2797 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2798 break;
2799 }
2800 node->can_change_signature = !e;
2801 }
2802 }
2803 analyze_function_body (node, early);
2804 pop_cfun ();
2805 }
2806 for (e = node->callees; e; e = e->next_callee)
2807 if (e->callee->comdat_local_p ())
2808 break;
2809 node->calls_comdat_local = (e != NULL);
2810
2811 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2812 size_info->size = size_info->self_size;
2813 info->estimated_stack_size = size_info->estimated_self_stack_size;
2814
2815 /* Code above should compute exactly the same result as
2816 ipa_update_overall_fn_summary but because computation happens in
2817 different order the roundoff errors result in slight changes. */
2818 ipa_update_overall_fn_summary (node);
2819 /* In LTO mode we may have speculative edges set. */
2820 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
2821 }
2822
2823
2824 /* Compute parameters of functions used by inliner using
2825 current_function_decl. */
2826
2827 static unsigned int
2828 compute_fn_summary_for_current (void)
2829 {
2830 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2831 return 0;
2832 }
2833
2834 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2835 KNOWN_CONTEXTS and KNOWN_AGGS. */
2836
2837 static bool
2838 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2839 int *size, int *time,
2840 vec<tree> known_vals,
2841 vec<ipa_polymorphic_call_context> known_contexts,
2842 vec<ipa_agg_jump_function_p> known_aggs)
2843 {
2844 tree target;
2845 struct cgraph_node *callee;
2846 class ipa_fn_summary *isummary;
2847 enum availability avail;
2848 bool speculative;
2849
2850 if (!known_vals.exists () && !known_contexts.exists ())
2851 return false;
2852 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2853 return false;
2854
2855 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2856 known_aggs, &speculative);
2857 if (!target || speculative)
2858 return false;
2859
2860 /* Account for difference in cost between indirect and direct calls. */
2861 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2862 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2863 gcc_checking_assert (*time >= 0);
2864 gcc_checking_assert (*size >= 0);
2865
2866 callee = cgraph_node::get (target);
2867 if (!callee || !callee->definition)
2868 return false;
2869 callee = callee->function_symbol (&avail);
2870 if (avail < AVAIL_AVAILABLE)
2871 return false;
2872 isummary = ipa_fn_summaries->get (callee);
2873 if (isummary == NULL)
2874 return false;
2875
2876 return isummary->inlinable;
2877 }
2878
2879 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2880 handle edge E with probability PROB.
2881 Set HINTS if edge may be devirtualized.
2882 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2883 site. */
2884
2885 static inline void
2886 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2887 sreal *time,
2888 int prob,
2889 vec<tree> known_vals,
2890 vec<ipa_polymorphic_call_context> known_contexts,
2891 vec<ipa_agg_jump_function_p> known_aggs,
2892 ipa_hints *hints)
2893 {
2894 class ipa_call_summary *es = ipa_call_summaries->get (e);
2895 int call_size = es->call_stmt_size;
2896 int call_time = es->call_stmt_time;
2897 int cur_size;
2898 if (!e->callee && hints && e->maybe_hot_p ()
2899 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2900 known_vals, known_contexts, known_aggs))
2901 *hints |= INLINE_HINT_indirect_call;
2902 cur_size = call_size * ipa_fn_summary::size_scale;
2903 *size += cur_size;
2904 if (min_size)
2905 *min_size += cur_size;
2906 if (!time)
2907 ;
2908 else if (prob == REG_BR_PROB_BASE)
2909 *time += ((sreal)call_time) * e->sreal_frequency ();
2910 else
2911 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2912 }
2913
2914
2915
2916 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2917 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2918 describe context of the call site. */
2919
2920 static void
2921 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2922 int *min_size, sreal *time,
2923 ipa_hints *hints,
2924 clause_t possible_truths,
2925 vec<tree> known_vals,
2926 vec<ipa_polymorphic_call_context> known_contexts,
2927 vec<ipa_agg_jump_function_p> known_aggs)
2928 {
2929 struct cgraph_edge *e;
2930 for (e = node->callees; e; e = e->next_callee)
2931 {
2932 if (!e->inline_failed)
2933 {
2934 gcc_checking_assert (!ipa_call_summaries->get (e));
2935 estimate_calls_size_and_time (e->callee, size, min_size, time,
2936 hints,
2937 possible_truths,
2938 known_vals, known_contexts,
2939 known_aggs);
2940 continue;
2941 }
2942 class ipa_call_summary *es = ipa_call_summaries->get (e);
2943
2944 /* Do not care about zero sized builtins. */
2945 if (!es->call_stmt_size)
2946 {
2947 gcc_checking_assert (!es->call_stmt_time);
2948 continue;
2949 }
2950 if (!es->predicate
2951 || es->predicate->evaluate (possible_truths))
2952 {
2953 /* Predicates of calls shall not use NOT_CHANGED codes,
2954 sowe do not need to compute probabilities. */
2955 estimate_edge_size_and_time (e, size,
2956 es->predicate ? NULL : min_size,
2957 time, REG_BR_PROB_BASE,
2958 known_vals, known_contexts,
2959 known_aggs, hints);
2960 }
2961 }
2962 for (e = node->indirect_calls; e; e = e->next_callee)
2963 {
2964 class ipa_call_summary *es = ipa_call_summaries->get (e);
2965 if (!es->predicate
2966 || es->predicate->evaluate (possible_truths))
2967 estimate_edge_size_and_time (e, size,
2968 es->predicate ? NULL : min_size,
2969 time, REG_BR_PROB_BASE,
2970 known_vals, known_contexts, known_aggs,
2971 hints);
2972 }
2973 }
2974
2975 /* Default constructor for ipa call context.
2976 Memory alloction of known_vals, known_contexts
2977 and known_aggs vectors is owned by the caller, but can
2978 be release by ipa_call_context::release.
2979
2980 inline_param_summary is owned by the caller. */
2981 ipa_call_context::ipa_call_context (cgraph_node *node,
2982 clause_t possible_truths,
2983 clause_t nonspec_possible_truths,
2984 vec<tree> known_vals,
2985 vec<ipa_polymorphic_call_context>
2986 known_contexts,
2987 vec<ipa_agg_jump_function_p> known_aggs,
2988 vec<inline_param_summary>
2989 inline_param_summary)
2990 : m_node (node), m_possible_truths (possible_truths),
2991 m_nonspec_possible_truths (nonspec_possible_truths),
2992 m_inline_param_summary (inline_param_summary),
2993 m_known_vals (known_vals),
2994 m_known_contexts (known_contexts),
2995 m_known_aggs (known_aggs)
2996 {
2997 }
2998
2999 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3000
3001 void
3002 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3003 {
3004 m_node = ctx.m_node;
3005 m_possible_truths = ctx.m_possible_truths;
3006 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3007 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3008 unsigned int nargs = params_summary
3009 ? ipa_get_param_count (params_summary) : 0;
3010
3011 m_inline_param_summary = vNULL;
3012 /* Copy the info only if there is at least one useful entry. */
3013 if (ctx.m_inline_param_summary.exists ())
3014 {
3015 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3016
3017 for (unsigned int i = 0; i < n; i++)
3018 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3019 && !ctx.m_inline_param_summary[i].useless_p ())
3020 {
3021 m_inline_param_summary
3022 = ctx.m_inline_param_summary.copy ();
3023 break;
3024 }
3025 }
3026 m_known_vals = vNULL;
3027 if (ctx.m_known_vals.exists ())
3028 {
3029 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3030
3031 for (unsigned int i = 0; i < n; i++)
3032 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3033 && ctx.m_known_vals[i])
3034 {
3035 m_known_vals = ctx.m_known_vals.copy ();
3036 break;
3037 }
3038 }
3039
3040 m_known_contexts = vNULL;
3041 if (ctx.m_known_contexts.exists ())
3042 {
3043 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3044
3045 for (unsigned int i = 0; i < n; i++)
3046 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3047 && !ctx.m_known_contexts[i].useless_p ())
3048 {
3049 m_known_contexts = ctx.m_known_contexts.copy ();
3050 break;
3051 }
3052 }
3053
3054 m_known_aggs = vNULL;
3055 if (ctx.m_known_aggs.exists ())
3056 {
3057 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3058
3059 for (unsigned int i = 0; i < n; i++)
3060 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3061 && ctx.m_known_aggs[i])
3062 {
3063 m_known_aggs = ctx.m_known_aggs.copy ();
3064 break;
3065 }
3066 }
3067 }
3068
3069 /* Release memory used by known_vals/contexts/aggs vectors.
3070 If ALL is true release also inline_param_summary.
3071 This happens when context was previously duplciated to be stored
3072 into cache. */
3073
3074 void
3075 ipa_call_context::release (bool all)
3076 {
3077 /* See if context is initialized at first place. */
3078 if (!m_node)
3079 return;
3080 m_known_vals.release ();
3081 m_known_contexts.release ();
3082 m_known_aggs.release ();
3083 if (all)
3084 m_inline_param_summary.release ();
3085 }
3086
3087 /* Return true if CTX describes the same call context as THIS. */
3088
3089 bool
3090 ipa_call_context::equal_to (const ipa_call_context &ctx)
3091 {
3092 if (m_node != ctx.m_node
3093 || m_possible_truths != ctx.m_possible_truths
3094 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3095 return false;
3096
3097 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3098 unsigned int nargs = params_summary
3099 ? ipa_get_param_count (params_summary) : 0;
3100
3101 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3102 {
3103 for (unsigned int i = 0; i < nargs; i++)
3104 {
3105 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3106 continue;
3107 if (i >= m_inline_param_summary.length ()
3108 || m_inline_param_summary[i].useless_p ())
3109 {
3110 if (i < ctx.m_inline_param_summary.length ()
3111 && !ctx.m_inline_param_summary[i].useless_p ())
3112 return false;
3113 continue;
3114 }
3115 if (i >= ctx.m_inline_param_summary.length ()
3116 || ctx.m_inline_param_summary[i].useless_p ())
3117 {
3118 if (i < m_inline_param_summary.length ()
3119 && !m_inline_param_summary[i].useless_p ())
3120 return false;
3121 continue;
3122 }
3123 if (!m_inline_param_summary[i].equal_to
3124 (ctx.m_inline_param_summary[i]))
3125 return false;
3126 }
3127 }
3128 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3129 {
3130 for (unsigned int i = 0; i < nargs; i++)
3131 {
3132 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3133 continue;
3134 if (i >= m_known_vals.length () || !m_known_vals[i])
3135 {
3136 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3137 return false;
3138 continue;
3139 }
3140 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3141 {
3142 if (i < m_known_vals.length () && m_known_vals[i])
3143 return false;
3144 continue;
3145 }
3146 if (m_known_vals[i] != ctx.m_known_vals[i])
3147 return false;
3148 }
3149 }
3150 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3151 {
3152 for (unsigned int i = 0; i < nargs; i++)
3153 {
3154 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3155 continue;
3156 if (i >= m_known_contexts.length ()
3157 || m_known_contexts[i].useless_p ())
3158 {
3159 if (i < ctx.m_known_contexts.length ()
3160 && !ctx.m_known_contexts[i].useless_p ())
3161 return false;
3162 continue;
3163 }
3164 if (i >= ctx.m_known_contexts.length ()
3165 || ctx.m_known_contexts[i].useless_p ())
3166 {
3167 if (i < m_known_contexts.length ()
3168 && !m_known_contexts[i].useless_p ())
3169 return false;
3170 continue;
3171 }
3172 if (!m_known_contexts[i].equal_to
3173 (ctx.m_known_contexts[i]))
3174 return false;
3175 }
3176 }
3177 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3178 {
3179 for (unsigned int i = 0; i < nargs; i++)
3180 {
3181 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3182 continue;
3183 if (i >= m_known_aggs.length () || !m_known_aggs[i])
3184 {
3185 if (i < ctx.m_known_aggs.length () && ctx.m_known_aggs[i])
3186 return false;
3187 continue;
3188 }
3189 if (i >= ctx.m_known_aggs.length () || !ctx.m_known_aggs[i])
3190 {
3191 if (i < m_known_aggs.length () && m_known_aggs[i])
3192 return false;
3193 continue;
3194 }
3195 if (m_known_aggs[i] != ctx.m_known_aggs[i])
3196 return false;
3197 }
3198 }
3199 return true;
3200 }
3201
3202 /* Estimate size and time needed to execute call in the given context.
3203 Additionally detemine hints determined by the context. Finally compute
3204 minimal size needed for the call that is independent on the call context and
3205 can be used for fast estimates. Return the values in RET_SIZE,
3206 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3207
3208 void
3209 ipa_call_context::estimate_size_and_time (int *ret_size,
3210 int *ret_min_size,
3211 sreal *ret_time,
3212 sreal *ret_nonspecialized_time,
3213 ipa_hints *ret_hints)
3214 {
3215 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3216 size_time_entry *e;
3217 int size = 0;
3218 sreal time = 0;
3219 int min_size = 0;
3220 ipa_hints hints = 0;
3221 int i;
3222
3223 if (dump_file && (dump_flags & TDF_DETAILS))
3224 {
3225 bool found = false;
3226 fprintf (dump_file, " Estimating body: %s/%i\n"
3227 " Known to be false: ", m_node->name (),
3228 m_node->order);
3229
3230 for (i = predicate::not_inlined_condition;
3231 i < (predicate::first_dynamic_condition
3232 + (int) vec_safe_length (info->conds)); i++)
3233 if (!(m_possible_truths & (1 << i)))
3234 {
3235 if (found)
3236 fprintf (dump_file, ", ");
3237 found = true;
3238 dump_condition (dump_file, info->conds, i);
3239 }
3240 }
3241
3242 estimate_calls_size_and_time (m_node, &size, &min_size,
3243 ret_time ? &time : NULL,
3244 ret_hints ? &hints : NULL, m_possible_truths,
3245 m_known_vals, m_known_contexts, m_known_aggs);
3246
3247 sreal nonspecialized_time = time;
3248
3249 min_size += (*info->size_time_table)[0].size;
3250 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3251 {
3252 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3253
3254 /* Because predicates are conservative, it can happen that nonconst is 1
3255 but exec is 0. */
3256 if (exec)
3257 {
3258 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3259
3260 gcc_checking_assert (e->time >= 0);
3261 gcc_checking_assert (time >= 0);
3262
3263 /* We compute specialized size only because size of nonspecialized
3264 copy is context independent.
3265
3266 The difference between nonspecialized execution and specialized is
3267 that nonspecialized is not going to have optimized out computations
3268 known to be constant in a specialized setting. */
3269 if (nonconst)
3270 size += e->size;
3271 if (!ret_time)
3272 continue;
3273 nonspecialized_time += e->time;
3274 if (!nonconst)
3275 ;
3276 else if (!m_inline_param_summary.exists ())
3277 {
3278 if (nonconst)
3279 time += e->time;
3280 }
3281 else
3282 {
3283 int prob = e->nonconst_predicate.probability
3284 (info->conds, m_possible_truths,
3285 m_inline_param_summary);
3286 gcc_checking_assert (prob >= 0);
3287 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3288 time += e->time * prob / REG_BR_PROB_BASE;
3289 }
3290 gcc_checking_assert (time >= 0);
3291 }
3292 }
3293 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3294 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3295 gcc_checking_assert (min_size >= 0);
3296 gcc_checking_assert (size >= 0);
3297 gcc_checking_assert (time >= 0);
3298 /* nonspecialized_time should be always bigger than specialized time.
3299 Roundoff issues however may get into the way. */
3300 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3301
3302 /* Roundoff issues may make specialized time bigger than nonspecialized
3303 time. We do not really want that to happen because some heurstics
3304 may get confused by seeing negative speedups. */
3305 if (time > nonspecialized_time)
3306 time = nonspecialized_time;
3307
3308 if (ret_hints)
3309 {
3310 if (info->loop_iterations
3311 && !info->loop_iterations->evaluate (m_possible_truths))
3312 hints |= INLINE_HINT_loop_iterations;
3313 if (info->loop_stride
3314 && !info->loop_stride->evaluate (m_possible_truths))
3315 hints |= INLINE_HINT_loop_stride;
3316 if (info->scc_no)
3317 hints |= INLINE_HINT_in_scc;
3318 if (DECL_DECLARED_INLINE_P (m_node->decl))
3319 hints |= INLINE_HINT_declared_inline;
3320 }
3321
3322 size = RDIV (size, ipa_fn_summary::size_scale);
3323 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3324
3325 if (dump_file && (dump_flags & TDF_DETAILS))
3326 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3327 time.to_double (), nonspecialized_time.to_double ());
3328 if (ret_time)
3329 *ret_time = time;
3330 if (ret_nonspecialized_time)
3331 *ret_nonspecialized_time = nonspecialized_time;
3332 if (ret_size)
3333 *ret_size = size;
3334 if (ret_min_size)
3335 *ret_min_size = min_size;
3336 if (ret_hints)
3337 *ret_hints = hints;
3338 return;
3339 }
3340
3341
3342 /* Estimate size and time needed to execute callee of EDGE assuming that
3343 parameters known to be constant at caller of EDGE are propagated.
3344 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3345 and types for parameters. */
3346
3347 void
3348 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3349 vec<tree> known_vals,
3350 vec<ipa_polymorphic_call_context>
3351 known_contexts,
3352 vec<ipa_agg_jump_function_p> known_aggs,
3353 int *ret_size, sreal *ret_time,
3354 sreal *ret_nonspec_time,
3355 ipa_hints *hints)
3356 {
3357 clause_t clause, nonspec_clause;
3358
3359 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
3360 &clause, &nonspec_clause);
3361 ipa_call_context ctx (node, clause, nonspec_clause,
3362 known_vals, known_contexts,
3363 known_aggs, vNULL);
3364 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3365 ret_nonspec_time, hints);
3366 }
3367
3368 /* Return stack frame offset where frame of NODE is supposed to start inside
3369 of the function it is inlined to.
3370 Return 0 for functions that are not inlined. */
3371
3372 HOST_WIDE_INT
3373 ipa_get_stack_frame_offset (struct cgraph_node *node)
3374 {
3375 HOST_WIDE_INT offset = 0;
3376 if (!node->inlined_to)
3377 return 0;
3378 node = node->callers->caller;
3379 while (true)
3380 {
3381 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3382 if (!node->inlined_to)
3383 return offset;
3384 node = node->callers->caller;
3385 }
3386 }
3387
3388
3389 /* Update summary information of inline clones after inlining.
3390 Compute peak stack usage. */
3391
3392 static void
3393 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3394 {
3395 struct cgraph_edge *e;
3396
3397 ipa_propagate_frequency (node);
3398 for (e = node->callees; e; e = e->next_callee)
3399 {
3400 if (!e->inline_failed)
3401 inline_update_callee_summaries (e->callee, depth);
3402 else
3403 ipa_call_summaries->get (e)->loop_depth += depth;
3404 }
3405 for (e = node->indirect_calls; e; e = e->next_callee)
3406 ipa_call_summaries->get (e)->loop_depth += depth;
3407 }
3408
3409 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3410 When function A is inlined in B and A calls C with parameter that
3411 changes with probability PROB1 and C is known to be passthroug
3412 of argument if B that change with probability PROB2, the probability
3413 of change is now PROB1*PROB2. */
3414
3415 static void
3416 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3417 struct cgraph_edge *edge)
3418 {
3419 if (ipa_node_params_sum)
3420 {
3421 int i;
3422 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3423 if (!args)
3424 return;
3425 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3426 class ipa_call_summary *inlined_es
3427 = ipa_call_summaries->get (inlined_edge);
3428
3429 if (es->param.length () == 0)
3430 return;
3431
3432 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3433 {
3434 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3435 if (jfunc->type == IPA_JF_PASS_THROUGH
3436 || jfunc->type == IPA_JF_ANCESTOR)
3437 {
3438 int id = jfunc->type == IPA_JF_PASS_THROUGH
3439 ? ipa_get_jf_pass_through_formal_id (jfunc)
3440 : ipa_get_jf_ancestor_formal_id (jfunc);
3441 if (id < (int) inlined_es->param.length ())
3442 {
3443 int prob1 = es->param[i].change_prob;
3444 int prob2 = inlined_es->param[id].change_prob;
3445 int prob = combine_probabilities (prob1, prob2);
3446
3447 if (prob1 && prob2 && !prob)
3448 prob = 1;
3449
3450 es->param[i].change_prob = prob;
3451 }
3452 }
3453 }
3454 }
3455 }
3456
3457 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3458
3459 Remap predicates of callees of NODE. Rest of arguments match
3460 remap_predicate.
3461
3462 Also update change probabilities. */
3463
3464 static void
3465 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3466 struct cgraph_node *node,
3467 class ipa_fn_summary *info,
3468 class ipa_node_params *params_summary,
3469 class ipa_fn_summary *callee_info,
3470 vec<int> operand_map,
3471 vec<int> offset_map,
3472 clause_t possible_truths,
3473 predicate *toplev_predicate)
3474 {
3475 struct cgraph_edge *e, *next;
3476 for (e = node->callees; e; e = next)
3477 {
3478 predicate p;
3479 next = e->next_callee;
3480
3481 if (e->inline_failed)
3482 {
3483 class ipa_call_summary *es = ipa_call_summaries->get (e);
3484 remap_edge_change_prob (inlined_edge, e);
3485
3486 if (es->predicate)
3487 {
3488 p = es->predicate->remap_after_inlining
3489 (info, params_summary,
3490 callee_info, operand_map,
3491 offset_map, possible_truths,
3492 *toplev_predicate);
3493 edge_set_predicate (e, &p);
3494 }
3495 else
3496 edge_set_predicate (e, toplev_predicate);
3497 }
3498 else
3499 remap_edge_summaries (inlined_edge, e->callee, info,
3500 params_summary, callee_info,
3501 operand_map, offset_map, possible_truths,
3502 toplev_predicate);
3503 }
3504 for (e = node->indirect_calls; e; e = next)
3505 {
3506 class ipa_call_summary *es = ipa_call_summaries->get (e);
3507 predicate p;
3508 next = e->next_callee;
3509
3510 remap_edge_change_prob (inlined_edge, e);
3511 if (es->predicate)
3512 {
3513 p = es->predicate->remap_after_inlining
3514 (info, params_summary,
3515 callee_info, operand_map, offset_map,
3516 possible_truths, *toplev_predicate);
3517 edge_set_predicate (e, &p);
3518 }
3519 else
3520 edge_set_predicate (e, toplev_predicate);
3521 }
3522 }
3523
3524 /* Same as remap_predicate, but set result into hint *HINT. */
3525
3526 static void
3527 remap_hint_predicate (class ipa_fn_summary *info,
3528 class ipa_node_params *params_summary,
3529 class ipa_fn_summary *callee_info,
3530 predicate **hint,
3531 vec<int> operand_map,
3532 vec<int> offset_map,
3533 clause_t possible_truths,
3534 predicate *toplev_predicate)
3535 {
3536 predicate p;
3537
3538 if (!*hint)
3539 return;
3540 p = (*hint)->remap_after_inlining
3541 (info, params_summary, callee_info,
3542 operand_map, offset_map,
3543 possible_truths, *toplev_predicate);
3544 if (p != false && p != true)
3545 {
3546 if (!*hint)
3547 set_hint_predicate (hint, p);
3548 else
3549 **hint &= p;
3550 }
3551 }
3552
3553 /* We inlined EDGE. Update summary of the function we inlined into. */
3554
3555 void
3556 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3557 {
3558 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3559 struct cgraph_node *to = (edge->caller->inlined_to
3560 ? edge->caller->inlined_to : edge->caller);
3561 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3562 clause_t clause = 0; /* not_inline is known to be false. */
3563 size_time_entry *e;
3564 auto_vec<int, 8> operand_map;
3565 auto_vec<int, 8> offset_map;
3566 int i;
3567 predicate toplev_predicate;
3568 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3569 class ipa_node_params *params_summary = (ipa_node_params_sum
3570 ? IPA_NODE_REF (to) : NULL);
3571
3572 if (es->predicate)
3573 toplev_predicate = *es->predicate;
3574 else
3575 toplev_predicate = true;
3576
3577 info->fp_expressions |= callee_info->fp_expressions;
3578
3579 if (callee_info->conds)
3580 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3581 if (ipa_node_params_sum && callee_info->conds)
3582 {
3583 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3584 int count = args ? ipa_get_cs_argument_count (args) : 0;
3585 int i;
3586
3587 if (count)
3588 {
3589 operand_map.safe_grow_cleared (count);
3590 offset_map.safe_grow_cleared (count);
3591 }
3592 for (i = 0; i < count; i++)
3593 {
3594 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3595 int map = -1;
3596
3597 /* TODO: handle non-NOPs when merging. */
3598 if (jfunc->type == IPA_JF_PASS_THROUGH)
3599 {
3600 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3601 map = ipa_get_jf_pass_through_formal_id (jfunc);
3602 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3603 offset_map[i] = -1;
3604 }
3605 else if (jfunc->type == IPA_JF_ANCESTOR)
3606 {
3607 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3608 if (offset >= 0 && offset < INT_MAX)
3609 {
3610 map = ipa_get_jf_ancestor_formal_id (jfunc);
3611 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3612 offset = -1;
3613 offset_map[i] = offset;
3614 }
3615 }
3616 operand_map[i] = map;
3617 gcc_assert (map < ipa_get_param_count (params_summary));
3618 }
3619 }
3620 sreal freq = edge->sreal_frequency ();
3621 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3622 {
3623 predicate p;
3624 p = e->exec_predicate.remap_after_inlining
3625 (info, params_summary,
3626 callee_info, operand_map,
3627 offset_map, clause,
3628 toplev_predicate);
3629 predicate nonconstp;
3630 nonconstp = e->nonconst_predicate.remap_after_inlining
3631 (info, params_summary,
3632 callee_info, operand_map,
3633 offset_map, clause,
3634 toplev_predicate);
3635 if (p != false && nonconstp != false)
3636 {
3637 sreal add_time = ((sreal)e->time * freq);
3638 int prob = e->nonconst_predicate.probability (callee_info->conds,
3639 clause, es->param);
3640 add_time = add_time * prob / REG_BR_PROB_BASE;
3641 if (prob != REG_BR_PROB_BASE
3642 && dump_file && (dump_flags & TDF_DETAILS))
3643 {
3644 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3645 (double) prob / REG_BR_PROB_BASE);
3646 }
3647 info->account_size_time (e->size, add_time, p, nonconstp);
3648 }
3649 }
3650 remap_edge_summaries (edge, edge->callee, info, params_summary,
3651 callee_info, operand_map,
3652 offset_map, clause, &toplev_predicate);
3653 remap_hint_predicate (info, params_summary, callee_info,
3654 &callee_info->loop_iterations,
3655 operand_map, offset_map, clause, &toplev_predicate);
3656 remap_hint_predicate (info, params_summary, callee_info,
3657 &callee_info->loop_stride,
3658 operand_map, offset_map, clause, &toplev_predicate);
3659
3660 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
3661 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
3662
3663 if (info->estimated_stack_size < peak)
3664 info->estimated_stack_size = peak;
3665
3666 inline_update_callee_summaries (edge->callee, es->loop_depth);
3667
3668 /* Free summaries that are not maintained for inline clones/edges. */
3669 ipa_call_summaries->remove (edge);
3670 ipa_fn_summaries->remove (edge->callee);
3671 ipa_remove_from_growth_caches (edge);
3672 }
3673
3674 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3675 overall size and time. Recompute it. */
3676
3677 void
3678 ipa_update_overall_fn_summary (struct cgraph_node *node)
3679 {
3680 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
3681 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
3682 size_time_entry *e;
3683 int i;
3684
3685 size_info->size = 0;
3686 info->time = 0;
3687 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3688 {
3689 size_info->size += e->size;
3690 info->time += e->time;
3691 }
3692 info->min_size = (*info->size_time_table)[0].size;
3693 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
3694 &info->time, NULL,
3695 ~(clause_t) (1 << predicate::false_condition),
3696 vNULL, vNULL, vNULL);
3697 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
3698 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
3699 }
3700
3701
3702 /* This function performs intraprocedural analysis in NODE that is required to
3703 inline indirect calls. */
3704
3705 static void
3706 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3707 {
3708 ipa_analyze_node (node);
3709 if (dump_file && (dump_flags & TDF_DETAILS))
3710 {
3711 ipa_print_node_params (dump_file, node);
3712 ipa_print_node_jump_functions (dump_file, node);
3713 }
3714 }
3715
3716
3717 /* Note function body size. */
3718
3719 void
3720 inline_analyze_function (struct cgraph_node *node)
3721 {
3722 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3723
3724 if (dump_file)
3725 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3726 node->name (), node->order);
3727 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3728 inline_indirect_intraprocedural_analysis (node);
3729 compute_fn_summary (node, false);
3730 if (!optimize)
3731 {
3732 struct cgraph_edge *e;
3733 for (e = node->callees; e; e = e->next_callee)
3734 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3735 for (e = node->indirect_calls; e; e = e->next_callee)
3736 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3737 }
3738
3739 pop_cfun ();
3740 }
3741
3742
3743 /* Called when new function is inserted to callgraph late. */
3744
3745 void
3746 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3747 {
3748 inline_analyze_function (node);
3749 }
3750
3751 /* Note function body size. */
3752
3753 static void
3754 ipa_fn_summary_generate (void)
3755 {
3756 struct cgraph_node *node;
3757
3758 FOR_EACH_DEFINED_FUNCTION (node)
3759 if (DECL_STRUCT_FUNCTION (node->decl))
3760 node->versionable = tree_versionable_function_p (node->decl);
3761
3762 ipa_fn_summary_alloc ();
3763
3764 ipa_fn_summaries->enable_insertion_hook ();
3765
3766 ipa_register_cgraph_hooks ();
3767
3768 FOR_EACH_DEFINED_FUNCTION (node)
3769 if (!node->alias
3770 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3771 || opt_for_fn (node->decl, optimize)))
3772 inline_analyze_function (node);
3773 }
3774
3775
3776 /* Write inline summary for edge E to OB. */
3777
3778 static void
3779 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
3780 bool prevails)
3781 {
3782 class ipa_call_summary *es = prevails
3783 ? ipa_call_summaries->get_create (e) : NULL;
3784 predicate p;
3785 int length, i;
3786
3787 int size = streamer_read_uhwi (ib);
3788 int time = streamer_read_uhwi (ib);
3789 int depth = streamer_read_uhwi (ib);
3790
3791 if (es)
3792 {
3793 es->call_stmt_size = size;
3794 es->call_stmt_time = time;
3795 es->loop_depth = depth;
3796 }
3797
3798 bitpack_d bp = streamer_read_bitpack (ib);
3799 if (es)
3800 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3801 else
3802 bp_unpack_value (&bp, 1);
3803
3804 p.stream_in (ib);
3805 if (es)
3806 edge_set_predicate (e, &p);
3807 length = streamer_read_uhwi (ib);
3808 if (length && es && e->possibly_call_in_translation_unit_p ())
3809 {
3810 es->param.safe_grow_cleared (length);
3811 for (i = 0; i < length; i++)
3812 es->param[i].change_prob = streamer_read_uhwi (ib);
3813 }
3814 else
3815 {
3816 for (i = 0; i < length; i++)
3817 streamer_read_uhwi (ib);
3818 }
3819 }
3820
3821
3822 /* Stream in inline summaries from the section. */
3823
3824 static void
3825 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3826 size_t len)
3827 {
3828 const struct lto_function_header *header =
3829 (const struct lto_function_header *) data;
3830 const int cfg_offset = sizeof (struct lto_function_header);
3831 const int main_offset = cfg_offset + header->cfg_size;
3832 const int string_offset = main_offset + header->main_size;
3833 class data_in *data_in;
3834 unsigned int i, count2, j;
3835 unsigned int f_count;
3836
3837 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3838 file_data->mode_table);
3839
3840 data_in =
3841 lto_data_in_create (file_data, (const char *) data + string_offset,
3842 header->string_size, vNULL);
3843 f_count = streamer_read_uhwi (&ib);
3844 for (i = 0; i < f_count; i++)
3845 {
3846 unsigned int index;
3847 struct cgraph_node *node;
3848 class ipa_fn_summary *info;
3849 class ipa_node_params *params_summary;
3850 class ipa_size_summary *size_info;
3851 lto_symtab_encoder_t encoder;
3852 struct bitpack_d bp;
3853 struct cgraph_edge *e;
3854 predicate p;
3855
3856 index = streamer_read_uhwi (&ib);
3857 encoder = file_data->symtab_node_encoder;
3858 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3859 index));
3860 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
3861 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
3862 size_info = node->prevailing_p ()
3863 ? ipa_size_summaries->get_create (node) : NULL;
3864
3865 int stack_size = streamer_read_uhwi (&ib);
3866 int size = streamer_read_uhwi (&ib);
3867 sreal time = sreal::stream_in (&ib);
3868
3869 if (info)
3870 {
3871 info->estimated_stack_size
3872 = size_info->estimated_self_stack_size = stack_size;
3873 size_info->size = size_info->self_size = size;
3874 info->time = time;
3875 }
3876
3877 bp = streamer_read_bitpack (&ib);
3878 if (info)
3879 {
3880 info->inlinable = bp_unpack_value (&bp, 1);
3881 info->fp_expressions = bp_unpack_value (&bp, 1);
3882 }
3883 else
3884 {
3885 bp_unpack_value (&bp, 1);
3886 bp_unpack_value (&bp, 1);
3887 }
3888
3889 count2 = streamer_read_uhwi (&ib);
3890 gcc_assert (!info || !info->conds);
3891 if (info)
3892 vec_safe_reserve_exact (info->conds, count2);
3893 for (j = 0; j < count2; j++)
3894 {
3895 struct condition c;
3896 unsigned int k, count3;
3897 c.operand_num = streamer_read_uhwi (&ib);
3898 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3899 c.type = stream_read_tree (&ib, data_in);
3900 c.val = stream_read_tree (&ib, data_in);
3901 bp = streamer_read_bitpack (&ib);
3902 c.agg_contents = bp_unpack_value (&bp, 1);
3903 c.by_ref = bp_unpack_value (&bp, 1);
3904 if (c.agg_contents)
3905 c.offset = streamer_read_uhwi (&ib);
3906 count3 = streamer_read_uhwi (&ib);
3907 c.param_ops = NULL;
3908 if (info)
3909 vec_safe_reserve_exact (c.param_ops, count3);
3910 if (params_summary)
3911 ipa_set_param_used_by_ipa_predicates
3912 (params_summary, c.operand_num, true);
3913 for (k = 0; k < count3; k++)
3914 {
3915 struct expr_eval_op op;
3916 enum gimple_rhs_class rhs_class;
3917 op.code = (enum tree_code) streamer_read_uhwi (&ib);
3918 op.type = stream_read_tree (&ib, data_in);
3919 switch (rhs_class = get_gimple_rhs_class (op.code))
3920 {
3921 case GIMPLE_UNARY_RHS:
3922 op.index = 0;
3923 op.val[0] = NULL_TREE;
3924 op.val[1] = NULL_TREE;
3925 break;
3926
3927 case GIMPLE_BINARY_RHS:
3928 case GIMPLE_TERNARY_RHS:
3929 bp = streamer_read_bitpack (&ib);
3930 op.index = bp_unpack_value (&bp, 2);
3931 op.val[0] = stream_read_tree (&ib, data_in);
3932 if (rhs_class == GIMPLE_BINARY_RHS)
3933 op.val[1] = NULL_TREE;
3934 else
3935 op.val[1] = stream_read_tree (&ib, data_in);
3936 break;
3937
3938 default:
3939 fatal_error (UNKNOWN_LOCATION,
3940 "invalid fnsummary in LTO stream");
3941 }
3942 if (info)
3943 c.param_ops->quick_push (op);
3944 }
3945 if (info)
3946 info->conds->quick_push (c);
3947 }
3948 count2 = streamer_read_uhwi (&ib);
3949 gcc_assert (!info || !info->size_time_table);
3950 if (info && count2)
3951 vec_safe_reserve_exact (info->size_time_table, count2);
3952 for (j = 0; j < count2; j++)
3953 {
3954 class size_time_entry e;
3955
3956 e.size = streamer_read_uhwi (&ib);
3957 e.time = sreal::stream_in (&ib);
3958 e.exec_predicate.stream_in (&ib);
3959 e.nonconst_predicate.stream_in (&ib);
3960
3961 if (info)
3962 info->size_time_table->quick_push (e);
3963 }
3964
3965 p.stream_in (&ib);
3966 if (info)
3967 set_hint_predicate (&info->loop_iterations, p);
3968 p.stream_in (&ib);
3969 if (info)
3970 set_hint_predicate (&info->loop_stride, p);
3971 for (e = node->callees; e; e = e->next_callee)
3972 read_ipa_call_summary (&ib, e, info != NULL);
3973 for (e = node->indirect_calls; e; e = e->next_callee)
3974 read_ipa_call_summary (&ib, e, info != NULL);
3975 }
3976
3977 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3978 len);
3979 lto_data_in_delete (data_in);
3980 }
3981
3982
3983 /* Read inline summary. Jump functions are shared among ipa-cp
3984 and inliner, so when ipa-cp is active, we don't need to write them
3985 twice. */
3986
3987 static void
3988 ipa_fn_summary_read (void)
3989 {
3990 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3991 struct lto_file_decl_data *file_data;
3992 unsigned int j = 0;
3993
3994 ipa_fn_summary_alloc ();
3995
3996 while ((file_data = file_data_vec[j++]))
3997 {
3998 size_t len;
3999 const char *data
4000 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4001 &len);
4002 if (data)
4003 inline_read_section (file_data, data, len);
4004 else
4005 /* Fatal error here. We do not want to support compiling ltrans units
4006 with different version of compiler or different flags than the WPA
4007 unit, so this should never happen. */
4008 fatal_error (input_location,
4009 "ipa inline summary is missing in input file");
4010 }
4011 ipa_register_cgraph_hooks ();
4012 if (!flag_ipa_cp)
4013 ipa_prop_read_jump_functions ();
4014
4015 gcc_assert (ipa_fn_summaries);
4016 ipa_fn_summaries->enable_insertion_hook ();
4017 }
4018
4019
4020 /* Write inline summary for edge E to OB. */
4021
4022 static void
4023 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4024 {
4025 class ipa_call_summary *es = ipa_call_summaries->get (e);
4026 int i;
4027
4028 streamer_write_uhwi (ob, es->call_stmt_size);
4029 streamer_write_uhwi (ob, es->call_stmt_time);
4030 streamer_write_uhwi (ob, es->loop_depth);
4031
4032 bitpack_d bp = bitpack_create (ob->main_stream);
4033 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4034 streamer_write_bitpack (&bp);
4035
4036 if (es->predicate)
4037 es->predicate->stream_out (ob);
4038 else
4039 streamer_write_uhwi (ob, 0);
4040 streamer_write_uhwi (ob, es->param.length ());
4041 for (i = 0; i < (int) es->param.length (); i++)
4042 streamer_write_uhwi (ob, es->param[i].change_prob);
4043 }
4044
4045
4046 /* Write inline summary for node in SET.
4047 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4048 active, we don't need to write them twice. */
4049
4050 static void
4051 ipa_fn_summary_write (void)
4052 {
4053 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4054 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4055 unsigned int count = 0;
4056 int i;
4057
4058 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4059 {
4060 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4061 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4062 if (cnode && cnode->definition && !cnode->alias)
4063 count++;
4064 }
4065 streamer_write_uhwi (ob, count);
4066
4067 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4068 {
4069 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4070 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4071 if (cnode && cnode->definition && !cnode->alias)
4072 {
4073 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4074 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4075 struct bitpack_d bp;
4076 struct cgraph_edge *edge;
4077 int i;
4078 size_time_entry *e;
4079 struct condition *c;
4080
4081 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4082 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4083 streamer_write_hwi (ob, size_info->self_size);
4084 info->time.stream_out (ob);
4085 bp = bitpack_create (ob->main_stream);
4086 bp_pack_value (&bp, info->inlinable, 1);
4087 bp_pack_value (&bp, false, 1);
4088 bp_pack_value (&bp, info->fp_expressions, 1);
4089 streamer_write_bitpack (&bp);
4090 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4091 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4092 {
4093 int j;
4094 struct expr_eval_op *op;
4095
4096 streamer_write_uhwi (ob, c->operand_num);
4097 streamer_write_uhwi (ob, c->code);
4098 stream_write_tree (ob, c->type, true);
4099 stream_write_tree (ob, c->val, true);
4100 bp = bitpack_create (ob->main_stream);
4101 bp_pack_value (&bp, c->agg_contents, 1);
4102 bp_pack_value (&bp, c->by_ref, 1);
4103 streamer_write_bitpack (&bp);
4104 if (c->agg_contents)
4105 streamer_write_uhwi (ob, c->offset);
4106 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4107 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4108 {
4109 streamer_write_uhwi (ob, op->code);
4110 stream_write_tree (ob, op->type, true);
4111 if (op->val[0])
4112 {
4113 bp = bitpack_create (ob->main_stream);
4114 bp_pack_value (&bp, op->index, 2);
4115 streamer_write_bitpack (&bp);
4116 stream_write_tree (ob, op->val[0], true);
4117 if (op->val[1])
4118 stream_write_tree (ob, op->val[1], true);
4119 }
4120 }
4121 }
4122 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4123 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4124 {
4125 streamer_write_uhwi (ob, e->size);
4126 e->time.stream_out (ob);
4127 e->exec_predicate.stream_out (ob);
4128 e->nonconst_predicate.stream_out (ob);
4129 }
4130 if (info->loop_iterations)
4131 info->loop_iterations->stream_out (ob);
4132 else
4133 streamer_write_uhwi (ob, 0);
4134 if (info->loop_stride)
4135 info->loop_stride->stream_out (ob);
4136 else
4137 streamer_write_uhwi (ob, 0);
4138 for (edge = cnode->callees; edge; edge = edge->next_callee)
4139 write_ipa_call_summary (ob, edge);
4140 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4141 write_ipa_call_summary (ob, edge);
4142 }
4143 }
4144 streamer_write_char_stream (ob->main_stream, 0);
4145 produce_asm (ob, NULL);
4146 destroy_output_block (ob);
4147
4148 if (!flag_ipa_cp)
4149 ipa_prop_write_jump_functions ();
4150 }
4151
4152
4153 /* Release function summary. */
4154
4155 void
4156 ipa_free_fn_summary (void)
4157 {
4158 if (!ipa_call_summaries)
4159 return;
4160 ggc_delete (ipa_fn_summaries);
4161 ipa_fn_summaries = NULL;
4162 delete ipa_call_summaries;
4163 ipa_call_summaries = NULL;
4164 edge_predicate_pool.release ();
4165 /* During IPA this is one of largest datastructures to release. */
4166 if (flag_wpa)
4167 ggc_trim ();
4168 }
4169
4170 /* Release function summary. */
4171
4172 void
4173 ipa_free_size_summary (void)
4174 {
4175 if (!ipa_size_summaries)
4176 return;
4177 delete ipa_size_summaries;
4178 ipa_size_summaries = NULL;
4179 }
4180
4181 namespace {
4182
4183 const pass_data pass_data_local_fn_summary =
4184 {
4185 GIMPLE_PASS, /* type */
4186 "local-fnsummary", /* name */
4187 OPTGROUP_INLINE, /* optinfo_flags */
4188 TV_INLINE_PARAMETERS, /* tv_id */
4189 0, /* properties_required */
4190 0, /* properties_provided */
4191 0, /* properties_destroyed */
4192 0, /* todo_flags_start */
4193 0, /* todo_flags_finish */
4194 };
4195
4196 class pass_local_fn_summary : public gimple_opt_pass
4197 {
4198 public:
4199 pass_local_fn_summary (gcc::context *ctxt)
4200 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4201 {}
4202
4203 /* opt_pass methods: */
4204 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4205 virtual unsigned int execute (function *)
4206 {
4207 return compute_fn_summary_for_current ();
4208 }
4209
4210 }; // class pass_local_fn_summary
4211
4212 } // anon namespace
4213
4214 gimple_opt_pass *
4215 make_pass_local_fn_summary (gcc::context *ctxt)
4216 {
4217 return new pass_local_fn_summary (ctxt);
4218 }
4219
4220
4221 /* Free inline summary. */
4222
4223 namespace {
4224
4225 const pass_data pass_data_ipa_free_fn_summary =
4226 {
4227 SIMPLE_IPA_PASS, /* type */
4228 "free-fnsummary", /* name */
4229 OPTGROUP_NONE, /* optinfo_flags */
4230 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4231 0, /* properties_required */
4232 0, /* properties_provided */
4233 0, /* properties_destroyed */
4234 0, /* todo_flags_start */
4235 0, /* todo_flags_finish */
4236 };
4237
4238 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4239 {
4240 public:
4241 pass_ipa_free_fn_summary (gcc::context *ctxt)
4242 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4243 small_p (false)
4244 {}
4245
4246 /* opt_pass methods: */
4247 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4248 void set_pass_param (unsigned int n, bool param)
4249 {
4250 gcc_assert (n == 0);
4251 small_p = param;
4252 }
4253 virtual bool gate (function *) { return true; }
4254 virtual unsigned int execute (function *)
4255 {
4256 ipa_free_fn_summary ();
4257 if (!flag_wpa)
4258 ipa_free_size_summary ();
4259 return 0;
4260 }
4261
4262 private:
4263 bool small_p;
4264 }; // class pass_ipa_free_fn_summary
4265
4266 } // anon namespace
4267
4268 simple_ipa_opt_pass *
4269 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4270 {
4271 return new pass_ipa_free_fn_summary (ctxt);
4272 }
4273
4274 namespace {
4275
4276 const pass_data pass_data_ipa_fn_summary =
4277 {
4278 IPA_PASS, /* type */
4279 "fnsummary", /* name */
4280 OPTGROUP_INLINE, /* optinfo_flags */
4281 TV_IPA_FNSUMMARY, /* tv_id */
4282 0, /* properties_required */
4283 0, /* properties_provided */
4284 0, /* properties_destroyed */
4285 0, /* todo_flags_start */
4286 ( TODO_dump_symtab ), /* todo_flags_finish */
4287 };
4288
4289 class pass_ipa_fn_summary : public ipa_opt_pass_d
4290 {
4291 public:
4292 pass_ipa_fn_summary (gcc::context *ctxt)
4293 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4294 ipa_fn_summary_generate, /* generate_summary */
4295 ipa_fn_summary_write, /* write_summary */
4296 ipa_fn_summary_read, /* read_summary */
4297 NULL, /* write_optimization_summary */
4298 NULL, /* read_optimization_summary */
4299 NULL, /* stmt_fixup */
4300 0, /* function_transform_todo_flags_start */
4301 NULL, /* function_transform */
4302 NULL) /* variable_transform */
4303 {}
4304
4305 /* opt_pass methods: */
4306 virtual unsigned int execute (function *) { return 0; }
4307
4308 }; // class pass_ipa_fn_summary
4309
4310 } // anon namespace
4311
4312 ipa_opt_pass_d *
4313 make_pass_ipa_fn_summary (gcc::context *ctxt)
4314 {
4315 return new pass_ipa_fn_summary (ctxt);
4316 }
4317
4318 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4319 within the same process. For use by toplev::finalize. */
4320
4321 void
4322 ipa_fnsummary_c_finalize (void)
4323 {
4324 ipa_free_fn_summary ();
4325 }