]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
cgraph.c (cgraph_node::dump): Dump unit_id and merged_extern_inline.
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85
86 /* Summaries. */
87 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
88 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
89 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143 }
144
145
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaluate to constant and
149 will get optimized out in specialized clones of the function.
150 If CALL is true account to call_size_time_table rather than
151 size_time_table. */
152
153 void
154 ipa_fn_summary::account_size_time (int size, sreal time,
155 const predicate &exec_pred,
156 const predicate &nonconst_pred_in,
157 bool call)
158 {
159 size_time_entry *e;
160 bool found = false;
161 int i;
162 predicate nonconst_pred;
163 vec<size_time_entry, va_gc> *table = call
164 ? call_size_time_table : size_time_table;
165
166 if (exec_pred == false)
167 return;
168
169 nonconst_pred = nonconst_pred_in & exec_pred;
170
171 if (nonconst_pred == false)
172 return;
173
174 /* We need to create initial empty unconditional clause, but otherwise
175 we don't need to account empty times and sizes. */
176 if (!size && time == 0 && table)
177 return;
178
179 /* Only for calls we are unaccounting what we previously recorded. */
180 gcc_checking_assert (time >= 0 || call);
181
182 for (i = 0; vec_safe_iterate (table, i, &e); i++)
183 if (e->exec_predicate == exec_pred
184 && e->nonconst_predicate == nonconst_pred)
185 {
186 found = true;
187 break;
188 }
189 if (i == max_size_time_table_size)
190 {
191 i = 0;
192 found = true;
193 e = &(*table)[0];
194 if (dump_file && (dump_flags & TDF_DETAILS))
195 fprintf (dump_file,
196 "\t\tReached limit on number of entries, "
197 "ignoring the predicate.");
198 }
199 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
200 {
201 fprintf (dump_file,
202 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
203 ((double) size) / ipa_fn_summary::size_scale,
204 (time.to_double ()), found ? "" : "new ");
205 exec_pred.dump (dump_file, conds, 0);
206 if (exec_pred != nonconst_pred)
207 {
208 fprintf (dump_file, " nonconst:");
209 nonconst_pred.dump (dump_file, conds);
210 }
211 else
212 fprintf (dump_file, "\n");
213 }
214 if (!found)
215 {
216 class size_time_entry new_entry;
217 new_entry.size = size;
218 new_entry.time = time;
219 new_entry.exec_predicate = exec_pred;
220 new_entry.nonconst_predicate = nonconst_pred;
221 if (call)
222 vec_safe_push (call_size_time_table, new_entry);
223 else
224 vec_safe_push (size_time_table, new_entry);
225 }
226 else
227 {
228 e->size += size;
229 e->time += time;
230 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
231 /* Tolerate small roundoff issues. */
232 if (e->time < 0)
233 e->time = 0;
234 }
235 }
236
237 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
238
239 static struct cgraph_edge *
240 redirect_to_unreachable (struct cgraph_edge *e)
241 {
242 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
243 struct cgraph_node *target = cgraph_node::get_create
244 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
245
246 if (e->speculative)
247 e = e->resolve_speculation (target->decl);
248 else if (!e->callee)
249 e->make_direct (target);
250 else
251 e->redirect_callee (target);
252 class ipa_call_summary *es = ipa_call_summaries->get (e);
253 e->inline_failed = CIF_UNREACHABLE;
254 e->count = profile_count::zero ();
255 es->call_stmt_size = 0;
256 es->call_stmt_time = 0;
257 if (callee)
258 callee->remove_symbol_and_inline_clones ();
259 return e;
260 }
261
262 /* Set predicate for edge E. */
263
264 static void
265 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
266 {
267 /* If the edge is determined to be never executed, redirect it
268 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
269 be optimized out. */
270 if (predicate && *predicate == false
271 /* When handling speculative edges, we need to do the redirection
272 just once. Do it always on the direct edge, so we do not
273 attempt to resolve speculation while duplicating the edge. */
274 && (!e->speculative || e->callee))
275 e = redirect_to_unreachable (e);
276
277 class ipa_call_summary *es = ipa_call_summaries->get (e);
278 if (predicate && *predicate != true)
279 {
280 if (!es->predicate)
281 es->predicate = edge_predicate_pool.allocate ();
282 *es->predicate = *predicate;
283 }
284 else
285 {
286 if (es->predicate)
287 edge_predicate_pool.remove (es->predicate);
288 es->predicate = NULL;
289 }
290 }
291
292 /* Set predicate for hint *P. */
293
294 static void
295 set_hint_predicate (predicate **p, predicate new_predicate)
296 {
297 if (new_predicate == false || new_predicate == true)
298 {
299 if (*p)
300 edge_predicate_pool.remove (*p);
301 *p = NULL;
302 }
303 else
304 {
305 if (!*p)
306 *p = edge_predicate_pool.allocate ();
307 **p = new_predicate;
308 }
309 }
310
311
312 /* Compute what conditions may or may not hold given information about
313 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
314 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
315 copy when called in a given context. It is a bitmask of conditions. Bit
316 0 means that condition is known to be false, while bit 1 means that condition
317 may or may not be true. These differs - for example NOT_INLINED condition
318 is always false in the second and also builtin_constant_p tests cannot use
319 the fact that parameter is indeed a constant.
320
321 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
322 KNOWN_AGGS is a vector of aggregate known offset/value set for each
323 parameter. Return clause of possible truths. When INLINE_P is true, assume
324 that we are inlining.
325
326 ERROR_MARK means compile time invariant. */
327
328 static void
329 evaluate_conditions_for_known_args (struct cgraph_node *node,
330 bool inline_p,
331 vec<tree> known_vals,
332 vec<value_range> known_value_ranges,
333 vec<ipa_agg_value_set> known_aggs,
334 clause_t *ret_clause,
335 clause_t *ret_nonspec_clause)
336 {
337 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
338 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
339 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
340 int i;
341 struct condition *c;
342
343 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
344 {
345 tree val = NULL;
346 tree res;
347 int j;
348 struct expr_eval_op *op;
349
350 /* We allow call stmt to have fewer arguments than the callee function
351 (especially for K&R style programs). So bound check here (we assume
352 known_aggs vector, if non-NULL, has the same length as
353 known_vals). */
354 gcc_checking_assert (!known_aggs.length () || !known_vals.length ()
355 || (known_vals.length () == known_aggs.length ()));
356
357 if (c->agg_contents)
358 {
359 struct ipa_agg_value_set *agg;
360
361 if (c->code == predicate::changed
362 && !c->by_ref
363 && c->operand_num < (int)known_vals.length ()
364 && (known_vals[c->operand_num] == error_mark_node))
365 continue;
366
367 if (c->operand_num < (int)known_aggs.length ())
368 {
369 agg = &known_aggs[c->operand_num];
370 val = ipa_find_agg_cst_for_param (agg,
371 c->operand_num
372 < (int) known_vals.length ()
373 ? known_vals[c->operand_num]
374 : NULL,
375 c->offset, c->by_ref);
376 }
377 else
378 val = NULL_TREE;
379 }
380 else if (c->operand_num < (int) known_vals.length ())
381 {
382 val = known_vals[c->operand_num];
383 if (val == error_mark_node && c->code != predicate::changed)
384 val = NULL_TREE;
385 }
386
387 if (!val
388 && (c->code == predicate::changed
389 || c->code == predicate::is_not_constant))
390 {
391 clause |= 1 << (i + predicate::first_dynamic_condition);
392 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
393 continue;
394 }
395 if (c->code == predicate::changed)
396 {
397 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
398 continue;
399 }
400
401 if (c->code == predicate::is_not_constant)
402 {
403 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
404 continue;
405 }
406
407 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
408 {
409 if (c->type != TREE_TYPE (val))
410 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
411 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
412 {
413 if (!val)
414 break;
415 if (!op->val[0])
416 val = fold_unary (op->code, op->type, val);
417 else if (!op->val[1])
418 val = fold_binary (op->code, op->type,
419 op->index ? op->val[0] : val,
420 op->index ? val : op->val[0]);
421 else if (op->index == 0)
422 val = fold_ternary (op->code, op->type,
423 val, op->val[0], op->val[1]);
424 else if (op->index == 1)
425 val = fold_ternary (op->code, op->type,
426 op->val[0], val, op->val[1]);
427 else if (op->index == 2)
428 val = fold_ternary (op->code, op->type,
429 op->val[0], op->val[1], val);
430 else
431 val = NULL_TREE;
432 }
433
434 res = val
435 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
436 : NULL;
437
438 if (res && integer_zerop (res))
439 continue;
440 if (res && integer_onep (res))
441 {
442 clause |= 1 << (i + predicate::first_dynamic_condition);
443 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
444 continue;
445 }
446 }
447 if (c->operand_num < (int) known_value_ranges.length ()
448 && !c->agg_contents
449 && !known_value_ranges[c->operand_num].undefined_p ()
450 && !known_value_ranges[c->operand_num].varying_p ()
451 && TYPE_SIZE (c->type)
452 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
453 && (!val || TREE_CODE (val) != INTEGER_CST))
454 {
455 value_range vr = known_value_ranges[c->operand_num];
456 if (!useless_type_conversion_p (c->type, vr.type ()))
457 {
458 value_range res;
459 range_fold_unary_expr (&res, NOP_EXPR,
460 c->type, &vr, vr.type ());
461 vr = res;
462 }
463 tree type = c->type;
464
465 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
466 {
467 if (vr.varying_p () || vr.undefined_p ())
468 break;
469
470 value_range res;
471 if (!op->val[0])
472 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
473 else if (!op->val[1])
474 {
475 value_range op0 (op->val[0], op->val[0]);
476 range_fold_binary_expr (&res, op->code, op->type,
477 op->index ? &op0 : &vr,
478 op->index ? &vr : &op0);
479 }
480 else
481 gcc_unreachable ();
482 type = op->type;
483 vr = res;
484 }
485 if (!vr.varying_p () && !vr.undefined_p ())
486 {
487 value_range res;
488 value_range val_vr (c->val, c->val);
489 range_fold_binary_expr (&res, c->code, boolean_type_node,
490 &vr,
491 &val_vr);
492 if (res.zero_p ())
493 continue;
494 }
495 }
496
497 clause |= 1 << (i + predicate::first_dynamic_condition);
498 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
499 }
500 *ret_clause = clause;
501 if (ret_nonspec_clause)
502 *ret_nonspec_clause = nonspec_clause;
503 }
504
505
506 /* Work out what conditions might be true at invocation of E.
507 Compute costs for inlined edge if INLINE_P is true.
508
509 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
510 (if non-NULL) conditions evaluated for nonspecialized clone called
511 in a given context.
512
513 KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by
514 known constant and aggregate values of parameters.
515
516 KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts
517 of parameter used by a polymorphic call. */
518
519 void
520 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
521 clause_t *clause_ptr,
522 clause_t *nonspec_clause_ptr,
523 vec<tree> *known_vals_ptr,
524 vec<ipa_polymorphic_call_context>
525 *known_contexts_ptr,
526 vec<ipa_agg_value_set> *known_aggs_ptr)
527 {
528 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
529 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
530 auto_vec<value_range, 32> known_value_ranges;
531 class ipa_edge_args *args;
532
533 if (clause_ptr)
534 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
535
536 if (ipa_node_params_sum
537 && !e->call_stmt_cannot_inline_p
538 && (info->conds || known_contexts_ptr)
539 && (args = IPA_EDGE_REF (e)) != NULL)
540 {
541 struct cgraph_node *caller;
542 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
543 class ipa_call_summary *es = ipa_call_summaries->get (e);
544 int i, count = ipa_get_cs_argument_count (args);
545
546 if (count)
547 {
548 if (e->caller->inlined_to)
549 caller = e->caller->inlined_to;
550 else
551 caller = e->caller;
552 caller_parms_info = IPA_NODE_REF (caller);
553 callee_pi = IPA_NODE_REF (callee);
554
555 /* Watch for thunks. */
556 if (callee_pi)
557 /* Watch for variadic functions. */
558 count = MIN (count, ipa_get_param_count (callee_pi));
559 }
560
561 if (callee_pi)
562 for (i = 0; i < count; i++)
563 {
564 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
565
566 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
567 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
568 {
569 /* Determine if we know constant value of the parameter. */
570 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
571 ipa_get_type (callee_pi, i));
572
573 if (!cst && e->call_stmt
574 && i < (int)gimple_call_num_args (e->call_stmt))
575 {
576 cst = gimple_call_arg (e->call_stmt, i);
577 if (!is_gimple_min_invariant (cst))
578 cst = NULL;
579 }
580 if (cst)
581 {
582 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
583 if (!known_vals_ptr->length ())
584 vec_safe_grow_cleared (known_vals_ptr, count);
585 (*known_vals_ptr)[i] = cst;
586 }
587 else if (inline_p && !es->param[i].change_prob)
588 {
589 if (!known_vals_ptr->length ())
590 vec_safe_grow_cleared (known_vals_ptr, count);
591 (*known_vals_ptr)[i] = error_mark_node;
592 }
593
594 /* If we failed to get simple constant, try value range. */
595 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
596 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
597 {
598 value_range vr
599 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
600 ipa_get_type (callee_pi,
601 i));
602 if (!vr.undefined_p () && !vr.varying_p ())
603 {
604 if (!known_value_ranges.length ())
605 known_value_ranges.safe_grow_cleared (count);
606 known_value_ranges[i] = vr;
607 }
608 }
609
610 /* Determine known aggregate values. */
611 ipa_agg_value_set agg
612 = ipa_agg_value_set_from_jfunc (caller_parms_info,
613 caller, &jf->agg);
614 if (agg.items.length ())
615 {
616 if (!known_aggs_ptr->length ())
617 vec_safe_grow_cleared (known_aggs_ptr, count);
618 (*known_aggs_ptr)[i] = agg;
619 }
620 }
621
622 /* For calls used in polymorphic calls we further determine
623 polymorphic call context. */
624 if (known_contexts_ptr
625 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
626 {
627 ipa_polymorphic_call_context
628 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
629 if (!ctx.useless_p ())
630 {
631 if (!known_contexts_ptr->length ())
632 known_contexts_ptr->safe_grow_cleared (count);
633 (*known_contexts_ptr)[i]
634 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
635 }
636 }
637 }
638 else
639 gcc_assert (!count || callee->thunk.thunk_p);
640 }
641 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
642 {
643 int i, count = (int)gimple_call_num_args (e->call_stmt);
644
645 for (i = 0; i < count; i++)
646 {
647 tree cst = gimple_call_arg (e->call_stmt, i);
648 if (!is_gimple_min_invariant (cst))
649 cst = NULL;
650 if (cst)
651 {
652 if (!known_vals_ptr->length ())
653 vec_safe_grow_cleared (known_vals_ptr, count);
654 (*known_vals_ptr)[i] = cst;
655 }
656 }
657 }
658
659 evaluate_conditions_for_known_args (callee, inline_p,
660 *known_vals_ptr,
661 known_value_ranges,
662 *known_aggs_ptr,
663 clause_ptr,
664 nonspec_clause_ptr);
665 }
666
667
668 /* Allocate the function summary. */
669
670 static void
671 ipa_fn_summary_alloc (void)
672 {
673 gcc_checking_assert (!ipa_fn_summaries);
674 ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
675 (symtab);
676 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
677 ipa_call_summaries = new ipa_call_summary_t (symtab);
678 }
679
680 ipa_call_summary::~ipa_call_summary ()
681 {
682 if (predicate)
683 edge_predicate_pool.remove (predicate);
684
685 param.release ();
686 }
687
688 ipa_fn_summary::~ipa_fn_summary ()
689 {
690 if (loop_iterations)
691 edge_predicate_pool.remove (loop_iterations);
692 if (loop_stride)
693 edge_predicate_pool.remove (loop_stride);
694 vec_free (conds);
695 vec_free (size_time_table);
696 vec_free (call_size_time_table);
697 }
698
699 void
700 ipa_fn_summary_t::remove_callees (cgraph_node *node)
701 {
702 cgraph_edge *e;
703 for (e = node->callees; e; e = e->next_callee)
704 ipa_call_summaries->remove (e);
705 for (e = node->indirect_calls; e; e = e->next_callee)
706 ipa_call_summaries->remove (e);
707 }
708
709 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
710 Additionally care about allocating new memory slot for updated predicate
711 and set it to NULL when it becomes true or false (and thus uninteresting).
712 */
713
714 static void
715 remap_hint_predicate_after_duplication (predicate **p,
716 clause_t possible_truths)
717 {
718 predicate new_predicate;
719
720 if (!*p)
721 return;
722
723 new_predicate = (*p)->remap_after_duplication (possible_truths);
724 /* We do not want to free previous predicate; it is used by node origin. */
725 *p = NULL;
726 set_hint_predicate (p, new_predicate);
727 }
728
729
730 /* Hook that is called by cgraph.c when a node is duplicated. */
731 void
732 ipa_fn_summary_t::duplicate (cgraph_node *src,
733 cgraph_node *dst,
734 ipa_fn_summary *,
735 ipa_fn_summary *info)
736 {
737 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
738 /* TODO: as an optimization, we may avoid copying conditions
739 that are known to be false or true. */
740 info->conds = vec_safe_copy (info->conds);
741
742 /* When there are any replacements in the function body, see if we can figure
743 out that something was optimized out. */
744 if (ipa_node_params_sum && dst->clone.tree_map)
745 {
746 vec<size_time_entry, va_gc> *entry = info->size_time_table;
747 /* Use SRC parm info since it may not be copied yet. */
748 class ipa_node_params *parms_info = IPA_NODE_REF (src);
749 vec<tree> known_vals = vNULL;
750 int count = ipa_get_param_count (parms_info);
751 int i, j;
752 clause_t possible_truths;
753 predicate true_pred = true;
754 size_time_entry *e;
755 int optimized_out_size = 0;
756 bool inlined_to_p = false;
757 struct cgraph_edge *edge, *next;
758
759 info->size_time_table = 0;
760 known_vals.safe_grow_cleared (count);
761 for (i = 0; i < count; i++)
762 {
763 struct ipa_replace_map *r;
764
765 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
766 {
767 if (r->parm_num == i)
768 {
769 known_vals[i] = r->new_tree;
770 break;
771 }
772 }
773 }
774 evaluate_conditions_for_known_args (dst, false,
775 known_vals,
776 vNULL,
777 vNULL,
778 &possible_truths,
779 /* We are going to specialize,
780 so ignore nonspec truths. */
781 NULL);
782 known_vals.release ();
783
784 info->account_size_time (0, 0, true_pred, true_pred);
785
786 /* Remap size_time vectors.
787 Simplify the predicate by pruning out alternatives that are known
788 to be false.
789 TODO: as on optimization, we can also eliminate conditions known
790 to be true. */
791 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
792 {
793 predicate new_exec_pred;
794 predicate new_nonconst_pred;
795 new_exec_pred = e->exec_predicate.remap_after_duplication
796 (possible_truths);
797 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
798 (possible_truths);
799 if (new_exec_pred == false || new_nonconst_pred == false)
800 optimized_out_size += e->size;
801 else
802 info->account_size_time (e->size, e->time, new_exec_pred,
803 new_nonconst_pred);
804 }
805
806 /* Remap edge predicates with the same simplification as above.
807 Also copy constantness arrays. */
808 for (edge = dst->callees; edge; edge = next)
809 {
810 predicate new_predicate;
811 class ipa_call_summary *es = ipa_call_summaries->get (edge);
812 next = edge->next_callee;
813
814 if (!edge->inline_failed)
815 inlined_to_p = true;
816 if (!es->predicate)
817 continue;
818 new_predicate = es->predicate->remap_after_duplication
819 (possible_truths);
820 if (new_predicate == false && *es->predicate != false)
821 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
822 edge_set_predicate (edge, &new_predicate);
823 }
824
825 /* Remap indirect edge predicates with the same simplification as above.
826 Also copy constantness arrays. */
827 for (edge = dst->indirect_calls; edge; edge = next)
828 {
829 predicate new_predicate;
830 class ipa_call_summary *es = ipa_call_summaries->get (edge);
831 next = edge->next_callee;
832
833 gcc_checking_assert (edge->inline_failed);
834 if (!es->predicate)
835 continue;
836 new_predicate = es->predicate->remap_after_duplication
837 (possible_truths);
838 if (new_predicate == false && *es->predicate != false)
839 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
840 edge_set_predicate (edge, &new_predicate);
841 }
842 remap_hint_predicate_after_duplication (&info->loop_iterations,
843 possible_truths);
844 remap_hint_predicate_after_duplication (&info->loop_stride,
845 possible_truths);
846
847 /* If inliner or someone after inliner will ever start producing
848 non-trivial clones, we will get trouble with lack of information
849 about updating self sizes, because size vectors already contains
850 sizes of the callees. */
851 gcc_assert (!inlined_to_p || !optimized_out_size);
852 }
853 else
854 {
855 info->size_time_table = vec_safe_copy (info->size_time_table);
856 if (info->loop_iterations)
857 {
858 predicate p = *info->loop_iterations;
859 info->loop_iterations = NULL;
860 set_hint_predicate (&info->loop_iterations, p);
861 }
862 if (info->loop_stride)
863 {
864 predicate p = *info->loop_stride;
865 info->loop_stride = NULL;
866 set_hint_predicate (&info->loop_stride, p);
867 }
868 }
869 if (!dst->inlined_to)
870 ipa_update_overall_fn_summary (dst);
871 }
872
873
874 /* Hook that is called by cgraph.c when a node is duplicated. */
875
876 void
877 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
878 struct cgraph_edge *dst,
879 class ipa_call_summary *srcinfo,
880 class ipa_call_summary *info)
881 {
882 new (info) ipa_call_summary (*srcinfo);
883 info->predicate = NULL;
884 edge_set_predicate (dst, srcinfo->predicate);
885 info->param = srcinfo->param.copy ();
886 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
887 {
888 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
889 - eni_size_weights.call_cost);
890 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
891 - eni_time_weights.call_cost);
892 }
893 }
894
895 /* Dump edge summaries associated to NODE and recursively to all clones.
896 Indent by INDENT. */
897
898 static void
899 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
900 class ipa_fn_summary *info)
901 {
902 struct cgraph_edge *edge;
903 for (edge = node->callees; edge; edge = edge->next_callee)
904 {
905 class ipa_call_summary *es = ipa_call_summaries->get (edge);
906 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
907 int i;
908
909 fprintf (f,
910 "%*s%s/%i %s\n%*s freq:%4.2f",
911 indent, "", callee->name (), callee->order,
912 !edge->inline_failed
913 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
914 indent, "", edge->sreal_frequency ().to_double ());
915
916 if (cross_module_call_p (edge))
917 fprintf (f, " cross module");
918
919 if (es)
920 fprintf (f, " loop depth:%2i size:%2i time: %2i",
921 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
922
923 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
924 ipa_size_summary *ss = ipa_size_summaries->get (callee);
925 if (s != NULL)
926 fprintf (f, " callee size:%2i stack:%2i",
927 (int) (ss->size / ipa_fn_summary::size_scale),
928 (int) s->estimated_stack_size);
929
930 if (es && es->predicate)
931 {
932 fprintf (f, " predicate: ");
933 es->predicate->dump (f, info->conds);
934 }
935 else
936 fprintf (f, "\n");
937 if (es && es->param.exists ())
938 for (i = 0; i < (int) es->param.length (); i++)
939 {
940 int prob = es->param[i].change_prob;
941
942 if (!prob)
943 fprintf (f, "%*s op%i is compile time invariant\n",
944 indent + 2, "", i);
945 else if (prob != REG_BR_PROB_BASE)
946 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
947 prob * 100.0 / REG_BR_PROB_BASE);
948 }
949 if (!edge->inline_failed)
950 {
951 ipa_size_summary *ss = ipa_size_summaries->get (callee);
952 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
953 indent + 2, "",
954 (int) ipa_get_stack_frame_offset (callee),
955 (int) ss->estimated_self_stack_size);
956 dump_ipa_call_summary (f, indent + 2, callee, info);
957 }
958 }
959 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
960 {
961 class ipa_call_summary *es = ipa_call_summaries->get (edge);
962 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
963 " time: %2i",
964 indent, "",
965 es->loop_depth,
966 edge->sreal_frequency ().to_double (), es->call_stmt_size,
967 es->call_stmt_time);
968 if (es->predicate)
969 {
970 fprintf (f, "predicate: ");
971 es->predicate->dump (f, info->conds);
972 }
973 else
974 fprintf (f, "\n");
975 }
976 }
977
978
979 void
980 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
981 {
982 if (node->definition)
983 {
984 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
985 class ipa_size_summary *ss = ipa_size_summaries->get (node);
986 if (s != NULL)
987 {
988 size_time_entry *e;
989 int i;
990 fprintf (f, "IPA function summary for %s", node->dump_name ());
991 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
992 fprintf (f, " always_inline");
993 if (s->inlinable)
994 fprintf (f, " inlinable");
995 if (s->fp_expressions)
996 fprintf (f, " fp_expression");
997 fprintf (f, "\n global time: %f\n", s->time.to_double ());
998 fprintf (f, " self size: %i\n", ss->self_size);
999 fprintf (f, " global size: %i\n", ss->size);
1000 fprintf (f, " min size: %i\n", s->min_size);
1001 fprintf (f, " self stack: %i\n",
1002 (int) ss->estimated_self_stack_size);
1003 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1004 if (s->growth)
1005 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1006 if (s->scc_no)
1007 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1008 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
1009 {
1010 fprintf (f, " size:%f, time:%f",
1011 (double) e->size / ipa_fn_summary::size_scale,
1012 e->time.to_double ());
1013 if (e->exec_predicate != true)
1014 {
1015 fprintf (f, ", executed if:");
1016 e->exec_predicate.dump (f, s->conds, 0);
1017 }
1018 if (e->exec_predicate != e->nonconst_predicate)
1019 {
1020 fprintf (f, ", nonconst if:");
1021 e->nonconst_predicate.dump (f, s->conds, 0);
1022 }
1023 fprintf (f, "\n");
1024 }
1025 if (s->loop_iterations)
1026 {
1027 fprintf (f, " loop iterations:");
1028 s->loop_iterations->dump (f, s->conds);
1029 }
1030 if (s->loop_stride)
1031 {
1032 fprintf (f, " loop stride:");
1033 s->loop_stride->dump (f, s->conds);
1034 }
1035 fprintf (f, " calls:\n");
1036 dump_ipa_call_summary (f, 4, node, s);
1037 fprintf (f, "\n");
1038 }
1039 else
1040 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1041 }
1042 }
1043
1044 DEBUG_FUNCTION void
1045 ipa_debug_fn_summary (struct cgraph_node *node)
1046 {
1047 ipa_dump_fn_summary (stderr, node);
1048 }
1049
1050 void
1051 ipa_dump_fn_summaries (FILE *f)
1052 {
1053 struct cgraph_node *node;
1054
1055 FOR_EACH_DEFINED_FUNCTION (node)
1056 if (!node->inlined_to)
1057 ipa_dump_fn_summary (f, node);
1058 }
1059
1060 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1061 boolean variable pointed to by DATA. */
1062
1063 static bool
1064 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1065 void *data)
1066 {
1067 bool *b = (bool *) data;
1068 *b = true;
1069 return true;
1070 }
1071
1072 /* If OP refers to value of function parameter, return the corresponding
1073 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1074 PARM_DECL) will be stored to *SIZE_P in that case too. */
1075
1076 static tree
1077 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1078 poly_int64 *size_p)
1079 {
1080 /* SSA_NAME referring to parm default def? */
1081 if (TREE_CODE (op) == SSA_NAME
1082 && SSA_NAME_IS_DEFAULT_DEF (op)
1083 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1084 {
1085 if (size_p)
1086 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1087 return SSA_NAME_VAR (op);
1088 }
1089 /* Non-SSA parm reference? */
1090 if (TREE_CODE (op) == PARM_DECL)
1091 {
1092 bool modified = false;
1093
1094 ao_ref refd;
1095 ao_ref_init (&refd, op);
1096 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1097 mark_modified, &modified, NULL, NULL,
1098 fbi->aa_walk_budget + 1);
1099 if (walked < 0)
1100 {
1101 fbi->aa_walk_budget = 0;
1102 return NULL_TREE;
1103 }
1104 if (!modified)
1105 {
1106 if (size_p)
1107 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1108 return op;
1109 }
1110 }
1111 return NULL_TREE;
1112 }
1113
1114 /* If OP refers to value of function parameter, return the corresponding
1115 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1116 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1117 stored to *SIZE_P in that case too. */
1118
1119 static tree
1120 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1121 poly_int64 *size_p)
1122 {
1123 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1124 if (res)
1125 return res;
1126
1127 if (TREE_CODE (op) == SSA_NAME
1128 && !SSA_NAME_IS_DEFAULT_DEF (op)
1129 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1130 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1131 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1132 size_p);
1133 return NULL_TREE;
1134 }
1135
1136 /* If OP refers to a value of a function parameter or value loaded from an
1137 aggregate passed to a parameter (either by value or reference), return TRUE
1138 and store the number of the parameter to *INDEX_P, the access size into
1139 *SIZE_P, and information whether and how it has been loaded from an
1140 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1141 statement in which OP is used or loaded. */
1142
1143 static bool
1144 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1145 gimple *stmt, tree op, int *index_p,
1146 poly_int64 *size_p,
1147 struct agg_position_info *aggpos)
1148 {
1149 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1150
1151 gcc_checking_assert (aggpos);
1152 if (res)
1153 {
1154 *index_p = ipa_get_param_decl_index (fbi->info, res);
1155 if (*index_p < 0)
1156 return false;
1157 aggpos->agg_contents = false;
1158 aggpos->by_ref = false;
1159 return true;
1160 }
1161
1162 if (TREE_CODE (op) == SSA_NAME)
1163 {
1164 if (SSA_NAME_IS_DEFAULT_DEF (op)
1165 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1166 return false;
1167 stmt = SSA_NAME_DEF_STMT (op);
1168 op = gimple_assign_rhs1 (stmt);
1169 if (!REFERENCE_CLASS_P (op))
1170 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1171 aggpos);
1172 }
1173
1174 aggpos->agg_contents = true;
1175 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1176 stmt, op, index_p, &aggpos->offset,
1177 size_p, &aggpos->by_ref);
1178 }
1179
1180 /* See if statement might disappear after inlining.
1181 0 - means not eliminated
1182 1 - half of statements goes away
1183 2 - for sure it is eliminated.
1184 We are not terribly sophisticated, basically looking for simple abstraction
1185 penalty wrappers. */
1186
1187 static int
1188 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1189 {
1190 enum gimple_code code = gimple_code (stmt);
1191 enum tree_code rhs_code;
1192
1193 if (!optimize)
1194 return 0;
1195
1196 switch (code)
1197 {
1198 case GIMPLE_RETURN:
1199 return 2;
1200 case GIMPLE_ASSIGN:
1201 if (gimple_num_ops (stmt) != 2)
1202 return 0;
1203
1204 rhs_code = gimple_assign_rhs_code (stmt);
1205
1206 /* Casts of parameters, loads from parameters passed by reference
1207 and stores to return value or parameters are often free after
1208 inlining due to SRA and further combining.
1209 Assume that half of statements goes away. */
1210 if (CONVERT_EXPR_CODE_P (rhs_code)
1211 || rhs_code == VIEW_CONVERT_EXPR
1212 || rhs_code == ADDR_EXPR
1213 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1214 {
1215 tree rhs = gimple_assign_rhs1 (stmt);
1216 tree lhs = gimple_assign_lhs (stmt);
1217 tree inner_rhs = get_base_address (rhs);
1218 tree inner_lhs = get_base_address (lhs);
1219 bool rhs_free = false;
1220 bool lhs_free = false;
1221
1222 if (!inner_rhs)
1223 inner_rhs = rhs;
1224 if (!inner_lhs)
1225 inner_lhs = lhs;
1226
1227 /* Reads of parameter are expected to be free. */
1228 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1229 rhs_free = true;
1230 /* Match expressions of form &this->field. Those will most likely
1231 combine with something upstream after inlining. */
1232 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1233 {
1234 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1235 if (TREE_CODE (op) == PARM_DECL)
1236 rhs_free = true;
1237 else if (TREE_CODE (op) == MEM_REF
1238 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1239 NULL))
1240 rhs_free = true;
1241 }
1242
1243 /* When parameter is not SSA register because its address is taken
1244 and it is just copied into one, the statement will be completely
1245 free after inlining (we will copy propagate backward). */
1246 if (rhs_free && is_gimple_reg (lhs))
1247 return 2;
1248
1249 /* Reads of parameters passed by reference
1250 expected to be free (i.e. optimized out after inlining). */
1251 if (TREE_CODE (inner_rhs) == MEM_REF
1252 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1253 rhs_free = true;
1254
1255 /* Copying parameter passed by reference into gimple register is
1256 probably also going to copy propagate, but we can't be quite
1257 sure. */
1258 if (rhs_free && is_gimple_reg (lhs))
1259 lhs_free = true;
1260
1261 /* Writes to parameters, parameters passed by value and return value
1262 (either directly or passed via invisible reference) are free.
1263
1264 TODO: We ought to handle testcase like
1265 struct a {int a,b;};
1266 struct a
1267 returnstruct (void)
1268 {
1269 struct a a ={1,2};
1270 return a;
1271 }
1272
1273 This translate into:
1274
1275 returnstruct ()
1276 {
1277 int a$b;
1278 int a$a;
1279 struct a a;
1280 struct a D.2739;
1281
1282 <bb 2>:
1283 D.2739.a = 1;
1284 D.2739.b = 2;
1285 return D.2739;
1286
1287 }
1288 For that we either need to copy ipa-split logic detecting writes
1289 to return value. */
1290 if (TREE_CODE (inner_lhs) == PARM_DECL
1291 || TREE_CODE (inner_lhs) == RESULT_DECL
1292 || (TREE_CODE (inner_lhs) == MEM_REF
1293 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1294 NULL)
1295 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1296 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1297 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1298 (inner_lhs,
1299 0))) == RESULT_DECL))))
1300 lhs_free = true;
1301 if (lhs_free
1302 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1303 rhs_free = true;
1304 if (lhs_free && rhs_free)
1305 return 1;
1306 }
1307 return 0;
1308 default:
1309 return 0;
1310 }
1311 }
1312
1313 /* Analyze EXPR if it represents a series of simple operations performed on
1314 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1315 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1316 Type of the parameter or load from an aggregate via the parameter is
1317 stored in *TYPE_P. Operations on the parameter are recorded to
1318 PARAM_OPS_P if it is not NULL. */
1319
1320 static bool
1321 decompose_param_expr (struct ipa_func_body_info *fbi,
1322 gimple *stmt, tree expr,
1323 int *index_p, tree *type_p,
1324 struct agg_position_info *aggpos,
1325 expr_eval_ops *param_ops_p = NULL)
1326 {
1327 int op_limit = param_ipa_max_param_expr_ops;
1328 int op_count = 0;
1329
1330 if (param_ops_p)
1331 *param_ops_p = NULL;
1332
1333 while (true)
1334 {
1335 expr_eval_op eval_op;
1336 unsigned rhs_count;
1337 unsigned cst_count = 0;
1338
1339 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1340 aggpos))
1341 {
1342 tree type = TREE_TYPE (expr);
1343
1344 if (aggpos->agg_contents)
1345 {
1346 /* Stop if containing bit-field. */
1347 if (TREE_CODE (expr) == BIT_FIELD_REF
1348 || contains_bitfld_component_ref_p (expr))
1349 break;
1350 }
1351
1352 *type_p = type;
1353 return true;
1354 }
1355
1356 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1357 break;
1358
1359 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1360 break;
1361
1362 switch (gimple_assign_rhs_class (stmt))
1363 {
1364 case GIMPLE_SINGLE_RHS:
1365 expr = gimple_assign_rhs1 (stmt);
1366 continue;
1367
1368 case GIMPLE_UNARY_RHS:
1369 rhs_count = 1;
1370 break;
1371
1372 case GIMPLE_BINARY_RHS:
1373 rhs_count = 2;
1374 break;
1375
1376 case GIMPLE_TERNARY_RHS:
1377 rhs_count = 3;
1378 break;
1379
1380 default:
1381 goto fail;
1382 }
1383
1384 /* Stop if expression is too complex. */
1385 if (op_count++ == op_limit)
1386 break;
1387
1388 if (param_ops_p)
1389 {
1390 eval_op.code = gimple_assign_rhs_code (stmt);
1391 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1392 eval_op.val[0] = NULL_TREE;
1393 eval_op.val[1] = NULL_TREE;
1394 }
1395
1396 expr = NULL_TREE;
1397 for (unsigned i = 0; i < rhs_count; i++)
1398 {
1399 tree op = gimple_op (stmt, i + 1);
1400
1401 gcc_assert (op && !TYPE_P (op));
1402 if (is_gimple_ip_invariant (op))
1403 {
1404 if (++cst_count == rhs_count)
1405 goto fail;
1406
1407 eval_op.val[cst_count - 1] = op;
1408 }
1409 else if (!expr)
1410 {
1411 /* Found a non-constant operand, and record its index in rhs
1412 operands. */
1413 eval_op.index = i;
1414 expr = op;
1415 }
1416 else
1417 {
1418 /* Found more than one non-constant operands. */
1419 goto fail;
1420 }
1421 }
1422
1423 if (param_ops_p)
1424 vec_safe_insert (*param_ops_p, 0, eval_op);
1425 }
1426
1427 /* Failed to decompose, free resource and return. */
1428 fail:
1429 if (param_ops_p)
1430 vec_free (*param_ops_p);
1431
1432 return false;
1433 }
1434
1435 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1436 predicates to the CFG edges. */
1437
1438 static void
1439 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1440 class ipa_fn_summary *summary,
1441 class ipa_node_params *params_summary,
1442 basic_block bb)
1443 {
1444 gimple *last;
1445 tree op, op2;
1446 int index;
1447 struct agg_position_info aggpos;
1448 enum tree_code code, inverted_code;
1449 edge e;
1450 edge_iterator ei;
1451 gimple *set_stmt;
1452 tree param_type;
1453 expr_eval_ops param_ops;
1454
1455 last = last_stmt (bb);
1456 if (!last || gimple_code (last) != GIMPLE_COND)
1457 return;
1458 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1459 return;
1460 op = gimple_cond_lhs (last);
1461
1462 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1463 &param_ops))
1464 {
1465 code = gimple_cond_code (last);
1466 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1467
1468 FOR_EACH_EDGE (e, ei, bb->succs)
1469 {
1470 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1471 ? code : inverted_code);
1472 /* invert_tree_comparison will return ERROR_MARK on FP
1473 comparisons that are not EQ/NE instead of returning proper
1474 unordered one. Be sure it is not confused with NON_CONSTANT.
1475
1476 And if the edge's target is the final block of diamond CFG graph
1477 of this conditional statement, we do not need to compute
1478 predicate for the edge because the final block's predicate must
1479 be at least as that of the first block of the statement. */
1480 if (this_code != ERROR_MARK
1481 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1482 {
1483 predicate p
1484 = add_condition (summary, params_summary, index,
1485 param_type, &aggpos,
1486 this_code, gimple_cond_rhs (last), param_ops);
1487 e->aux = edge_predicate_pool.allocate ();
1488 *(predicate *) e->aux = p;
1489 }
1490 }
1491 vec_free (param_ops);
1492 }
1493
1494 if (TREE_CODE (op) != SSA_NAME)
1495 return;
1496 /* Special case
1497 if (builtin_constant_p (op))
1498 constant_code
1499 else
1500 nonconstant_code.
1501 Here we can predicate nonconstant_code. We can't
1502 really handle constant_code since we have no predicate
1503 for this and also the constant code is not known to be
1504 optimized away when inliner doesn't see operand is constant.
1505 Other optimizers might think otherwise. */
1506 if (gimple_cond_code (last) != NE_EXPR
1507 || !integer_zerop (gimple_cond_rhs (last)))
1508 return;
1509 set_stmt = SSA_NAME_DEF_STMT (op);
1510 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1511 || gimple_call_num_args (set_stmt) != 1)
1512 return;
1513 op2 = gimple_call_arg (set_stmt, 0);
1514 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1515 return;
1516 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1517 {
1518 predicate p = add_condition (summary, params_summary, index,
1519 param_type, &aggpos,
1520 predicate::is_not_constant, NULL_TREE);
1521 e->aux = edge_predicate_pool.allocate ();
1522 *(predicate *) e->aux = p;
1523 }
1524 }
1525
1526
1527 /* If BB ends by a switch we can turn into predicates, attach corresponding
1528 predicates to the CFG edges. */
1529
1530 static void
1531 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1532 class ipa_fn_summary *summary,
1533 class ipa_node_params *params_summary,
1534 basic_block bb)
1535 {
1536 gimple *lastg;
1537 tree op;
1538 int index;
1539 struct agg_position_info aggpos;
1540 edge e;
1541 edge_iterator ei;
1542 size_t n;
1543 size_t case_idx;
1544 tree param_type;
1545 expr_eval_ops param_ops;
1546
1547 lastg = last_stmt (bb);
1548 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1549 return;
1550 gswitch *last = as_a <gswitch *> (lastg);
1551 op = gimple_switch_index (last);
1552 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1553 &param_ops))
1554 return;
1555
1556 auto_vec<std::pair<tree, tree> > ranges;
1557 tree type = TREE_TYPE (op);
1558 int bound_limit = param_ipa_max_switch_predicate_bounds;
1559 int bound_count = 0;
1560 wide_int vr_wmin, vr_wmax;
1561 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1562
1563 FOR_EACH_EDGE (e, ei, bb->succs)
1564 {
1565 e->aux = edge_predicate_pool.allocate ();
1566 *(predicate *) e->aux = false;
1567 }
1568
1569 e = gimple_switch_edge (cfun, last, 0);
1570 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1571 default case if its target basic block is in convergence point of all
1572 switch cases, which can be determined by checking whether it
1573 post-dominates the switch statement. */
1574 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1575 bound_count = INT_MAX;
1576
1577 n = gimple_switch_num_labels (last);
1578 for (case_idx = 1; case_idx < n; ++case_idx)
1579 {
1580 tree cl = gimple_switch_label (last, case_idx);
1581 tree min = CASE_LOW (cl);
1582 tree max = CASE_HIGH (cl);
1583 predicate p;
1584
1585 e = gimple_switch_edge (cfun, last, case_idx);
1586
1587 /* The case value might not have same type as switch expression,
1588 extend the value based on the expression type. */
1589 if (TREE_TYPE (min) != type)
1590 min = wide_int_to_tree (type, wi::to_wide (min));
1591
1592 if (!max)
1593 max = min;
1594 else if (TREE_TYPE (max) != type)
1595 max = wide_int_to_tree (type, wi::to_wide (max));
1596
1597 /* The case's target basic block is in convergence point of all switch
1598 cases, its predicate should be at least as that of the switch
1599 statement. */
1600 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1601 p = true;
1602 else if (min == max)
1603 p = add_condition (summary, params_summary, index, param_type,
1604 &aggpos, EQ_EXPR, min, param_ops);
1605 else
1606 {
1607 predicate p1, p2;
1608 p1 = add_condition (summary, params_summary, index, param_type,
1609 &aggpos, GE_EXPR, min, param_ops);
1610 p2 = add_condition (summary, params_summary,index, param_type,
1611 &aggpos, LE_EXPR, max, param_ops);
1612 p = p1 & p2;
1613 }
1614 *(class predicate *) e->aux
1615 = p.or_with (summary->conds, *(class predicate *) e->aux);
1616
1617 /* If there are too many disjoint case ranges, predicate for default
1618 case might become too complicated. So add a limit here. */
1619 if (bound_count > bound_limit)
1620 continue;
1621
1622 bool new_range = true;
1623
1624 if (!ranges.is_empty ())
1625 {
1626 wide_int curr_wmin = wi::to_wide (min);
1627 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1628
1629 /* Merge case ranges if they are continuous. */
1630 if (curr_wmin == last_wmax + 1)
1631 new_range = false;
1632 else if (vr_type == VR_ANTI_RANGE)
1633 {
1634 /* If two disjoint case ranges can be connected by anti-range
1635 of switch index, combine them to one range. */
1636 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1637 vr_type = VR_UNDEFINED;
1638 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1639 new_range = false;
1640 }
1641 }
1642
1643 /* Create/extend a case range. And we count endpoints of range set,
1644 this number nearly equals to number of conditions that we will create
1645 for predicate of default case. */
1646 if (new_range)
1647 {
1648 bound_count += (min == max) ? 1 : 2;
1649 ranges.safe_push (std::make_pair (min, max));
1650 }
1651 else
1652 {
1653 bound_count += (ranges.last ().first == ranges.last ().second);
1654 ranges.last ().second = max;
1655 }
1656 }
1657
1658 e = gimple_switch_edge (cfun, last, 0);
1659 if (bound_count > bound_limit)
1660 {
1661 *(class predicate *) e->aux = true;
1662 vec_free (param_ops);
1663 return;
1664 }
1665
1666 predicate p_seg = true;
1667 predicate p_all = false;
1668
1669 if (vr_type != VR_RANGE)
1670 {
1671 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1672 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1673 }
1674
1675 /* Construct predicate to represent default range set that is negation of
1676 all case ranges. Case range is classified as containing single/non-single
1677 values. Suppose a piece of case ranges in the following.
1678
1679 [D1...D2] [S1] ... [Sn] [D3...D4]
1680
1681 To represent default case's range sets between two non-single value
1682 case ranges (From D2 to D3), we construct predicate as:
1683
1684 D2 < x < D3 && x != S1 && ... && x != Sn
1685 */
1686 for (size_t i = 0; i < ranges.length (); i++)
1687 {
1688 tree min = ranges[i].first;
1689 tree max = ranges[i].second;
1690
1691 if (min == max)
1692 p_seg &= add_condition (summary, params_summary, index,
1693 param_type, &aggpos, NE_EXPR,
1694 min, param_ops);
1695 else
1696 {
1697 /* Do not create sub-predicate for range that is beyond low bound
1698 of switch index. */
1699 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1700 {
1701 p_seg &= add_condition (summary, params_summary, index,
1702 param_type, &aggpos,
1703 LT_EXPR, min, param_ops);
1704 p_all = p_all.or_with (summary->conds, p_seg);
1705 }
1706
1707 /* Do not create sub-predicate for range that is beyond up bound
1708 of switch index. */
1709 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1710 {
1711 p_seg = false;
1712 break;
1713 }
1714
1715 p_seg = add_condition (summary, params_summary, index,
1716 param_type, &aggpos, GT_EXPR,
1717 max, param_ops);
1718 }
1719 }
1720
1721 p_all = p_all.or_with (summary->conds, p_seg);
1722 *(class predicate *) e->aux
1723 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1724
1725 vec_free (param_ops);
1726 }
1727
1728
1729 /* For each BB in NODE attach to its AUX pointer predicate under
1730 which it is executable. */
1731
1732 static void
1733 compute_bb_predicates (struct ipa_func_body_info *fbi,
1734 struct cgraph_node *node,
1735 class ipa_fn_summary *summary,
1736 class ipa_node_params *params_summary)
1737 {
1738 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1739 bool done = false;
1740 basic_block bb;
1741
1742 FOR_EACH_BB_FN (bb, my_function)
1743 {
1744 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1745 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1746 }
1747
1748 /* Entry block is always executable. */
1749 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1750 = edge_predicate_pool.allocate ();
1751 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1752
1753 /* A simple dataflow propagation of predicates forward in the CFG.
1754 TODO: work in reverse postorder. */
1755 while (!done)
1756 {
1757 done = true;
1758 FOR_EACH_BB_FN (bb, my_function)
1759 {
1760 predicate p = false;
1761 edge e;
1762 edge_iterator ei;
1763 FOR_EACH_EDGE (e, ei, bb->preds)
1764 {
1765 if (e->src->aux)
1766 {
1767 predicate this_bb_predicate
1768 = *(predicate *) e->src->aux;
1769 if (e->aux)
1770 this_bb_predicate &= (*(class predicate *) e->aux);
1771 p = p.or_with (summary->conds, this_bb_predicate);
1772 if (p == true)
1773 break;
1774 }
1775 }
1776 if (p != false)
1777 {
1778 basic_block pdom_bb;
1779
1780 if (!bb->aux)
1781 {
1782 done = false;
1783 bb->aux = edge_predicate_pool.allocate ();
1784 *((predicate *) bb->aux) = p;
1785 }
1786 else if (p != *(predicate *) bb->aux)
1787 {
1788 /* This OR operation is needed to ensure monotonous data flow
1789 in the case we hit the limit on number of clauses and the
1790 and/or operations above give approximate answers. */
1791 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1792 if (p != *(predicate *) bb->aux)
1793 {
1794 done = false;
1795 *((predicate *) bb->aux) = p;
1796 }
1797 }
1798
1799 /* For switch/if statement, we can OR-combine predicates of all
1800 its cases/branches to get predicate for basic block in their
1801 convergence point, but sometimes this will generate very
1802 complicated predicate. Actually, we can get simplified
1803 predicate in another way by using the fact that predicate
1804 for a basic block must also hold true for its post dominators.
1805 To be specific, basic block in convergence point of
1806 conditional statement should include predicate of the
1807 statement. */
1808 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1809 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1810 ;
1811 else if (!pdom_bb->aux)
1812 {
1813 done = false;
1814 pdom_bb->aux = edge_predicate_pool.allocate ();
1815 *((predicate *) pdom_bb->aux) = p;
1816 }
1817 else if (p != *(predicate *) pdom_bb->aux)
1818 {
1819 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1820 if (p != *(predicate *) pdom_bb->aux)
1821 {
1822 done = false;
1823 *((predicate *) pdom_bb->aux) = p;
1824 }
1825 }
1826 }
1827 }
1828 }
1829 }
1830
1831
1832 /* Return predicate specifying when the STMT might have result that is not
1833 a compile time constant. */
1834
1835 static predicate
1836 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1837 class ipa_fn_summary *summary,
1838 class ipa_node_params *params_summary,
1839 tree expr,
1840 vec<predicate> nonconstant_names)
1841 {
1842 tree parm;
1843 int index;
1844
1845 while (UNARY_CLASS_P (expr))
1846 expr = TREE_OPERAND (expr, 0);
1847
1848 parm = unmodified_parm (fbi, NULL, expr, NULL);
1849 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1850 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1851 predicate::changed, NULL_TREE);
1852 if (is_gimple_min_invariant (expr))
1853 return false;
1854 if (TREE_CODE (expr) == SSA_NAME)
1855 return nonconstant_names[SSA_NAME_VERSION (expr)];
1856 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1857 {
1858 predicate p1
1859 = will_be_nonconstant_expr_predicate (fbi, summary,
1860 params_summary,
1861 TREE_OPERAND (expr, 0),
1862 nonconstant_names);
1863 if (p1 == true)
1864 return p1;
1865
1866 predicate p2
1867 = will_be_nonconstant_expr_predicate (fbi, summary,
1868 params_summary,
1869 TREE_OPERAND (expr, 1),
1870 nonconstant_names);
1871 return p1.or_with (summary->conds, p2);
1872 }
1873 else if (TREE_CODE (expr) == COND_EXPR)
1874 {
1875 predicate p1
1876 = will_be_nonconstant_expr_predicate (fbi, summary,
1877 params_summary,
1878 TREE_OPERAND (expr, 0),
1879 nonconstant_names);
1880 if (p1 == true)
1881 return p1;
1882
1883 predicate p2
1884 = will_be_nonconstant_expr_predicate (fbi, summary,
1885 params_summary,
1886 TREE_OPERAND (expr, 1),
1887 nonconstant_names);
1888 if (p2 == true)
1889 return p2;
1890 p1 = p1.or_with (summary->conds, p2);
1891 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1892 params_summary,
1893 TREE_OPERAND (expr, 2),
1894 nonconstant_names);
1895 return p2.or_with (summary->conds, p1);
1896 }
1897 else if (TREE_CODE (expr) == CALL_EXPR)
1898 return true;
1899 else
1900 {
1901 debug_tree (expr);
1902 gcc_unreachable ();
1903 }
1904 return false;
1905 }
1906
1907
1908 /* Return predicate specifying when the STMT might have result that is not
1909 a compile time constant. */
1910
1911 static predicate
1912 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1913 class ipa_fn_summary *summary,
1914 class ipa_node_params *params_summary,
1915 gimple *stmt,
1916 vec<predicate> nonconstant_names)
1917 {
1918 predicate p = true;
1919 ssa_op_iter iter;
1920 tree use;
1921 tree param_type = NULL_TREE;
1922 predicate op_non_const;
1923 bool is_load;
1924 int base_index;
1925 struct agg_position_info aggpos;
1926
1927 /* What statements might be optimized away
1928 when their arguments are constant. */
1929 if (gimple_code (stmt) != GIMPLE_ASSIGN
1930 && gimple_code (stmt) != GIMPLE_COND
1931 && gimple_code (stmt) != GIMPLE_SWITCH
1932 && (gimple_code (stmt) != GIMPLE_CALL
1933 || !(gimple_call_flags (stmt) & ECF_CONST)))
1934 return p;
1935
1936 /* Stores will stay anyway. */
1937 if (gimple_store_p (stmt))
1938 return p;
1939
1940 is_load = gimple_assign_load_p (stmt);
1941
1942 /* Loads can be optimized when the value is known. */
1943 if (is_load)
1944 {
1945 tree op = gimple_assign_rhs1 (stmt);
1946 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1947 &aggpos))
1948 return p;
1949 }
1950 else
1951 base_index = -1;
1952
1953 /* See if we understand all operands before we start
1954 adding conditionals. */
1955 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1956 {
1957 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1958 /* For arguments we can build a condition. */
1959 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1960 continue;
1961 if (TREE_CODE (use) != SSA_NAME)
1962 return p;
1963 /* If we know when operand is constant,
1964 we still can say something useful. */
1965 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1966 continue;
1967 return p;
1968 }
1969
1970 if (is_load)
1971 op_non_const =
1972 add_condition (summary, params_summary,
1973 base_index, param_type, &aggpos,
1974 predicate::changed, NULL_TREE);
1975 else
1976 op_non_const = false;
1977 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1978 {
1979 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1980 int index;
1981
1982 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1983 {
1984 if (index != base_index)
1985 p = add_condition (summary, params_summary, index,
1986 TREE_TYPE (parm), NULL,
1987 predicate::changed, NULL_TREE);
1988 else
1989 continue;
1990 }
1991 else
1992 p = nonconstant_names[SSA_NAME_VERSION (use)];
1993 op_non_const = p.or_with (summary->conds, op_non_const);
1994 }
1995 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1996 && gimple_op (stmt, 0)
1997 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1998 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1999 = op_non_const;
2000 return op_non_const;
2001 }
2002
2003 struct record_modified_bb_info
2004 {
2005 tree op;
2006 bitmap bb_set;
2007 gimple *stmt;
2008 };
2009
2010 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2011 probability how often it changes between USE_BB.
2012 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2013 is in different loop nest, we can do better.
2014 This is all just estimate. In theory we look for minimal cut separating
2015 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2016 anyway. */
2017
2018 static basic_block
2019 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2020 {
2021 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2022 if (l && l->header->count < init_bb->count)
2023 return l->header;
2024 return init_bb;
2025 }
2026
2027 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2028 set except for info->stmt. */
2029
2030 static bool
2031 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2032 {
2033 struct record_modified_bb_info *info =
2034 (struct record_modified_bb_info *) data;
2035 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2036 return false;
2037 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2038 return false;
2039 bitmap_set_bit (info->bb_set,
2040 SSA_NAME_IS_DEFAULT_DEF (vdef)
2041 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2042 : get_minimal_bb
2043 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2044 gimple_bb (info->stmt))->index);
2045 if (dump_file)
2046 {
2047 fprintf (dump_file, " Param ");
2048 print_generic_expr (dump_file, info->op, TDF_SLIM);
2049 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2050 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2051 get_minimal_bb
2052 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2053 gimple_bb (info->stmt))->index);
2054 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2055 }
2056 return false;
2057 }
2058
2059 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2060 will change since last invocation of STMT.
2061
2062 Value 0 is reserved for compile time invariants.
2063 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2064 ought to be REG_BR_PROB_BASE / estimated_iters. */
2065
2066 static int
2067 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2068 {
2069 tree op = gimple_call_arg (stmt, i);
2070 basic_block bb = gimple_bb (stmt);
2071
2072 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2073 op = TREE_OPERAND (op, 0);
2074
2075 tree base = get_base_address (op);
2076
2077 /* Global invariants never change. */
2078 if (is_gimple_min_invariant (base))
2079 return 0;
2080
2081 /* We would have to do non-trivial analysis to really work out what
2082 is the probability of value to change (i.e. when init statement
2083 is in a sibling loop of the call).
2084
2085 We do an conservative estimate: when call is executed N times more often
2086 than the statement defining value, we take the frequency 1/N. */
2087 if (TREE_CODE (base) == SSA_NAME)
2088 {
2089 profile_count init_count;
2090
2091 if (!bb->count.nonzero_p ())
2092 return REG_BR_PROB_BASE;
2093
2094 if (SSA_NAME_IS_DEFAULT_DEF (base))
2095 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2096 else
2097 init_count = get_minimal_bb
2098 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2099 gimple_bb (stmt))->count;
2100
2101 if (init_count < bb->count)
2102 return MAX ((init_count.to_sreal_scale (bb->count)
2103 * REG_BR_PROB_BASE).to_int (), 1);
2104 return REG_BR_PROB_BASE;
2105 }
2106 else
2107 {
2108 ao_ref refd;
2109 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2110 struct record_modified_bb_info info;
2111 tree init = ctor_for_folding (base);
2112
2113 if (init != error_mark_node)
2114 return 0;
2115 if (!bb->count.nonzero_p ())
2116 return REG_BR_PROB_BASE;
2117 if (dump_file)
2118 {
2119 fprintf (dump_file, " Analyzing param change probability of ");
2120 print_generic_expr (dump_file, op, TDF_SLIM);
2121 fprintf (dump_file, "\n");
2122 }
2123 ao_ref_init (&refd, op);
2124 info.op = op;
2125 info.stmt = stmt;
2126 info.bb_set = BITMAP_ALLOC (NULL);
2127 int walked
2128 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2129 NULL, NULL, fbi->aa_walk_budget);
2130 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2131 {
2132 if (dump_file)
2133 {
2134 if (walked < 0)
2135 fprintf (dump_file, " Ran out of AA walking budget.\n");
2136 else
2137 fprintf (dump_file, " Set in same BB as used.\n");
2138 }
2139 BITMAP_FREE (info.bb_set);
2140 return REG_BR_PROB_BASE;
2141 }
2142
2143 bitmap_iterator bi;
2144 unsigned index;
2145 /* Lookup the most frequent update of the value and believe that
2146 it dominates all the other; precise analysis here is difficult. */
2147 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2148 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2149 if (dump_file)
2150 {
2151 fprintf (dump_file, " Set with count ");
2152 max.dump (dump_file);
2153 fprintf (dump_file, " and used with count ");
2154 bb->count.dump (dump_file);
2155 fprintf (dump_file, " freq %f\n",
2156 max.to_sreal_scale (bb->count).to_double ());
2157 }
2158
2159 BITMAP_FREE (info.bb_set);
2160 if (max < bb->count)
2161 return MAX ((max.to_sreal_scale (bb->count)
2162 * REG_BR_PROB_BASE).to_int (), 1);
2163 return REG_BR_PROB_BASE;
2164 }
2165 }
2166
2167 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2168 sub-graph and if the predicate the condition depends on is known. If so,
2169 return true and store the pointer the predicate in *P. */
2170
2171 static bool
2172 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2173 ipa_fn_summary *summary,
2174 class ipa_node_params *params_summary,
2175 basic_block bb,
2176 predicate *p,
2177 vec<predicate> nonconstant_names)
2178 {
2179 edge e;
2180 edge_iterator ei;
2181 basic_block first_bb = NULL;
2182 gimple *stmt;
2183
2184 if (single_pred_p (bb))
2185 {
2186 *p = false;
2187 return true;
2188 }
2189
2190 FOR_EACH_EDGE (e, ei, bb->preds)
2191 {
2192 if (single_succ_p (e->src))
2193 {
2194 if (!single_pred_p (e->src))
2195 return false;
2196 if (!first_bb)
2197 first_bb = single_pred (e->src);
2198 else if (single_pred (e->src) != first_bb)
2199 return false;
2200 }
2201 else
2202 {
2203 if (!first_bb)
2204 first_bb = e->src;
2205 else if (e->src != first_bb)
2206 return false;
2207 }
2208 }
2209
2210 if (!first_bb)
2211 return false;
2212
2213 stmt = last_stmt (first_bb);
2214 if (!stmt
2215 || gimple_code (stmt) != GIMPLE_COND
2216 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2217 return false;
2218
2219 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2220 gimple_cond_lhs (stmt),
2221 nonconstant_names);
2222 if (*p == true)
2223 return false;
2224 else
2225 return true;
2226 }
2227
2228 /* Given a PHI statement in a function described by inline properties SUMMARY
2229 and *P being the predicate describing whether the selected PHI argument is
2230 known, store a predicate for the result of the PHI statement into
2231 NONCONSTANT_NAMES, if possible. */
2232
2233 static void
2234 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2235 predicate *p,
2236 vec<predicate> nonconstant_names)
2237 {
2238 unsigned i;
2239
2240 for (i = 0; i < gimple_phi_num_args (phi); i++)
2241 {
2242 tree arg = gimple_phi_arg (phi, i)->def;
2243 if (!is_gimple_min_invariant (arg))
2244 {
2245 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2246 *p = p->or_with (summary->conds,
2247 nonconstant_names[SSA_NAME_VERSION (arg)]);
2248 if (*p == true)
2249 return;
2250 }
2251 }
2252
2253 if (dump_file && (dump_flags & TDF_DETAILS))
2254 {
2255 fprintf (dump_file, "\t\tphi predicate: ");
2256 p->dump (dump_file, summary->conds);
2257 }
2258 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2259 }
2260
2261 /* For a typical usage of __builtin_expect (a<b, 1), we
2262 may introduce an extra relation stmt:
2263 With the builtin, we have
2264 t1 = a <= b;
2265 t2 = (long int) t1;
2266 t3 = __builtin_expect (t2, 1);
2267 if (t3 != 0)
2268 goto ...
2269 Without the builtin, we have
2270 if (a<=b)
2271 goto...
2272 This affects the size/time estimation and may have
2273 an impact on the earlier inlining.
2274 Here find this pattern and fix it up later. */
2275
2276 static gimple *
2277 find_foldable_builtin_expect (basic_block bb)
2278 {
2279 gimple_stmt_iterator bsi;
2280
2281 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2282 {
2283 gimple *stmt = gsi_stmt (bsi);
2284 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2285 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2286 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2287 {
2288 tree var = gimple_call_lhs (stmt);
2289 tree arg = gimple_call_arg (stmt, 0);
2290 use_operand_p use_p;
2291 gimple *use_stmt;
2292 bool match = false;
2293 bool done = false;
2294
2295 if (!var || !arg)
2296 continue;
2297 gcc_assert (TREE_CODE (var) == SSA_NAME);
2298
2299 while (TREE_CODE (arg) == SSA_NAME)
2300 {
2301 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2302 if (!is_gimple_assign (stmt_tmp))
2303 break;
2304 switch (gimple_assign_rhs_code (stmt_tmp))
2305 {
2306 case LT_EXPR:
2307 case LE_EXPR:
2308 case GT_EXPR:
2309 case GE_EXPR:
2310 case EQ_EXPR:
2311 case NE_EXPR:
2312 match = true;
2313 done = true;
2314 break;
2315 CASE_CONVERT:
2316 break;
2317 default:
2318 done = true;
2319 break;
2320 }
2321 if (done)
2322 break;
2323 arg = gimple_assign_rhs1 (stmt_tmp);
2324 }
2325
2326 if (match && single_imm_use (var, &use_p, &use_stmt)
2327 && gimple_code (use_stmt) == GIMPLE_COND)
2328 return use_stmt;
2329 }
2330 }
2331 return NULL;
2332 }
2333
2334 /* Return true when the basic blocks contains only clobbers followed by RESX.
2335 Such BBs are kept around to make removal of dead stores possible with
2336 presence of EH and will be optimized out by optimize_clobbers later in the
2337 game.
2338
2339 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2340 that can be clobber only, too.. When it is false, the RESX is not necessary
2341 on the end of basic block. */
2342
2343 static bool
2344 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2345 {
2346 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2347 edge_iterator ei;
2348 edge e;
2349
2350 if (need_eh)
2351 {
2352 if (gsi_end_p (gsi))
2353 return false;
2354 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2355 return false;
2356 gsi_prev (&gsi);
2357 }
2358 else if (!single_succ_p (bb))
2359 return false;
2360
2361 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2362 {
2363 gimple *stmt = gsi_stmt (gsi);
2364 if (is_gimple_debug (stmt))
2365 continue;
2366 if (gimple_clobber_p (stmt))
2367 continue;
2368 if (gimple_code (stmt) == GIMPLE_LABEL)
2369 break;
2370 return false;
2371 }
2372
2373 /* See if all predecessors are either throws or clobber only BBs. */
2374 FOR_EACH_EDGE (e, ei, bb->preds)
2375 if (!(e->flags & EDGE_EH)
2376 && !clobber_only_eh_bb_p (e->src, false))
2377 return false;
2378
2379 return true;
2380 }
2381
2382 /* Return true if STMT compute a floating point expression that may be affected
2383 by -ffast-math and similar flags. */
2384
2385 static bool
2386 fp_expression_p (gimple *stmt)
2387 {
2388 ssa_op_iter i;
2389 tree op;
2390
2391 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2392 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2393 return true;
2394 return false;
2395 }
2396
2397 /* Analyze function body for NODE.
2398 EARLY indicates run from early optimization pipeline. */
2399
2400 static void
2401 analyze_function_body (struct cgraph_node *node, bool early)
2402 {
2403 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2404 /* Estimate static overhead for function prologue/epilogue and alignment. */
2405 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2406 /* Benefits are scaled by probability of elimination that is in range
2407 <0,2>. */
2408 basic_block bb;
2409 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2410 sreal freq;
2411 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2412 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2413 predicate bb_predicate;
2414 struct ipa_func_body_info fbi;
2415 vec<predicate> nonconstant_names = vNULL;
2416 int nblocks, n;
2417 int *order;
2418 gimple *fix_builtin_expect_stmt;
2419
2420 gcc_assert (my_function && my_function->cfg);
2421 gcc_assert (cfun == my_function);
2422
2423 memset(&fbi, 0, sizeof(fbi));
2424 vec_free (info->conds);
2425 info->conds = NULL;
2426 vec_free (info->size_time_table);
2427 info->size_time_table = NULL;
2428
2429 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2430 so we can produce proper inline hints.
2431
2432 When optimizing and analyzing for early inliner, initialize node params
2433 so we can produce correct BB predicates. */
2434
2435 if (opt_for_fn (node->decl, optimize))
2436 {
2437 calculate_dominance_info (CDI_DOMINATORS);
2438 calculate_dominance_info (CDI_POST_DOMINATORS);
2439 if (!early)
2440 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2441 else
2442 {
2443 ipa_check_create_node_params ();
2444 ipa_initialize_node_params (node);
2445 }
2446
2447 if (ipa_node_params_sum)
2448 {
2449 fbi.node = node;
2450 fbi.info = IPA_NODE_REF (node);
2451 fbi.bb_infos = vNULL;
2452 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2453 fbi.param_count = count_formal_params (node->decl);
2454 fbi.aa_walk_budget = param_ipa_max_aa_steps;
2455
2456 nonconstant_names.safe_grow_cleared
2457 (SSANAMES (my_function)->length ());
2458 }
2459 }
2460
2461 if (dump_file)
2462 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2463 node->name ());
2464
2465 /* When we run into maximal number of entries, we assign everything to the
2466 constant truth case. Be sure to have it in list. */
2467 bb_predicate = true;
2468 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2469
2470 bb_predicate = predicate::not_inlined ();
2471 info->account_size_time (opt_for_fn (node->decl,
2472 param_uninlined_function_insns)
2473 * ipa_fn_summary::size_scale,
2474 opt_for_fn (node->decl,
2475 param_uninlined_function_time),
2476 bb_predicate,
2477 bb_predicate);
2478
2479 if (fbi.info)
2480 compute_bb_predicates (&fbi, node, info, params_summary);
2481 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2482 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2483 for (n = 0; n < nblocks; n++)
2484 {
2485 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2486 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2487 if (clobber_only_eh_bb_p (bb))
2488 {
2489 if (dump_file && (dump_flags & TDF_DETAILS))
2490 fprintf (dump_file, "\n Ignoring BB %i;"
2491 " it will be optimized away by cleanup_clobbers\n",
2492 bb->index);
2493 continue;
2494 }
2495
2496 /* TODO: Obviously predicates can be propagated down across CFG. */
2497 if (fbi.info)
2498 {
2499 if (bb->aux)
2500 bb_predicate = *(predicate *) bb->aux;
2501 else
2502 bb_predicate = false;
2503 }
2504 else
2505 bb_predicate = true;
2506
2507 if (dump_file && (dump_flags & TDF_DETAILS))
2508 {
2509 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2510 bb_predicate.dump (dump_file, info->conds);
2511 }
2512
2513 if (fbi.info && nonconstant_names.exists ())
2514 {
2515 predicate phi_predicate;
2516 bool first_phi = true;
2517
2518 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2519 gsi_next (&bsi))
2520 {
2521 if (first_phi
2522 && !phi_result_unknown_predicate (&fbi, info,
2523 params_summary,
2524 bb,
2525 &phi_predicate,
2526 nonconstant_names))
2527 break;
2528 first_phi = false;
2529 if (dump_file && (dump_flags & TDF_DETAILS))
2530 {
2531 fprintf (dump_file, " ");
2532 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2533 }
2534 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2535 nonconstant_names);
2536 }
2537 }
2538
2539 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2540
2541 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2542 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2543 {
2544 gimple *stmt = gsi_stmt (bsi);
2545 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2546 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2547 int prob;
2548 predicate will_be_nonconstant;
2549
2550 /* This relation stmt should be folded after we remove
2551 __builtin_expect call. Adjust the cost here. */
2552 if (stmt == fix_builtin_expect_stmt)
2553 {
2554 this_size--;
2555 this_time--;
2556 }
2557
2558 if (dump_file && (dump_flags & TDF_DETAILS))
2559 {
2560 fprintf (dump_file, " ");
2561 print_gimple_stmt (dump_file, stmt, 0);
2562 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2563 freq.to_double (), this_size,
2564 this_time);
2565 }
2566
2567 if (is_gimple_call (stmt)
2568 && !gimple_call_internal_p (stmt))
2569 {
2570 struct cgraph_edge *edge = node->get_edge (stmt);
2571 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2572
2573 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2574 resolved as constant. We however don't want to optimize
2575 out the cgraph edges. */
2576 if (nonconstant_names.exists ()
2577 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2578 && gimple_call_lhs (stmt)
2579 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2580 {
2581 predicate false_p = false;
2582 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2583 = false_p;
2584 }
2585 if (ipa_node_params_sum)
2586 {
2587 int count = gimple_call_num_args (stmt);
2588 int i;
2589
2590 if (count)
2591 es->param.safe_grow_cleared (count);
2592 for (i = 0; i < count; i++)
2593 {
2594 int prob = param_change_prob (&fbi, stmt, i);
2595 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2596 es->param[i].change_prob = prob;
2597 }
2598 }
2599
2600 es->call_stmt_size = this_size;
2601 es->call_stmt_time = this_time;
2602 es->loop_depth = bb_loop_depth (bb);
2603 edge_set_predicate (edge, &bb_predicate);
2604 if (edge->speculative)
2605 {
2606 cgraph_edge *direct, *indirect;
2607 ipa_ref *ref;
2608 edge->speculative_call_info (direct, indirect, ref);
2609 gcc_assert (direct == edge);
2610 ipa_call_summary *es2
2611 = ipa_call_summaries->get_create (indirect);
2612 ipa_call_summaries->duplicate (edge, indirect,
2613 es, es2);
2614 }
2615 }
2616
2617 /* TODO: When conditional jump or switch is known to be constant, but
2618 we did not translate it into the predicates, we really can account
2619 just maximum of the possible paths. */
2620 if (fbi.info)
2621 will_be_nonconstant
2622 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2623 stmt, nonconstant_names);
2624 else
2625 will_be_nonconstant = true;
2626 if (this_time || this_size)
2627 {
2628 sreal final_time = (sreal)this_time * freq;
2629
2630 prob = eliminated_by_inlining_prob (&fbi, stmt);
2631 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2632 fprintf (dump_file,
2633 "\t\t50%% will be eliminated by inlining\n");
2634 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2635 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2636
2637 class predicate p = bb_predicate & will_be_nonconstant;
2638
2639 /* We can ignore statement when we proved it is never going
2640 to happen, but we cannot do that for call statements
2641 because edges are accounted specially. */
2642
2643 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2644 {
2645 time += final_time;
2646 size += this_size;
2647 }
2648
2649 /* We account everything but the calls. Calls have their own
2650 size/time info attached to cgraph edges. This is necessary
2651 in order to make the cost disappear after inlining. */
2652 if (!is_gimple_call (stmt))
2653 {
2654 if (prob)
2655 {
2656 predicate ip = bb_predicate & predicate::not_inlined ();
2657 info->account_size_time (this_size * prob,
2658 (final_time * prob) / 2, ip,
2659 p);
2660 }
2661 if (prob != 2)
2662 info->account_size_time (this_size * (2 - prob),
2663 (final_time * (2 - prob) / 2),
2664 bb_predicate,
2665 p);
2666 }
2667
2668 if (!info->fp_expressions && fp_expression_p (stmt))
2669 {
2670 info->fp_expressions = true;
2671 if (dump_file)
2672 fprintf (dump_file, " fp_expression set\n");
2673 }
2674 }
2675
2676 /* Account cost of address calculations in the statements. */
2677 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2678 {
2679 for (tree op = gimple_op (stmt, i);
2680 op && handled_component_p (op);
2681 op = TREE_OPERAND (op, 0))
2682 if ((TREE_CODE (op) == ARRAY_REF
2683 || TREE_CODE (op) == ARRAY_RANGE_REF)
2684 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2685 {
2686 predicate p = bb_predicate;
2687 if (fbi.info)
2688 p = p & will_be_nonconstant_expr_predicate
2689 (&fbi, info, params_summary,
2690 TREE_OPERAND (op, 1),
2691 nonconstant_names);
2692 if (p != false)
2693 {
2694 time += freq;
2695 size += 1;
2696 if (dump_file)
2697 fprintf (dump_file,
2698 "\t\tAccounting address calculation.\n");
2699 info->account_size_time (ipa_fn_summary::size_scale,
2700 freq,
2701 bb_predicate,
2702 p);
2703 }
2704 }
2705 }
2706
2707 }
2708 }
2709 free (order);
2710
2711 if (nonconstant_names.exists () && !early)
2712 {
2713 class loop *loop;
2714 predicate loop_iterations = true;
2715 predicate loop_stride = true;
2716
2717 if (dump_file && (dump_flags & TDF_DETAILS))
2718 flow_loops_dump (dump_file, NULL, 0);
2719 scev_initialize ();
2720 FOR_EACH_LOOP (loop, 0)
2721 {
2722 vec<edge> exits;
2723 edge ex;
2724 unsigned int j;
2725 class tree_niter_desc niter_desc;
2726 bb_predicate = *(predicate *) loop->header->aux;
2727
2728 exits = get_loop_exit_edges (loop);
2729 FOR_EACH_VEC_ELT (exits, j, ex)
2730 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2731 && !is_gimple_min_invariant (niter_desc.niter))
2732 {
2733 predicate will_be_nonconstant
2734 = will_be_nonconstant_expr_predicate (&fbi, info,
2735 params_summary,
2736 niter_desc.niter,
2737 nonconstant_names);
2738 if (will_be_nonconstant != true)
2739 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2740 if (will_be_nonconstant != true
2741 && will_be_nonconstant != false)
2742 /* This is slightly inprecise. We may want to represent each
2743 loop with independent predicate. */
2744 loop_iterations &= will_be_nonconstant;
2745 }
2746 exits.release ();
2747 }
2748
2749 /* To avoid quadratic behavior we analyze stride predicates only
2750 with respect to the containing loop. Thus we simply iterate
2751 over all defs in the outermost loop body. */
2752 for (loop = loops_for_fn (cfun)->tree_root->inner;
2753 loop != NULL; loop = loop->next)
2754 {
2755 basic_block *body = get_loop_body (loop);
2756 for (unsigned i = 0; i < loop->num_nodes; i++)
2757 {
2758 gimple_stmt_iterator gsi;
2759 bb_predicate = *(predicate *) body[i]->aux;
2760 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2761 gsi_next (&gsi))
2762 {
2763 gimple *stmt = gsi_stmt (gsi);
2764
2765 if (!is_gimple_assign (stmt))
2766 continue;
2767
2768 tree def = gimple_assign_lhs (stmt);
2769 if (TREE_CODE (def) != SSA_NAME)
2770 continue;
2771
2772 affine_iv iv;
2773 if (!simple_iv (loop_containing_stmt (stmt),
2774 loop_containing_stmt (stmt),
2775 def, &iv, true)
2776 || is_gimple_min_invariant (iv.step))
2777 continue;
2778
2779 predicate will_be_nonconstant
2780 = will_be_nonconstant_expr_predicate (&fbi, info,
2781 params_summary,
2782 iv.step,
2783 nonconstant_names);
2784 if (will_be_nonconstant != true)
2785 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2786 if (will_be_nonconstant != true
2787 && will_be_nonconstant != false)
2788 /* This is slightly inprecise. We may want to represent
2789 each loop with independent predicate. */
2790 loop_stride = loop_stride & will_be_nonconstant;
2791 }
2792 }
2793 free (body);
2794 }
2795 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2796 set_hint_predicate (&s->loop_iterations, loop_iterations);
2797 set_hint_predicate (&s->loop_stride, loop_stride);
2798 scev_finalize ();
2799 }
2800 FOR_ALL_BB_FN (bb, my_function)
2801 {
2802 edge e;
2803 edge_iterator ei;
2804
2805 if (bb->aux)
2806 edge_predicate_pool.remove ((predicate *)bb->aux);
2807 bb->aux = NULL;
2808 FOR_EACH_EDGE (e, ei, bb->succs)
2809 {
2810 if (e->aux)
2811 edge_predicate_pool.remove ((predicate *) e->aux);
2812 e->aux = NULL;
2813 }
2814 }
2815 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2816 ipa_size_summary *ss = ipa_size_summaries->get (node);
2817 s->time = time;
2818 ss->self_size = size;
2819 nonconstant_names.release ();
2820 ipa_release_body_info (&fbi);
2821 if (opt_for_fn (node->decl, optimize))
2822 {
2823 if (!early)
2824 loop_optimizer_finalize ();
2825 else if (!ipa_edge_args_sum)
2826 ipa_free_all_node_params ();
2827 free_dominance_info (CDI_DOMINATORS);
2828 free_dominance_info (CDI_POST_DOMINATORS);
2829 }
2830 if (dump_file)
2831 {
2832 fprintf (dump_file, "\n");
2833 ipa_dump_fn_summary (dump_file, node);
2834 }
2835 }
2836
2837
2838 /* Compute function summary.
2839 EARLY is true when we compute parameters during early opts. */
2840
2841 void
2842 compute_fn_summary (struct cgraph_node *node, bool early)
2843 {
2844 HOST_WIDE_INT self_stack_size;
2845 struct cgraph_edge *e;
2846
2847 gcc_assert (!node->inlined_to);
2848
2849 if (!ipa_fn_summaries)
2850 ipa_fn_summary_alloc ();
2851
2852 /* Create a new ipa_fn_summary. */
2853 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2854 ipa_fn_summaries->remove (node);
2855 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2856 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2857
2858 /* Estimate the stack size for the function if we're optimizing. */
2859 self_stack_size = optimize && !node->thunk.thunk_p
2860 ? estimated_stack_frame_size (node) : 0;
2861 size_info->estimated_self_stack_size = self_stack_size;
2862 info->estimated_stack_size = self_stack_size;
2863
2864 if (node->thunk.thunk_p)
2865 {
2866 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2867 predicate t = true;
2868
2869 node->can_change_signature = false;
2870 es->call_stmt_size = eni_size_weights.call_cost;
2871 es->call_stmt_time = eni_time_weights.call_cost;
2872 info->account_size_time (ipa_fn_summary::size_scale
2873 * opt_for_fn (node->decl,
2874 param_uninlined_function_thunk_insns),
2875 opt_for_fn (node->decl,
2876 param_uninlined_function_thunk_time), t, t);
2877 t = predicate::not_inlined ();
2878 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2879 ipa_update_overall_fn_summary (node);
2880 size_info->self_size = size_info->size;
2881 if (stdarg_p (TREE_TYPE (node->decl)))
2882 {
2883 info->inlinable = false;
2884 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2885 }
2886 else
2887 info->inlinable = true;
2888 }
2889 else
2890 {
2891 /* Even is_gimple_min_invariant rely on current_function_decl. */
2892 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2893
2894 /* Can this function be inlined at all? */
2895 if (!opt_for_fn (node->decl, optimize)
2896 && !lookup_attribute ("always_inline",
2897 DECL_ATTRIBUTES (node->decl)))
2898 info->inlinable = false;
2899 else
2900 info->inlinable = tree_inlinable_function_p (node->decl);
2901
2902 /* Type attributes can use parameter indices to describe them. */
2903 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2904 /* Likewise for #pragma omp declare simd functions or functions
2905 with simd attribute. */
2906 || lookup_attribute ("omp declare simd",
2907 DECL_ATTRIBUTES (node->decl)))
2908 node->can_change_signature = false;
2909 else
2910 {
2911 /* Otherwise, inlinable functions always can change signature. */
2912 if (info->inlinable)
2913 node->can_change_signature = true;
2914 else
2915 {
2916 /* Functions calling builtin_apply cannot change signature. */
2917 for (e = node->callees; e; e = e->next_callee)
2918 {
2919 tree cdecl = e->callee->decl;
2920 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2921 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2922 break;
2923 }
2924 node->can_change_signature = !e;
2925 }
2926 }
2927 analyze_function_body (node, early);
2928 pop_cfun ();
2929 }
2930 for (e = node->callees; e; e = e->next_callee)
2931 if (e->callee->comdat_local_p ())
2932 break;
2933 node->calls_comdat_local = (e != NULL);
2934
2935 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2936 size_info->size = size_info->self_size;
2937 info->estimated_stack_size = size_info->estimated_self_stack_size;
2938
2939 /* Code above should compute exactly the same result as
2940 ipa_update_overall_fn_summary but because computation happens in
2941 different order the roundoff errors result in slight changes. */
2942 ipa_update_overall_fn_summary (node);
2943 /* In LTO mode we may have speculative edges set. */
2944 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
2945 }
2946
2947
2948 /* Compute parameters of functions used by inliner using
2949 current_function_decl. */
2950
2951 static unsigned int
2952 compute_fn_summary_for_current (void)
2953 {
2954 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2955 return 0;
2956 }
2957
2958 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2959 KNOWN_CONTEXTS and KNOWN_AGGS. */
2960
2961 static bool
2962 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2963 int *size, int *time,
2964 vec<tree> known_vals,
2965 vec<ipa_polymorphic_call_context> known_contexts,
2966 vec<ipa_agg_value_set> known_aggs)
2967 {
2968 tree target;
2969 struct cgraph_node *callee;
2970 class ipa_fn_summary *isummary;
2971 enum availability avail;
2972 bool speculative;
2973
2974 if (!known_vals.length () && !known_contexts.length ())
2975 return false;
2976 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2977 return false;
2978
2979 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2980 known_aggs, &speculative);
2981 if (!target || speculative)
2982 return false;
2983
2984 /* Account for difference in cost between indirect and direct calls. */
2985 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2986 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2987 gcc_checking_assert (*time >= 0);
2988 gcc_checking_assert (*size >= 0);
2989
2990 callee = cgraph_node::get (target);
2991 if (!callee || !callee->definition)
2992 return false;
2993 callee = callee->function_symbol (&avail);
2994 if (avail < AVAIL_AVAILABLE)
2995 return false;
2996 isummary = ipa_fn_summaries->get (callee);
2997 if (isummary == NULL)
2998 return false;
2999
3000 return isummary->inlinable;
3001 }
3002
3003 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3004 handle edge E with probability PROB.
3005 Set HINTS if edge may be devirtualized.
3006 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
3007 site. */
3008
3009 static inline void
3010 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3011 sreal *time,
3012 vec<tree> known_vals,
3013 vec<ipa_polymorphic_call_context> known_contexts,
3014 vec<ipa_agg_value_set> known_aggs,
3015 ipa_hints *hints)
3016 {
3017 class ipa_call_summary *es = ipa_call_summaries->get (e);
3018 int call_size = es->call_stmt_size;
3019 int call_time = es->call_stmt_time;
3020 int cur_size;
3021
3022 if (!e->callee && hints && e->maybe_hot_p ()
3023 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
3024 known_vals, known_contexts, known_aggs))
3025 *hints |= INLINE_HINT_indirect_call;
3026 cur_size = call_size * ipa_fn_summary::size_scale;
3027 *size += cur_size;
3028 if (min_size)
3029 *min_size += cur_size;
3030 if (time)
3031 *time += ((sreal)call_time) * e->sreal_frequency ();
3032 }
3033
3034
3035 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3036 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3037 describe context of the call site.
3038
3039 Helper for estimate_calls_size_and_time which does the same but
3040 (in most cases) faster. */
3041
3042 static void
3043 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3044 int *min_size, sreal *time,
3045 ipa_hints *hints,
3046 clause_t possible_truths,
3047 vec<tree> known_vals,
3048 vec<ipa_polymorphic_call_context> known_contexts,
3049 vec<ipa_agg_value_set> known_aggs)
3050 {
3051 struct cgraph_edge *e;
3052 for (e = node->callees; e; e = e->next_callee)
3053 {
3054 if (!e->inline_failed)
3055 {
3056 gcc_checking_assert (!ipa_call_summaries->get (e));
3057 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3058 hints,
3059 possible_truths,
3060 known_vals, known_contexts,
3061 known_aggs);
3062 continue;
3063 }
3064 class ipa_call_summary *es = ipa_call_summaries->get (e);
3065
3066 /* Do not care about zero sized builtins. */
3067 if (!es->call_stmt_size)
3068 {
3069 gcc_checking_assert (!es->call_stmt_time);
3070 continue;
3071 }
3072 if (!es->predicate
3073 || es->predicate->evaluate (possible_truths))
3074 {
3075 /* Predicates of calls shall not use NOT_CHANGED codes,
3076 so we do not need to compute probabilities. */
3077 estimate_edge_size_and_time (e, size,
3078 es->predicate ? NULL : min_size,
3079 time,
3080 known_vals, known_contexts,
3081 known_aggs, hints);
3082 }
3083 }
3084 for (e = node->indirect_calls; e; e = e->next_callee)
3085 {
3086 class ipa_call_summary *es = ipa_call_summaries->get (e);
3087 if (!es->predicate
3088 || es->predicate->evaluate (possible_truths))
3089 estimate_edge_size_and_time (e, size,
3090 es->predicate ? NULL : min_size,
3091 time,
3092 known_vals, known_contexts, known_aggs,
3093 hints);
3094 }
3095 }
3096
3097 /* Populate sum->call_size_time_table for edges from NODE. */
3098
3099 static void
3100 summarize_calls_size_and_time (struct cgraph_node *node,
3101 ipa_fn_summary *sum)
3102 {
3103 struct cgraph_edge *e;
3104 for (e = node->callees; e; e = e->next_callee)
3105 {
3106 if (!e->inline_failed)
3107 {
3108 gcc_checking_assert (!ipa_call_summaries->get (e));
3109 summarize_calls_size_and_time (e->callee, sum);
3110 continue;
3111 }
3112 int size = 0;
3113 sreal time = 0;
3114
3115 estimate_edge_size_and_time (e, &size, NULL, &time,
3116 vNULL, vNULL, vNULL, NULL);
3117
3118 struct predicate pred = true;
3119 class ipa_call_summary *es = ipa_call_summaries->get (e);
3120
3121 if (es->predicate)
3122 pred = *es->predicate;
3123 sum->account_size_time (size, time, pred, pred, true);
3124 }
3125 for (e = node->indirect_calls; e; e = e->next_callee)
3126 {
3127 int size = 0;
3128 sreal time = 0;
3129
3130 estimate_edge_size_and_time (e, &size, NULL, &time,
3131 vNULL, vNULL, vNULL, NULL);
3132 struct predicate pred = true;
3133 class ipa_call_summary *es = ipa_call_summaries->get (e);
3134
3135 if (es->predicate)
3136 pred = *es->predicate;
3137 sum->account_size_time (size, time, pred, pred, true);
3138 }
3139 }
3140
3141 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3142 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3143 describe context of the call site. */
3144
3145 static void
3146 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3147 int *min_size, sreal *time,
3148 ipa_hints *hints,
3149 clause_t possible_truths,
3150 vec<tree> known_vals,
3151 vec<ipa_polymorphic_call_context> known_contexts,
3152 vec<ipa_agg_value_set> known_aggs)
3153 {
3154 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3155 bool use_table = true;
3156
3157 gcc_assert (node->callees || node->indirect_calls);
3158
3159 /* During early inlining we do not calculate info for very
3160 large functions and thus there is no need for producing
3161 summaries. */
3162 if (!ipa_node_params_sum)
3163 use_table = false;
3164 /* Do not calculate summaries for simple wrappers; it is waste
3165 of memory. */
3166 else if (node->callees && node->indirect_calls
3167 && node->callees->inline_failed && !node->callees->next_callee)
3168 use_table = false;
3169 /* If there is an indirect edge that may be optimized, we need
3170 to go the slow way. */
3171 else if ((known_vals.length ()
3172 || known_contexts.length ()
3173 || known_aggs.length ()) && hints)
3174 {
3175 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3176 unsigned int nargs = params_summary
3177 ? ipa_get_param_count (params_summary) : 0;
3178
3179 for (unsigned int i = 0; i < nargs && use_table; i++)
3180 {
3181 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3182 && ((known_vals.length () > i && known_vals[i])
3183 || (known_aggs.length () > i
3184 && known_aggs[i].items.length ())))
3185 use_table = false;
3186 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3187 && (known_contexts.length () > i
3188 && !known_contexts[i].useless_p ()))
3189 use_table = false;
3190 }
3191 }
3192
3193 /* Fast path is via the call size time table. */
3194 if (use_table)
3195 {
3196 /* Build summary if it is absent. */
3197 if (!sum->call_size_time_table)
3198 {
3199 predicate true_pred = true;
3200 sum->account_size_time (0, 0, true_pred, true_pred, true);
3201 summarize_calls_size_and_time (node, sum);
3202 }
3203
3204 int old_size = *size;
3205 sreal old_time = time ? *time : 0;
3206
3207 if (min_size)
3208 *min_size += (*sum->call_size_time_table)[0].size;
3209
3210 unsigned int i;
3211 size_time_entry *e;
3212
3213 /* Walk the table and account sizes and times. */
3214 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3215 i++)
3216 if (e->exec_predicate.evaluate (possible_truths))
3217 {
3218 *size += e->size;
3219 if (time)
3220 *time += e->time;
3221 }
3222
3223 /* Be careful and see if both methods agree. */
3224 if ((flag_checking || dump_file)
3225 /* Do not try to sanity check when we know we lost some
3226 precision. */
3227 && sum->call_size_time_table->length ()
3228 < ipa_fn_summary::max_size_time_table_size)
3229 {
3230 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3231 possible_truths, known_vals,
3232 known_contexts, known_aggs);
3233 gcc_assert (*size == old_size);
3234 if (time && (*time - old_time > 1 || *time - old_time < -1)
3235 && dump_file)
3236 fprintf (dump_file, "Time mismatch in call summary %f!=%f",
3237 old_time.to_double (),
3238 time->to_double ());
3239 }
3240 }
3241 /* Slow path by walking all edges. */
3242 else
3243 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3244 possible_truths, known_vals, known_contexts,
3245 known_aggs);
3246 }
3247
3248 /* Default constructor for ipa call context.
3249 Memory allocation of known_vals, known_contexts
3250 and known_aggs vectors is owned by the caller, but can
3251 be release by ipa_call_context::release.
3252
3253 inline_param_summary is owned by the caller. */
3254 ipa_call_context::ipa_call_context (cgraph_node *node,
3255 clause_t possible_truths,
3256 clause_t nonspec_possible_truths,
3257 vec<tree> known_vals,
3258 vec<ipa_polymorphic_call_context>
3259 known_contexts,
3260 vec<ipa_agg_value_set> known_aggs,
3261 vec<inline_param_summary>
3262 inline_param_summary)
3263 : m_node (node), m_possible_truths (possible_truths),
3264 m_nonspec_possible_truths (nonspec_possible_truths),
3265 m_inline_param_summary (inline_param_summary),
3266 m_known_vals (known_vals),
3267 m_known_contexts (known_contexts),
3268 m_known_aggs (known_aggs)
3269 {
3270 }
3271
3272 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3273
3274 void
3275 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3276 {
3277 m_node = ctx.m_node;
3278 m_possible_truths = ctx.m_possible_truths;
3279 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3280 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3281 unsigned int nargs = params_summary
3282 ? ipa_get_param_count (params_summary) : 0;
3283
3284 m_inline_param_summary = vNULL;
3285 /* Copy the info only if there is at least one useful entry. */
3286 if (ctx.m_inline_param_summary.exists ())
3287 {
3288 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3289
3290 for (unsigned int i = 0; i < n; i++)
3291 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3292 && !ctx.m_inline_param_summary[i].useless_p ())
3293 {
3294 m_inline_param_summary
3295 = ctx.m_inline_param_summary.copy ();
3296 break;
3297 }
3298 }
3299 m_known_vals = vNULL;
3300 if (ctx.m_known_vals.exists ())
3301 {
3302 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3303
3304 for (unsigned int i = 0; i < n; i++)
3305 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3306 && ctx.m_known_vals[i])
3307 {
3308 m_known_vals = ctx.m_known_vals.copy ();
3309 break;
3310 }
3311 }
3312
3313 m_known_contexts = vNULL;
3314 if (ctx.m_known_contexts.exists ())
3315 {
3316 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3317
3318 for (unsigned int i = 0; i < n; i++)
3319 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3320 && !ctx.m_known_contexts[i].useless_p ())
3321 {
3322 m_known_contexts = ctx.m_known_contexts.copy ();
3323 break;
3324 }
3325 }
3326
3327 m_known_aggs = vNULL;
3328 if (ctx.m_known_aggs.exists ())
3329 {
3330 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3331
3332 for (unsigned int i = 0; i < n; i++)
3333 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3334 && !ctx.m_known_aggs[i].is_empty ())
3335 {
3336 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3337 break;
3338 }
3339 }
3340 }
3341
3342 /* Release memory used by known_vals/contexts/aggs vectors.
3343 If ALL is true release also inline_param_summary.
3344 This happens when context was previously duplicated to be stored
3345 into cache. */
3346
3347 void
3348 ipa_call_context::release (bool all)
3349 {
3350 /* See if context is initialized at first place. */
3351 if (!m_node)
3352 return;
3353 ipa_release_agg_values (m_known_aggs, all);
3354 if (all)
3355 {
3356 m_known_vals.release ();
3357 m_known_contexts.release ();
3358 m_inline_param_summary.release ();
3359 }
3360 }
3361
3362 /* Return true if CTX describes the same call context as THIS. */
3363
3364 bool
3365 ipa_call_context::equal_to (const ipa_call_context &ctx)
3366 {
3367 if (m_node != ctx.m_node
3368 || m_possible_truths != ctx.m_possible_truths
3369 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3370 return false;
3371
3372 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3373 unsigned int nargs = params_summary
3374 ? ipa_get_param_count (params_summary) : 0;
3375
3376 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3377 {
3378 for (unsigned int i = 0; i < nargs; i++)
3379 {
3380 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3381 continue;
3382 if (i >= m_inline_param_summary.length ()
3383 || m_inline_param_summary[i].useless_p ())
3384 {
3385 if (i < ctx.m_inline_param_summary.length ()
3386 && !ctx.m_inline_param_summary[i].useless_p ())
3387 return false;
3388 continue;
3389 }
3390 if (i >= ctx.m_inline_param_summary.length ()
3391 || ctx.m_inline_param_summary[i].useless_p ())
3392 {
3393 if (i < m_inline_param_summary.length ()
3394 && !m_inline_param_summary[i].useless_p ())
3395 return false;
3396 continue;
3397 }
3398 if (!m_inline_param_summary[i].equal_to
3399 (ctx.m_inline_param_summary[i]))
3400 return false;
3401 }
3402 }
3403 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3404 {
3405 for (unsigned int i = 0; i < nargs; i++)
3406 {
3407 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3408 continue;
3409 if (i >= m_known_vals.length () || !m_known_vals[i])
3410 {
3411 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3412 return false;
3413 continue;
3414 }
3415 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3416 {
3417 if (i < m_known_vals.length () && m_known_vals[i])
3418 return false;
3419 continue;
3420 }
3421 if (m_known_vals[i] != ctx.m_known_vals[i])
3422 return false;
3423 }
3424 }
3425 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3426 {
3427 for (unsigned int i = 0; i < nargs; i++)
3428 {
3429 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3430 continue;
3431 if (i >= m_known_contexts.length ()
3432 || m_known_contexts[i].useless_p ())
3433 {
3434 if (i < ctx.m_known_contexts.length ()
3435 && !ctx.m_known_contexts[i].useless_p ())
3436 return false;
3437 continue;
3438 }
3439 if (i >= ctx.m_known_contexts.length ()
3440 || ctx.m_known_contexts[i].useless_p ())
3441 {
3442 if (i < m_known_contexts.length ()
3443 && !m_known_contexts[i].useless_p ())
3444 return false;
3445 continue;
3446 }
3447 if (!m_known_contexts[i].equal_to
3448 (ctx.m_known_contexts[i]))
3449 return false;
3450 }
3451 }
3452 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3453 {
3454 for (unsigned int i = 0; i < nargs; i++)
3455 {
3456 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3457 continue;
3458 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3459 {
3460 if (i < ctx.m_known_aggs.length ()
3461 && !ctx.m_known_aggs[i].is_empty ())
3462 return false;
3463 continue;
3464 }
3465 if (i >= ctx.m_known_aggs.length ()
3466 || ctx.m_known_aggs[i].is_empty ())
3467 {
3468 if (i < m_known_aggs.length ()
3469 && !m_known_aggs[i].is_empty ())
3470 return false;
3471 continue;
3472 }
3473 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3474 return false;
3475 }
3476 }
3477 return true;
3478 }
3479
3480 /* Estimate size and time needed to execute call in the given context.
3481 Additionally determine hints determined by the context. Finally compute
3482 minimal size needed for the call that is independent on the call context and
3483 can be used for fast estimates. Return the values in RET_SIZE,
3484 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3485
3486 void
3487 ipa_call_context::estimate_size_and_time (int *ret_size,
3488 int *ret_min_size,
3489 sreal *ret_time,
3490 sreal *ret_nonspecialized_time,
3491 ipa_hints *ret_hints)
3492 {
3493 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3494 size_time_entry *e;
3495 int size = 0;
3496 sreal time = 0;
3497 int min_size = 0;
3498 ipa_hints hints = 0;
3499 int i;
3500
3501 if (dump_file && (dump_flags & TDF_DETAILS))
3502 {
3503 bool found = false;
3504 fprintf (dump_file, " Estimating body: %s/%i\n"
3505 " Known to be false: ", m_node->name (),
3506 m_node->order);
3507
3508 for (i = predicate::not_inlined_condition;
3509 i < (predicate::first_dynamic_condition
3510 + (int) vec_safe_length (info->conds)); i++)
3511 if (!(m_possible_truths & (1 << i)))
3512 {
3513 if (found)
3514 fprintf (dump_file, ", ");
3515 found = true;
3516 dump_condition (dump_file, info->conds, i);
3517 }
3518 }
3519
3520 if (m_node->callees || m_node->indirect_calls)
3521 estimate_calls_size_and_time (m_node, &size, &min_size,
3522 ret_time ? &time : NULL,
3523 ret_hints ? &hints : NULL, m_possible_truths,
3524 m_known_vals, m_known_contexts, m_known_aggs);
3525
3526 sreal nonspecialized_time = time;
3527
3528 min_size += (*info->size_time_table)[0].size;
3529 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3530 {
3531 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3532
3533 /* Because predicates are conservative, it can happen that nonconst is 1
3534 but exec is 0. */
3535 if (exec)
3536 {
3537 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3538
3539 gcc_checking_assert (e->time >= 0);
3540 gcc_checking_assert (time >= 0);
3541
3542 /* We compute specialized size only because size of nonspecialized
3543 copy is context independent.
3544
3545 The difference between nonspecialized execution and specialized is
3546 that nonspecialized is not going to have optimized out computations
3547 known to be constant in a specialized setting. */
3548 if (nonconst)
3549 size += e->size;
3550 if (!ret_time)
3551 continue;
3552 nonspecialized_time += e->time;
3553 if (!nonconst)
3554 ;
3555 else if (!m_inline_param_summary.exists ())
3556 {
3557 if (nonconst)
3558 time += e->time;
3559 }
3560 else
3561 {
3562 int prob = e->nonconst_predicate.probability
3563 (info->conds, m_possible_truths,
3564 m_inline_param_summary);
3565 gcc_checking_assert (prob >= 0);
3566 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3567 if (prob == REG_BR_PROB_BASE)
3568 time += e->time;
3569 else
3570 time += e->time * prob / REG_BR_PROB_BASE;
3571 }
3572 gcc_checking_assert (time >= 0);
3573 }
3574 }
3575 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3576 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3577 gcc_checking_assert (min_size >= 0);
3578 gcc_checking_assert (size >= 0);
3579 gcc_checking_assert (time >= 0);
3580 /* nonspecialized_time should be always bigger than specialized time.
3581 Roundoff issues however may get into the way. */
3582 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3583
3584 /* Roundoff issues may make specialized time bigger than nonspecialized
3585 time. We do not really want that to happen because some heuristics
3586 may get confused by seeing negative speedups. */
3587 if (time > nonspecialized_time)
3588 time = nonspecialized_time;
3589
3590 if (ret_hints)
3591 {
3592 if (info->loop_iterations
3593 && !info->loop_iterations->evaluate (m_possible_truths))
3594 hints |= INLINE_HINT_loop_iterations;
3595 if (info->loop_stride
3596 && !info->loop_stride->evaluate (m_possible_truths))
3597 hints |= INLINE_HINT_loop_stride;
3598 if (info->scc_no)
3599 hints |= INLINE_HINT_in_scc;
3600 if (DECL_DECLARED_INLINE_P (m_node->decl))
3601 hints |= INLINE_HINT_declared_inline;
3602 }
3603
3604 size = RDIV (size, ipa_fn_summary::size_scale);
3605 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3606
3607 if (dump_file && (dump_flags & TDF_DETAILS))
3608 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3609 time.to_double (), nonspecialized_time.to_double ());
3610 if (ret_time)
3611 *ret_time = time;
3612 if (ret_nonspecialized_time)
3613 *ret_nonspecialized_time = nonspecialized_time;
3614 if (ret_size)
3615 *ret_size = size;
3616 if (ret_min_size)
3617 *ret_min_size = min_size;
3618 if (ret_hints)
3619 *ret_hints = hints;
3620 return;
3621 }
3622
3623
3624 /* Estimate size and time needed to execute callee of EDGE assuming that
3625 parameters known to be constant at caller of EDGE are propagated.
3626 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3627 and types for parameters. */
3628
3629 void
3630 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3631 vec<tree> known_vals,
3632 vec<ipa_polymorphic_call_context>
3633 known_contexts,
3634 vec<ipa_agg_value_set> known_aggs,
3635 int *ret_size, sreal *ret_time,
3636 sreal *ret_nonspec_time,
3637 ipa_hints *hints)
3638 {
3639 clause_t clause, nonspec_clause;
3640
3641 /* TODO: Also pass known value ranges. */
3642 evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3643 known_aggs, &clause, &nonspec_clause);
3644 ipa_call_context ctx (node, clause, nonspec_clause,
3645 known_vals, known_contexts,
3646 known_aggs, vNULL);
3647 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3648 ret_nonspec_time, hints);
3649 }
3650
3651 /* Return stack frame offset where frame of NODE is supposed to start inside
3652 of the function it is inlined to.
3653 Return 0 for functions that are not inlined. */
3654
3655 HOST_WIDE_INT
3656 ipa_get_stack_frame_offset (struct cgraph_node *node)
3657 {
3658 HOST_WIDE_INT offset = 0;
3659 if (!node->inlined_to)
3660 return 0;
3661 node = node->callers->caller;
3662 while (true)
3663 {
3664 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3665 if (!node->inlined_to)
3666 return offset;
3667 node = node->callers->caller;
3668 }
3669 }
3670
3671
3672 /* Update summary information of inline clones after inlining.
3673 Compute peak stack usage. */
3674
3675 static void
3676 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3677 {
3678 struct cgraph_edge *e;
3679
3680 ipa_propagate_frequency (node);
3681 for (e = node->callees; e; e = e->next_callee)
3682 {
3683 if (!e->inline_failed)
3684 inline_update_callee_summaries (e->callee, depth);
3685 else
3686 ipa_call_summaries->get (e)->loop_depth += depth;
3687 }
3688 for (e = node->indirect_calls; e; e = e->next_callee)
3689 ipa_call_summaries->get (e)->loop_depth += depth;
3690 }
3691
3692 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3693 When function A is inlined in B and A calls C with parameter that
3694 changes with probability PROB1 and C is known to be passthrough
3695 of argument if B that change with probability PROB2, the probability
3696 of change is now PROB1*PROB2. */
3697
3698 static void
3699 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3700 struct cgraph_edge *edge)
3701 {
3702 if (ipa_node_params_sum)
3703 {
3704 int i;
3705 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3706 if (!args)
3707 return;
3708 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3709 class ipa_call_summary *inlined_es
3710 = ipa_call_summaries->get (inlined_edge);
3711
3712 if (es->param.length () == 0)
3713 return;
3714
3715 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3716 {
3717 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3718 if (jfunc->type == IPA_JF_PASS_THROUGH
3719 || jfunc->type == IPA_JF_ANCESTOR)
3720 {
3721 int id = jfunc->type == IPA_JF_PASS_THROUGH
3722 ? ipa_get_jf_pass_through_formal_id (jfunc)
3723 : ipa_get_jf_ancestor_formal_id (jfunc);
3724 if (id < (int) inlined_es->param.length ())
3725 {
3726 int prob1 = es->param[i].change_prob;
3727 int prob2 = inlined_es->param[id].change_prob;
3728 int prob = combine_probabilities (prob1, prob2);
3729
3730 if (prob1 && prob2 && !prob)
3731 prob = 1;
3732
3733 es->param[i].change_prob = prob;
3734 }
3735 }
3736 }
3737 }
3738 }
3739
3740 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3741
3742 Remap predicates of callees of NODE. Rest of arguments match
3743 remap_predicate.
3744
3745 Also update change probabilities. */
3746
3747 static void
3748 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3749 struct cgraph_node *node,
3750 class ipa_fn_summary *info,
3751 class ipa_node_params *params_summary,
3752 class ipa_fn_summary *callee_info,
3753 vec<int> operand_map,
3754 vec<int> offset_map,
3755 clause_t possible_truths,
3756 predicate *toplev_predicate)
3757 {
3758 struct cgraph_edge *e, *next;
3759 for (e = node->callees; e; e = next)
3760 {
3761 predicate p;
3762 next = e->next_callee;
3763
3764 if (e->inline_failed)
3765 {
3766 class ipa_call_summary *es = ipa_call_summaries->get (e);
3767 remap_edge_change_prob (inlined_edge, e);
3768
3769 if (es->predicate)
3770 {
3771 p = es->predicate->remap_after_inlining
3772 (info, params_summary,
3773 callee_info, operand_map,
3774 offset_map, possible_truths,
3775 *toplev_predicate);
3776 edge_set_predicate (e, &p);
3777 }
3778 else
3779 edge_set_predicate (e, toplev_predicate);
3780 }
3781 else
3782 remap_edge_summaries (inlined_edge, e->callee, info,
3783 params_summary, callee_info,
3784 operand_map, offset_map, possible_truths,
3785 toplev_predicate);
3786 }
3787 for (e = node->indirect_calls; e; e = next)
3788 {
3789 class ipa_call_summary *es = ipa_call_summaries->get (e);
3790 predicate p;
3791 next = e->next_callee;
3792
3793 remap_edge_change_prob (inlined_edge, e);
3794 if (es->predicate)
3795 {
3796 p = es->predicate->remap_after_inlining
3797 (info, params_summary,
3798 callee_info, operand_map, offset_map,
3799 possible_truths, *toplev_predicate);
3800 edge_set_predicate (e, &p);
3801 }
3802 else
3803 edge_set_predicate (e, toplev_predicate);
3804 }
3805 }
3806
3807 /* Same as remap_predicate, but set result into hint *HINT. */
3808
3809 static void
3810 remap_hint_predicate (class ipa_fn_summary *info,
3811 class ipa_node_params *params_summary,
3812 class ipa_fn_summary *callee_info,
3813 predicate **hint,
3814 vec<int> operand_map,
3815 vec<int> offset_map,
3816 clause_t possible_truths,
3817 predicate *toplev_predicate)
3818 {
3819 predicate p;
3820
3821 if (!*hint)
3822 return;
3823 p = (*hint)->remap_after_inlining
3824 (info, params_summary, callee_info,
3825 operand_map, offset_map,
3826 possible_truths, *toplev_predicate);
3827 if (p != false && p != true)
3828 {
3829 if (!*hint)
3830 set_hint_predicate (hint, p);
3831 else
3832 **hint &= p;
3833 }
3834 }
3835
3836 /* We inlined EDGE. Update summary of the function we inlined into. */
3837
3838 void
3839 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3840 {
3841 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3842 struct cgraph_node *to = (edge->caller->inlined_to
3843 ? edge->caller->inlined_to : edge->caller);
3844 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3845 clause_t clause = 0; /* not_inline is known to be false. */
3846 size_time_entry *e;
3847 auto_vec<int, 8> operand_map;
3848 auto_vec<int, 8> offset_map;
3849 int i;
3850 predicate toplev_predicate;
3851 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3852 class ipa_node_params *params_summary = (ipa_node_params_sum
3853 ? IPA_NODE_REF (to) : NULL);
3854
3855 if (es->predicate)
3856 toplev_predicate = *es->predicate;
3857 else
3858 toplev_predicate = true;
3859
3860 info->fp_expressions |= callee_info->fp_expressions;
3861
3862 if (callee_info->conds)
3863 {
3864 auto_vec<tree, 32> known_vals;
3865 auto_vec<ipa_agg_value_set, 32> known_aggs;
3866 evaluate_properties_for_edge (edge, true, &clause, NULL,
3867 &known_vals, NULL, &known_aggs);
3868 }
3869 if (ipa_node_params_sum && callee_info->conds)
3870 {
3871 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3872 int count = args ? ipa_get_cs_argument_count (args) : 0;
3873 int i;
3874
3875 if (count)
3876 {
3877 operand_map.safe_grow_cleared (count);
3878 offset_map.safe_grow_cleared (count);
3879 }
3880 for (i = 0; i < count; i++)
3881 {
3882 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3883 int map = -1;
3884
3885 /* TODO: handle non-NOPs when merging. */
3886 if (jfunc->type == IPA_JF_PASS_THROUGH)
3887 {
3888 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3889 map = ipa_get_jf_pass_through_formal_id (jfunc);
3890 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3891 offset_map[i] = -1;
3892 }
3893 else if (jfunc->type == IPA_JF_ANCESTOR)
3894 {
3895 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3896 if (offset >= 0 && offset < INT_MAX)
3897 {
3898 map = ipa_get_jf_ancestor_formal_id (jfunc);
3899 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3900 offset = -1;
3901 offset_map[i] = offset;
3902 }
3903 }
3904 operand_map[i] = map;
3905 gcc_assert (map < ipa_get_param_count (params_summary));
3906 }
3907 }
3908 sreal freq = edge->sreal_frequency ();
3909 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3910 {
3911 predicate p;
3912 p = e->exec_predicate.remap_after_inlining
3913 (info, params_summary,
3914 callee_info, operand_map,
3915 offset_map, clause,
3916 toplev_predicate);
3917 predicate nonconstp;
3918 nonconstp = e->nonconst_predicate.remap_after_inlining
3919 (info, params_summary,
3920 callee_info, operand_map,
3921 offset_map, clause,
3922 toplev_predicate);
3923 if (p != false && nonconstp != false)
3924 {
3925 sreal add_time = ((sreal)e->time * freq);
3926 int prob = e->nonconst_predicate.probability (callee_info->conds,
3927 clause, es->param);
3928 if (prob != REG_BR_PROB_BASE)
3929 add_time = add_time * prob / REG_BR_PROB_BASE;
3930 if (prob != REG_BR_PROB_BASE
3931 && dump_file && (dump_flags & TDF_DETAILS))
3932 {
3933 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3934 (double) prob / REG_BR_PROB_BASE);
3935 }
3936 info->account_size_time (e->size, add_time, p, nonconstp);
3937 }
3938 }
3939 remap_edge_summaries (edge, edge->callee, info, params_summary,
3940 callee_info, operand_map,
3941 offset_map, clause, &toplev_predicate);
3942 remap_hint_predicate (info, params_summary, callee_info,
3943 &callee_info->loop_iterations,
3944 operand_map, offset_map, clause, &toplev_predicate);
3945 remap_hint_predicate (info, params_summary, callee_info,
3946 &callee_info->loop_stride,
3947 operand_map, offset_map, clause, &toplev_predicate);
3948
3949 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
3950 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
3951
3952 if (info->estimated_stack_size < peak)
3953 info->estimated_stack_size = peak;
3954
3955 inline_update_callee_summaries (edge->callee, es->loop_depth);
3956 if (info->call_size_time_table)
3957 {
3958 int edge_size = 0;
3959 sreal edge_time = 0;
3960
3961 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL,
3962 vNULL, vNULL, 0);
3963 /* Unaccount size and time of the optimized out call. */
3964 info->account_size_time (-edge_size, -edge_time,
3965 es->predicate ? *es->predicate : true,
3966 es->predicate ? *es->predicate : true,
3967 true);
3968 /* Account new calls. */
3969 summarize_calls_size_and_time (edge->callee, info);
3970 }
3971
3972 /* Free summaries that are not maintained for inline clones/edges. */
3973 ipa_call_summaries->remove (edge);
3974 ipa_fn_summaries->remove (edge->callee);
3975 ipa_remove_from_growth_caches (edge);
3976 }
3977
3978 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3979 overall size and time. Recompute it.
3980 If RESET is true also recompute call_time_size_table. */
3981
3982 void
3983 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
3984 {
3985 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
3986 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
3987 size_time_entry *e;
3988 int i;
3989
3990 size_info->size = 0;
3991 info->time = 0;
3992 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3993 {
3994 size_info->size += e->size;
3995 info->time += e->time;
3996 }
3997 info->min_size = (*info->size_time_table)[0].size;
3998 if (reset)
3999 vec_free (info->call_size_time_table);
4000 if (node->callees || node->indirect_calls)
4001 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4002 &info->time, NULL,
4003 ~(clause_t) (1 << predicate::false_condition),
4004 vNULL, vNULL, vNULL);
4005 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4006 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4007 }
4008
4009
4010 /* This function performs intraprocedural analysis in NODE that is required to
4011 inline indirect calls. */
4012
4013 static void
4014 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4015 {
4016 ipa_analyze_node (node);
4017 if (dump_file && (dump_flags & TDF_DETAILS))
4018 {
4019 ipa_print_node_params (dump_file, node);
4020 ipa_print_node_jump_functions (dump_file, node);
4021 }
4022 }
4023
4024
4025 /* Note function body size. */
4026
4027 void
4028 inline_analyze_function (struct cgraph_node *node)
4029 {
4030 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4031
4032 if (dump_file)
4033 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
4034 node->name (), node->order);
4035 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
4036 inline_indirect_intraprocedural_analysis (node);
4037 compute_fn_summary (node, false);
4038 if (!optimize)
4039 {
4040 struct cgraph_edge *e;
4041 for (e = node->callees; e; e = e->next_callee)
4042 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4043 for (e = node->indirect_calls; e; e = e->next_callee)
4044 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4045 }
4046
4047 pop_cfun ();
4048 }
4049
4050
4051 /* Called when new function is inserted to callgraph late. */
4052
4053 void
4054 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4055 {
4056 inline_analyze_function (node);
4057 }
4058
4059 /* Note function body size. */
4060
4061 static void
4062 ipa_fn_summary_generate (void)
4063 {
4064 struct cgraph_node *node;
4065
4066 FOR_EACH_DEFINED_FUNCTION (node)
4067 if (DECL_STRUCT_FUNCTION (node->decl))
4068 node->versionable = tree_versionable_function_p (node->decl);
4069
4070 ipa_fn_summary_alloc ();
4071
4072 ipa_fn_summaries->enable_insertion_hook ();
4073
4074 ipa_register_cgraph_hooks ();
4075
4076 FOR_EACH_DEFINED_FUNCTION (node)
4077 if (!node->alias
4078 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4079 || opt_for_fn (node->decl, optimize)))
4080 inline_analyze_function (node);
4081 }
4082
4083
4084 /* Write inline summary for edge E to OB. */
4085
4086 static void
4087 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4088 bool prevails)
4089 {
4090 class ipa_call_summary *es = prevails
4091 ? ipa_call_summaries->get_create (e) : NULL;
4092 predicate p;
4093 int length, i;
4094
4095 int size = streamer_read_uhwi (ib);
4096 int time = streamer_read_uhwi (ib);
4097 int depth = streamer_read_uhwi (ib);
4098
4099 if (es)
4100 {
4101 es->call_stmt_size = size;
4102 es->call_stmt_time = time;
4103 es->loop_depth = depth;
4104 }
4105
4106 bitpack_d bp = streamer_read_bitpack (ib);
4107 if (es)
4108 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4109 else
4110 bp_unpack_value (&bp, 1);
4111
4112 p.stream_in (ib);
4113 if (es)
4114 edge_set_predicate (e, &p);
4115 length = streamer_read_uhwi (ib);
4116 if (length && es && e->possibly_call_in_translation_unit_p ())
4117 {
4118 es->param.safe_grow_cleared (length);
4119 for (i = 0; i < length; i++)
4120 es->param[i].change_prob = streamer_read_uhwi (ib);
4121 }
4122 else
4123 {
4124 for (i = 0; i < length; i++)
4125 streamer_read_uhwi (ib);
4126 }
4127 }
4128
4129
4130 /* Stream in inline summaries from the section. */
4131
4132 static void
4133 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4134 size_t len)
4135 {
4136 const struct lto_function_header *header =
4137 (const struct lto_function_header *) data;
4138 const int cfg_offset = sizeof (struct lto_function_header);
4139 const int main_offset = cfg_offset + header->cfg_size;
4140 const int string_offset = main_offset + header->main_size;
4141 class data_in *data_in;
4142 unsigned int i, count2, j;
4143 unsigned int f_count;
4144
4145 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4146 file_data->mode_table);
4147
4148 data_in =
4149 lto_data_in_create (file_data, (const char *) data + string_offset,
4150 header->string_size, vNULL);
4151 f_count = streamer_read_uhwi (&ib);
4152 for (i = 0; i < f_count; i++)
4153 {
4154 unsigned int index;
4155 struct cgraph_node *node;
4156 class ipa_fn_summary *info;
4157 class ipa_node_params *params_summary;
4158 class ipa_size_summary *size_info;
4159 lto_symtab_encoder_t encoder;
4160 struct bitpack_d bp;
4161 struct cgraph_edge *e;
4162 predicate p;
4163
4164 index = streamer_read_uhwi (&ib);
4165 encoder = file_data->symtab_node_encoder;
4166 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4167 index));
4168 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4169 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4170 size_info = node->prevailing_p ()
4171 ? ipa_size_summaries->get_create (node) : NULL;
4172
4173 int stack_size = streamer_read_uhwi (&ib);
4174 int size = streamer_read_uhwi (&ib);
4175 sreal time = sreal::stream_in (&ib);
4176
4177 if (info)
4178 {
4179 info->estimated_stack_size
4180 = size_info->estimated_self_stack_size = stack_size;
4181 size_info->size = size_info->self_size = size;
4182 info->time = time;
4183 }
4184
4185 bp = streamer_read_bitpack (&ib);
4186 if (info)
4187 {
4188 info->inlinable = bp_unpack_value (&bp, 1);
4189 info->fp_expressions = bp_unpack_value (&bp, 1);
4190 }
4191 else
4192 {
4193 bp_unpack_value (&bp, 1);
4194 bp_unpack_value (&bp, 1);
4195 }
4196
4197 count2 = streamer_read_uhwi (&ib);
4198 gcc_assert (!info || !info->conds);
4199 if (info)
4200 vec_safe_reserve_exact (info->conds, count2);
4201 for (j = 0; j < count2; j++)
4202 {
4203 struct condition c;
4204 unsigned int k, count3;
4205 c.operand_num = streamer_read_uhwi (&ib);
4206 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4207 c.type = stream_read_tree (&ib, data_in);
4208 c.val = stream_read_tree (&ib, data_in);
4209 bp = streamer_read_bitpack (&ib);
4210 c.agg_contents = bp_unpack_value (&bp, 1);
4211 c.by_ref = bp_unpack_value (&bp, 1);
4212 if (c.agg_contents)
4213 c.offset = streamer_read_uhwi (&ib);
4214 count3 = streamer_read_uhwi (&ib);
4215 c.param_ops = NULL;
4216 if (info)
4217 vec_safe_reserve_exact (c.param_ops, count3);
4218 if (params_summary)
4219 ipa_set_param_used_by_ipa_predicates
4220 (params_summary, c.operand_num, true);
4221 for (k = 0; k < count3; k++)
4222 {
4223 struct expr_eval_op op;
4224 enum gimple_rhs_class rhs_class;
4225 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4226 op.type = stream_read_tree (&ib, data_in);
4227 switch (rhs_class = get_gimple_rhs_class (op.code))
4228 {
4229 case GIMPLE_UNARY_RHS:
4230 op.index = 0;
4231 op.val[0] = NULL_TREE;
4232 op.val[1] = NULL_TREE;
4233 break;
4234
4235 case GIMPLE_BINARY_RHS:
4236 case GIMPLE_TERNARY_RHS:
4237 bp = streamer_read_bitpack (&ib);
4238 op.index = bp_unpack_value (&bp, 2);
4239 op.val[0] = stream_read_tree (&ib, data_in);
4240 if (rhs_class == GIMPLE_BINARY_RHS)
4241 op.val[1] = NULL_TREE;
4242 else
4243 op.val[1] = stream_read_tree (&ib, data_in);
4244 break;
4245
4246 default:
4247 fatal_error (UNKNOWN_LOCATION,
4248 "invalid fnsummary in LTO stream");
4249 }
4250 if (info)
4251 c.param_ops->quick_push (op);
4252 }
4253 if (info)
4254 info->conds->quick_push (c);
4255 }
4256 count2 = streamer_read_uhwi (&ib);
4257 gcc_assert (!info || !info->size_time_table);
4258 if (info && count2)
4259 vec_safe_reserve_exact (info->size_time_table, count2);
4260 for (j = 0; j < count2; j++)
4261 {
4262 class size_time_entry e;
4263
4264 e.size = streamer_read_uhwi (&ib);
4265 e.time = sreal::stream_in (&ib);
4266 e.exec_predicate.stream_in (&ib);
4267 e.nonconst_predicate.stream_in (&ib);
4268
4269 if (info)
4270 info->size_time_table->quick_push (e);
4271 }
4272
4273 p.stream_in (&ib);
4274 if (info)
4275 set_hint_predicate (&info->loop_iterations, p);
4276 p.stream_in (&ib);
4277 if (info)
4278 set_hint_predicate (&info->loop_stride, p);
4279 for (e = node->callees; e; e = e->next_callee)
4280 read_ipa_call_summary (&ib, e, info != NULL);
4281 for (e = node->indirect_calls; e; e = e->next_callee)
4282 read_ipa_call_summary (&ib, e, info != NULL);
4283 }
4284
4285 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4286 len);
4287 lto_data_in_delete (data_in);
4288 }
4289
4290
4291 /* Read inline summary. Jump functions are shared among ipa-cp
4292 and inliner, so when ipa-cp is active, we don't need to write them
4293 twice. */
4294
4295 static void
4296 ipa_fn_summary_read (void)
4297 {
4298 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4299 struct lto_file_decl_data *file_data;
4300 unsigned int j = 0;
4301
4302 ipa_fn_summary_alloc ();
4303
4304 while ((file_data = file_data_vec[j++]))
4305 {
4306 size_t len;
4307 const char *data
4308 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4309 &len);
4310 if (data)
4311 inline_read_section (file_data, data, len);
4312 else
4313 /* Fatal error here. We do not want to support compiling ltrans units
4314 with different version of compiler or different flags than the WPA
4315 unit, so this should never happen. */
4316 fatal_error (input_location,
4317 "ipa inline summary is missing in input file");
4318 }
4319 ipa_register_cgraph_hooks ();
4320 if (!flag_ipa_cp)
4321 ipa_prop_read_jump_functions ();
4322
4323 gcc_assert (ipa_fn_summaries);
4324 ipa_fn_summaries->enable_insertion_hook ();
4325 }
4326
4327
4328 /* Write inline summary for edge E to OB. */
4329
4330 static void
4331 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4332 {
4333 class ipa_call_summary *es = ipa_call_summaries->get (e);
4334 int i;
4335
4336 streamer_write_uhwi (ob, es->call_stmt_size);
4337 streamer_write_uhwi (ob, es->call_stmt_time);
4338 streamer_write_uhwi (ob, es->loop_depth);
4339
4340 bitpack_d bp = bitpack_create (ob->main_stream);
4341 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4342 streamer_write_bitpack (&bp);
4343
4344 if (es->predicate)
4345 es->predicate->stream_out (ob);
4346 else
4347 streamer_write_uhwi (ob, 0);
4348 streamer_write_uhwi (ob, es->param.length ());
4349 for (i = 0; i < (int) es->param.length (); i++)
4350 streamer_write_uhwi (ob, es->param[i].change_prob);
4351 }
4352
4353
4354 /* Write inline summary for node in SET.
4355 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4356 active, we don't need to write them twice. */
4357
4358 static void
4359 ipa_fn_summary_write (void)
4360 {
4361 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4362 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4363 unsigned int count = 0;
4364 int i;
4365
4366 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4367 {
4368 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4369 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4370 if (cnode && cnode->definition && !cnode->alias)
4371 count++;
4372 }
4373 streamer_write_uhwi (ob, count);
4374
4375 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4376 {
4377 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4378 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4379 if (cnode && cnode->definition && !cnode->alias)
4380 {
4381 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4382 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4383 struct bitpack_d bp;
4384 struct cgraph_edge *edge;
4385 int i;
4386 size_time_entry *e;
4387 struct condition *c;
4388
4389 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4390 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4391 streamer_write_hwi (ob, size_info->self_size);
4392 info->time.stream_out (ob);
4393 bp = bitpack_create (ob->main_stream);
4394 bp_pack_value (&bp, info->inlinable, 1);
4395 bp_pack_value (&bp, false, 1);
4396 bp_pack_value (&bp, info->fp_expressions, 1);
4397 streamer_write_bitpack (&bp);
4398 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4399 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4400 {
4401 int j;
4402 struct expr_eval_op *op;
4403
4404 streamer_write_uhwi (ob, c->operand_num);
4405 streamer_write_uhwi (ob, c->code);
4406 stream_write_tree (ob, c->type, true);
4407 stream_write_tree (ob, c->val, true);
4408 bp = bitpack_create (ob->main_stream);
4409 bp_pack_value (&bp, c->agg_contents, 1);
4410 bp_pack_value (&bp, c->by_ref, 1);
4411 streamer_write_bitpack (&bp);
4412 if (c->agg_contents)
4413 streamer_write_uhwi (ob, c->offset);
4414 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4415 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4416 {
4417 streamer_write_uhwi (ob, op->code);
4418 stream_write_tree (ob, op->type, true);
4419 if (op->val[0])
4420 {
4421 bp = bitpack_create (ob->main_stream);
4422 bp_pack_value (&bp, op->index, 2);
4423 streamer_write_bitpack (&bp);
4424 stream_write_tree (ob, op->val[0], true);
4425 if (op->val[1])
4426 stream_write_tree (ob, op->val[1], true);
4427 }
4428 }
4429 }
4430 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4431 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4432 {
4433 streamer_write_uhwi (ob, e->size);
4434 e->time.stream_out (ob);
4435 e->exec_predicate.stream_out (ob);
4436 e->nonconst_predicate.stream_out (ob);
4437 }
4438 if (info->loop_iterations)
4439 info->loop_iterations->stream_out (ob);
4440 else
4441 streamer_write_uhwi (ob, 0);
4442 if (info->loop_stride)
4443 info->loop_stride->stream_out (ob);
4444 else
4445 streamer_write_uhwi (ob, 0);
4446 for (edge = cnode->callees; edge; edge = edge->next_callee)
4447 write_ipa_call_summary (ob, edge);
4448 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4449 write_ipa_call_summary (ob, edge);
4450 }
4451 }
4452 streamer_write_char_stream (ob->main_stream, 0);
4453 produce_asm (ob, NULL);
4454 destroy_output_block (ob);
4455
4456 if (!flag_ipa_cp)
4457 ipa_prop_write_jump_functions ();
4458 }
4459
4460
4461 /* Release function summary. */
4462
4463 void
4464 ipa_free_fn_summary (void)
4465 {
4466 if (!ipa_call_summaries)
4467 return;
4468 ggc_delete (ipa_fn_summaries);
4469 ipa_fn_summaries = NULL;
4470 delete ipa_call_summaries;
4471 ipa_call_summaries = NULL;
4472 edge_predicate_pool.release ();
4473 /* During IPA this is one of largest datastructures to release. */
4474 if (flag_wpa)
4475 ggc_trim ();
4476 }
4477
4478 /* Release function summary. */
4479
4480 void
4481 ipa_free_size_summary (void)
4482 {
4483 if (!ipa_size_summaries)
4484 return;
4485 delete ipa_size_summaries;
4486 ipa_size_summaries = NULL;
4487 }
4488
4489 namespace {
4490
4491 const pass_data pass_data_local_fn_summary =
4492 {
4493 GIMPLE_PASS, /* type */
4494 "local-fnsummary", /* name */
4495 OPTGROUP_INLINE, /* optinfo_flags */
4496 TV_INLINE_PARAMETERS, /* tv_id */
4497 0, /* properties_required */
4498 0, /* properties_provided */
4499 0, /* properties_destroyed */
4500 0, /* todo_flags_start */
4501 0, /* todo_flags_finish */
4502 };
4503
4504 class pass_local_fn_summary : public gimple_opt_pass
4505 {
4506 public:
4507 pass_local_fn_summary (gcc::context *ctxt)
4508 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4509 {}
4510
4511 /* opt_pass methods: */
4512 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4513 virtual unsigned int execute (function *)
4514 {
4515 return compute_fn_summary_for_current ();
4516 }
4517
4518 }; // class pass_local_fn_summary
4519
4520 } // anon namespace
4521
4522 gimple_opt_pass *
4523 make_pass_local_fn_summary (gcc::context *ctxt)
4524 {
4525 return new pass_local_fn_summary (ctxt);
4526 }
4527
4528
4529 /* Free inline summary. */
4530
4531 namespace {
4532
4533 const pass_data pass_data_ipa_free_fn_summary =
4534 {
4535 SIMPLE_IPA_PASS, /* type */
4536 "free-fnsummary", /* name */
4537 OPTGROUP_NONE, /* optinfo_flags */
4538 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4539 0, /* properties_required */
4540 0, /* properties_provided */
4541 0, /* properties_destroyed */
4542 0, /* todo_flags_start */
4543 0, /* todo_flags_finish */
4544 };
4545
4546 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4547 {
4548 public:
4549 pass_ipa_free_fn_summary (gcc::context *ctxt)
4550 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4551 small_p (false)
4552 {}
4553
4554 /* opt_pass methods: */
4555 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4556 void set_pass_param (unsigned int n, bool param)
4557 {
4558 gcc_assert (n == 0);
4559 small_p = param;
4560 }
4561 virtual bool gate (function *) { return true; }
4562 virtual unsigned int execute (function *)
4563 {
4564 ipa_free_fn_summary ();
4565 if (!flag_wpa)
4566 ipa_free_size_summary ();
4567 return 0;
4568 }
4569
4570 private:
4571 bool small_p;
4572 }; // class pass_ipa_free_fn_summary
4573
4574 } // anon namespace
4575
4576 simple_ipa_opt_pass *
4577 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4578 {
4579 return new pass_ipa_free_fn_summary (ctxt);
4580 }
4581
4582 namespace {
4583
4584 const pass_data pass_data_ipa_fn_summary =
4585 {
4586 IPA_PASS, /* type */
4587 "fnsummary", /* name */
4588 OPTGROUP_INLINE, /* optinfo_flags */
4589 TV_IPA_FNSUMMARY, /* tv_id */
4590 0, /* properties_required */
4591 0, /* properties_provided */
4592 0, /* properties_destroyed */
4593 0, /* todo_flags_start */
4594 ( TODO_dump_symtab ), /* todo_flags_finish */
4595 };
4596
4597 class pass_ipa_fn_summary : public ipa_opt_pass_d
4598 {
4599 public:
4600 pass_ipa_fn_summary (gcc::context *ctxt)
4601 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4602 ipa_fn_summary_generate, /* generate_summary */
4603 ipa_fn_summary_write, /* write_summary */
4604 ipa_fn_summary_read, /* read_summary */
4605 NULL, /* write_optimization_summary */
4606 NULL, /* read_optimization_summary */
4607 NULL, /* stmt_fixup */
4608 0, /* function_transform_todo_flags_start */
4609 NULL, /* function_transform */
4610 NULL) /* variable_transform */
4611 {}
4612
4613 /* opt_pass methods: */
4614 virtual unsigned int execute (function *) { return 0; }
4615
4616 }; // class pass_ipa_fn_summary
4617
4618 } // anon namespace
4619
4620 ipa_opt_pass_d *
4621 make_pass_ipa_fn_summary (gcc::context *ctxt)
4622 {
4623 return new pass_ipa_fn_summary (ctxt);
4624 }
4625
4626 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4627 within the same process. For use by toplev::finalize. */
4628
4629 void
4630 ipa_fnsummary_c_finalize (void)
4631 {
4632 ipa_free_fn_summary ();
4633 }