]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
Remove unused parameter PROB in ipa-fnsummary.c
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85
86 /* Summaries. */
87 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
88 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
89 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143 }
144
145
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
150
151 void
152 ipa_fn_summary::account_size_time (int size, sreal time,
153 const predicate &exec_pred,
154 const predicate &nonconst_pred_in)
155 {
156 size_time_entry *e;
157 bool found = false;
158 int i;
159 predicate nonconst_pred;
160
161 if (exec_pred == false)
162 return;
163
164 nonconst_pred = nonconst_pred_in & exec_pred;
165
166 if (nonconst_pred == false)
167 return;
168
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size && time == 0 && size_time_table)
172 return;
173
174 gcc_assert (time >= 0);
175
176 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
177 if (e->exec_predicate == exec_pred
178 && e->nonconst_predicate == nonconst_pred)
179 {
180 found = true;
181 break;
182 }
183 if (i == 256)
184 {
185 i = 0;
186 found = true;
187 e = &(*size_time_table)[0];
188 if (dump_file && (dump_flags & TDF_DETAILS))
189 fprintf (dump_file,
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
192 }
193 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
194 {
195 fprintf (dump_file,
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
197 ((double) size) / ipa_fn_summary::size_scale,
198 (time.to_double ()), found ? "" : "new ");
199 exec_pred.dump (dump_file, conds, 0);
200 if (exec_pred != nonconst_pred)
201 {
202 fprintf (dump_file, " nonconst:");
203 nonconst_pred.dump (dump_file, conds);
204 }
205 else
206 fprintf (dump_file, "\n");
207 }
208 if (!found)
209 {
210 class size_time_entry new_entry;
211 new_entry.size = size;
212 new_entry.time = time;
213 new_entry.exec_predicate = exec_pred;
214 new_entry.nonconst_predicate = nonconst_pred;
215 vec_safe_push (size_time_table, new_entry);
216 }
217 else
218 {
219 e->size += size;
220 e->time += time;
221 }
222 }
223
224 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
225
226 static struct cgraph_edge *
227 redirect_to_unreachable (struct cgraph_edge *e)
228 {
229 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
230 struct cgraph_node *target = cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
232
233 if (e->speculative)
234 e = e->resolve_speculation (target->decl);
235 else if (!e->callee)
236 e->make_direct (target);
237 else
238 e->redirect_callee (target);
239 class ipa_call_summary *es = ipa_call_summaries->get (e);
240 e->inline_failed = CIF_UNREACHABLE;
241 e->count = profile_count::zero ();
242 es->call_stmt_size = 0;
243 es->call_stmt_time = 0;
244 if (callee)
245 callee->remove_symbol_and_inline_clones ();
246 return e;
247 }
248
249 /* Set predicate for edge E. */
250
251 static void
252 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
253 {
254 /* If the edge is determined to be never executed, redirect it
255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
256 be optimized out. */
257 if (predicate && *predicate == false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e->speculative || e->callee))
262 e = redirect_to_unreachable (e);
263
264 class ipa_call_summary *es = ipa_call_summaries->get (e);
265 if (predicate && *predicate != true)
266 {
267 if (!es->predicate)
268 es->predicate = edge_predicate_pool.allocate ();
269 *es->predicate = *predicate;
270 }
271 else
272 {
273 if (es->predicate)
274 edge_predicate_pool.remove (es->predicate);
275 es->predicate = NULL;
276 }
277 }
278
279 /* Set predicate for hint *P. */
280
281 static void
282 set_hint_predicate (predicate **p, predicate new_predicate)
283 {
284 if (new_predicate == false || new_predicate == true)
285 {
286 if (*p)
287 edge_predicate_pool.remove (*p);
288 *p = NULL;
289 }
290 else
291 {
292 if (!*p)
293 *p = edge_predicate_pool.allocate ();
294 **p = new_predicate;
295 }
296 }
297
298
299 /* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
305 is always false in the second and also builtin_constant_p tests cannot use
306 the fact that parameter is indeed a constant.
307
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate known offset/value set for each
310 parameter. Return clause of possible truths. When INLINE_P is true, assume
311 that we are inlining.
312
313 ERROR_MARK means compile time invariant. */
314
315 static void
316 evaluate_conditions_for_known_args (struct cgraph_node *node,
317 bool inline_p,
318 vec<tree> known_vals,
319 vec<value_range> known_value_ranges,
320 vec<ipa_agg_value_set> known_aggs,
321 clause_t *ret_clause,
322 clause_t *ret_nonspec_clause)
323 {
324 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
325 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
326 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
327 int i;
328 struct condition *c;
329
330 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
331 {
332 tree val;
333 tree res;
334 int j;
335 struct expr_eval_op *op;
336
337 /* We allow call stmt to have fewer arguments than the callee function
338 (especially for K&R style programs). So bound check here (we assume
339 known_aggs vector, if non-NULL, has the same length as
340 known_vals). */
341 gcc_checking_assert (!known_aggs.exists ()
342 || (known_vals.length () == known_aggs.length ()));
343 if (c->operand_num >= (int) known_vals.length ())
344 {
345 clause |= 1 << (i + predicate::first_dynamic_condition);
346 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
347 continue;
348 }
349
350 if (c->agg_contents)
351 {
352 struct ipa_agg_value_set *agg;
353
354 if (c->code == predicate::changed
355 && !c->by_ref
356 && (known_vals[c->operand_num] == error_mark_node))
357 continue;
358
359 if (known_aggs.exists ())
360 {
361 agg = &known_aggs[c->operand_num];
362 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
363 c->offset, c->by_ref);
364 }
365 else
366 val = NULL_TREE;
367 }
368 else
369 {
370 val = known_vals[c->operand_num];
371 if (val == error_mark_node && c->code != predicate::changed)
372 val = NULL_TREE;
373 }
374
375 if (!val
376 && (c->code == predicate::changed
377 || c->code == predicate::is_not_constant))
378 {
379 clause |= 1 << (i + predicate::first_dynamic_condition);
380 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
381 continue;
382 }
383 if (c->code == predicate::changed)
384 {
385 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
386 continue;
387 }
388
389 if (c->code == predicate::is_not_constant)
390 {
391 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
392 continue;
393 }
394
395 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
396 {
397 if (c->type != TREE_TYPE (val))
398 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
399 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
400 {
401 if (!val)
402 break;
403 if (!op->val[0])
404 val = fold_unary (op->code, op->type, val);
405 else if (!op->val[1])
406 val = fold_binary (op->code, op->type,
407 op->index ? op->val[0] : val,
408 op->index ? val : op->val[0]);
409 else if (op->index == 0)
410 val = fold_ternary (op->code, op->type,
411 val, op->val[0], op->val[1]);
412 else if (op->index == 1)
413 val = fold_ternary (op->code, op->type,
414 op->val[0], val, op->val[1]);
415 else if (op->index == 2)
416 val = fold_ternary (op->code, op->type,
417 op->val[0], op->val[1], val);
418 else
419 val = NULL_TREE;
420 }
421
422 res = val
423 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
424 : NULL;
425
426 if (res && integer_zerop (res))
427 continue;
428 if (res && integer_onep (res))
429 {
430 clause |= 1 << (i + predicate::first_dynamic_condition);
431 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
432 continue;
433 }
434 }
435 if (c->operand_num < (int) known_value_ranges.length ()
436 && !c->agg_contents
437 && !known_value_ranges[c->operand_num].undefined_p ()
438 && !known_value_ranges[c->operand_num].varying_p ()
439 && TYPE_SIZE (c->type)
440 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
441 && (!val || TREE_CODE (val) != INTEGER_CST))
442 {
443 value_range vr = known_value_ranges[c->operand_num];
444 if (!useless_type_conversion_p (c->type, vr.type ()))
445 {
446 value_range res;
447 range_fold_unary_expr (&res, NOP_EXPR,
448 c->type, &vr, vr.type ());
449 vr = res;
450 }
451 tree type = c->type;
452
453 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
454 {
455 if (vr.varying_p () || vr.undefined_p ())
456 break;
457
458 value_range res;
459 if (!op->val[0])
460 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
461 else if (!op->val[1])
462 {
463 value_range op0 (op->val[0], op->val[0]);
464 range_fold_binary_expr (&res, op->code, op->type,
465 op->index ? &op0 : &vr,
466 op->index ? &vr : &op0);
467 }
468 else
469 gcc_unreachable ();
470 type = op->type;
471 vr = res;
472 }
473 if (!vr.varying_p () && !vr.undefined_p ())
474 {
475 value_range res;
476 value_range val_vr (c->val, c->val);
477 range_fold_binary_expr (&res, c->code, boolean_type_node,
478 &vr,
479 &val_vr);
480 if (res.zero_p ())
481 continue;
482 }
483 }
484
485 clause |= 1 << (i + predicate::first_dynamic_condition);
486 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
487 }
488 *ret_clause = clause;
489 if (ret_nonspec_clause)
490 *ret_nonspec_clause = nonspec_clause;
491 }
492
493
494 /* Work out what conditions might be true at invocation of E. */
495
496 void
497 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
498 clause_t *clause_ptr,
499 clause_t *nonspec_clause_ptr,
500 vec<tree> *known_vals_ptr,
501 vec<ipa_polymorphic_call_context>
502 *known_contexts_ptr,
503 vec<ipa_agg_value_set> *known_aggs_ptr)
504 {
505 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
506 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
507 vec<tree> known_vals = vNULL;
508 auto_vec<value_range, 32> known_value_ranges;
509 vec<ipa_agg_value_set> known_aggs = vNULL;
510 class ipa_edge_args *args;
511
512 if (clause_ptr)
513 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
514 if (known_vals_ptr)
515 known_vals_ptr->create (0);
516 if (known_contexts_ptr)
517 known_contexts_ptr->create (0);
518
519 if (ipa_node_params_sum
520 && !e->call_stmt_cannot_inline_p
521 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)
522 && (args = IPA_EDGE_REF (e)) != NULL)
523 {
524 struct cgraph_node *caller;
525 class ipa_node_params *caller_parms_info, *callee_pi;
526 class ipa_call_summary *es = ipa_call_summaries->get (e);
527 int i, count = ipa_get_cs_argument_count (args);
528
529 if (e->caller->inlined_to)
530 caller = e->caller->inlined_to;
531 else
532 caller = e->caller;
533 caller_parms_info = IPA_NODE_REF (caller);
534 callee_pi = IPA_NODE_REF (callee);
535
536 if (count && (info->conds || known_vals_ptr))
537 known_vals.safe_grow_cleared (count);
538 if (count && info->conds)
539 known_value_ranges.safe_grow_cleared (count);
540 if (count && (info->conds || known_aggs_ptr))
541 known_aggs.safe_grow_cleared (count);
542 if (count && known_contexts_ptr)
543 known_contexts_ptr->safe_grow_cleared (count);
544
545 if (callee_pi)
546 for (i = 0; i < count; i++)
547 {
548 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
549 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
550 ipa_get_type (callee_pi, i));
551
552 if (!cst && e->call_stmt
553 && i < (int)gimple_call_num_args (e->call_stmt))
554 {
555 cst = gimple_call_arg (e->call_stmt, i);
556 if (!is_gimple_min_invariant (cst))
557 cst = NULL;
558 }
559 if (cst)
560 {
561 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
562 if (known_vals.exists ())
563 known_vals[i] = cst;
564 }
565 else if (inline_p && !es->param[i].change_prob)
566 known_vals[i] = error_mark_node;
567
568 if (known_contexts_ptr)
569 (*known_contexts_ptr)[i]
570 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
571
572 known_aggs[i] = ipa_agg_value_set_from_jfunc (caller_parms_info,
573 caller, &jf->agg);
574 if (info->conds)
575 known_value_ranges[i]
576 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
577 ipa_get_type (callee_pi, i));
578 }
579 else
580 gcc_assert (callee->thunk.thunk_p);
581 }
582 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
583 && ((clause_ptr && info->conds) || known_vals_ptr))
584 {
585 int i, count = (int)gimple_call_num_args (e->call_stmt);
586
587 if (count && (info->conds || known_vals_ptr))
588 known_vals.safe_grow_cleared (count);
589 for (i = 0; i < count; i++)
590 {
591 tree cst = gimple_call_arg (e->call_stmt, i);
592 if (!is_gimple_min_invariant (cst))
593 cst = NULL;
594 if (cst)
595 known_vals[i] = cst;
596 }
597 }
598
599 evaluate_conditions_for_known_args (callee, inline_p,
600 known_vals,
601 known_value_ranges,
602 known_aggs, clause_ptr,
603 nonspec_clause_ptr);
604
605 if (known_vals_ptr)
606 *known_vals_ptr = known_vals;
607 else
608 known_vals.release ();
609
610 if (known_aggs_ptr)
611 *known_aggs_ptr = known_aggs;
612 else
613 ipa_release_agg_values (known_aggs);
614 }
615
616
617 /* Allocate the function summary. */
618
619 static void
620 ipa_fn_summary_alloc (void)
621 {
622 gcc_checking_assert (!ipa_fn_summaries);
623 ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
624 (symtab);
625 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
626 ipa_call_summaries = new ipa_call_summary_t (symtab);
627 }
628
629 ipa_call_summary::~ipa_call_summary ()
630 {
631 if (predicate)
632 edge_predicate_pool.remove (predicate);
633
634 param.release ();
635 }
636
637 ipa_fn_summary::~ipa_fn_summary ()
638 {
639 if (loop_iterations)
640 edge_predicate_pool.remove (loop_iterations);
641 if (loop_stride)
642 edge_predicate_pool.remove (loop_stride);
643 vec_free (conds);
644 vec_free (size_time_table);
645 }
646
647 void
648 ipa_fn_summary_t::remove_callees (cgraph_node *node)
649 {
650 cgraph_edge *e;
651 for (e = node->callees; e; e = e->next_callee)
652 ipa_call_summaries->remove (e);
653 for (e = node->indirect_calls; e; e = e->next_callee)
654 ipa_call_summaries->remove (e);
655 }
656
657 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
658 Additionally care about allocating new memory slot for updated predicate
659 and set it to NULL when it becomes true or false (and thus uninteresting).
660 */
661
662 static void
663 remap_hint_predicate_after_duplication (predicate **p,
664 clause_t possible_truths)
665 {
666 predicate new_predicate;
667
668 if (!*p)
669 return;
670
671 new_predicate = (*p)->remap_after_duplication (possible_truths);
672 /* We do not want to free previous predicate; it is used by node origin. */
673 *p = NULL;
674 set_hint_predicate (p, new_predicate);
675 }
676
677
678 /* Hook that is called by cgraph.c when a node is duplicated. */
679 void
680 ipa_fn_summary_t::duplicate (cgraph_node *src,
681 cgraph_node *dst,
682 ipa_fn_summary *,
683 ipa_fn_summary *info)
684 {
685 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
686 /* TODO: as an optimization, we may avoid copying conditions
687 that are known to be false or true. */
688 info->conds = vec_safe_copy (info->conds);
689
690 /* When there are any replacements in the function body, see if we can figure
691 out that something was optimized out. */
692 if (ipa_node_params_sum && dst->clone.tree_map)
693 {
694 vec<size_time_entry, va_gc> *entry = info->size_time_table;
695 /* Use SRC parm info since it may not be copied yet. */
696 class ipa_node_params *parms_info = IPA_NODE_REF (src);
697 vec<tree> known_vals = vNULL;
698 int count = ipa_get_param_count (parms_info);
699 int i, j;
700 clause_t possible_truths;
701 predicate true_pred = true;
702 size_time_entry *e;
703 int optimized_out_size = 0;
704 bool inlined_to_p = false;
705 struct cgraph_edge *edge, *next;
706
707 info->size_time_table = 0;
708 known_vals.safe_grow_cleared (count);
709 for (i = 0; i < count; i++)
710 {
711 struct ipa_replace_map *r;
712
713 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
714 {
715 if (r->parm_num == i)
716 {
717 known_vals[i] = r->new_tree;
718 break;
719 }
720 }
721 }
722 evaluate_conditions_for_known_args (dst, false,
723 known_vals,
724 vNULL,
725 vNULL,
726 &possible_truths,
727 /* We are going to specialize,
728 so ignore nonspec truths. */
729 NULL);
730 known_vals.release ();
731
732 info->account_size_time (0, 0, true_pred, true_pred);
733
734 /* Remap size_time vectors.
735 Simplify the predicate by prunning out alternatives that are known
736 to be false.
737 TODO: as on optimization, we can also eliminate conditions known
738 to be true. */
739 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
740 {
741 predicate new_exec_pred;
742 predicate new_nonconst_pred;
743 new_exec_pred = e->exec_predicate.remap_after_duplication
744 (possible_truths);
745 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
746 (possible_truths);
747 if (new_exec_pred == false || new_nonconst_pred == false)
748 optimized_out_size += e->size;
749 else
750 info->account_size_time (e->size, e->time, new_exec_pred,
751 new_nonconst_pred);
752 }
753
754 /* Remap edge predicates with the same simplification as above.
755 Also copy constantness arrays. */
756 for (edge = dst->callees; edge; edge = next)
757 {
758 predicate new_predicate;
759 class ipa_call_summary *es = ipa_call_summaries->get (edge);
760 next = edge->next_callee;
761
762 if (!edge->inline_failed)
763 inlined_to_p = true;
764 if (!es->predicate)
765 continue;
766 new_predicate = es->predicate->remap_after_duplication
767 (possible_truths);
768 if (new_predicate == false && *es->predicate != false)
769 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
770 edge_set_predicate (edge, &new_predicate);
771 }
772
773 /* Remap indirect edge predicates with the same simplificaiton as above.
774 Also copy constantness arrays. */
775 for (edge = dst->indirect_calls; edge; edge = next)
776 {
777 predicate new_predicate;
778 class ipa_call_summary *es = ipa_call_summaries->get (edge);
779 next = edge->next_callee;
780
781 gcc_checking_assert (edge->inline_failed);
782 if (!es->predicate)
783 continue;
784 new_predicate = es->predicate->remap_after_duplication
785 (possible_truths);
786 if (new_predicate == false && *es->predicate != false)
787 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
788 edge_set_predicate (edge, &new_predicate);
789 }
790 remap_hint_predicate_after_duplication (&info->loop_iterations,
791 possible_truths);
792 remap_hint_predicate_after_duplication (&info->loop_stride,
793 possible_truths);
794
795 /* If inliner or someone after inliner will ever start producing
796 non-trivial clones, we will get trouble with lack of information
797 about updating self sizes, because size vectors already contains
798 sizes of the calees. */
799 gcc_assert (!inlined_to_p || !optimized_out_size);
800 }
801 else
802 {
803 info->size_time_table = vec_safe_copy (info->size_time_table);
804 if (info->loop_iterations)
805 {
806 predicate p = *info->loop_iterations;
807 info->loop_iterations = NULL;
808 set_hint_predicate (&info->loop_iterations, p);
809 }
810 if (info->loop_stride)
811 {
812 predicate p = *info->loop_stride;
813 info->loop_stride = NULL;
814 set_hint_predicate (&info->loop_stride, p);
815 }
816 }
817 if (!dst->inlined_to)
818 ipa_update_overall_fn_summary (dst);
819 }
820
821
822 /* Hook that is called by cgraph.c when a node is duplicated. */
823
824 void
825 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
826 struct cgraph_edge *dst,
827 class ipa_call_summary *srcinfo,
828 class ipa_call_summary *info)
829 {
830 new (info) ipa_call_summary (*srcinfo);
831 info->predicate = NULL;
832 edge_set_predicate (dst, srcinfo->predicate);
833 info->param = srcinfo->param.copy ();
834 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
835 {
836 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
837 - eni_size_weights.call_cost);
838 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
839 - eni_time_weights.call_cost);
840 }
841 }
842
843 /* Dump edge summaries associated to NODE and recursively to all clones.
844 Indent by INDENT. */
845
846 static void
847 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
848 class ipa_fn_summary *info)
849 {
850 struct cgraph_edge *edge;
851 for (edge = node->callees; edge; edge = edge->next_callee)
852 {
853 class ipa_call_summary *es = ipa_call_summaries->get (edge);
854 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
855 int i;
856
857 fprintf (f,
858 "%*s%s/%i %s\n%*s freq:%4.2f",
859 indent, "", callee->name (), callee->order,
860 !edge->inline_failed
861 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
862 indent, "", edge->sreal_frequency ().to_double ());
863
864 if (es)
865 fprintf (f, " loop depth:%2i size:%2i time: %2i",
866 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
867
868 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
869 ipa_size_summary *ss = ipa_size_summaries->get (callee);
870 if (s != NULL)
871 fprintf (f, " callee size:%2i stack:%2i",
872 (int) (ss->size / ipa_fn_summary::size_scale),
873 (int) s->estimated_stack_size);
874
875 if (es && es->predicate)
876 {
877 fprintf (f, " predicate: ");
878 es->predicate->dump (f, info->conds);
879 }
880 else
881 fprintf (f, "\n");
882 if (es && es->param.exists ())
883 for (i = 0; i < (int) es->param.length (); i++)
884 {
885 int prob = es->param[i].change_prob;
886
887 if (!prob)
888 fprintf (f, "%*s op%i is compile time invariant\n",
889 indent + 2, "", i);
890 else if (prob != REG_BR_PROB_BASE)
891 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
892 prob * 100.0 / REG_BR_PROB_BASE);
893 }
894 if (!edge->inline_failed)
895 {
896 ipa_size_summary *ss = ipa_size_summaries->get (callee);
897 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
898 indent + 2, "",
899 (int) ipa_get_stack_frame_offset (callee),
900 (int) ss->estimated_self_stack_size);
901 dump_ipa_call_summary (f, indent + 2, callee, info);
902 }
903 }
904 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
905 {
906 class ipa_call_summary *es = ipa_call_summaries->get (edge);
907 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
908 " time: %2i",
909 indent, "",
910 es->loop_depth,
911 edge->sreal_frequency ().to_double (), es->call_stmt_size,
912 es->call_stmt_time);
913 if (es->predicate)
914 {
915 fprintf (f, "predicate: ");
916 es->predicate->dump (f, info->conds);
917 }
918 else
919 fprintf (f, "\n");
920 }
921 }
922
923
924 void
925 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
926 {
927 if (node->definition)
928 {
929 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
930 class ipa_size_summary *ss = ipa_size_summaries->get (node);
931 if (s != NULL)
932 {
933 size_time_entry *e;
934 int i;
935 fprintf (f, "IPA function summary for %s", node->dump_name ());
936 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
937 fprintf (f, " always_inline");
938 if (s->inlinable)
939 fprintf (f, " inlinable");
940 if (s->fp_expressions)
941 fprintf (f, " fp_expression");
942 fprintf (f, "\n global time: %f\n", s->time.to_double ());
943 fprintf (f, " self size: %i\n", ss->self_size);
944 fprintf (f, " global size: %i\n", ss->size);
945 fprintf (f, " min size: %i\n", s->min_size);
946 fprintf (f, " self stack: %i\n",
947 (int) ss->estimated_self_stack_size);
948 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
949 if (s->growth)
950 fprintf (f, " estimated growth:%i\n", (int) s->growth);
951 if (s->scc_no)
952 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
953 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
954 {
955 fprintf (f, " size:%f, time:%f",
956 (double) e->size / ipa_fn_summary::size_scale,
957 e->time.to_double ());
958 if (e->exec_predicate != true)
959 {
960 fprintf (f, ", executed if:");
961 e->exec_predicate.dump (f, s->conds, 0);
962 }
963 if (e->exec_predicate != e->nonconst_predicate)
964 {
965 fprintf (f, ", nonconst if:");
966 e->nonconst_predicate.dump (f, s->conds, 0);
967 }
968 fprintf (f, "\n");
969 }
970 if (s->loop_iterations)
971 {
972 fprintf (f, " loop iterations:");
973 s->loop_iterations->dump (f, s->conds);
974 }
975 if (s->loop_stride)
976 {
977 fprintf (f, " loop stride:");
978 s->loop_stride->dump (f, s->conds);
979 }
980 fprintf (f, " calls:\n");
981 dump_ipa_call_summary (f, 4, node, s);
982 fprintf (f, "\n");
983 }
984 else
985 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
986 }
987 }
988
989 DEBUG_FUNCTION void
990 ipa_debug_fn_summary (struct cgraph_node *node)
991 {
992 ipa_dump_fn_summary (stderr, node);
993 }
994
995 void
996 ipa_dump_fn_summaries (FILE *f)
997 {
998 struct cgraph_node *node;
999
1000 FOR_EACH_DEFINED_FUNCTION (node)
1001 if (!node->inlined_to)
1002 ipa_dump_fn_summary (f, node);
1003 }
1004
1005 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1006 boolean variable pointed to by DATA. */
1007
1008 static bool
1009 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1010 void *data)
1011 {
1012 bool *b = (bool *) data;
1013 *b = true;
1014 return true;
1015 }
1016
1017 /* If OP refers to value of function parameter, return the corresponding
1018 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1019 PARM_DECL) will be stored to *SIZE_P in that case too. */
1020
1021 static tree
1022 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1023 poly_int64 *size_p)
1024 {
1025 /* SSA_NAME referring to parm default def? */
1026 if (TREE_CODE (op) == SSA_NAME
1027 && SSA_NAME_IS_DEFAULT_DEF (op)
1028 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1029 {
1030 if (size_p)
1031 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1032 return SSA_NAME_VAR (op);
1033 }
1034 /* Non-SSA parm reference? */
1035 if (TREE_CODE (op) == PARM_DECL)
1036 {
1037 bool modified = false;
1038
1039 ao_ref refd;
1040 ao_ref_init (&refd, op);
1041 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1042 mark_modified, &modified, NULL, NULL,
1043 fbi->aa_walk_budget + 1);
1044 if (walked < 0)
1045 {
1046 fbi->aa_walk_budget = 0;
1047 return NULL_TREE;
1048 }
1049 if (!modified)
1050 {
1051 if (size_p)
1052 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1053 return op;
1054 }
1055 }
1056 return NULL_TREE;
1057 }
1058
1059 /* If OP refers to value of function parameter, return the corresponding
1060 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1061 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1062 stored to *SIZE_P in that case too. */
1063
1064 static tree
1065 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1066 poly_int64 *size_p)
1067 {
1068 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1069 if (res)
1070 return res;
1071
1072 if (TREE_CODE (op) == SSA_NAME
1073 && !SSA_NAME_IS_DEFAULT_DEF (op)
1074 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1075 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1076 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1077 size_p);
1078 return NULL_TREE;
1079 }
1080
1081 /* If OP refers to a value of a function parameter or value loaded from an
1082 aggregate passed to a parameter (either by value or reference), return TRUE
1083 and store the number of the parameter to *INDEX_P, the access size into
1084 *SIZE_P, and information whether and how it has been loaded from an
1085 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1086 statement in which OP is used or loaded. */
1087
1088 static bool
1089 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1090 gimple *stmt, tree op, int *index_p,
1091 poly_int64 *size_p,
1092 struct agg_position_info *aggpos)
1093 {
1094 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1095
1096 gcc_checking_assert (aggpos);
1097 if (res)
1098 {
1099 *index_p = ipa_get_param_decl_index (fbi->info, res);
1100 if (*index_p < 0)
1101 return false;
1102 aggpos->agg_contents = false;
1103 aggpos->by_ref = false;
1104 return true;
1105 }
1106
1107 if (TREE_CODE (op) == SSA_NAME)
1108 {
1109 if (SSA_NAME_IS_DEFAULT_DEF (op)
1110 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1111 return false;
1112 stmt = SSA_NAME_DEF_STMT (op);
1113 op = gimple_assign_rhs1 (stmt);
1114 if (!REFERENCE_CLASS_P (op))
1115 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1116 aggpos);
1117 }
1118
1119 aggpos->agg_contents = true;
1120 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1121 stmt, op, index_p, &aggpos->offset,
1122 size_p, &aggpos->by_ref);
1123 }
1124
1125 /* See if statement might disappear after inlining.
1126 0 - means not eliminated
1127 1 - half of statements goes away
1128 2 - for sure it is eliminated.
1129 We are not terribly sophisticated, basically looking for simple abstraction
1130 penalty wrappers. */
1131
1132 static int
1133 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1134 {
1135 enum gimple_code code = gimple_code (stmt);
1136 enum tree_code rhs_code;
1137
1138 if (!optimize)
1139 return 0;
1140
1141 switch (code)
1142 {
1143 case GIMPLE_RETURN:
1144 return 2;
1145 case GIMPLE_ASSIGN:
1146 if (gimple_num_ops (stmt) != 2)
1147 return 0;
1148
1149 rhs_code = gimple_assign_rhs_code (stmt);
1150
1151 /* Casts of parameters, loads from parameters passed by reference
1152 and stores to return value or parameters are often free after
1153 inlining dua to SRA and further combining.
1154 Assume that half of statements goes away. */
1155 if (CONVERT_EXPR_CODE_P (rhs_code)
1156 || rhs_code == VIEW_CONVERT_EXPR
1157 || rhs_code == ADDR_EXPR
1158 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1159 {
1160 tree rhs = gimple_assign_rhs1 (stmt);
1161 tree lhs = gimple_assign_lhs (stmt);
1162 tree inner_rhs = get_base_address (rhs);
1163 tree inner_lhs = get_base_address (lhs);
1164 bool rhs_free = false;
1165 bool lhs_free = false;
1166
1167 if (!inner_rhs)
1168 inner_rhs = rhs;
1169 if (!inner_lhs)
1170 inner_lhs = lhs;
1171
1172 /* Reads of parameter are expected to be free. */
1173 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1174 rhs_free = true;
1175 /* Match expressions of form &this->field. Those will most likely
1176 combine with something upstream after inlining. */
1177 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1178 {
1179 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1180 if (TREE_CODE (op) == PARM_DECL)
1181 rhs_free = true;
1182 else if (TREE_CODE (op) == MEM_REF
1183 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1184 NULL))
1185 rhs_free = true;
1186 }
1187
1188 /* When parameter is not SSA register because its address is taken
1189 and it is just copied into one, the statement will be completely
1190 free after inlining (we will copy propagate backward). */
1191 if (rhs_free && is_gimple_reg (lhs))
1192 return 2;
1193
1194 /* Reads of parameters passed by reference
1195 expected to be free (i.e. optimized out after inlining). */
1196 if (TREE_CODE (inner_rhs) == MEM_REF
1197 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1198 rhs_free = true;
1199
1200 /* Copying parameter passed by reference into gimple register is
1201 probably also going to copy propagate, but we can't be quite
1202 sure. */
1203 if (rhs_free && is_gimple_reg (lhs))
1204 lhs_free = true;
1205
1206 /* Writes to parameters, parameters passed by value and return value
1207 (either dirrectly or passed via invisible reference) are free.
1208
1209 TODO: We ought to handle testcase like
1210 struct a {int a,b;};
1211 struct a
1212 retrurnsturct (void)
1213 {
1214 struct a a ={1,2};
1215 return a;
1216 }
1217
1218 This translate into:
1219
1220 retrurnsturct ()
1221 {
1222 int a$b;
1223 int a$a;
1224 struct a a;
1225 struct a D.2739;
1226
1227 <bb 2>:
1228 D.2739.a = 1;
1229 D.2739.b = 2;
1230 return D.2739;
1231
1232 }
1233 For that we either need to copy ipa-split logic detecting writes
1234 to return value. */
1235 if (TREE_CODE (inner_lhs) == PARM_DECL
1236 || TREE_CODE (inner_lhs) == RESULT_DECL
1237 || (TREE_CODE (inner_lhs) == MEM_REF
1238 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1239 NULL)
1240 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1241 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1242 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1243 (inner_lhs,
1244 0))) == RESULT_DECL))))
1245 lhs_free = true;
1246 if (lhs_free
1247 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1248 rhs_free = true;
1249 if (lhs_free && rhs_free)
1250 return 1;
1251 }
1252 return 0;
1253 default:
1254 return 0;
1255 }
1256 }
1257
1258 /* Analyze EXPR if it represents a series of simple operations performed on
1259 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1260 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1261 Type of the parameter or load from an aggregate via the parameter is
1262 stored in *TYPE_P. Operations on the parameter are recorded to
1263 PARAM_OPS_P if it is not NULL. */
1264
1265 static bool
1266 decompose_param_expr (struct ipa_func_body_info *fbi,
1267 gimple *stmt, tree expr,
1268 int *index_p, tree *type_p,
1269 struct agg_position_info *aggpos,
1270 expr_eval_ops *param_ops_p = NULL)
1271 {
1272 int op_limit = param_ipa_max_param_expr_ops;
1273 int op_count = 0;
1274
1275 if (param_ops_p)
1276 *param_ops_p = NULL;
1277
1278 while (true)
1279 {
1280 expr_eval_op eval_op;
1281 unsigned rhs_count;
1282 unsigned cst_count = 0;
1283
1284 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1285 aggpos))
1286 {
1287 tree type = TREE_TYPE (expr);
1288
1289 if (aggpos->agg_contents)
1290 {
1291 /* Stop if containing bit-field. */
1292 if (TREE_CODE (expr) == BIT_FIELD_REF
1293 || contains_bitfld_component_ref_p (expr))
1294 break;
1295 }
1296
1297 *type_p = type;
1298 return true;
1299 }
1300
1301 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1302 break;
1303
1304 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1305 break;
1306
1307 switch (gimple_assign_rhs_class (stmt))
1308 {
1309 case GIMPLE_SINGLE_RHS:
1310 expr = gimple_assign_rhs1 (stmt);
1311 continue;
1312
1313 case GIMPLE_UNARY_RHS:
1314 rhs_count = 1;
1315 break;
1316
1317 case GIMPLE_BINARY_RHS:
1318 rhs_count = 2;
1319 break;
1320
1321 case GIMPLE_TERNARY_RHS:
1322 rhs_count = 3;
1323 break;
1324
1325 default:
1326 goto fail;
1327 }
1328
1329 /* Stop if expression is too complex. */
1330 if (op_count++ == op_limit)
1331 break;
1332
1333 if (param_ops_p)
1334 {
1335 eval_op.code = gimple_assign_rhs_code (stmt);
1336 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1337 eval_op.val[0] = NULL_TREE;
1338 eval_op.val[1] = NULL_TREE;
1339 }
1340
1341 expr = NULL_TREE;
1342 for (unsigned i = 0; i < rhs_count; i++)
1343 {
1344 tree op = gimple_op (stmt, i + 1);
1345
1346 gcc_assert (op && !TYPE_P (op));
1347 if (is_gimple_ip_invariant (op))
1348 {
1349 if (++cst_count == rhs_count)
1350 goto fail;
1351
1352 eval_op.val[cst_count - 1] = op;
1353 }
1354 else if (!expr)
1355 {
1356 /* Found a non-constant operand, and record its index in rhs
1357 operands. */
1358 eval_op.index = i;
1359 expr = op;
1360 }
1361 else
1362 {
1363 /* Found more than one non-constant operands. */
1364 goto fail;
1365 }
1366 }
1367
1368 if (param_ops_p)
1369 vec_safe_insert (*param_ops_p, 0, eval_op);
1370 }
1371
1372 /* Failed to decompose, free resource and return. */
1373 fail:
1374 if (param_ops_p)
1375 vec_free (*param_ops_p);
1376
1377 return false;
1378 }
1379
1380 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1381 predicates to the CFG edges. */
1382
1383 static void
1384 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1385 class ipa_fn_summary *summary,
1386 class ipa_node_params *params_summary,
1387 basic_block bb)
1388 {
1389 gimple *last;
1390 tree op, op2;
1391 int index;
1392 struct agg_position_info aggpos;
1393 enum tree_code code, inverted_code;
1394 edge e;
1395 edge_iterator ei;
1396 gimple *set_stmt;
1397 tree param_type;
1398 expr_eval_ops param_ops;
1399
1400 last = last_stmt (bb);
1401 if (!last || gimple_code (last) != GIMPLE_COND)
1402 return;
1403 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1404 return;
1405 op = gimple_cond_lhs (last);
1406
1407 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1408 &param_ops))
1409 {
1410 code = gimple_cond_code (last);
1411 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1412
1413 FOR_EACH_EDGE (e, ei, bb->succs)
1414 {
1415 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1416 ? code : inverted_code);
1417 /* invert_tree_comparison will return ERROR_MARK on FP
1418 comparsions that are not EQ/NE instead of returning proper
1419 unordered one. Be sure it is not confused with NON_CONSTANT.
1420
1421 And if the edge's target is the final block of diamond CFG graph
1422 of this conditional statement, we do not need to compute
1423 predicate for the edge because the final block's predicate must
1424 be at least as that of the first block of the statement. */
1425 if (this_code != ERROR_MARK
1426 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1427 {
1428 predicate p
1429 = add_condition (summary, params_summary, index,
1430 param_type, &aggpos,
1431 this_code, gimple_cond_rhs (last), param_ops);
1432 e->aux = edge_predicate_pool.allocate ();
1433 *(predicate *) e->aux = p;
1434 }
1435 }
1436 vec_free (param_ops);
1437 }
1438
1439 if (TREE_CODE (op) != SSA_NAME)
1440 return;
1441 /* Special case
1442 if (builtin_constant_p (op))
1443 constant_code
1444 else
1445 nonconstant_code.
1446 Here we can predicate nonconstant_code. We can't
1447 really handle constant_code since we have no predicate
1448 for this and also the constant code is not known to be
1449 optimized away when inliner doen't see operand is constant.
1450 Other optimizers might think otherwise. */
1451 if (gimple_cond_code (last) != NE_EXPR
1452 || !integer_zerop (gimple_cond_rhs (last)))
1453 return;
1454 set_stmt = SSA_NAME_DEF_STMT (op);
1455 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1456 || gimple_call_num_args (set_stmt) != 1)
1457 return;
1458 op2 = gimple_call_arg (set_stmt, 0);
1459 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1460 return;
1461 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1462 {
1463 predicate p = add_condition (summary, params_summary, index,
1464 param_type, &aggpos,
1465 predicate::is_not_constant, NULL_TREE);
1466 e->aux = edge_predicate_pool.allocate ();
1467 *(predicate *) e->aux = p;
1468 }
1469 }
1470
1471
1472 /* If BB ends by a switch we can turn into predicates, attach corresponding
1473 predicates to the CFG edges. */
1474
1475 static void
1476 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1477 class ipa_fn_summary *summary,
1478 class ipa_node_params *params_summary,
1479 basic_block bb)
1480 {
1481 gimple *lastg;
1482 tree op;
1483 int index;
1484 struct agg_position_info aggpos;
1485 edge e;
1486 edge_iterator ei;
1487 size_t n;
1488 size_t case_idx;
1489 tree param_type;
1490 expr_eval_ops param_ops;
1491
1492 lastg = last_stmt (bb);
1493 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1494 return;
1495 gswitch *last = as_a <gswitch *> (lastg);
1496 op = gimple_switch_index (last);
1497 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1498 &param_ops))
1499 return;
1500
1501 auto_vec<std::pair<tree, tree> > ranges;
1502 tree type = TREE_TYPE (op);
1503 int bound_limit = param_ipa_max_switch_predicate_bounds;
1504 int bound_count = 0;
1505 wide_int vr_wmin, vr_wmax;
1506 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1507
1508 FOR_EACH_EDGE (e, ei, bb->succs)
1509 {
1510 e->aux = edge_predicate_pool.allocate ();
1511 *(predicate *) e->aux = false;
1512 }
1513
1514 e = gimple_switch_edge (cfun, last, 0);
1515 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1516 default case if its target basic block is in convergence point of all
1517 switch cases, which can be determined by checking whether it
1518 post-dominates the switch statement. */
1519 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1520 bound_count = INT_MAX;
1521
1522 n = gimple_switch_num_labels (last);
1523 for (case_idx = 1; case_idx < n; ++case_idx)
1524 {
1525 tree cl = gimple_switch_label (last, case_idx);
1526 tree min = CASE_LOW (cl);
1527 tree max = CASE_HIGH (cl);
1528 predicate p;
1529
1530 e = gimple_switch_edge (cfun, last, case_idx);
1531
1532 /* The case value might not have same type as switch expression,
1533 extend the value based on the expression type. */
1534 if (TREE_TYPE (min) != type)
1535 min = wide_int_to_tree (type, wi::to_wide (min));
1536
1537 if (!max)
1538 max = min;
1539 else if (TREE_TYPE (max) != type)
1540 max = wide_int_to_tree (type, wi::to_wide (max));
1541
1542 /* The case's target basic block is in convergence point of all switch
1543 cases, its predicate should be at least as that of the switch
1544 statement. */
1545 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1546 p = true;
1547 else if (min == max)
1548 p = add_condition (summary, params_summary, index, param_type,
1549 &aggpos, EQ_EXPR, min, param_ops);
1550 else
1551 {
1552 predicate p1, p2;
1553 p1 = add_condition (summary, params_summary, index, param_type,
1554 &aggpos, GE_EXPR, min, param_ops);
1555 p2 = add_condition (summary, params_summary,index, param_type,
1556 &aggpos, LE_EXPR, max, param_ops);
1557 p = p1 & p2;
1558 }
1559 *(class predicate *) e->aux
1560 = p.or_with (summary->conds, *(class predicate *) e->aux);
1561
1562 /* If there are too many disjoint case ranges, predicate for default
1563 case might become too complicated. So add a limit here. */
1564 if (bound_count > bound_limit)
1565 continue;
1566
1567 bool new_range = true;
1568
1569 if (!ranges.is_empty ())
1570 {
1571 wide_int curr_wmin = wi::to_wide (min);
1572 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1573
1574 /* Merge case ranges if they are continuous. */
1575 if (curr_wmin == last_wmax + 1)
1576 new_range = false;
1577 else if (vr_type == VR_ANTI_RANGE)
1578 {
1579 /* If two disjoint case ranges can be connected by anti-range
1580 of switch index, combine them to one range. */
1581 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1582 vr_type = VR_UNDEFINED;
1583 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1584 new_range = false;
1585 }
1586 }
1587
1588 /* Create/extend a case range. And we count endpoints of range set,
1589 this number nearly equals to number of conditions that we will create
1590 for predicate of default case. */
1591 if (new_range)
1592 {
1593 bound_count += (min == max) ? 1 : 2;
1594 ranges.safe_push (std::make_pair (min, max));
1595 }
1596 else
1597 {
1598 bound_count += (ranges.last ().first == ranges.last ().second);
1599 ranges.last ().second = max;
1600 }
1601 }
1602
1603 e = gimple_switch_edge (cfun, last, 0);
1604 if (bound_count > bound_limit)
1605 {
1606 *(class predicate *) e->aux = true;
1607 vec_free (param_ops);
1608 return;
1609 }
1610
1611 predicate p_seg = true;
1612 predicate p_all = false;
1613
1614 if (vr_type != VR_RANGE)
1615 {
1616 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1617 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1618 }
1619
1620 /* Construct predicate to represent default range set that is negation of
1621 all case ranges. Case range is classified as containing single/non-single
1622 values. Suppose a piece of case ranges in the following.
1623
1624 [D1...D2] [S1] ... [Sn] [D3...D4]
1625
1626 To represent default case's range sets between two non-single value
1627 case ranges (From D2 to D3), we construct predicate as:
1628
1629 D2 < x < D3 && x != S1 && ... && x != Sn
1630 */
1631 for (size_t i = 0; i < ranges.length (); i++)
1632 {
1633 tree min = ranges[i].first;
1634 tree max = ranges[i].second;
1635
1636 if (min == max)
1637 p_seg &= add_condition (summary, params_summary, index,
1638 param_type, &aggpos, NE_EXPR,
1639 min, param_ops);
1640 else
1641 {
1642 /* Do not create sub-predicate for range that is beyond low bound
1643 of switch index. */
1644 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1645 {
1646 p_seg &= add_condition (summary, params_summary, index,
1647 param_type, &aggpos,
1648 LT_EXPR, min, param_ops);
1649 p_all = p_all.or_with (summary->conds, p_seg);
1650 }
1651
1652 /* Do not create sub-predicate for range that is beyond up bound
1653 of switch index. */
1654 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1655 {
1656 p_seg = false;
1657 break;
1658 }
1659
1660 p_seg = add_condition (summary, params_summary, index,
1661 param_type, &aggpos, GT_EXPR,
1662 max, param_ops);
1663 }
1664 }
1665
1666 p_all = p_all.or_with (summary->conds, p_seg);
1667 *(class predicate *) e->aux
1668 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1669
1670 vec_free (param_ops);
1671 }
1672
1673
1674 /* For each BB in NODE attach to its AUX pointer predicate under
1675 which it is executable. */
1676
1677 static void
1678 compute_bb_predicates (struct ipa_func_body_info *fbi,
1679 struct cgraph_node *node,
1680 class ipa_fn_summary *summary,
1681 class ipa_node_params *params_summary)
1682 {
1683 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1684 bool done = false;
1685 basic_block bb;
1686
1687 FOR_EACH_BB_FN (bb, my_function)
1688 {
1689 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1690 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1691 }
1692
1693 /* Entry block is always executable. */
1694 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1695 = edge_predicate_pool.allocate ();
1696 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1697
1698 /* A simple dataflow propagation of predicates forward in the CFG.
1699 TODO: work in reverse postorder. */
1700 while (!done)
1701 {
1702 done = true;
1703 FOR_EACH_BB_FN (bb, my_function)
1704 {
1705 predicate p = false;
1706 edge e;
1707 edge_iterator ei;
1708 FOR_EACH_EDGE (e, ei, bb->preds)
1709 {
1710 if (e->src->aux)
1711 {
1712 predicate this_bb_predicate
1713 = *(predicate *) e->src->aux;
1714 if (e->aux)
1715 this_bb_predicate &= (*(class predicate *) e->aux);
1716 p = p.or_with (summary->conds, this_bb_predicate);
1717 if (p == true)
1718 break;
1719 }
1720 }
1721 if (p != false)
1722 {
1723 basic_block pdom_bb;
1724
1725 if (!bb->aux)
1726 {
1727 done = false;
1728 bb->aux = edge_predicate_pool.allocate ();
1729 *((predicate *) bb->aux) = p;
1730 }
1731 else if (p != *(predicate *) bb->aux)
1732 {
1733 /* This OR operation is needed to ensure monotonous data flow
1734 in the case we hit the limit on number of clauses and the
1735 and/or operations above give approximate answers. */
1736 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1737 if (p != *(predicate *) bb->aux)
1738 {
1739 done = false;
1740 *((predicate *) bb->aux) = p;
1741 }
1742 }
1743
1744 /* For switch/if statement, we can OR-combine predicates of all
1745 its cases/branches to get predicate for basic block in their
1746 convergence point, but sometimes this will generate very
1747 complicated predicate. Actually, we can get simplified
1748 predicate in another way by using the fact that predicate
1749 for a basic block must also hold true for its post dominators.
1750 To be specific, basic block in convergence point of
1751 conditional statement should include predicate of the
1752 statement. */
1753 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1754 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1755 ;
1756 else if (!pdom_bb->aux)
1757 {
1758 done = false;
1759 pdom_bb->aux = edge_predicate_pool.allocate ();
1760 *((predicate *) pdom_bb->aux) = p;
1761 }
1762 else if (p != *(predicate *) pdom_bb->aux)
1763 {
1764 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1765 if (p != *(predicate *) pdom_bb->aux)
1766 {
1767 done = false;
1768 *((predicate *) pdom_bb->aux) = p;
1769 }
1770 }
1771 }
1772 }
1773 }
1774 }
1775
1776
1777 /* Return predicate specifying when the STMT might have result that is not
1778 a compile time constant. */
1779
1780 static predicate
1781 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1782 class ipa_fn_summary *summary,
1783 class ipa_node_params *params_summary,
1784 tree expr,
1785 vec<predicate> nonconstant_names)
1786 {
1787 tree parm;
1788 int index;
1789
1790 while (UNARY_CLASS_P (expr))
1791 expr = TREE_OPERAND (expr, 0);
1792
1793 parm = unmodified_parm (fbi, NULL, expr, NULL);
1794 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1795 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1796 predicate::changed, NULL_TREE);
1797 if (is_gimple_min_invariant (expr))
1798 return false;
1799 if (TREE_CODE (expr) == SSA_NAME)
1800 return nonconstant_names[SSA_NAME_VERSION (expr)];
1801 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1802 {
1803 predicate p1
1804 = will_be_nonconstant_expr_predicate (fbi, summary,
1805 params_summary,
1806 TREE_OPERAND (expr, 0),
1807 nonconstant_names);
1808 if (p1 == true)
1809 return p1;
1810
1811 predicate p2
1812 = will_be_nonconstant_expr_predicate (fbi, summary,
1813 params_summary,
1814 TREE_OPERAND (expr, 1),
1815 nonconstant_names);
1816 return p1.or_with (summary->conds, p2);
1817 }
1818 else if (TREE_CODE (expr) == COND_EXPR)
1819 {
1820 predicate p1
1821 = will_be_nonconstant_expr_predicate (fbi, summary,
1822 params_summary,
1823 TREE_OPERAND (expr, 0),
1824 nonconstant_names);
1825 if (p1 == true)
1826 return p1;
1827
1828 predicate p2
1829 = will_be_nonconstant_expr_predicate (fbi, summary,
1830 params_summary,
1831 TREE_OPERAND (expr, 1),
1832 nonconstant_names);
1833 if (p2 == true)
1834 return p2;
1835 p1 = p1.or_with (summary->conds, p2);
1836 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1837 params_summary,
1838 TREE_OPERAND (expr, 2),
1839 nonconstant_names);
1840 return p2.or_with (summary->conds, p1);
1841 }
1842 else if (TREE_CODE (expr) == CALL_EXPR)
1843 return true;
1844 else
1845 {
1846 debug_tree (expr);
1847 gcc_unreachable ();
1848 }
1849 return false;
1850 }
1851
1852
1853 /* Return predicate specifying when the STMT might have result that is not
1854 a compile time constant. */
1855
1856 static predicate
1857 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1858 class ipa_fn_summary *summary,
1859 class ipa_node_params *params_summary,
1860 gimple *stmt,
1861 vec<predicate> nonconstant_names)
1862 {
1863 predicate p = true;
1864 ssa_op_iter iter;
1865 tree use;
1866 tree param_type = NULL_TREE;
1867 predicate op_non_const;
1868 bool is_load;
1869 int base_index;
1870 struct agg_position_info aggpos;
1871
1872 /* What statments might be optimized away
1873 when their arguments are constant. */
1874 if (gimple_code (stmt) != GIMPLE_ASSIGN
1875 && gimple_code (stmt) != GIMPLE_COND
1876 && gimple_code (stmt) != GIMPLE_SWITCH
1877 && (gimple_code (stmt) != GIMPLE_CALL
1878 || !(gimple_call_flags (stmt) & ECF_CONST)))
1879 return p;
1880
1881 /* Stores will stay anyway. */
1882 if (gimple_store_p (stmt))
1883 return p;
1884
1885 is_load = gimple_assign_load_p (stmt);
1886
1887 /* Loads can be optimized when the value is known. */
1888 if (is_load)
1889 {
1890 tree op = gimple_assign_rhs1 (stmt);
1891 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1892 &aggpos))
1893 return p;
1894 }
1895 else
1896 base_index = -1;
1897
1898 /* See if we understand all operands before we start
1899 adding conditionals. */
1900 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1901 {
1902 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1903 /* For arguments we can build a condition. */
1904 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1905 continue;
1906 if (TREE_CODE (use) != SSA_NAME)
1907 return p;
1908 /* If we know when operand is constant,
1909 we still can say something useful. */
1910 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1911 continue;
1912 return p;
1913 }
1914
1915 if (is_load)
1916 op_non_const =
1917 add_condition (summary, params_summary,
1918 base_index, param_type, &aggpos,
1919 predicate::changed, NULL_TREE);
1920 else
1921 op_non_const = false;
1922 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1923 {
1924 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1925 int index;
1926
1927 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1928 {
1929 if (index != base_index)
1930 p = add_condition (summary, params_summary, index,
1931 TREE_TYPE (parm), NULL,
1932 predicate::changed, NULL_TREE);
1933 else
1934 continue;
1935 }
1936 else
1937 p = nonconstant_names[SSA_NAME_VERSION (use)];
1938 op_non_const = p.or_with (summary->conds, op_non_const);
1939 }
1940 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1941 && gimple_op (stmt, 0)
1942 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1943 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1944 = op_non_const;
1945 return op_non_const;
1946 }
1947
1948 struct record_modified_bb_info
1949 {
1950 tree op;
1951 bitmap bb_set;
1952 gimple *stmt;
1953 };
1954
1955 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1956 probability how often it changes between USE_BB.
1957 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1958 is in different loop nest, we can do better.
1959 This is all just estimate. In theory we look for minimal cut separating
1960 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1961 anyway. */
1962
1963 static basic_block
1964 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1965 {
1966 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1967 if (l && l->header->count < init_bb->count)
1968 return l->header;
1969 return init_bb;
1970 }
1971
1972 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1973 set except for info->stmt. */
1974
1975 static bool
1976 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1977 {
1978 struct record_modified_bb_info *info =
1979 (struct record_modified_bb_info *) data;
1980 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1981 return false;
1982 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1983 return false;
1984 bitmap_set_bit (info->bb_set,
1985 SSA_NAME_IS_DEFAULT_DEF (vdef)
1986 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1987 : get_minimal_bb
1988 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1989 gimple_bb (info->stmt))->index);
1990 if (dump_file)
1991 {
1992 fprintf (dump_file, " Param ");
1993 print_generic_expr (dump_file, info->op, TDF_SLIM);
1994 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1995 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1996 get_minimal_bb
1997 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1998 gimple_bb (info->stmt))->index);
1999 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2000 }
2001 return false;
2002 }
2003
2004 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2005 will change since last invocation of STMT.
2006
2007 Value 0 is reserved for compile time invariants.
2008 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2009 ought to be REG_BR_PROB_BASE / estimated_iters. */
2010
2011 static int
2012 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2013 {
2014 tree op = gimple_call_arg (stmt, i);
2015 basic_block bb = gimple_bb (stmt);
2016
2017 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2018 op = TREE_OPERAND (op, 0);
2019
2020 tree base = get_base_address (op);
2021
2022 /* Global invariants never change. */
2023 if (is_gimple_min_invariant (base))
2024 return 0;
2025
2026 /* We would have to do non-trivial analysis to really work out what
2027 is the probability of value to change (i.e. when init statement
2028 is in a sibling loop of the call).
2029
2030 We do an conservative estimate: when call is executed N times more often
2031 than the statement defining value, we take the frequency 1/N. */
2032 if (TREE_CODE (base) == SSA_NAME)
2033 {
2034 profile_count init_count;
2035
2036 if (!bb->count.nonzero_p ())
2037 return REG_BR_PROB_BASE;
2038
2039 if (SSA_NAME_IS_DEFAULT_DEF (base))
2040 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2041 else
2042 init_count = get_minimal_bb
2043 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2044 gimple_bb (stmt))->count;
2045
2046 if (init_count < bb->count)
2047 return MAX ((init_count.to_sreal_scale (bb->count)
2048 * REG_BR_PROB_BASE).to_int (), 1);
2049 return REG_BR_PROB_BASE;
2050 }
2051 else
2052 {
2053 ao_ref refd;
2054 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2055 struct record_modified_bb_info info;
2056 tree init = ctor_for_folding (base);
2057
2058 if (init != error_mark_node)
2059 return 0;
2060 if (!bb->count.nonzero_p ())
2061 return REG_BR_PROB_BASE;
2062 if (dump_file)
2063 {
2064 fprintf (dump_file, " Analyzing param change probability of ");
2065 print_generic_expr (dump_file, op, TDF_SLIM);
2066 fprintf (dump_file, "\n");
2067 }
2068 ao_ref_init (&refd, op);
2069 info.op = op;
2070 info.stmt = stmt;
2071 info.bb_set = BITMAP_ALLOC (NULL);
2072 int walked
2073 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2074 NULL, NULL, fbi->aa_walk_budget);
2075 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2076 {
2077 if (dump_file)
2078 {
2079 if (walked < 0)
2080 fprintf (dump_file, " Ran out of AA walking budget.\n");
2081 else
2082 fprintf (dump_file, " Set in same BB as used.\n");
2083 }
2084 BITMAP_FREE (info.bb_set);
2085 return REG_BR_PROB_BASE;
2086 }
2087
2088 bitmap_iterator bi;
2089 unsigned index;
2090 /* Lookup the most frequent update of the value and believe that
2091 it dominates all the other; precise analysis here is difficult. */
2092 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2093 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2094 if (dump_file)
2095 {
2096 fprintf (dump_file, " Set with count ");
2097 max.dump (dump_file);
2098 fprintf (dump_file, " and used with count ");
2099 bb->count.dump (dump_file);
2100 fprintf (dump_file, " freq %f\n",
2101 max.to_sreal_scale (bb->count).to_double ());
2102 }
2103
2104 BITMAP_FREE (info.bb_set);
2105 if (max < bb->count)
2106 return MAX ((max.to_sreal_scale (bb->count)
2107 * REG_BR_PROB_BASE).to_int (), 1);
2108 return REG_BR_PROB_BASE;
2109 }
2110 }
2111
2112 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2113 sub-graph and if the predicate the condition depends on is known. If so,
2114 return true and store the pointer the predicate in *P. */
2115
2116 static bool
2117 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2118 ipa_fn_summary *summary,
2119 class ipa_node_params *params_summary,
2120 basic_block bb,
2121 predicate *p,
2122 vec<predicate> nonconstant_names)
2123 {
2124 edge e;
2125 edge_iterator ei;
2126 basic_block first_bb = NULL;
2127 gimple *stmt;
2128
2129 if (single_pred_p (bb))
2130 {
2131 *p = false;
2132 return true;
2133 }
2134
2135 FOR_EACH_EDGE (e, ei, bb->preds)
2136 {
2137 if (single_succ_p (e->src))
2138 {
2139 if (!single_pred_p (e->src))
2140 return false;
2141 if (!first_bb)
2142 first_bb = single_pred (e->src);
2143 else if (single_pred (e->src) != first_bb)
2144 return false;
2145 }
2146 else
2147 {
2148 if (!first_bb)
2149 first_bb = e->src;
2150 else if (e->src != first_bb)
2151 return false;
2152 }
2153 }
2154
2155 if (!first_bb)
2156 return false;
2157
2158 stmt = last_stmt (first_bb);
2159 if (!stmt
2160 || gimple_code (stmt) != GIMPLE_COND
2161 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2162 return false;
2163
2164 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2165 gimple_cond_lhs (stmt),
2166 nonconstant_names);
2167 if (*p == true)
2168 return false;
2169 else
2170 return true;
2171 }
2172
2173 /* Given a PHI statement in a function described by inline properties SUMMARY
2174 and *P being the predicate describing whether the selected PHI argument is
2175 known, store a predicate for the result of the PHI statement into
2176 NONCONSTANT_NAMES, if possible. */
2177
2178 static void
2179 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2180 predicate *p,
2181 vec<predicate> nonconstant_names)
2182 {
2183 unsigned i;
2184
2185 for (i = 0; i < gimple_phi_num_args (phi); i++)
2186 {
2187 tree arg = gimple_phi_arg (phi, i)->def;
2188 if (!is_gimple_min_invariant (arg))
2189 {
2190 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2191 *p = p->or_with (summary->conds,
2192 nonconstant_names[SSA_NAME_VERSION (arg)]);
2193 if (*p == true)
2194 return;
2195 }
2196 }
2197
2198 if (dump_file && (dump_flags & TDF_DETAILS))
2199 {
2200 fprintf (dump_file, "\t\tphi predicate: ");
2201 p->dump (dump_file, summary->conds);
2202 }
2203 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2204 }
2205
2206 /* For a typical usage of __builtin_expect (a<b, 1), we
2207 may introduce an extra relation stmt:
2208 With the builtin, we have
2209 t1 = a <= b;
2210 t2 = (long int) t1;
2211 t3 = __builtin_expect (t2, 1);
2212 if (t3 != 0)
2213 goto ...
2214 Without the builtin, we have
2215 if (a<=b)
2216 goto...
2217 This affects the size/time estimation and may have
2218 an impact on the earlier inlining.
2219 Here find this pattern and fix it up later. */
2220
2221 static gimple *
2222 find_foldable_builtin_expect (basic_block bb)
2223 {
2224 gimple_stmt_iterator bsi;
2225
2226 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2227 {
2228 gimple *stmt = gsi_stmt (bsi);
2229 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2230 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2231 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2232 {
2233 tree var = gimple_call_lhs (stmt);
2234 tree arg = gimple_call_arg (stmt, 0);
2235 use_operand_p use_p;
2236 gimple *use_stmt;
2237 bool match = false;
2238 bool done = false;
2239
2240 if (!var || !arg)
2241 continue;
2242 gcc_assert (TREE_CODE (var) == SSA_NAME);
2243
2244 while (TREE_CODE (arg) == SSA_NAME)
2245 {
2246 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2247 if (!is_gimple_assign (stmt_tmp))
2248 break;
2249 switch (gimple_assign_rhs_code (stmt_tmp))
2250 {
2251 case LT_EXPR:
2252 case LE_EXPR:
2253 case GT_EXPR:
2254 case GE_EXPR:
2255 case EQ_EXPR:
2256 case NE_EXPR:
2257 match = true;
2258 done = true;
2259 break;
2260 CASE_CONVERT:
2261 break;
2262 default:
2263 done = true;
2264 break;
2265 }
2266 if (done)
2267 break;
2268 arg = gimple_assign_rhs1 (stmt_tmp);
2269 }
2270
2271 if (match && single_imm_use (var, &use_p, &use_stmt)
2272 && gimple_code (use_stmt) == GIMPLE_COND)
2273 return use_stmt;
2274 }
2275 }
2276 return NULL;
2277 }
2278
2279 /* Return true when the basic blocks contains only clobbers followed by RESX.
2280 Such BBs are kept around to make removal of dead stores possible with
2281 presence of EH and will be optimized out by optimize_clobbers later in the
2282 game.
2283
2284 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2285 that can be clobber only, too.. When it is false, the RESX is not necessary
2286 on the end of basic block. */
2287
2288 static bool
2289 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2290 {
2291 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2292 edge_iterator ei;
2293 edge e;
2294
2295 if (need_eh)
2296 {
2297 if (gsi_end_p (gsi))
2298 return false;
2299 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2300 return false;
2301 gsi_prev (&gsi);
2302 }
2303 else if (!single_succ_p (bb))
2304 return false;
2305
2306 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2307 {
2308 gimple *stmt = gsi_stmt (gsi);
2309 if (is_gimple_debug (stmt))
2310 continue;
2311 if (gimple_clobber_p (stmt))
2312 continue;
2313 if (gimple_code (stmt) == GIMPLE_LABEL)
2314 break;
2315 return false;
2316 }
2317
2318 /* See if all predecestors are either throws or clobber only BBs. */
2319 FOR_EACH_EDGE (e, ei, bb->preds)
2320 if (!(e->flags & EDGE_EH)
2321 && !clobber_only_eh_bb_p (e->src, false))
2322 return false;
2323
2324 return true;
2325 }
2326
2327 /* Return true if STMT compute a floating point expression that may be affected
2328 by -ffast-math and similar flags. */
2329
2330 static bool
2331 fp_expression_p (gimple *stmt)
2332 {
2333 ssa_op_iter i;
2334 tree op;
2335
2336 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2337 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2338 return true;
2339 return false;
2340 }
2341
2342 /* Analyze function body for NODE.
2343 EARLY indicates run from early optimization pipeline. */
2344
2345 static void
2346 analyze_function_body (struct cgraph_node *node, bool early)
2347 {
2348 sreal time = param_uninlined_function_time;
2349 /* Estimate static overhead for function prologue/epilogue and alignment. */
2350 int size = param_uninlined_function_insns;
2351 /* Benefits are scaled by probability of elimination that is in range
2352 <0,2>. */
2353 basic_block bb;
2354 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2355 sreal freq;
2356 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2357 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2358 predicate bb_predicate;
2359 struct ipa_func_body_info fbi;
2360 vec<predicate> nonconstant_names = vNULL;
2361 int nblocks, n;
2362 int *order;
2363 gimple *fix_builtin_expect_stmt;
2364
2365 gcc_assert (my_function && my_function->cfg);
2366 gcc_assert (cfun == my_function);
2367
2368 memset(&fbi, 0, sizeof(fbi));
2369 vec_free (info->conds);
2370 info->conds = NULL;
2371 vec_free (info->size_time_table);
2372 info->size_time_table = NULL;
2373
2374 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2375 so we can produce proper inline hints.
2376
2377 When optimizing and analyzing for early inliner, initialize node params
2378 so we can produce correct BB predicates. */
2379
2380 if (opt_for_fn (node->decl, optimize))
2381 {
2382 calculate_dominance_info (CDI_DOMINATORS);
2383 calculate_dominance_info (CDI_POST_DOMINATORS);
2384 if (!early)
2385 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2386 else
2387 {
2388 ipa_check_create_node_params ();
2389 ipa_initialize_node_params (node);
2390 }
2391
2392 if (ipa_node_params_sum)
2393 {
2394 fbi.node = node;
2395 fbi.info = IPA_NODE_REF (node);
2396 fbi.bb_infos = vNULL;
2397 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2398 fbi.param_count = count_formal_params (node->decl);
2399 fbi.aa_walk_budget = param_ipa_max_aa_steps;
2400
2401 nonconstant_names.safe_grow_cleared
2402 (SSANAMES (my_function)->length ());
2403 }
2404 }
2405
2406 if (dump_file)
2407 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2408 node->name ());
2409
2410 /* When we run into maximal number of entries, we assign everything to the
2411 constant truth case. Be sure to have it in list. */
2412 bb_predicate = true;
2413 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2414
2415 bb_predicate = predicate::not_inlined ();
2416 info->account_size_time (param_uninlined_function_insns
2417 * ipa_fn_summary::size_scale,
2418 param_uninlined_function_time,
2419 bb_predicate,
2420 bb_predicate);
2421
2422 if (fbi.info)
2423 compute_bb_predicates (&fbi, node, info, params_summary);
2424 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2425 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2426 for (n = 0; n < nblocks; n++)
2427 {
2428 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2429 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2430 if (clobber_only_eh_bb_p (bb))
2431 {
2432 if (dump_file && (dump_flags & TDF_DETAILS))
2433 fprintf (dump_file, "\n Ignoring BB %i;"
2434 " it will be optimized away by cleanup_clobbers\n",
2435 bb->index);
2436 continue;
2437 }
2438
2439 /* TODO: Obviously predicates can be propagated down across CFG. */
2440 if (fbi.info)
2441 {
2442 if (bb->aux)
2443 bb_predicate = *(predicate *) bb->aux;
2444 else
2445 bb_predicate = false;
2446 }
2447 else
2448 bb_predicate = true;
2449
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 {
2452 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2453 bb_predicate.dump (dump_file, info->conds);
2454 }
2455
2456 if (fbi.info && nonconstant_names.exists ())
2457 {
2458 predicate phi_predicate;
2459 bool first_phi = true;
2460
2461 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2462 gsi_next (&bsi))
2463 {
2464 if (first_phi
2465 && !phi_result_unknown_predicate (&fbi, info,
2466 params_summary,
2467 bb,
2468 &phi_predicate,
2469 nonconstant_names))
2470 break;
2471 first_phi = false;
2472 if (dump_file && (dump_flags & TDF_DETAILS))
2473 {
2474 fprintf (dump_file, " ");
2475 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2476 }
2477 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2478 nonconstant_names);
2479 }
2480 }
2481
2482 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2483
2484 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2485 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2486 {
2487 gimple *stmt = gsi_stmt (bsi);
2488 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2489 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2490 int prob;
2491 predicate will_be_nonconstant;
2492
2493 /* This relation stmt should be folded after we remove
2494 buildin_expect call. Adjust the cost here. */
2495 if (stmt == fix_builtin_expect_stmt)
2496 {
2497 this_size--;
2498 this_time--;
2499 }
2500
2501 if (dump_file && (dump_flags & TDF_DETAILS))
2502 {
2503 fprintf (dump_file, " ");
2504 print_gimple_stmt (dump_file, stmt, 0);
2505 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2506 freq.to_double (), this_size,
2507 this_time);
2508 }
2509
2510 if (is_gimple_call (stmt)
2511 && !gimple_call_internal_p (stmt))
2512 {
2513 struct cgraph_edge *edge = node->get_edge (stmt);
2514 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2515
2516 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2517 resolved as constant. We however don't want to optimize
2518 out the cgraph edges. */
2519 if (nonconstant_names.exists ()
2520 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2521 && gimple_call_lhs (stmt)
2522 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2523 {
2524 predicate false_p = false;
2525 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2526 = false_p;
2527 }
2528 if (ipa_node_params_sum)
2529 {
2530 int count = gimple_call_num_args (stmt);
2531 int i;
2532
2533 if (count)
2534 es->param.safe_grow_cleared (count);
2535 for (i = 0; i < count; i++)
2536 {
2537 int prob = param_change_prob (&fbi, stmt, i);
2538 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2539 es->param[i].change_prob = prob;
2540 }
2541 }
2542
2543 es->call_stmt_size = this_size;
2544 es->call_stmt_time = this_time;
2545 es->loop_depth = bb_loop_depth (bb);
2546 edge_set_predicate (edge, &bb_predicate);
2547 if (edge->speculative)
2548 {
2549 cgraph_edge *direct, *indirect;
2550 ipa_ref *ref;
2551 edge->speculative_call_info (direct, indirect, ref);
2552 gcc_assert (direct == edge);
2553 ipa_call_summary *es2
2554 = ipa_call_summaries->get_create (indirect);
2555 ipa_call_summaries->duplicate (edge, indirect,
2556 es, es2);
2557 }
2558 }
2559
2560 /* TODO: When conditional jump or swithc is known to be constant, but
2561 we did not translate it into the predicates, we really can account
2562 just maximum of the possible paths. */
2563 if (fbi.info)
2564 will_be_nonconstant
2565 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2566 stmt, nonconstant_names);
2567 else
2568 will_be_nonconstant = true;
2569 if (this_time || this_size)
2570 {
2571 sreal final_time = (sreal)this_time * freq;
2572
2573 prob = eliminated_by_inlining_prob (&fbi, stmt);
2574 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2575 fprintf (dump_file,
2576 "\t\t50%% will be eliminated by inlining\n");
2577 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2578 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2579
2580 class predicate p = bb_predicate & will_be_nonconstant;
2581
2582 /* We can ignore statement when we proved it is never going
2583 to happen, but we cannot do that for call statements
2584 because edges are accounted specially. */
2585
2586 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2587 {
2588 time += final_time;
2589 size += this_size;
2590 }
2591
2592 /* We account everything but the calls. Calls have their own
2593 size/time info attached to cgraph edges. This is necessary
2594 in order to make the cost disappear after inlining. */
2595 if (!is_gimple_call (stmt))
2596 {
2597 if (prob)
2598 {
2599 predicate ip = bb_predicate & predicate::not_inlined ();
2600 info->account_size_time (this_size * prob,
2601 (final_time * prob) / 2, ip,
2602 p);
2603 }
2604 if (prob != 2)
2605 info->account_size_time (this_size * (2 - prob),
2606 (final_time * (2 - prob) / 2),
2607 bb_predicate,
2608 p);
2609 }
2610
2611 if (!info->fp_expressions && fp_expression_p (stmt))
2612 {
2613 info->fp_expressions = true;
2614 if (dump_file)
2615 fprintf (dump_file, " fp_expression set\n");
2616 }
2617 }
2618
2619 /* Account cost of address calculations in the statements. */
2620 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2621 {
2622 for (tree op = gimple_op (stmt, i);
2623 op && handled_component_p (op);
2624 op = TREE_OPERAND (op, 0))
2625 if ((TREE_CODE (op) == ARRAY_REF
2626 || TREE_CODE (op) == ARRAY_RANGE_REF)
2627 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2628 {
2629 predicate p = bb_predicate;
2630 if (fbi.info)
2631 p = p & will_be_nonconstant_expr_predicate
2632 (&fbi, info, params_summary,
2633 TREE_OPERAND (op, 1),
2634 nonconstant_names);
2635 if (p != false)
2636 {
2637 time += freq;
2638 size += 1;
2639 if (dump_file)
2640 fprintf (dump_file,
2641 "\t\tAccounting address calculation.\n");
2642 info->account_size_time (ipa_fn_summary::size_scale,
2643 freq,
2644 bb_predicate,
2645 p);
2646 }
2647 }
2648 }
2649
2650 }
2651 }
2652 free (order);
2653
2654 if (nonconstant_names.exists () && !early)
2655 {
2656 class loop *loop;
2657 predicate loop_iterations = true;
2658 predicate loop_stride = true;
2659
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2661 flow_loops_dump (dump_file, NULL, 0);
2662 scev_initialize ();
2663 FOR_EACH_LOOP (loop, 0)
2664 {
2665 vec<edge> exits;
2666 edge ex;
2667 unsigned int j;
2668 class tree_niter_desc niter_desc;
2669 bb_predicate = *(predicate *) loop->header->aux;
2670
2671 exits = get_loop_exit_edges (loop);
2672 FOR_EACH_VEC_ELT (exits, j, ex)
2673 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2674 && !is_gimple_min_invariant (niter_desc.niter))
2675 {
2676 predicate will_be_nonconstant
2677 = will_be_nonconstant_expr_predicate (&fbi, info,
2678 params_summary,
2679 niter_desc.niter,
2680 nonconstant_names);
2681 if (will_be_nonconstant != true)
2682 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2683 if (will_be_nonconstant != true
2684 && will_be_nonconstant != false)
2685 /* This is slightly inprecise. We may want to represent each
2686 loop with independent predicate. */
2687 loop_iterations &= will_be_nonconstant;
2688 }
2689 exits.release ();
2690 }
2691
2692 /* To avoid quadratic behavior we analyze stride predicates only
2693 with respect to the containing loop. Thus we simply iterate
2694 over all defs in the outermost loop body. */
2695 for (loop = loops_for_fn (cfun)->tree_root->inner;
2696 loop != NULL; loop = loop->next)
2697 {
2698 basic_block *body = get_loop_body (loop);
2699 for (unsigned i = 0; i < loop->num_nodes; i++)
2700 {
2701 gimple_stmt_iterator gsi;
2702 bb_predicate = *(predicate *) body[i]->aux;
2703 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2704 gsi_next (&gsi))
2705 {
2706 gimple *stmt = gsi_stmt (gsi);
2707
2708 if (!is_gimple_assign (stmt))
2709 continue;
2710
2711 tree def = gimple_assign_lhs (stmt);
2712 if (TREE_CODE (def) != SSA_NAME)
2713 continue;
2714
2715 affine_iv iv;
2716 if (!simple_iv (loop_containing_stmt (stmt),
2717 loop_containing_stmt (stmt),
2718 def, &iv, true)
2719 || is_gimple_min_invariant (iv.step))
2720 continue;
2721
2722 predicate will_be_nonconstant
2723 = will_be_nonconstant_expr_predicate (&fbi, info,
2724 params_summary,
2725 iv.step,
2726 nonconstant_names);
2727 if (will_be_nonconstant != true)
2728 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2729 if (will_be_nonconstant != true
2730 && will_be_nonconstant != false)
2731 /* This is slightly inprecise. We may want to represent
2732 each loop with independent predicate. */
2733 loop_stride = loop_stride & will_be_nonconstant;
2734 }
2735 }
2736 free (body);
2737 }
2738 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2739 set_hint_predicate (&s->loop_iterations, loop_iterations);
2740 set_hint_predicate (&s->loop_stride, loop_stride);
2741 scev_finalize ();
2742 }
2743 FOR_ALL_BB_FN (bb, my_function)
2744 {
2745 edge e;
2746 edge_iterator ei;
2747
2748 if (bb->aux)
2749 edge_predicate_pool.remove ((predicate *)bb->aux);
2750 bb->aux = NULL;
2751 FOR_EACH_EDGE (e, ei, bb->succs)
2752 {
2753 if (e->aux)
2754 edge_predicate_pool.remove ((predicate *) e->aux);
2755 e->aux = NULL;
2756 }
2757 }
2758 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2759 ipa_size_summary *ss = ipa_size_summaries->get (node);
2760 s->time = time;
2761 ss->self_size = size;
2762 nonconstant_names.release ();
2763 ipa_release_body_info (&fbi);
2764 if (opt_for_fn (node->decl, optimize))
2765 {
2766 if (!early)
2767 loop_optimizer_finalize ();
2768 else if (!ipa_edge_args_sum)
2769 ipa_free_all_node_params ();
2770 free_dominance_info (CDI_DOMINATORS);
2771 free_dominance_info (CDI_POST_DOMINATORS);
2772 }
2773 if (dump_file)
2774 {
2775 fprintf (dump_file, "\n");
2776 ipa_dump_fn_summary (dump_file, node);
2777 }
2778 }
2779
2780
2781 /* Compute function summary.
2782 EARLY is true when we compute parameters during early opts. */
2783
2784 void
2785 compute_fn_summary (struct cgraph_node *node, bool early)
2786 {
2787 HOST_WIDE_INT self_stack_size;
2788 struct cgraph_edge *e;
2789
2790 gcc_assert (!node->inlined_to);
2791
2792 if (!ipa_fn_summaries)
2793 ipa_fn_summary_alloc ();
2794
2795 /* Create a new ipa_fn_summary. */
2796 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2797 ipa_fn_summaries->remove (node);
2798 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2799 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2800
2801 /* Estimate the stack size for the function if we're optimizing. */
2802 self_stack_size = optimize && !node->thunk.thunk_p
2803 ? estimated_stack_frame_size (node) : 0;
2804 size_info->estimated_self_stack_size = self_stack_size;
2805 info->estimated_stack_size = self_stack_size;
2806
2807 if (node->thunk.thunk_p)
2808 {
2809 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2810 predicate t = true;
2811
2812 node->can_change_signature = false;
2813 es->call_stmt_size = eni_size_weights.call_cost;
2814 es->call_stmt_time = eni_time_weights.call_cost;
2815 info->account_size_time (ipa_fn_summary::size_scale
2816 * param_uninlined_function_thunk_insns,
2817 param_uninlined_function_thunk_time, t, t);
2818 t = predicate::not_inlined ();
2819 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2820 ipa_update_overall_fn_summary (node);
2821 size_info->self_size = size_info->size;
2822 if (stdarg_p (TREE_TYPE (node->decl)))
2823 {
2824 info->inlinable = false;
2825 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2826 }
2827 else
2828 info->inlinable = true;
2829 }
2830 else
2831 {
2832 /* Even is_gimple_min_invariant rely on current_function_decl. */
2833 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2834
2835 /* Can this function be inlined at all? */
2836 if (!opt_for_fn (node->decl, optimize)
2837 && !lookup_attribute ("always_inline",
2838 DECL_ATTRIBUTES (node->decl)))
2839 info->inlinable = false;
2840 else
2841 info->inlinable = tree_inlinable_function_p (node->decl);
2842
2843 /* Type attributes can use parameter indices to describe them. */
2844 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2845 /* Likewise for #pragma omp declare simd functions or functions
2846 with simd attribute. */
2847 || lookup_attribute ("omp declare simd",
2848 DECL_ATTRIBUTES (node->decl)))
2849 node->can_change_signature = false;
2850 else
2851 {
2852 /* Otherwise, inlinable functions always can change signature. */
2853 if (info->inlinable)
2854 node->can_change_signature = true;
2855 else
2856 {
2857 /* Functions calling builtin_apply cannot change signature. */
2858 for (e = node->callees; e; e = e->next_callee)
2859 {
2860 tree cdecl = e->callee->decl;
2861 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2862 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2863 break;
2864 }
2865 node->can_change_signature = !e;
2866 }
2867 }
2868 analyze_function_body (node, early);
2869 pop_cfun ();
2870 }
2871 for (e = node->callees; e; e = e->next_callee)
2872 if (e->callee->comdat_local_p ())
2873 break;
2874 node->calls_comdat_local = (e != NULL);
2875
2876 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2877 size_info->size = size_info->self_size;
2878 info->estimated_stack_size = size_info->estimated_self_stack_size;
2879
2880 /* Code above should compute exactly the same result as
2881 ipa_update_overall_fn_summary but because computation happens in
2882 different order the roundoff errors result in slight changes. */
2883 ipa_update_overall_fn_summary (node);
2884 /* In LTO mode we may have speculative edges set. */
2885 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
2886 }
2887
2888
2889 /* Compute parameters of functions used by inliner using
2890 current_function_decl. */
2891
2892 static unsigned int
2893 compute_fn_summary_for_current (void)
2894 {
2895 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2896 return 0;
2897 }
2898
2899 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2900 KNOWN_CONTEXTS and KNOWN_AGGS. */
2901
2902 static bool
2903 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2904 int *size, int *time,
2905 vec<tree> known_vals,
2906 vec<ipa_polymorphic_call_context> known_contexts,
2907 vec<ipa_agg_value_set> known_aggs)
2908 {
2909 tree target;
2910 struct cgraph_node *callee;
2911 class ipa_fn_summary *isummary;
2912 enum availability avail;
2913 bool speculative;
2914
2915 if (!known_vals.exists () && !known_contexts.exists ())
2916 return false;
2917 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2918 return false;
2919
2920 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2921 known_aggs, &speculative);
2922 if (!target || speculative)
2923 return false;
2924
2925 /* Account for difference in cost between indirect and direct calls. */
2926 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2927 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2928 gcc_checking_assert (*time >= 0);
2929 gcc_checking_assert (*size >= 0);
2930
2931 callee = cgraph_node::get (target);
2932 if (!callee || !callee->definition)
2933 return false;
2934 callee = callee->function_symbol (&avail);
2935 if (avail < AVAIL_AVAILABLE)
2936 return false;
2937 isummary = ipa_fn_summaries->get (callee);
2938 if (isummary == NULL)
2939 return false;
2940
2941 return isummary->inlinable;
2942 }
2943
2944 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2945 handle edge E with probability PROB.
2946 Set HINTS if edge may be devirtualized.
2947 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2948 site. */
2949
2950 static inline void
2951 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2952 sreal *time,
2953 vec<tree> known_vals,
2954 vec<ipa_polymorphic_call_context> known_contexts,
2955 vec<ipa_agg_value_set> known_aggs,
2956 ipa_hints *hints)
2957 {
2958 class ipa_call_summary *es = ipa_call_summaries->get (e);
2959 int call_size = es->call_stmt_size;
2960 int call_time = es->call_stmt_time;
2961 int cur_size;
2962
2963 if (!e->callee && hints && e->maybe_hot_p ()
2964 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2965 known_vals, known_contexts, known_aggs))
2966 *hints |= INLINE_HINT_indirect_call;
2967 cur_size = call_size * ipa_fn_summary::size_scale;
2968 *size += cur_size;
2969 if (min_size)
2970 *min_size += cur_size;
2971 if (time)
2972 *time += ((sreal)call_time) * e->sreal_frequency ();
2973 }
2974
2975
2976
2977 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2978 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2979 describe context of the call site. */
2980
2981 static void
2982 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2983 int *min_size, sreal *time,
2984 ipa_hints *hints,
2985 clause_t possible_truths,
2986 vec<tree> known_vals,
2987 vec<ipa_polymorphic_call_context> known_contexts,
2988 vec<ipa_agg_value_set> known_aggs)
2989 {
2990 struct cgraph_edge *e;
2991 for (e = node->callees; e; e = e->next_callee)
2992 {
2993 if (!e->inline_failed)
2994 {
2995 gcc_checking_assert (!ipa_call_summaries->get (e));
2996 estimate_calls_size_and_time (e->callee, size, min_size, time,
2997 hints,
2998 possible_truths,
2999 known_vals, known_contexts,
3000 known_aggs);
3001 continue;
3002 }
3003 class ipa_call_summary *es = ipa_call_summaries->get (e);
3004
3005 /* Do not care about zero sized builtins. */
3006 if (!es->call_stmt_size)
3007 {
3008 gcc_checking_assert (!es->call_stmt_time);
3009 continue;
3010 }
3011 if (!es->predicate
3012 || es->predicate->evaluate (possible_truths))
3013 {
3014 /* Predicates of calls shall not use NOT_CHANGED codes,
3015 sowe do not need to compute probabilities. */
3016 estimate_edge_size_and_time (e, size,
3017 es->predicate ? NULL : min_size,
3018 time,
3019 known_vals, known_contexts,
3020 known_aggs, hints);
3021 }
3022 }
3023 for (e = node->indirect_calls; e; e = e->next_callee)
3024 {
3025 class ipa_call_summary *es = ipa_call_summaries->get (e);
3026 if (!es->predicate
3027 || es->predicate->evaluate (possible_truths))
3028 estimate_edge_size_and_time (e, size,
3029 es->predicate ? NULL : min_size,
3030 time,
3031 known_vals, known_contexts, known_aggs,
3032 hints);
3033 }
3034 }
3035
3036 /* Default constructor for ipa call context.
3037 Memory alloction of known_vals, known_contexts
3038 and known_aggs vectors is owned by the caller, but can
3039 be release by ipa_call_context::release.
3040
3041 inline_param_summary is owned by the caller. */
3042 ipa_call_context::ipa_call_context (cgraph_node *node,
3043 clause_t possible_truths,
3044 clause_t nonspec_possible_truths,
3045 vec<tree> known_vals,
3046 vec<ipa_polymorphic_call_context>
3047 known_contexts,
3048 vec<ipa_agg_value_set> known_aggs,
3049 vec<inline_param_summary>
3050 inline_param_summary)
3051 : m_node (node), m_possible_truths (possible_truths),
3052 m_nonspec_possible_truths (nonspec_possible_truths),
3053 m_inline_param_summary (inline_param_summary),
3054 m_known_vals (known_vals),
3055 m_known_contexts (known_contexts),
3056 m_known_aggs (known_aggs)
3057 {
3058 }
3059
3060 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3061
3062 void
3063 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3064 {
3065 m_node = ctx.m_node;
3066 m_possible_truths = ctx.m_possible_truths;
3067 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3068 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3069 unsigned int nargs = params_summary
3070 ? ipa_get_param_count (params_summary) : 0;
3071
3072 m_inline_param_summary = vNULL;
3073 /* Copy the info only if there is at least one useful entry. */
3074 if (ctx.m_inline_param_summary.exists ())
3075 {
3076 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3077
3078 for (unsigned int i = 0; i < n; i++)
3079 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3080 && !ctx.m_inline_param_summary[i].useless_p ())
3081 {
3082 m_inline_param_summary
3083 = ctx.m_inline_param_summary.copy ();
3084 break;
3085 }
3086 }
3087 m_known_vals = vNULL;
3088 if (ctx.m_known_vals.exists ())
3089 {
3090 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3091
3092 for (unsigned int i = 0; i < n; i++)
3093 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3094 && ctx.m_known_vals[i])
3095 {
3096 m_known_vals = ctx.m_known_vals.copy ();
3097 break;
3098 }
3099 }
3100
3101 m_known_contexts = vNULL;
3102 if (ctx.m_known_contexts.exists ())
3103 {
3104 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3105
3106 for (unsigned int i = 0; i < n; i++)
3107 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3108 && !ctx.m_known_contexts[i].useless_p ())
3109 {
3110 m_known_contexts = ctx.m_known_contexts.copy ();
3111 break;
3112 }
3113 }
3114
3115 m_known_aggs = vNULL;
3116 if (ctx.m_known_aggs.exists ())
3117 {
3118 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3119
3120 for (unsigned int i = 0; i < n; i++)
3121 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3122 && !ctx.m_known_aggs[i].is_empty ())
3123 {
3124 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3125 break;
3126 }
3127 }
3128 }
3129
3130 /* Release memory used by known_vals/contexts/aggs vectors.
3131 If ALL is true release also inline_param_summary.
3132 This happens when context was previously duplciated to be stored
3133 into cache. */
3134
3135 void
3136 ipa_call_context::release (bool all)
3137 {
3138 /* See if context is initialized at first place. */
3139 if (!m_node)
3140 return;
3141 m_known_vals.release ();
3142 m_known_contexts.release ();
3143 ipa_release_agg_values (m_known_aggs);
3144 if (all)
3145 m_inline_param_summary.release ();
3146 }
3147
3148 /* Return true if CTX describes the same call context as THIS. */
3149
3150 bool
3151 ipa_call_context::equal_to (const ipa_call_context &ctx)
3152 {
3153 if (m_node != ctx.m_node
3154 || m_possible_truths != ctx.m_possible_truths
3155 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3156 return false;
3157
3158 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3159 unsigned int nargs = params_summary
3160 ? ipa_get_param_count (params_summary) : 0;
3161
3162 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3163 {
3164 for (unsigned int i = 0; i < nargs; i++)
3165 {
3166 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3167 continue;
3168 if (i >= m_inline_param_summary.length ()
3169 || m_inline_param_summary[i].useless_p ())
3170 {
3171 if (i < ctx.m_inline_param_summary.length ()
3172 && !ctx.m_inline_param_summary[i].useless_p ())
3173 return false;
3174 continue;
3175 }
3176 if (i >= ctx.m_inline_param_summary.length ()
3177 || ctx.m_inline_param_summary[i].useless_p ())
3178 {
3179 if (i < m_inline_param_summary.length ()
3180 && !m_inline_param_summary[i].useless_p ())
3181 return false;
3182 continue;
3183 }
3184 if (!m_inline_param_summary[i].equal_to
3185 (ctx.m_inline_param_summary[i]))
3186 return false;
3187 }
3188 }
3189 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3190 {
3191 for (unsigned int i = 0; i < nargs; i++)
3192 {
3193 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3194 continue;
3195 if (i >= m_known_vals.length () || !m_known_vals[i])
3196 {
3197 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3198 return false;
3199 continue;
3200 }
3201 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3202 {
3203 if (i < m_known_vals.length () && m_known_vals[i])
3204 return false;
3205 continue;
3206 }
3207 if (m_known_vals[i] != ctx.m_known_vals[i])
3208 return false;
3209 }
3210 }
3211 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3212 {
3213 for (unsigned int i = 0; i < nargs; i++)
3214 {
3215 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3216 continue;
3217 if (i >= m_known_contexts.length ()
3218 || m_known_contexts[i].useless_p ())
3219 {
3220 if (i < ctx.m_known_contexts.length ()
3221 && !ctx.m_known_contexts[i].useless_p ())
3222 return false;
3223 continue;
3224 }
3225 if (i >= ctx.m_known_contexts.length ()
3226 || ctx.m_known_contexts[i].useless_p ())
3227 {
3228 if (i < m_known_contexts.length ()
3229 && !m_known_contexts[i].useless_p ())
3230 return false;
3231 continue;
3232 }
3233 if (!m_known_contexts[i].equal_to
3234 (ctx.m_known_contexts[i]))
3235 return false;
3236 }
3237 }
3238 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3239 {
3240 for (unsigned int i = 0; i < nargs; i++)
3241 {
3242 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3243 continue;
3244 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3245 {
3246 if (i < ctx.m_known_aggs.length ()
3247 && !ctx.m_known_aggs[i].is_empty ())
3248 return false;
3249 continue;
3250 }
3251 if (i >= ctx.m_known_aggs.length ()
3252 || ctx.m_known_aggs[i].is_empty ())
3253 {
3254 if (i < m_known_aggs.length ()
3255 && !m_known_aggs[i].is_empty ())
3256 return false;
3257 continue;
3258 }
3259 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3260 return false;
3261 }
3262 }
3263 return true;
3264 }
3265
3266 /* Estimate size and time needed to execute call in the given context.
3267 Additionally detemine hints determined by the context. Finally compute
3268 minimal size needed for the call that is independent on the call context and
3269 can be used for fast estimates. Return the values in RET_SIZE,
3270 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3271
3272 void
3273 ipa_call_context::estimate_size_and_time (int *ret_size,
3274 int *ret_min_size,
3275 sreal *ret_time,
3276 sreal *ret_nonspecialized_time,
3277 ipa_hints *ret_hints)
3278 {
3279 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3280 size_time_entry *e;
3281 int size = 0;
3282 sreal time = 0;
3283 int min_size = 0;
3284 ipa_hints hints = 0;
3285 int i;
3286
3287 if (dump_file && (dump_flags & TDF_DETAILS))
3288 {
3289 bool found = false;
3290 fprintf (dump_file, " Estimating body: %s/%i\n"
3291 " Known to be false: ", m_node->name (),
3292 m_node->order);
3293
3294 for (i = predicate::not_inlined_condition;
3295 i < (predicate::first_dynamic_condition
3296 + (int) vec_safe_length (info->conds)); i++)
3297 if (!(m_possible_truths & (1 << i)))
3298 {
3299 if (found)
3300 fprintf (dump_file, ", ");
3301 found = true;
3302 dump_condition (dump_file, info->conds, i);
3303 }
3304 }
3305
3306 estimate_calls_size_and_time (m_node, &size, &min_size,
3307 ret_time ? &time : NULL,
3308 ret_hints ? &hints : NULL, m_possible_truths,
3309 m_known_vals, m_known_contexts, m_known_aggs);
3310
3311 sreal nonspecialized_time = time;
3312
3313 min_size += (*info->size_time_table)[0].size;
3314 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3315 {
3316 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3317
3318 /* Because predicates are conservative, it can happen that nonconst is 1
3319 but exec is 0. */
3320 if (exec)
3321 {
3322 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3323
3324 gcc_checking_assert (e->time >= 0);
3325 gcc_checking_assert (time >= 0);
3326
3327 /* We compute specialized size only because size of nonspecialized
3328 copy is context independent.
3329
3330 The difference between nonspecialized execution and specialized is
3331 that nonspecialized is not going to have optimized out computations
3332 known to be constant in a specialized setting. */
3333 if (nonconst)
3334 size += e->size;
3335 if (!ret_time)
3336 continue;
3337 nonspecialized_time += e->time;
3338 if (!nonconst)
3339 ;
3340 else if (!m_inline_param_summary.exists ())
3341 {
3342 if (nonconst)
3343 time += e->time;
3344 }
3345 else
3346 {
3347 int prob = e->nonconst_predicate.probability
3348 (info->conds, m_possible_truths,
3349 m_inline_param_summary);
3350 gcc_checking_assert (prob >= 0);
3351 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3352 if (prob == REG_BR_PROB_BASE)
3353 time += e->time;
3354 else
3355 time += e->time * prob / REG_BR_PROB_BASE;
3356 }
3357 gcc_checking_assert (time >= 0);
3358 }
3359 }
3360 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3361 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3362 gcc_checking_assert (min_size >= 0);
3363 gcc_checking_assert (size >= 0);
3364 gcc_checking_assert (time >= 0);
3365 /* nonspecialized_time should be always bigger than specialized time.
3366 Roundoff issues however may get into the way. */
3367 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3368
3369 /* Roundoff issues may make specialized time bigger than nonspecialized
3370 time. We do not really want that to happen because some heurstics
3371 may get confused by seeing negative speedups. */
3372 if (time > nonspecialized_time)
3373 time = nonspecialized_time;
3374
3375 if (ret_hints)
3376 {
3377 if (info->loop_iterations
3378 && !info->loop_iterations->evaluate (m_possible_truths))
3379 hints |= INLINE_HINT_loop_iterations;
3380 if (info->loop_stride
3381 && !info->loop_stride->evaluate (m_possible_truths))
3382 hints |= INLINE_HINT_loop_stride;
3383 if (info->scc_no)
3384 hints |= INLINE_HINT_in_scc;
3385 if (DECL_DECLARED_INLINE_P (m_node->decl))
3386 hints |= INLINE_HINT_declared_inline;
3387 }
3388
3389 size = RDIV (size, ipa_fn_summary::size_scale);
3390 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3391
3392 if (dump_file && (dump_flags & TDF_DETAILS))
3393 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3394 time.to_double (), nonspecialized_time.to_double ());
3395 if (ret_time)
3396 *ret_time = time;
3397 if (ret_nonspecialized_time)
3398 *ret_nonspecialized_time = nonspecialized_time;
3399 if (ret_size)
3400 *ret_size = size;
3401 if (ret_min_size)
3402 *ret_min_size = min_size;
3403 if (ret_hints)
3404 *ret_hints = hints;
3405 return;
3406 }
3407
3408
3409 /* Estimate size and time needed to execute callee of EDGE assuming that
3410 parameters known to be constant at caller of EDGE are propagated.
3411 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3412 and types for parameters. */
3413
3414 void
3415 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3416 vec<tree> known_vals,
3417 vec<ipa_polymorphic_call_context>
3418 known_contexts,
3419 vec<ipa_agg_value_set> known_aggs,
3420 int *ret_size, sreal *ret_time,
3421 sreal *ret_nonspec_time,
3422 ipa_hints *hints)
3423 {
3424 clause_t clause, nonspec_clause;
3425
3426 /* TODO: Also pass known value ranges. */
3427 evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3428 known_aggs, &clause, &nonspec_clause);
3429 ipa_call_context ctx (node, clause, nonspec_clause,
3430 known_vals, known_contexts,
3431 known_aggs, vNULL);
3432 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3433 ret_nonspec_time, hints);
3434 }
3435
3436 /* Return stack frame offset where frame of NODE is supposed to start inside
3437 of the function it is inlined to.
3438 Return 0 for functions that are not inlined. */
3439
3440 HOST_WIDE_INT
3441 ipa_get_stack_frame_offset (struct cgraph_node *node)
3442 {
3443 HOST_WIDE_INT offset = 0;
3444 if (!node->inlined_to)
3445 return 0;
3446 node = node->callers->caller;
3447 while (true)
3448 {
3449 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3450 if (!node->inlined_to)
3451 return offset;
3452 node = node->callers->caller;
3453 }
3454 }
3455
3456
3457 /* Update summary information of inline clones after inlining.
3458 Compute peak stack usage. */
3459
3460 static void
3461 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3462 {
3463 struct cgraph_edge *e;
3464
3465 ipa_propagate_frequency (node);
3466 for (e = node->callees; e; e = e->next_callee)
3467 {
3468 if (!e->inline_failed)
3469 inline_update_callee_summaries (e->callee, depth);
3470 else
3471 ipa_call_summaries->get (e)->loop_depth += depth;
3472 }
3473 for (e = node->indirect_calls; e; e = e->next_callee)
3474 ipa_call_summaries->get (e)->loop_depth += depth;
3475 }
3476
3477 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3478 When function A is inlined in B and A calls C with parameter that
3479 changes with probability PROB1 and C is known to be passthroug
3480 of argument if B that change with probability PROB2, the probability
3481 of change is now PROB1*PROB2. */
3482
3483 static void
3484 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3485 struct cgraph_edge *edge)
3486 {
3487 if (ipa_node_params_sum)
3488 {
3489 int i;
3490 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3491 if (!args)
3492 return;
3493 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3494 class ipa_call_summary *inlined_es
3495 = ipa_call_summaries->get (inlined_edge);
3496
3497 if (es->param.length () == 0)
3498 return;
3499
3500 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3501 {
3502 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3503 if (jfunc->type == IPA_JF_PASS_THROUGH
3504 || jfunc->type == IPA_JF_ANCESTOR)
3505 {
3506 int id = jfunc->type == IPA_JF_PASS_THROUGH
3507 ? ipa_get_jf_pass_through_formal_id (jfunc)
3508 : ipa_get_jf_ancestor_formal_id (jfunc);
3509 if (id < (int) inlined_es->param.length ())
3510 {
3511 int prob1 = es->param[i].change_prob;
3512 int prob2 = inlined_es->param[id].change_prob;
3513 int prob = combine_probabilities (prob1, prob2);
3514
3515 if (prob1 && prob2 && !prob)
3516 prob = 1;
3517
3518 es->param[i].change_prob = prob;
3519 }
3520 }
3521 }
3522 }
3523 }
3524
3525 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3526
3527 Remap predicates of callees of NODE. Rest of arguments match
3528 remap_predicate.
3529
3530 Also update change probabilities. */
3531
3532 static void
3533 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3534 struct cgraph_node *node,
3535 class ipa_fn_summary *info,
3536 class ipa_node_params *params_summary,
3537 class ipa_fn_summary *callee_info,
3538 vec<int> operand_map,
3539 vec<int> offset_map,
3540 clause_t possible_truths,
3541 predicate *toplev_predicate)
3542 {
3543 struct cgraph_edge *e, *next;
3544 for (e = node->callees; e; e = next)
3545 {
3546 predicate p;
3547 next = e->next_callee;
3548
3549 if (e->inline_failed)
3550 {
3551 class ipa_call_summary *es = ipa_call_summaries->get (e);
3552 remap_edge_change_prob (inlined_edge, e);
3553
3554 if (es->predicate)
3555 {
3556 p = es->predicate->remap_after_inlining
3557 (info, params_summary,
3558 callee_info, operand_map,
3559 offset_map, possible_truths,
3560 *toplev_predicate);
3561 edge_set_predicate (e, &p);
3562 }
3563 else
3564 edge_set_predicate (e, toplev_predicate);
3565 }
3566 else
3567 remap_edge_summaries (inlined_edge, e->callee, info,
3568 params_summary, callee_info,
3569 operand_map, offset_map, possible_truths,
3570 toplev_predicate);
3571 }
3572 for (e = node->indirect_calls; e; e = next)
3573 {
3574 class ipa_call_summary *es = ipa_call_summaries->get (e);
3575 predicate p;
3576 next = e->next_callee;
3577
3578 remap_edge_change_prob (inlined_edge, e);
3579 if (es->predicate)
3580 {
3581 p = es->predicate->remap_after_inlining
3582 (info, params_summary,
3583 callee_info, operand_map, offset_map,
3584 possible_truths, *toplev_predicate);
3585 edge_set_predicate (e, &p);
3586 }
3587 else
3588 edge_set_predicate (e, toplev_predicate);
3589 }
3590 }
3591
3592 /* Same as remap_predicate, but set result into hint *HINT. */
3593
3594 static void
3595 remap_hint_predicate (class ipa_fn_summary *info,
3596 class ipa_node_params *params_summary,
3597 class ipa_fn_summary *callee_info,
3598 predicate **hint,
3599 vec<int> operand_map,
3600 vec<int> offset_map,
3601 clause_t possible_truths,
3602 predicate *toplev_predicate)
3603 {
3604 predicate p;
3605
3606 if (!*hint)
3607 return;
3608 p = (*hint)->remap_after_inlining
3609 (info, params_summary, callee_info,
3610 operand_map, offset_map,
3611 possible_truths, *toplev_predicate);
3612 if (p != false && p != true)
3613 {
3614 if (!*hint)
3615 set_hint_predicate (hint, p);
3616 else
3617 **hint &= p;
3618 }
3619 }
3620
3621 /* We inlined EDGE. Update summary of the function we inlined into. */
3622
3623 void
3624 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3625 {
3626 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3627 struct cgraph_node *to = (edge->caller->inlined_to
3628 ? edge->caller->inlined_to : edge->caller);
3629 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3630 clause_t clause = 0; /* not_inline is known to be false. */
3631 size_time_entry *e;
3632 auto_vec<int, 8> operand_map;
3633 auto_vec<int, 8> offset_map;
3634 int i;
3635 predicate toplev_predicate;
3636 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3637 class ipa_node_params *params_summary = (ipa_node_params_sum
3638 ? IPA_NODE_REF (to) : NULL);
3639
3640 if (es->predicate)
3641 toplev_predicate = *es->predicate;
3642 else
3643 toplev_predicate = true;
3644
3645 info->fp_expressions |= callee_info->fp_expressions;
3646
3647 if (callee_info->conds)
3648 evaluate_properties_for_edge (edge, true, &clause,
3649 NULL, NULL, NULL, NULL);
3650 if (ipa_node_params_sum && callee_info->conds)
3651 {
3652 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3653 int count = args ? ipa_get_cs_argument_count (args) : 0;
3654 int i;
3655
3656 if (count)
3657 {
3658 operand_map.safe_grow_cleared (count);
3659 offset_map.safe_grow_cleared (count);
3660 }
3661 for (i = 0; i < count; i++)
3662 {
3663 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3664 int map = -1;
3665
3666 /* TODO: handle non-NOPs when merging. */
3667 if (jfunc->type == IPA_JF_PASS_THROUGH)
3668 {
3669 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3670 map = ipa_get_jf_pass_through_formal_id (jfunc);
3671 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3672 offset_map[i] = -1;
3673 }
3674 else if (jfunc->type == IPA_JF_ANCESTOR)
3675 {
3676 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3677 if (offset >= 0 && offset < INT_MAX)
3678 {
3679 map = ipa_get_jf_ancestor_formal_id (jfunc);
3680 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3681 offset = -1;
3682 offset_map[i] = offset;
3683 }
3684 }
3685 operand_map[i] = map;
3686 gcc_assert (map < ipa_get_param_count (params_summary));
3687 }
3688 }
3689 sreal freq = edge->sreal_frequency ();
3690 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3691 {
3692 predicate p;
3693 p = e->exec_predicate.remap_after_inlining
3694 (info, params_summary,
3695 callee_info, operand_map,
3696 offset_map, clause,
3697 toplev_predicate);
3698 predicate nonconstp;
3699 nonconstp = e->nonconst_predicate.remap_after_inlining
3700 (info, params_summary,
3701 callee_info, operand_map,
3702 offset_map, clause,
3703 toplev_predicate);
3704 if (p != false && nonconstp != false)
3705 {
3706 sreal add_time = ((sreal)e->time * freq);
3707 int prob = e->nonconst_predicate.probability (callee_info->conds,
3708 clause, es->param);
3709 if (prob != REG_BR_PROB_BASE)
3710 add_time = add_time * prob / REG_BR_PROB_BASE;
3711 if (prob != REG_BR_PROB_BASE
3712 && dump_file && (dump_flags & TDF_DETAILS))
3713 {
3714 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3715 (double) prob / REG_BR_PROB_BASE);
3716 }
3717 info->account_size_time (e->size, add_time, p, nonconstp);
3718 }
3719 }
3720 remap_edge_summaries (edge, edge->callee, info, params_summary,
3721 callee_info, operand_map,
3722 offset_map, clause, &toplev_predicate);
3723 remap_hint_predicate (info, params_summary, callee_info,
3724 &callee_info->loop_iterations,
3725 operand_map, offset_map, clause, &toplev_predicate);
3726 remap_hint_predicate (info, params_summary, callee_info,
3727 &callee_info->loop_stride,
3728 operand_map, offset_map, clause, &toplev_predicate);
3729
3730 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
3731 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
3732
3733 if (info->estimated_stack_size < peak)
3734 info->estimated_stack_size = peak;
3735
3736 inline_update_callee_summaries (edge->callee, es->loop_depth);
3737
3738 /* Free summaries that are not maintained for inline clones/edges. */
3739 ipa_call_summaries->remove (edge);
3740 ipa_fn_summaries->remove (edge->callee);
3741 ipa_remove_from_growth_caches (edge);
3742 }
3743
3744 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3745 overall size and time. Recompute it. */
3746
3747 void
3748 ipa_update_overall_fn_summary (struct cgraph_node *node)
3749 {
3750 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
3751 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
3752 size_time_entry *e;
3753 int i;
3754
3755 size_info->size = 0;
3756 info->time = 0;
3757 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3758 {
3759 size_info->size += e->size;
3760 info->time += e->time;
3761 }
3762 info->min_size = (*info->size_time_table)[0].size;
3763 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
3764 &info->time, NULL,
3765 ~(clause_t) (1 << predicate::false_condition),
3766 vNULL, vNULL, vNULL);
3767 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
3768 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
3769 }
3770
3771
3772 /* This function performs intraprocedural analysis in NODE that is required to
3773 inline indirect calls. */
3774
3775 static void
3776 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3777 {
3778 ipa_analyze_node (node);
3779 if (dump_file && (dump_flags & TDF_DETAILS))
3780 {
3781 ipa_print_node_params (dump_file, node);
3782 ipa_print_node_jump_functions (dump_file, node);
3783 }
3784 }
3785
3786
3787 /* Note function body size. */
3788
3789 void
3790 inline_analyze_function (struct cgraph_node *node)
3791 {
3792 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3793
3794 if (dump_file)
3795 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3796 node->name (), node->order);
3797 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3798 inline_indirect_intraprocedural_analysis (node);
3799 compute_fn_summary (node, false);
3800 if (!optimize)
3801 {
3802 struct cgraph_edge *e;
3803 for (e = node->callees; e; e = e->next_callee)
3804 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3805 for (e = node->indirect_calls; e; e = e->next_callee)
3806 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3807 }
3808
3809 pop_cfun ();
3810 }
3811
3812
3813 /* Called when new function is inserted to callgraph late. */
3814
3815 void
3816 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3817 {
3818 inline_analyze_function (node);
3819 }
3820
3821 /* Note function body size. */
3822
3823 static void
3824 ipa_fn_summary_generate (void)
3825 {
3826 struct cgraph_node *node;
3827
3828 FOR_EACH_DEFINED_FUNCTION (node)
3829 if (DECL_STRUCT_FUNCTION (node->decl))
3830 node->versionable = tree_versionable_function_p (node->decl);
3831
3832 ipa_fn_summary_alloc ();
3833
3834 ipa_fn_summaries->enable_insertion_hook ();
3835
3836 ipa_register_cgraph_hooks ();
3837
3838 FOR_EACH_DEFINED_FUNCTION (node)
3839 if (!node->alias
3840 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3841 || opt_for_fn (node->decl, optimize)))
3842 inline_analyze_function (node);
3843 }
3844
3845
3846 /* Write inline summary for edge E to OB. */
3847
3848 static void
3849 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
3850 bool prevails)
3851 {
3852 class ipa_call_summary *es = prevails
3853 ? ipa_call_summaries->get_create (e) : NULL;
3854 predicate p;
3855 int length, i;
3856
3857 int size = streamer_read_uhwi (ib);
3858 int time = streamer_read_uhwi (ib);
3859 int depth = streamer_read_uhwi (ib);
3860
3861 if (es)
3862 {
3863 es->call_stmt_size = size;
3864 es->call_stmt_time = time;
3865 es->loop_depth = depth;
3866 }
3867
3868 bitpack_d bp = streamer_read_bitpack (ib);
3869 if (es)
3870 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3871 else
3872 bp_unpack_value (&bp, 1);
3873
3874 p.stream_in (ib);
3875 if (es)
3876 edge_set_predicate (e, &p);
3877 length = streamer_read_uhwi (ib);
3878 if (length && es && e->possibly_call_in_translation_unit_p ())
3879 {
3880 es->param.safe_grow_cleared (length);
3881 for (i = 0; i < length; i++)
3882 es->param[i].change_prob = streamer_read_uhwi (ib);
3883 }
3884 else
3885 {
3886 for (i = 0; i < length; i++)
3887 streamer_read_uhwi (ib);
3888 }
3889 }
3890
3891
3892 /* Stream in inline summaries from the section. */
3893
3894 static void
3895 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3896 size_t len)
3897 {
3898 const struct lto_function_header *header =
3899 (const struct lto_function_header *) data;
3900 const int cfg_offset = sizeof (struct lto_function_header);
3901 const int main_offset = cfg_offset + header->cfg_size;
3902 const int string_offset = main_offset + header->main_size;
3903 class data_in *data_in;
3904 unsigned int i, count2, j;
3905 unsigned int f_count;
3906
3907 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3908 file_data->mode_table);
3909
3910 data_in =
3911 lto_data_in_create (file_data, (const char *) data + string_offset,
3912 header->string_size, vNULL);
3913 f_count = streamer_read_uhwi (&ib);
3914 for (i = 0; i < f_count; i++)
3915 {
3916 unsigned int index;
3917 struct cgraph_node *node;
3918 class ipa_fn_summary *info;
3919 class ipa_node_params *params_summary;
3920 class ipa_size_summary *size_info;
3921 lto_symtab_encoder_t encoder;
3922 struct bitpack_d bp;
3923 struct cgraph_edge *e;
3924 predicate p;
3925
3926 index = streamer_read_uhwi (&ib);
3927 encoder = file_data->symtab_node_encoder;
3928 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3929 index));
3930 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
3931 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
3932 size_info = node->prevailing_p ()
3933 ? ipa_size_summaries->get_create (node) : NULL;
3934
3935 int stack_size = streamer_read_uhwi (&ib);
3936 int size = streamer_read_uhwi (&ib);
3937 sreal time = sreal::stream_in (&ib);
3938
3939 if (info)
3940 {
3941 info->estimated_stack_size
3942 = size_info->estimated_self_stack_size = stack_size;
3943 size_info->size = size_info->self_size = size;
3944 info->time = time;
3945 }
3946
3947 bp = streamer_read_bitpack (&ib);
3948 if (info)
3949 {
3950 info->inlinable = bp_unpack_value (&bp, 1);
3951 info->fp_expressions = bp_unpack_value (&bp, 1);
3952 }
3953 else
3954 {
3955 bp_unpack_value (&bp, 1);
3956 bp_unpack_value (&bp, 1);
3957 }
3958
3959 count2 = streamer_read_uhwi (&ib);
3960 gcc_assert (!info || !info->conds);
3961 if (info)
3962 vec_safe_reserve_exact (info->conds, count2);
3963 for (j = 0; j < count2; j++)
3964 {
3965 struct condition c;
3966 unsigned int k, count3;
3967 c.operand_num = streamer_read_uhwi (&ib);
3968 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3969 c.type = stream_read_tree (&ib, data_in);
3970 c.val = stream_read_tree (&ib, data_in);
3971 bp = streamer_read_bitpack (&ib);
3972 c.agg_contents = bp_unpack_value (&bp, 1);
3973 c.by_ref = bp_unpack_value (&bp, 1);
3974 if (c.agg_contents)
3975 c.offset = streamer_read_uhwi (&ib);
3976 count3 = streamer_read_uhwi (&ib);
3977 c.param_ops = NULL;
3978 if (info)
3979 vec_safe_reserve_exact (c.param_ops, count3);
3980 if (params_summary)
3981 ipa_set_param_used_by_ipa_predicates
3982 (params_summary, c.operand_num, true);
3983 for (k = 0; k < count3; k++)
3984 {
3985 struct expr_eval_op op;
3986 enum gimple_rhs_class rhs_class;
3987 op.code = (enum tree_code) streamer_read_uhwi (&ib);
3988 op.type = stream_read_tree (&ib, data_in);
3989 switch (rhs_class = get_gimple_rhs_class (op.code))
3990 {
3991 case GIMPLE_UNARY_RHS:
3992 op.index = 0;
3993 op.val[0] = NULL_TREE;
3994 op.val[1] = NULL_TREE;
3995 break;
3996
3997 case GIMPLE_BINARY_RHS:
3998 case GIMPLE_TERNARY_RHS:
3999 bp = streamer_read_bitpack (&ib);
4000 op.index = bp_unpack_value (&bp, 2);
4001 op.val[0] = stream_read_tree (&ib, data_in);
4002 if (rhs_class == GIMPLE_BINARY_RHS)
4003 op.val[1] = NULL_TREE;
4004 else
4005 op.val[1] = stream_read_tree (&ib, data_in);
4006 break;
4007
4008 default:
4009 fatal_error (UNKNOWN_LOCATION,
4010 "invalid fnsummary in LTO stream");
4011 }
4012 if (info)
4013 c.param_ops->quick_push (op);
4014 }
4015 if (info)
4016 info->conds->quick_push (c);
4017 }
4018 count2 = streamer_read_uhwi (&ib);
4019 gcc_assert (!info || !info->size_time_table);
4020 if (info && count2)
4021 vec_safe_reserve_exact (info->size_time_table, count2);
4022 for (j = 0; j < count2; j++)
4023 {
4024 class size_time_entry e;
4025
4026 e.size = streamer_read_uhwi (&ib);
4027 e.time = sreal::stream_in (&ib);
4028 e.exec_predicate.stream_in (&ib);
4029 e.nonconst_predicate.stream_in (&ib);
4030
4031 if (info)
4032 info->size_time_table->quick_push (e);
4033 }
4034
4035 p.stream_in (&ib);
4036 if (info)
4037 set_hint_predicate (&info->loop_iterations, p);
4038 p.stream_in (&ib);
4039 if (info)
4040 set_hint_predicate (&info->loop_stride, p);
4041 for (e = node->callees; e; e = e->next_callee)
4042 read_ipa_call_summary (&ib, e, info != NULL);
4043 for (e = node->indirect_calls; e; e = e->next_callee)
4044 read_ipa_call_summary (&ib, e, info != NULL);
4045 }
4046
4047 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4048 len);
4049 lto_data_in_delete (data_in);
4050 }
4051
4052
4053 /* Read inline summary. Jump functions are shared among ipa-cp
4054 and inliner, so when ipa-cp is active, we don't need to write them
4055 twice. */
4056
4057 static void
4058 ipa_fn_summary_read (void)
4059 {
4060 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4061 struct lto_file_decl_data *file_data;
4062 unsigned int j = 0;
4063
4064 ipa_fn_summary_alloc ();
4065
4066 while ((file_data = file_data_vec[j++]))
4067 {
4068 size_t len;
4069 const char *data
4070 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4071 &len);
4072 if (data)
4073 inline_read_section (file_data, data, len);
4074 else
4075 /* Fatal error here. We do not want to support compiling ltrans units
4076 with different version of compiler or different flags than the WPA
4077 unit, so this should never happen. */
4078 fatal_error (input_location,
4079 "ipa inline summary is missing in input file");
4080 }
4081 ipa_register_cgraph_hooks ();
4082 if (!flag_ipa_cp)
4083 ipa_prop_read_jump_functions ();
4084
4085 gcc_assert (ipa_fn_summaries);
4086 ipa_fn_summaries->enable_insertion_hook ();
4087 }
4088
4089
4090 /* Write inline summary for edge E to OB. */
4091
4092 static void
4093 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4094 {
4095 class ipa_call_summary *es = ipa_call_summaries->get (e);
4096 int i;
4097
4098 streamer_write_uhwi (ob, es->call_stmt_size);
4099 streamer_write_uhwi (ob, es->call_stmt_time);
4100 streamer_write_uhwi (ob, es->loop_depth);
4101
4102 bitpack_d bp = bitpack_create (ob->main_stream);
4103 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4104 streamer_write_bitpack (&bp);
4105
4106 if (es->predicate)
4107 es->predicate->stream_out (ob);
4108 else
4109 streamer_write_uhwi (ob, 0);
4110 streamer_write_uhwi (ob, es->param.length ());
4111 for (i = 0; i < (int) es->param.length (); i++)
4112 streamer_write_uhwi (ob, es->param[i].change_prob);
4113 }
4114
4115
4116 /* Write inline summary for node in SET.
4117 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4118 active, we don't need to write them twice. */
4119
4120 static void
4121 ipa_fn_summary_write (void)
4122 {
4123 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4124 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4125 unsigned int count = 0;
4126 int i;
4127
4128 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4129 {
4130 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4131 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4132 if (cnode && cnode->definition && !cnode->alias)
4133 count++;
4134 }
4135 streamer_write_uhwi (ob, count);
4136
4137 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4138 {
4139 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4140 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4141 if (cnode && cnode->definition && !cnode->alias)
4142 {
4143 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4144 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4145 struct bitpack_d bp;
4146 struct cgraph_edge *edge;
4147 int i;
4148 size_time_entry *e;
4149 struct condition *c;
4150
4151 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4152 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4153 streamer_write_hwi (ob, size_info->self_size);
4154 info->time.stream_out (ob);
4155 bp = bitpack_create (ob->main_stream);
4156 bp_pack_value (&bp, info->inlinable, 1);
4157 bp_pack_value (&bp, false, 1);
4158 bp_pack_value (&bp, info->fp_expressions, 1);
4159 streamer_write_bitpack (&bp);
4160 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4161 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4162 {
4163 int j;
4164 struct expr_eval_op *op;
4165
4166 streamer_write_uhwi (ob, c->operand_num);
4167 streamer_write_uhwi (ob, c->code);
4168 stream_write_tree (ob, c->type, true);
4169 stream_write_tree (ob, c->val, true);
4170 bp = bitpack_create (ob->main_stream);
4171 bp_pack_value (&bp, c->agg_contents, 1);
4172 bp_pack_value (&bp, c->by_ref, 1);
4173 streamer_write_bitpack (&bp);
4174 if (c->agg_contents)
4175 streamer_write_uhwi (ob, c->offset);
4176 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4177 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4178 {
4179 streamer_write_uhwi (ob, op->code);
4180 stream_write_tree (ob, op->type, true);
4181 if (op->val[0])
4182 {
4183 bp = bitpack_create (ob->main_stream);
4184 bp_pack_value (&bp, op->index, 2);
4185 streamer_write_bitpack (&bp);
4186 stream_write_tree (ob, op->val[0], true);
4187 if (op->val[1])
4188 stream_write_tree (ob, op->val[1], true);
4189 }
4190 }
4191 }
4192 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4193 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4194 {
4195 streamer_write_uhwi (ob, e->size);
4196 e->time.stream_out (ob);
4197 e->exec_predicate.stream_out (ob);
4198 e->nonconst_predicate.stream_out (ob);
4199 }
4200 if (info->loop_iterations)
4201 info->loop_iterations->stream_out (ob);
4202 else
4203 streamer_write_uhwi (ob, 0);
4204 if (info->loop_stride)
4205 info->loop_stride->stream_out (ob);
4206 else
4207 streamer_write_uhwi (ob, 0);
4208 for (edge = cnode->callees; edge; edge = edge->next_callee)
4209 write_ipa_call_summary (ob, edge);
4210 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4211 write_ipa_call_summary (ob, edge);
4212 }
4213 }
4214 streamer_write_char_stream (ob->main_stream, 0);
4215 produce_asm (ob, NULL);
4216 destroy_output_block (ob);
4217
4218 if (!flag_ipa_cp)
4219 ipa_prop_write_jump_functions ();
4220 }
4221
4222
4223 /* Release function summary. */
4224
4225 void
4226 ipa_free_fn_summary (void)
4227 {
4228 if (!ipa_call_summaries)
4229 return;
4230 ggc_delete (ipa_fn_summaries);
4231 ipa_fn_summaries = NULL;
4232 delete ipa_call_summaries;
4233 ipa_call_summaries = NULL;
4234 edge_predicate_pool.release ();
4235 /* During IPA this is one of largest datastructures to release. */
4236 if (flag_wpa)
4237 ggc_trim ();
4238 }
4239
4240 /* Release function summary. */
4241
4242 void
4243 ipa_free_size_summary (void)
4244 {
4245 if (!ipa_size_summaries)
4246 return;
4247 delete ipa_size_summaries;
4248 ipa_size_summaries = NULL;
4249 }
4250
4251 namespace {
4252
4253 const pass_data pass_data_local_fn_summary =
4254 {
4255 GIMPLE_PASS, /* type */
4256 "local-fnsummary", /* name */
4257 OPTGROUP_INLINE, /* optinfo_flags */
4258 TV_INLINE_PARAMETERS, /* tv_id */
4259 0, /* properties_required */
4260 0, /* properties_provided */
4261 0, /* properties_destroyed */
4262 0, /* todo_flags_start */
4263 0, /* todo_flags_finish */
4264 };
4265
4266 class pass_local_fn_summary : public gimple_opt_pass
4267 {
4268 public:
4269 pass_local_fn_summary (gcc::context *ctxt)
4270 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4271 {}
4272
4273 /* opt_pass methods: */
4274 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4275 virtual unsigned int execute (function *)
4276 {
4277 return compute_fn_summary_for_current ();
4278 }
4279
4280 }; // class pass_local_fn_summary
4281
4282 } // anon namespace
4283
4284 gimple_opt_pass *
4285 make_pass_local_fn_summary (gcc::context *ctxt)
4286 {
4287 return new pass_local_fn_summary (ctxt);
4288 }
4289
4290
4291 /* Free inline summary. */
4292
4293 namespace {
4294
4295 const pass_data pass_data_ipa_free_fn_summary =
4296 {
4297 SIMPLE_IPA_PASS, /* type */
4298 "free-fnsummary", /* name */
4299 OPTGROUP_NONE, /* optinfo_flags */
4300 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4301 0, /* properties_required */
4302 0, /* properties_provided */
4303 0, /* properties_destroyed */
4304 0, /* todo_flags_start */
4305 0, /* todo_flags_finish */
4306 };
4307
4308 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4309 {
4310 public:
4311 pass_ipa_free_fn_summary (gcc::context *ctxt)
4312 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4313 small_p (false)
4314 {}
4315
4316 /* opt_pass methods: */
4317 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4318 void set_pass_param (unsigned int n, bool param)
4319 {
4320 gcc_assert (n == 0);
4321 small_p = param;
4322 }
4323 virtual bool gate (function *) { return true; }
4324 virtual unsigned int execute (function *)
4325 {
4326 ipa_free_fn_summary ();
4327 if (!flag_wpa)
4328 ipa_free_size_summary ();
4329 return 0;
4330 }
4331
4332 private:
4333 bool small_p;
4334 }; // class pass_ipa_free_fn_summary
4335
4336 } // anon namespace
4337
4338 simple_ipa_opt_pass *
4339 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4340 {
4341 return new pass_ipa_free_fn_summary (ctxt);
4342 }
4343
4344 namespace {
4345
4346 const pass_data pass_data_ipa_fn_summary =
4347 {
4348 IPA_PASS, /* type */
4349 "fnsummary", /* name */
4350 OPTGROUP_INLINE, /* optinfo_flags */
4351 TV_IPA_FNSUMMARY, /* tv_id */
4352 0, /* properties_required */
4353 0, /* properties_provided */
4354 0, /* properties_destroyed */
4355 0, /* todo_flags_start */
4356 ( TODO_dump_symtab ), /* todo_flags_finish */
4357 };
4358
4359 class pass_ipa_fn_summary : public ipa_opt_pass_d
4360 {
4361 public:
4362 pass_ipa_fn_summary (gcc::context *ctxt)
4363 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4364 ipa_fn_summary_generate, /* generate_summary */
4365 ipa_fn_summary_write, /* write_summary */
4366 ipa_fn_summary_read, /* read_summary */
4367 NULL, /* write_optimization_summary */
4368 NULL, /* read_optimization_summary */
4369 NULL, /* stmt_fixup */
4370 0, /* function_transform_todo_flags_start */
4371 NULL, /* function_transform */
4372 NULL) /* variable_transform */
4373 {}
4374
4375 /* opt_pass methods: */
4376 virtual unsigned int execute (function *) { return 0; }
4377
4378 }; // class pass_ipa_fn_summary
4379
4380 } // anon namespace
4381
4382 ipa_opt_pass_d *
4383 make_pass_ipa_fn_summary (gcc::context *ctxt)
4384 {
4385 return new pass_ipa_fn_summary (ctxt);
4386 }
4387
4388 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4389 within the same process. For use by toplev::finalize. */
4390
4391 void
4392 ipa_fnsummary_c_finalize (void)
4393 {
4394 ipa_free_fn_summary ();
4395 }