]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ipa-fnsummary.c
generalized IPA predicate on parameter
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
CommitLineData
b9a58fc5 1/* Function summary pass.
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
b9a58fc5 3 Contributed by Jan Hubicka
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
1297cbcd 30 ipa_fn_summary data structures store above information locally (i.e.
b9a58fc5 31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
1297cbcd 35 We provide access to the ipa_fn_summary data structure and
b9a58fc5 36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
1297cbcd 47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
b9a58fc5 48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54#include "config.h"
55#include "system.h"
56#include "coretypes.h"
57#include "backend.h"
58#include "tree.h"
59#include "gimple.h"
60#include "alloc-pool.h"
61#include "tree-pass.h"
62#include "ssa.h"
63#include "tree-streamer.h"
64#include "cgraph.h"
65#include "diagnostic.h"
66#include "fold-const.h"
67#include "print-tree.h"
68#include "tree-inline.h"
69#include "gimple-pretty-print.h"
70#include "params.h"
71#include "cfganal.h"
72#include "gimple-iterator.h"
73#include "tree-cfg.h"
74#include "tree-ssa-loop-niter.h"
75#include "tree-ssa-loop.h"
76#include "symbol-summary.h"
77#include "ipa-prop.h"
78#include "ipa-fnsummary.h"
79#include "cfgloop.h"
80#include "tree-scalar-evolution.h"
81#include "ipa-utils.h"
b9a58fc5 82#include "cfgexpand.h"
83#include "gimplify.h"
30a86690 84#include "stringpool.h"
85#include "attribs.h"
b9a58fc5 86
87/* Summaries. */
1add72d5 88fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
89fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
b9a58fc5 90
91/* Edge predicates goes here. */
92static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
1297cbcd 95/* Dump IPA hints. */
b9a58fc5 96void
1297cbcd 97ipa_dump_hints (FILE *f, ipa_hints hints)
b9a58fc5 98{
99 if (!hints)
100 return;
1297cbcd 101 fprintf (f, "IPA hints:");
b9a58fc5 102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
b9a58fc5 137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143}
144
145
146/* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function. */
150
151void
1297cbcd 152ipa_fn_summary::account_size_time (int size, sreal time,
b9a58fc5 153 const predicate &exec_pred,
154 const predicate &nonconst_pred_in)
155{
156 size_time_entry *e;
157 bool found = false;
158 int i;
159 predicate nonconst_pred;
160
161 if (exec_pred == false)
162 return;
163
164 nonconst_pred = nonconst_pred_in & exec_pred;
165
166 if (nonconst_pred == false)
167 return;
168
169 /* We need to create initial empty unconitional clause, but otherwie
170 we don't need to account empty times and sizes. */
171 if (!size && time == 0 && size_time_table)
172 return;
173
174 gcc_assert (time >= 0);
175
176 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
177 if (e->exec_predicate == exec_pred
178 && e->nonconst_predicate == nonconst_pred)
179 {
180 found = true;
181 break;
182 }
183 if (i == 256)
184 {
185 i = 0;
186 found = true;
187 e = &(*size_time_table)[0];
188 if (dump_file && (dump_flags & TDF_DETAILS))
189 fprintf (dump_file,
190 "\t\tReached limit on number of entries, "
191 "ignoring the predicate.");
192 }
193 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
194 {
195 fprintf (dump_file,
196 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
1297cbcd 197 ((double) size) / ipa_fn_summary::size_scale,
b9a58fc5 198 (time.to_double ()), found ? "" : "new ");
199 exec_pred.dump (dump_file, conds, 0);
200 if (exec_pred != nonconst_pred)
201 {
202 fprintf (dump_file, " nonconst:");
203 nonconst_pred.dump (dump_file, conds);
204 }
205 else
206 fprintf (dump_file, "\n");
207 }
208 if (!found)
209 {
2e966e2a 210 class size_time_entry new_entry;
b9a58fc5 211 new_entry.size = size;
212 new_entry.time = time;
213 new_entry.exec_predicate = exec_pred;
214 new_entry.nonconst_predicate = nonconst_pred;
215 vec_safe_push (size_time_table, new_entry);
216 }
217 else
218 {
219 e->size += size;
220 e->time += time;
221 }
222}
223
224/* We proved E to be unreachable, redirect it to __bultin_unreachable. */
225
226static struct cgraph_edge *
227redirect_to_unreachable (struct cgraph_edge *e)
228{
229 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
230 struct cgraph_node *target = cgraph_node::get_create
231 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
232
233 if (e->speculative)
234 e = e->resolve_speculation (target->decl);
235 else if (!e->callee)
236 e->make_direct (target);
237 else
238 e->redirect_callee (target);
2e966e2a 239 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 240 e->inline_failed = CIF_UNREACHABLE;
db9cef39 241 e->count = profile_count::zero ();
b9a58fc5 242 es->call_stmt_size = 0;
243 es->call_stmt_time = 0;
244 if (callee)
245 callee->remove_symbol_and_inline_clones ();
246 return e;
247}
248
249/* Set predicate for edge E. */
250
251static void
252edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
253{
254 /* If the edge is determined to be never executed, redirect it
1297cbcd 255 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
256 be optimized out. */
b9a58fc5 257 if (predicate && *predicate == false
258 /* When handling speculative edges, we need to do the redirection
259 just once. Do it always on the direct edge, so we do not
260 attempt to resolve speculation while duplicating the edge. */
261 && (!e->speculative || e->callee))
262 e = redirect_to_unreachable (e);
263
2e966e2a 264 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 265 if (predicate && *predicate != true)
266 {
267 if (!es->predicate)
268 es->predicate = edge_predicate_pool.allocate ();
269 *es->predicate = *predicate;
270 }
271 else
272 {
273 if (es->predicate)
274 edge_predicate_pool.remove (es->predicate);
275 es->predicate = NULL;
276 }
277}
278
279/* Set predicate for hint *P. */
280
281static void
282set_hint_predicate (predicate **p, predicate new_predicate)
283{
284 if (new_predicate == false || new_predicate == true)
285 {
286 if (*p)
287 edge_predicate_pool.remove (*p);
288 *p = NULL;
289 }
290 else
291 {
292 if (!*p)
293 *p = edge_predicate_pool.allocate ();
294 **p = new_predicate;
295 }
296}
297
298
299/* Compute what conditions may or may not hold given invormation about
300 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
301 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
302 copy when called in a given context. It is a bitmask of conditions. Bit
303 0 means that condition is known to be false, while bit 1 means that condition
304 may or may not be true. These differs - for example NOT_INLINED condition
f4d3c071 305 is always false in the second and also builtin_constant_p tests cannot use
b9a58fc5 306 the fact that parameter is indeed a constant.
307
308 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
309 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
310 Return clause of possible truths. When INLINE_P is true, assume that we are
311 inlining.
312
313 ERROR_MARK means compile time invariant. */
314
315static void
316evaluate_conditions_for_known_args (struct cgraph_node *node,
317 bool inline_p,
318 vec<tree> known_vals,
319 vec<ipa_agg_jump_function_p>
320 known_aggs,
321 clause_t *ret_clause,
322 clause_t *ret_nonspec_clause)
323{
324 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
325 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
2e966e2a 326 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
b9a58fc5 327 int i;
328 struct condition *c;
329
330 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
331 {
332 tree val;
333 tree res;
b3d74655 334 int j;
335 struct expr_eval_op *op;
b9a58fc5 336
337 /* We allow call stmt to have fewer arguments than the callee function
338 (especially for K&R style programs). So bound check here (we assume
339 known_aggs vector, if non-NULL, has the same length as
340 known_vals). */
341 gcc_checking_assert (!known_aggs.exists ()
342 || (known_vals.length () == known_aggs.length ()));
343 if (c->operand_num >= (int) known_vals.length ())
344 {
345 clause |= 1 << (i + predicate::first_dynamic_condition);
346 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
347 continue;
348 }
349
350 if (c->agg_contents)
351 {
352 struct ipa_agg_jump_function *agg;
353
354 if (c->code == predicate::changed
355 && !c->by_ref
356 && (known_vals[c->operand_num] == error_mark_node))
357 continue;
358
359 if (known_aggs.exists ())
360 {
361 agg = known_aggs[c->operand_num];
362 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
363 c->offset, c->by_ref);
364 }
365 else
366 val = NULL_TREE;
367 }
368 else
369 {
370 val = known_vals[c->operand_num];
371 if (val == error_mark_node && c->code != predicate::changed)
372 val = NULL_TREE;
373 }
374
375 if (!val)
376 {
377 clause |= 1 << (i + predicate::first_dynamic_condition);
378 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
379 continue;
380 }
381 if (c->code == predicate::changed)
382 {
383 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
384 continue;
385 }
386
b3d74655 387 if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val)))
b9a58fc5 388 {
389 clause |= 1 << (i + predicate::first_dynamic_condition);
390 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
391 continue;
392 }
393 if (c->code == predicate::is_not_constant)
394 {
395 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
396 continue;
397 }
398
b3d74655 399 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
400 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
401 {
402 if (!val)
403 break;
404 if (!op->val[0])
405 val = fold_unary (op->code, op->type, val);
406 else if (!op->val[1])
407 val = fold_binary (op->code, op->type,
408 op->index ? op->val[0] : val,
409 op->index ? val : op->val[0]);
410 else if (op->index == 0)
411 val = fold_ternary (op->code, op->type,
412 val, op->val[0], op->val[1]);
413 else if (op->index == 1)
414 val = fold_ternary (op->code, op->type,
415 op->val[0], val, op->val[1]);
416 else if (op->index == 2)
417 val = fold_ternary (op->code, op->type,
418 op->val[0], op->val[1], val);
419 else
420 val = NULL_TREE;
421 }
422
b9a58fc5 423 res = val
424 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
425 : NULL;
426
427 if (res && integer_zerop (res))
428 continue;
429
430 clause |= 1 << (i + predicate::first_dynamic_condition);
431 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
432 }
433 *ret_clause = clause;
434 if (ret_nonspec_clause)
435 *ret_nonspec_clause = nonspec_clause;
436}
437
438
439/* Work out what conditions might be true at invocation of E. */
440
441void
442evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
443 clause_t *clause_ptr,
444 clause_t *nonspec_clause_ptr,
445 vec<tree> *known_vals_ptr,
446 vec<ipa_polymorphic_call_context>
447 *known_contexts_ptr,
448 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
449{
450 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2e966e2a 451 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
b9a58fc5 452 vec<tree> known_vals = vNULL;
453 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
454
455 if (clause_ptr)
456 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
457 if (known_vals_ptr)
458 known_vals_ptr->create (0);
459 if (known_contexts_ptr)
460 known_contexts_ptr->create (0);
461
462 if (ipa_node_params_sum
463 && !e->call_stmt_cannot_inline_p
464 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
465 {
2e966e2a 466 class ipa_node_params *caller_parms_info, *callee_pi;
467 class ipa_edge_args *args = IPA_EDGE_REF (e);
468 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 469 int i, count = ipa_get_cs_argument_count (args);
470
471 if (e->caller->global.inlined_to)
944ee40d 472 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
b9a58fc5 473 else
944ee40d 474 caller_parms_info = IPA_NODE_REF (e->caller);
475 callee_pi = IPA_NODE_REF (e->callee);
b9a58fc5 476
477 if (count && (info->conds || known_vals_ptr))
478 known_vals.safe_grow_cleared (count);
479 if (count && (info->conds || known_aggs_ptr))
480 known_aggs.safe_grow_cleared (count);
481 if (count && known_contexts_ptr)
482 known_contexts_ptr->safe_grow_cleared (count);
483
484 for (i = 0; i < count; i++)
485 {
486 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
944ee40d 487 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
488 ipa_get_type (callee_pi, i));
b9a58fc5 489
490 if (!cst && e->call_stmt
491 && i < (int)gimple_call_num_args (e->call_stmt))
492 {
493 cst = gimple_call_arg (e->call_stmt, i);
494 if (!is_gimple_min_invariant (cst))
495 cst = NULL;
496 }
497 if (cst)
498 {
499 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
500 if (known_vals.exists ())
501 known_vals[i] = cst;
502 }
503 else if (inline_p && !es->param[i].change_prob)
504 known_vals[i] = error_mark_node;
505
506 if (known_contexts_ptr)
944ee40d 507 (*known_contexts_ptr)[i]
508 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
b9a58fc5 509 /* TODO: When IPA-CP starts propagating and merging aggregate jump
510 functions, use its knowledge of the caller too, just like the
511 scalar case above. */
512 known_aggs[i] = &jf->agg;
513 }
514 }
515 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
516 && ((clause_ptr && info->conds) || known_vals_ptr))
517 {
518 int i, count = (int)gimple_call_num_args (e->call_stmt);
519
520 if (count && (info->conds || known_vals_ptr))
521 known_vals.safe_grow_cleared (count);
522 for (i = 0; i < count; i++)
523 {
524 tree cst = gimple_call_arg (e->call_stmt, i);
525 if (!is_gimple_min_invariant (cst))
526 cst = NULL;
527 if (cst)
528 known_vals[i] = cst;
529 }
530 }
531
532 evaluate_conditions_for_known_args (callee, inline_p,
533 known_vals, known_aggs, clause_ptr,
534 nonspec_clause_ptr);
535
536 if (known_vals_ptr)
537 *known_vals_ptr = known_vals;
538 else
539 known_vals.release ();
540
541 if (known_aggs_ptr)
542 *known_aggs_ptr = known_aggs;
543 else
544 known_aggs.release ();
545}
546
547
1297cbcd 548/* Allocate the function summary. */
b9a58fc5 549
550static void
1297cbcd 551ipa_fn_summary_alloc (void)
b9a58fc5 552{
1297cbcd 553 gcc_checking_assert (!ipa_fn_summaries);
554 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
1add72d5 555 ipa_call_summaries = new ipa_call_summary_t (symtab);
b9a58fc5 556}
557
d2c2513e 558ipa_call_summary::~ipa_call_summary ()
b9a58fc5 559{
b9a58fc5 560 if (predicate)
561 edge_predicate_pool.remove (predicate);
d2c2513e 562
b9a58fc5 563 param.release ();
564}
565
d2c2513e 566ipa_fn_summary::~ipa_fn_summary ()
b9a58fc5 567{
b9a58fc5 568 if (loop_iterations)
d2c2513e 569 edge_predicate_pool.remove (loop_iterations);
b9a58fc5 570 if (loop_stride)
d2c2513e 571 edge_predicate_pool.remove (loop_stride);
b9a58fc5 572 vec_free (conds);
573 vec_free (size_time_table);
b9a58fc5 574}
575
b9a58fc5 576void
d2c2513e 577ipa_fn_summary_t::remove_callees (cgraph_node *node)
b9a58fc5 578{
d2c2513e 579 cgraph_edge *e;
580 for (e = node->callees; e; e = e->next_callee)
581 ipa_call_summaries->remove (e);
582 for (e = node->indirect_calls; e; e = e->next_callee)
583 ipa_call_summaries->remove (e);
b9a58fc5 584}
585
586/* Same as remap_predicate_after_duplication but handle hint predicate *P.
587 Additionally care about allocating new memory slot for updated predicate
588 and set it to NULL when it becomes true or false (and thus uninteresting).
589 */
590
591static void
592remap_hint_predicate_after_duplication (predicate **p,
593 clause_t possible_truths)
594{
595 predicate new_predicate;
596
597 if (!*p)
598 return;
599
600 new_predicate = (*p)->remap_after_duplication (possible_truths);
601 /* We do not want to free previous predicate; it is used by node origin. */
602 *p = NULL;
603 set_hint_predicate (p, new_predicate);
604}
605
606
607/* Hook that is called by cgraph.c when a node is duplicated. */
608void
1297cbcd 609ipa_fn_summary_t::duplicate (cgraph_node *src,
b9a58fc5 610 cgraph_node *dst,
1297cbcd 611 ipa_fn_summary *,
612 ipa_fn_summary *info)
b9a58fc5 613{
d2c2513e 614 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
b9a58fc5 615 /* TODO: as an optimization, we may avoid copying conditions
616 that are known to be false or true. */
617 info->conds = vec_safe_copy (info->conds);
618
619 /* When there are any replacements in the function body, see if we can figure
620 out that something was optimized out. */
621 if (ipa_node_params_sum && dst->clone.tree_map)
622 {
623 vec<size_time_entry, va_gc> *entry = info->size_time_table;
624 /* Use SRC parm info since it may not be copied yet. */
2e966e2a 625 class ipa_node_params *parms_info = IPA_NODE_REF (src);
b9a58fc5 626 vec<tree> known_vals = vNULL;
627 int count = ipa_get_param_count (parms_info);
628 int i, j;
629 clause_t possible_truths;
630 predicate true_pred = true;
631 size_time_entry *e;
632 int optimized_out_size = 0;
633 bool inlined_to_p = false;
634 struct cgraph_edge *edge, *next;
635
636 info->size_time_table = 0;
637 known_vals.safe_grow_cleared (count);
638 for (i = 0; i < count; i++)
639 {
640 struct ipa_replace_map *r;
641
642 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
643 {
40d8e161 644 if (r->parm_num == i)
b9a58fc5 645 {
646 known_vals[i] = r->new_tree;
647 break;
648 }
649 }
650 }
651 evaluate_conditions_for_known_args (dst, false,
652 known_vals,
653 vNULL,
654 &possible_truths,
655 /* We are going to specialize,
656 so ignore nonspec truths. */
657 NULL);
658 known_vals.release ();
659
660 info->account_size_time (0, 0, true_pred, true_pred);
661
662 /* Remap size_time vectors.
663 Simplify the predicate by prunning out alternatives that are known
664 to be false.
665 TODO: as on optimization, we can also eliminate conditions known
666 to be true. */
667 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
668 {
669 predicate new_exec_pred;
670 predicate new_nonconst_pred;
671 new_exec_pred = e->exec_predicate.remap_after_duplication
672 (possible_truths);
673 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
674 (possible_truths);
675 if (new_exec_pred == false || new_nonconst_pred == false)
676 optimized_out_size += e->size;
677 else
678 info->account_size_time (e->size, e->time, new_exec_pred,
679 new_nonconst_pred);
680 }
681
682 /* Remap edge predicates with the same simplification as above.
683 Also copy constantness arrays. */
684 for (edge = dst->callees; edge; edge = next)
685 {
686 predicate new_predicate;
2e966e2a 687 class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
b9a58fc5 688 next = edge->next_callee;
689
690 if (!edge->inline_failed)
691 inlined_to_p = true;
692 if (!es->predicate)
693 continue;
694 new_predicate = es->predicate->remap_after_duplication
695 (possible_truths);
696 if (new_predicate == false && *es->predicate != false)
1297cbcd 697 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
b9a58fc5 698 edge_set_predicate (edge, &new_predicate);
699 }
700
701 /* Remap indirect edge predicates with the same simplificaiton as above.
702 Also copy constantness arrays. */
703 for (edge = dst->indirect_calls; edge; edge = next)
704 {
705 predicate new_predicate;
2e966e2a 706 class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
b9a58fc5 707 next = edge->next_callee;
708
709 gcc_checking_assert (edge->inline_failed);
710 if (!es->predicate)
711 continue;
712 new_predicate = es->predicate->remap_after_duplication
713 (possible_truths);
714 if (new_predicate == false && *es->predicate != false)
1297cbcd 715 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
b9a58fc5 716 edge_set_predicate (edge, &new_predicate);
717 }
718 remap_hint_predicate_after_duplication (&info->loop_iterations,
719 possible_truths);
720 remap_hint_predicate_after_duplication (&info->loop_stride,
721 possible_truths);
b9a58fc5 722
723 /* If inliner or someone after inliner will ever start producing
724 non-trivial clones, we will get trouble with lack of information
725 about updating self sizes, because size vectors already contains
726 sizes of the calees. */
727 gcc_assert (!inlined_to_p || !optimized_out_size);
728 }
729 else
730 {
731 info->size_time_table = vec_safe_copy (info->size_time_table);
732 if (info->loop_iterations)
733 {
734 predicate p = *info->loop_iterations;
735 info->loop_iterations = NULL;
736 set_hint_predicate (&info->loop_iterations, p);
737 }
738 if (info->loop_stride)
739 {
740 predicate p = *info->loop_stride;
741 info->loop_stride = NULL;
742 set_hint_predicate (&info->loop_stride, p);
743 }
b9a58fc5 744 }
745 if (!dst->global.inlined_to)
1297cbcd 746 ipa_update_overall_fn_summary (dst);
b9a58fc5 747}
748
749
750/* Hook that is called by cgraph.c when a node is duplicated. */
751
752void
753ipa_call_summary_t::duplicate (struct cgraph_edge *src,
754 struct cgraph_edge *dst,
2e966e2a 755 class ipa_call_summary *srcinfo,
756 class ipa_call_summary *info)
b9a58fc5 757{
d2c2513e 758 new (info) ipa_call_summary (*srcinfo);
b9a58fc5 759 info->predicate = NULL;
760 edge_set_predicate (dst, srcinfo->predicate);
761 info->param = srcinfo->param.copy ();
762 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
763 {
764 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
765 - eni_size_weights.call_cost);
766 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
767 - eni_time_weights.call_cost);
768 }
769}
770
b9a58fc5 771/* Dump edge summaries associated to NODE and recursively to all clones.
772 Indent by INDENT. */
773
774static void
775dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
2e966e2a 776 class ipa_fn_summary *info)
b9a58fc5 777{
778 struct cgraph_edge *edge;
779 for (edge = node->callees; edge; edge = edge->next_callee)
780 {
2e966e2a 781 class ipa_call_summary *es = ipa_call_summaries->get (edge);
b9a58fc5 782 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
783 int i;
784
785 fprintf (f,
d2c2513e 786 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i time: %2i",
b9a58fc5 787 indent, "", callee->name (), callee->order,
788 !edge->inline_failed
789 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
7c343235 790 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
d2c2513e 791 es->call_stmt_size, es->call_stmt_time);
792
793 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
794 if (s != NULL)
795 fprintf (f, "callee size:%2i stack:%2i",
796 (int) (s->size / ipa_fn_summary::size_scale),
797 (int) s->estimated_stack_size);
b9a58fc5 798
799 if (es->predicate)
800 {
801 fprintf (f, " predicate: ");
802 es->predicate->dump (f, info->conds);
803 }
804 else
805 fprintf (f, "\n");
806 if (es->param.exists ())
807 for (i = 0; i < (int) es->param.length (); i++)
808 {
809 int prob = es->param[i].change_prob;
810
811 if (!prob)
812 fprintf (f, "%*s op%i is compile time invariant\n",
813 indent + 2, "", i);
814 else if (prob != REG_BR_PROB_BASE)
815 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
816 prob * 100.0 / REG_BR_PROB_BASE);
817 }
818 if (!edge->inline_failed)
819 {
fbc9c62d 820 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
b9a58fc5 821 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
822 " callee size %i\n",
823 indent + 2, "",
b53d4f56 824 (int) s->stack_frame_offset,
825 (int) s->estimated_self_stack_size,
826 (int) s->estimated_stack_size);
b9a58fc5 827 dump_ipa_call_summary (f, indent + 2, callee, info);
828 }
829 }
830 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
831 {
2e966e2a 832 class ipa_call_summary *es = ipa_call_summaries->get (edge);
7c343235 833 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
b9a58fc5 834 " time: %2i",
835 indent, "",
836 es->loop_depth,
7c343235 837 edge->sreal_frequency ().to_double (), es->call_stmt_size,
838 es->call_stmt_time);
b9a58fc5 839 if (es->predicate)
840 {
841 fprintf (f, "predicate: ");
842 es->predicate->dump (f, info->conds);
843 }
844 else
845 fprintf (f, "\n");
846 }
847}
848
849
850void
1297cbcd 851ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
b9a58fc5 852{
853 if (node->definition)
854 {
2e966e2a 855 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
d2c2513e 856 if (s != NULL)
b9a58fc5 857 {
d2c2513e 858 size_time_entry *e;
859 int i;
860 fprintf (f, "IPA function summary for %s", node->dump_name ());
861 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
862 fprintf (f, " always_inline");
863 if (s->inlinable)
864 fprintf (f, " inlinable");
865 if (s->fp_expressions)
866 fprintf (f, " fp_expression");
867 fprintf (f, "\n global time: %f\n", s->time.to_double ());
868 fprintf (f, " self size: %i\n", s->self_size);
869 fprintf (f, " global size: %i\n", s->size);
870 fprintf (f, " min size: %i\n", s->min_size);
871 fprintf (f, " self stack: %i\n",
872 (int) s->estimated_self_stack_size);
873 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
874 if (s->growth)
875 fprintf (f, " estimated growth:%i\n", (int) s->growth);
876 if (s->scc_no)
877 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
878 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
879 {
880 fprintf (f, " size:%f, time:%f",
881 (double) e->size / ipa_fn_summary::size_scale,
882 e->time.to_double ());
883 if (e->exec_predicate != true)
884 {
885 fprintf (f, ", executed if:");
886 e->exec_predicate.dump (f, s->conds, 0);
887 }
888 if (e->exec_predicate != e->nonconst_predicate)
889 {
890 fprintf (f, ", nonconst if:");
891 e->nonconst_predicate.dump (f, s->conds, 0);
892 }
893 fprintf (f, "\n");
894 }
895 if (s->loop_iterations)
b9a58fc5 896 {
d2c2513e 897 fprintf (f, " loop iterations:");
898 s->loop_iterations->dump (f, s->conds);
b9a58fc5 899 }
d2c2513e 900 if (s->loop_stride)
b9a58fc5 901 {
d2c2513e 902 fprintf (f, " loop stride:");
903 s->loop_stride->dump (f, s->conds);
b9a58fc5 904 }
d2c2513e 905 fprintf (f, " calls:\n");
906 dump_ipa_call_summary (f, 4, node, s);
b9a58fc5 907 fprintf (f, "\n");
908 }
d2c2513e 909 else
910 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
b9a58fc5 911 }
912}
913
914DEBUG_FUNCTION void
1297cbcd 915ipa_debug_fn_summary (struct cgraph_node *node)
b9a58fc5 916{
1297cbcd 917 ipa_dump_fn_summary (stderr, node);
b9a58fc5 918}
919
920void
1297cbcd 921ipa_dump_fn_summaries (FILE *f)
b9a58fc5 922{
923 struct cgraph_node *node;
924
925 FOR_EACH_DEFINED_FUNCTION (node)
926 if (!node->global.inlined_to)
1297cbcd 927 ipa_dump_fn_summary (f, node);
b9a58fc5 928}
929
930/* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
931 boolean variable pointed to by DATA. */
932
933static bool
934mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
935 void *data)
936{
937 bool *b = (bool *) data;
938 *b = true;
939 return true;
940}
941
942/* If OP refers to value of function parameter, return the corresponding
943 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
944 PARM_DECL) will be stored to *SIZE_P in that case too. */
945
946static tree
915df3d8 947unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
c47e61e1 948 poly_int64 *size_p)
b9a58fc5 949{
950 /* SSA_NAME referring to parm default def? */
951 if (TREE_CODE (op) == SSA_NAME
952 && SSA_NAME_IS_DEFAULT_DEF (op)
953 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
954 {
955 if (size_p)
c47e61e1 956 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
b9a58fc5 957 return SSA_NAME_VAR (op);
958 }
959 /* Non-SSA parm reference? */
960 if (TREE_CODE (op) == PARM_DECL)
961 {
962 bool modified = false;
963
964 ao_ref refd;
965 ao_ref_init (&refd, op);
915df3d8 966 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
967 mark_modified, &modified, NULL, NULL,
968 fbi->aa_walk_budget + 1);
969 if (walked < 0)
970 {
971 fbi->aa_walk_budget = 0;
972 return NULL_TREE;
973 }
b9a58fc5 974 if (!modified)
975 {
976 if (size_p)
c47e61e1 977 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
b9a58fc5 978 return op;
979 }
980 }
981 return NULL_TREE;
982}
983
984/* If OP refers to value of function parameter, return the corresponding
985 parameter. Also traverse chains of SSA register assignments. If non-NULL,
986 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
987 stored to *SIZE_P in that case too. */
988
989static tree
915df3d8 990unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
c47e61e1 991 poly_int64 *size_p)
b9a58fc5 992{
915df3d8 993 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
b9a58fc5 994 if (res)
995 return res;
996
997 if (TREE_CODE (op) == SSA_NAME
998 && !SSA_NAME_IS_DEFAULT_DEF (op)
999 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
915df3d8 1000 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
b9a58fc5 1001 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1002 size_p);
1003 return NULL_TREE;
1004}
1005
1006/* If OP refers to a value of a function parameter or value loaded from an
1007 aggregate passed to a parameter (either by value or reference), return TRUE
1008 and store the number of the parameter to *INDEX_P, the access size into
1009 *SIZE_P, and information whether and how it has been loaded from an
1010 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1011 statement in which OP is used or loaded. */
1012
1013static bool
1014unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1015 gimple *stmt, tree op, int *index_p,
c47e61e1 1016 poly_int64 *size_p,
b9a58fc5 1017 struct agg_position_info *aggpos)
1018{
915df3d8 1019 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
b9a58fc5 1020
1021 gcc_checking_assert (aggpos);
1022 if (res)
1023 {
1024 *index_p = ipa_get_param_decl_index (fbi->info, res);
1025 if (*index_p < 0)
1026 return false;
1027 aggpos->agg_contents = false;
1028 aggpos->by_ref = false;
1029 return true;
1030 }
1031
1032 if (TREE_CODE (op) == SSA_NAME)
1033 {
1034 if (SSA_NAME_IS_DEFAULT_DEF (op)
1035 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1036 return false;
1037 stmt = SSA_NAME_DEF_STMT (op);
1038 op = gimple_assign_rhs1 (stmt);
1039 if (!REFERENCE_CLASS_P (op))
1040 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1041 aggpos);
1042 }
1043
1044 aggpos->agg_contents = true;
1045 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1046 stmt, op, index_p, &aggpos->offset,
1047 size_p, &aggpos->by_ref);
1048}
1049
1050/* See if statement might disappear after inlining.
1051 0 - means not eliminated
1052 1 - half of statements goes away
1053 2 - for sure it is eliminated.
1054 We are not terribly sophisticated, basically looking for simple abstraction
1055 penalty wrappers. */
1056
1057static int
915df3d8 1058eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
b9a58fc5 1059{
1060 enum gimple_code code = gimple_code (stmt);
1061 enum tree_code rhs_code;
1062
1063 if (!optimize)
1064 return 0;
1065
1066 switch (code)
1067 {
1068 case GIMPLE_RETURN:
1069 return 2;
1070 case GIMPLE_ASSIGN:
1071 if (gimple_num_ops (stmt) != 2)
1072 return 0;
1073
1074 rhs_code = gimple_assign_rhs_code (stmt);
1075
1076 /* Casts of parameters, loads from parameters passed by reference
1077 and stores to return value or parameters are often free after
1078 inlining dua to SRA and further combining.
1079 Assume that half of statements goes away. */
1080 if (CONVERT_EXPR_CODE_P (rhs_code)
1081 || rhs_code == VIEW_CONVERT_EXPR
1082 || rhs_code == ADDR_EXPR
1083 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1084 {
1085 tree rhs = gimple_assign_rhs1 (stmt);
1086 tree lhs = gimple_assign_lhs (stmt);
1087 tree inner_rhs = get_base_address (rhs);
1088 tree inner_lhs = get_base_address (lhs);
1089 bool rhs_free = false;
1090 bool lhs_free = false;
1091
1092 if (!inner_rhs)
1093 inner_rhs = rhs;
1094 if (!inner_lhs)
1095 inner_lhs = lhs;
1096
1097 /* Reads of parameter are expected to be free. */
915df3d8 1098 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
b9a58fc5 1099 rhs_free = true;
1100 /* Match expressions of form &this->field. Those will most likely
1101 combine with something upstream after inlining. */
1102 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1103 {
1104 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1105 if (TREE_CODE (op) == PARM_DECL)
1106 rhs_free = true;
1107 else if (TREE_CODE (op) == MEM_REF
915df3d8 1108 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1109 NULL))
b9a58fc5 1110 rhs_free = true;
1111 }
1112
1113 /* When parameter is not SSA register because its address is taken
1114 and it is just copied into one, the statement will be completely
1115 free after inlining (we will copy propagate backward). */
1116 if (rhs_free && is_gimple_reg (lhs))
1117 return 2;
1118
1119 /* Reads of parameters passed by reference
1120 expected to be free (i.e. optimized out after inlining). */
1121 if (TREE_CODE (inner_rhs) == MEM_REF
915df3d8 1122 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
b9a58fc5 1123 rhs_free = true;
1124
1125 /* Copying parameter passed by reference into gimple register is
1126 probably also going to copy propagate, but we can't be quite
1127 sure. */
1128 if (rhs_free && is_gimple_reg (lhs))
1129 lhs_free = true;
1130
1131 /* Writes to parameters, parameters passed by value and return value
1132 (either dirrectly or passed via invisible reference) are free.
1133
1134 TODO: We ought to handle testcase like
1135 struct a {int a,b;};
1136 struct a
1137 retrurnsturct (void)
1138 {
1139 struct a a ={1,2};
1140 return a;
1141 }
1142
1143 This translate into:
1144
1145 retrurnsturct ()
1146 {
1147 int a$b;
1148 int a$a;
1149 struct a a;
1150 struct a D.2739;
1151
1152 <bb 2>:
1153 D.2739.a = 1;
1154 D.2739.b = 2;
1155 return D.2739;
1156
1157 }
1158 For that we either need to copy ipa-split logic detecting writes
1159 to return value. */
1160 if (TREE_CODE (inner_lhs) == PARM_DECL
1161 || TREE_CODE (inner_lhs) == RESULT_DECL
1162 || (TREE_CODE (inner_lhs) == MEM_REF
915df3d8 1163 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1164 NULL)
b9a58fc5 1165 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1166 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1167 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1168 (inner_lhs,
1169 0))) == RESULT_DECL))))
1170 lhs_free = true;
1171 if (lhs_free
1172 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1173 rhs_free = true;
1174 if (lhs_free && rhs_free)
1175 return 1;
1176 }
1177 return 0;
1178 default:
1179 return 0;
1180 }
1181}
1182
b3d74655 1183/* Analyze EXPR if it represents a series of simple operations performed on
1184 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1185 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1186 Type of the parameter or load from an aggregate via the parameter is
1187 stored in *TYPE_P. Operations on the parameter are recorded to
1188 PARAM_OPS_P if it is not NULL. */
1189
1190static bool
1191decompose_param_expr (struct ipa_func_body_info *fbi,
1192 gimple *stmt, tree expr,
1193 int *index_p, tree *type_p,
1194 struct agg_position_info *aggpos,
1195 expr_eval_ops *param_ops_p = NULL)
1196{
1197 int op_limit = PARAM_VALUE (PARAM_IPA_MAX_PARAM_EXPR_OPS);
1198 int op_count = 0;
1199
1200 if (param_ops_p)
1201 *param_ops_p = NULL;
1202
1203 while (true)
1204 {
1205 expr_eval_op eval_op;
1206 unsigned rhs_count;
1207 unsigned cst_count = 0;
1208
1209 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1210 aggpos))
1211 {
1212 tree type = TREE_TYPE (expr);
1213
1214 if (aggpos->agg_contents)
1215 {
1216 /* Stop if containing bit-field. */
1217 if (TREE_CODE (expr) == BIT_FIELD_REF
1218 || contains_bitfld_component_ref_p (expr))
1219 break;
1220 }
1221
1222 *type_p = type;
1223 return true;
1224 }
1225
1226 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1227 break;
1228
1229 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1230 break;
1231
1232 switch (gimple_assign_rhs_class (stmt))
1233 {
1234 case GIMPLE_SINGLE_RHS:
1235 expr = gimple_assign_rhs1 (stmt);
1236 continue;
1237
1238 case GIMPLE_UNARY_RHS:
1239 rhs_count = 1;
1240 break;
1241
1242 case GIMPLE_BINARY_RHS:
1243 rhs_count = 2;
1244 break;
1245
1246 case GIMPLE_TERNARY_RHS:
1247 rhs_count = 3;
1248 break;
1249
1250 default:
1251 goto fail;
1252 }
1253
1254 /* Stop if expression is too complex. */
1255 if (op_count++ == op_limit)
1256 break;
1257
1258 if (param_ops_p)
1259 {
1260 eval_op.code = gimple_assign_rhs_code (stmt);
1261 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1262 eval_op.val[0] = NULL_TREE;
1263 eval_op.val[1] = NULL_TREE;
1264 }
1265
1266 expr = NULL_TREE;
1267 for (unsigned i = 0; i < rhs_count; i++)
1268 {
1269 tree op = gimple_op (stmt, i + 1);
1270
1271 gcc_assert (op && !TYPE_P (op));
1272 if (is_gimple_ip_invariant (op))
1273 {
1274 if (++cst_count == rhs_count)
1275 goto fail;
1276
1277 eval_op.val[cst_count - 1] = op;
1278 }
1279 else if (!expr)
1280 {
1281 /* Found a non-constant operand, and record its index in rhs
1282 operands. */
1283 eval_op.index = i;
1284 expr = op;
1285 }
1286 else
1287 {
1288 /* Found more than one non-constant operands. */
1289 goto fail;
1290 }
1291 }
1292
1293 if (param_ops_p)
1294 vec_safe_insert (*param_ops_p, 0, eval_op);
1295 }
1296
1297 /* Failed to decompose, free resource and return. */
1298fail:
1299 if (param_ops_p)
1300 vec_free (*param_ops_p);
1301
1302 return false;
1303}
b9a58fc5 1304
1305/* If BB ends by a conditional we can turn into predicates, attach corresponding
1306 predicates to the CFG edges. */
1307
1308static void
1309set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
2e966e2a 1310 class ipa_fn_summary *summary,
b9a58fc5 1311 basic_block bb)
1312{
1313 gimple *last;
b3d74655 1314 tree op, op2;
b9a58fc5 1315 int index;
b9a58fc5 1316 struct agg_position_info aggpos;
1317 enum tree_code code, inverted_code;
1318 edge e;
1319 edge_iterator ei;
1320 gimple *set_stmt;
b3d74655 1321 tree param_type;
1322 expr_eval_ops param_ops;
b9a58fc5 1323
1324 last = last_stmt (bb);
1325 if (!last || gimple_code (last) != GIMPLE_COND)
1326 return;
1327 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1328 return;
1329 op = gimple_cond_lhs (last);
b3d74655 1330
1331 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1332 &param_ops))
b9a58fc5 1333 {
1334 code = gimple_cond_code (last);
1335 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1336
1337 FOR_EACH_EDGE (e, ei, bb->succs)
1338 {
1339 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1340 ? code : inverted_code);
1341 /* invert_tree_comparison will return ERROR_MARK on FP
1342 comparsions that are not EQ/NE instead of returning proper
c23b675c 1343 unordered one. Be sure it is not confused with NON_CONSTANT.
1344
1345 And if the edge's target is the final block of diamond CFG graph
1346 of this conditional statement, we do not need to compute
1347 predicate for the edge because the final block's predicate must
1348 be at least as that of the first block of the statement. */
1349 if (this_code != ERROR_MARK
1350 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
b9a58fc5 1351 {
1352 predicate p
b3d74655 1353 = add_condition (summary, index, param_type, &aggpos,
1354 this_code, gimple_cond_rhs (last), param_ops);
b9a58fc5 1355 e->aux = edge_predicate_pool.allocate ();
1356 *(predicate *) e->aux = p;
1357 }
1358 }
b3d74655 1359 vec_free (param_ops);
b9a58fc5 1360 }
1361
1362 if (TREE_CODE (op) != SSA_NAME)
1363 return;
1364 /* Special case
1365 if (builtin_constant_p (op))
1366 constant_code
1367 else
1368 nonconstant_code.
1369 Here we can predicate nonconstant_code. We can't
1370 really handle constant_code since we have no predicate
1371 for this and also the constant code is not known to be
1372 optimized away when inliner doen't see operand is constant.
1373 Other optimizers might think otherwise. */
1374 if (gimple_cond_code (last) != NE_EXPR
1375 || !integer_zerop (gimple_cond_rhs (last)))
1376 return;
1377 set_stmt = SSA_NAME_DEF_STMT (op);
1378 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1379 || gimple_call_num_args (set_stmt) != 1)
1380 return;
1381 op2 = gimple_call_arg (set_stmt, 0);
b3d74655 1382 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
b9a58fc5 1383 return;
1384 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1385 {
b3d74655 1386 predicate p = add_condition (summary, index, param_type, &aggpos,
b9a58fc5 1387 predicate::is_not_constant, NULL_TREE);
1388 e->aux = edge_predicate_pool.allocate ();
1389 *(predicate *) e->aux = p;
1390 }
1391}
1392
1393
1394/* If BB ends by a switch we can turn into predicates, attach corresponding
1395 predicates to the CFG edges. */
1396
1397static void
1398set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
2e966e2a 1399 class ipa_fn_summary *summary,
b9a58fc5 1400 basic_block bb)
1401{
1402 gimple *lastg;
1403 tree op;
1404 int index;
b9a58fc5 1405 struct agg_position_info aggpos;
1406 edge e;
1407 edge_iterator ei;
1408 size_t n;
1409 size_t case_idx;
b3d74655 1410 tree param_type;
1411 expr_eval_ops param_ops;
b9a58fc5 1412
1413 lastg = last_stmt (bb);
1414 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1415 return;
1416 gswitch *last = as_a <gswitch *> (lastg);
1417 op = gimple_switch_index (last);
b3d74655 1418 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1419 &param_ops))
b9a58fc5 1420 return;
1421
0baccb2f 1422 auto_vec<std::pair<tree, tree> > ranges;
1423 tree type = TREE_TYPE (op);
1424 int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS);
1425 int bound_count = 0;
1426 wide_int vr_wmin, vr_wmax;
1427 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1428
b9a58fc5 1429 FOR_EACH_EDGE (e, ei, bb->succs)
1430 {
1431 e->aux = edge_predicate_pool.allocate ();
1432 *(predicate *) e->aux = false;
1433 }
0baccb2f 1434
c23b675c 1435 e = gimple_switch_edge (cfun, last, 0);
1436 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1437 default case if its target basic block is in convergence point of all
1438 switch cases, which can be determined by checking whether it
1439 post-dominates the switch statement. */
1440 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1441 bound_count = INT_MAX;
1442
b9a58fc5 1443 n = gimple_switch_num_labels (last);
0baccb2f 1444 for (case_idx = 1; case_idx < n; ++case_idx)
b9a58fc5 1445 {
1446 tree cl = gimple_switch_label (last, case_idx);
c23b675c 1447 tree min = CASE_LOW (cl);
1448 tree max = CASE_HIGH (cl);
b9a58fc5 1449 predicate p;
1450
b3d74655 1451 e = gimple_switch_edge (cfun, last, case_idx);
1452
c23b675c 1453 /* The case value might not have same type as switch expression,
1454 extend the value based on the expression type. */
1455 if (TREE_TYPE (min) != type)
1456 min = wide_int_to_tree (type, wi::to_wide (min));
b9a58fc5 1457
0baccb2f 1458 if (!max)
c23b675c 1459 max = min;
1460 else if (TREE_TYPE (max) != type)
1461 max = wide_int_to_tree (type, wi::to_wide (max));
1462
1463 /* The case's target basic block is in convergence point of all switch
1464 cases, its predicate should be at least as that of the switch
1465 statement. */
1466 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1467 p = true;
1468 else if (min == max)
b3d74655 1469 p = add_condition (summary, index, param_type, &aggpos, EQ_EXPR,
1470 min, param_ops);
b9a58fc5 1471 else
1472 {
1473 predicate p1, p2;
b3d74655 1474 p1 = add_condition (summary, index, param_type, &aggpos, GE_EXPR,
1475 min, param_ops);
1476 p2 = add_condition (summary, index, param_type, &aggpos, LE_EXPR,
1477 max, param_ops);
b9a58fc5 1478 p = p1 & p2;
1479 }
2e966e2a 1480 *(class predicate *) e->aux
1481 = p.or_with (summary->conds, *(class predicate *) e->aux);
0baccb2f 1482
1483 /* If there are too many disjoint case ranges, predicate for default
1484 case might become too complicated. So add a limit here. */
1485 if (bound_count > bound_limit)
1486 continue;
1487
1488 bool new_range = true;
1489
1490 if (!ranges.is_empty ())
1491 {
1492 wide_int curr_wmin = wi::to_wide (min);
1493 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1494
1495 /* Merge case ranges if they are continuous. */
1496 if (curr_wmin == last_wmax + 1)
1497 new_range = false;
1498 else if (vr_type == VR_ANTI_RANGE)
1499 {
1500 /* If two disjoint case ranges can be connected by anti-range
1501 of switch index, combine them to one range. */
1502 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1503 vr_type = VR_UNDEFINED;
1504 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1505 new_range = false;
1506 }
1507 }
1508
0baccb2f 1509 /* Create/extend a case range. And we count endpoints of range set,
1510 this number nearly equals to number of conditions that we will create
1511 for predicate of default case. */
1512 if (new_range)
1513 {
1514 bound_count += (min == max) ? 1 : 2;
1515 ranges.safe_push (std::make_pair (min, max));
1516 }
1517 else
1518 {
1519 bound_count += (ranges.last ().first == ranges.last ().second);
1520 ranges.last ().second = max;
1521 }
1522 }
1523
1524 e = gimple_switch_edge (cfun, last, 0);
1525 if (bound_count > bound_limit)
1526 {
1527 *(class predicate *) e->aux = true;
b3d74655 1528 vec_free (param_ops);
0baccb2f 1529 return;
b9a58fc5 1530 }
0baccb2f 1531
1532 predicate p_seg = true;
1533 predicate p_all = false;
1534
1535 if (vr_type != VR_RANGE)
1536 {
1537 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1538 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1539 }
1540
1541 /* Construct predicate to represent default range set that is negation of
1542 all case ranges. Case range is classified as containing single/non-single
1543 values. Suppose a piece of case ranges in the following.
1544
1545 [D1...D2] [S1] ... [Sn] [D3...D4]
1546
1547 To represent default case's range sets between two non-single value
1548 case ranges (From D2 to D3), we construct predicate as:
1549
1550 D2 < x < D3 && x != S1 && ... && x != Sn
1551 */
1552 for (size_t i = 0; i < ranges.length (); i++)
1553 {
1554 tree min = ranges[i].first;
1555 tree max = ranges[i].second;
1556
1557 if (min == max)
b3d74655 1558 p_seg &= add_condition (summary, index, param_type, &aggpos, NE_EXPR,
1559 min, param_ops);
0baccb2f 1560 else
1561 {
1562 /* Do not create sub-predicate for range that is beyond low bound
1563 of switch index. */
1564 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1565 {
b3d74655 1566 p_seg &= add_condition (summary, index, param_type, &aggpos,
1567 LT_EXPR, min, param_ops);
0baccb2f 1568 p_all = p_all.or_with (summary->conds, p_seg);
1569 }
1570
1571 /* Do not create sub-predicate for range that is beyond up bound
1572 of switch index. */
1573 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1574 {
1575 p_seg = false;
1576 break;
1577 }
1578
b3d74655 1579 p_seg = add_condition (summary, index, param_type, &aggpos, GT_EXPR,
1580 max, param_ops);
0baccb2f 1581 }
1582 }
1583
1584 p_all = p_all.or_with (summary->conds, p_seg);
1585 *(class predicate *) e->aux
1586 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
b3d74655 1587
1588 vec_free (param_ops);
b9a58fc5 1589}
1590
1591
1592/* For each BB in NODE attach to its AUX pointer predicate under
1593 which it is executable. */
1594
1595static void
1596compute_bb_predicates (struct ipa_func_body_info *fbi,
1597 struct cgraph_node *node,
2e966e2a 1598 class ipa_fn_summary *summary)
b9a58fc5 1599{
1600 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1601 bool done = false;
1602 basic_block bb;
1603
1604 FOR_EACH_BB_FN (bb, my_function)
1605 {
1606 set_cond_stmt_execution_predicate (fbi, summary, bb);
1607 set_switch_stmt_execution_predicate (fbi, summary, bb);
1608 }
1609
1610 /* Entry block is always executable. */
1611 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1612 = edge_predicate_pool.allocate ();
1613 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1614
1615 /* A simple dataflow propagation of predicates forward in the CFG.
1616 TODO: work in reverse postorder. */
1617 while (!done)
1618 {
1619 done = true;
1620 FOR_EACH_BB_FN (bb, my_function)
1621 {
1622 predicate p = false;
1623 edge e;
1624 edge_iterator ei;
1625 FOR_EACH_EDGE (e, ei, bb->preds)
1626 {
1627 if (e->src->aux)
1628 {
1629 predicate this_bb_predicate
1630 = *(predicate *) e->src->aux;
1631 if (e->aux)
2e966e2a 1632 this_bb_predicate &= (*(class predicate *) e->aux);
b9a58fc5 1633 p = p.or_with (summary->conds, this_bb_predicate);
1634 if (p == true)
1635 break;
1636 }
1637 }
c23b675c 1638 if (p != false)
b9a58fc5 1639 {
c23b675c 1640 basic_block pdom_bb;
1641
b9a58fc5 1642 if (!bb->aux)
1643 {
1644 done = false;
1645 bb->aux = edge_predicate_pool.allocate ();
1646 *((predicate *) bb->aux) = p;
1647 }
1648 else if (p != *(predicate *) bb->aux)
1649 {
1650 /* This OR operation is needed to ensure monotonous data flow
1651 in the case we hit the limit on number of clauses and the
1652 and/or operations above give approximate answers. */
1653 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1654 if (p != *(predicate *) bb->aux)
1655 {
1656 done = false;
1657 *((predicate *) bb->aux) = p;
1658 }
1659 }
c23b675c 1660
1661 /* For switch/if statement, we can OR-combine predicates of all
1662 its cases/branches to get predicate for basic block in their
1663 convergence point, but sometimes this will generate very
1664 complicated predicate. Actually, we can get simplified
1665 predicate in another way by using the fact that predicate
1666 for a basic block must also hold true for its post dominators.
1667 To be specific, basic block in convergence point of
1668 conditional statement should include predicate of the
1669 statement. */
1670 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1671 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1672 ;
1673 else if (!pdom_bb->aux)
1674 {
1675 done = false;
1676 pdom_bb->aux = edge_predicate_pool.allocate ();
1677 *((predicate *) pdom_bb->aux) = p;
1678 }
1679 else if (p != *(predicate *) pdom_bb->aux)
1680 {
1681 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1682 if (p != *(predicate *) pdom_bb->aux)
1683 {
1684 done = false;
1685 *((predicate *) pdom_bb->aux) = p;
1686 }
1687 }
b9a58fc5 1688 }
1689 }
1690 }
1691}
1692
1693
1694/* Return predicate specifying when the STMT might have result that is not
1695 a compile time constant. */
1696
1697static predicate
915df3d8 1698will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
2e966e2a 1699 class ipa_fn_summary *summary,
b9a58fc5 1700 tree expr,
1701 vec<predicate> nonconstant_names)
1702{
1703 tree parm;
1704 int index;
b9a58fc5 1705
1706 while (UNARY_CLASS_P (expr))
1707 expr = TREE_OPERAND (expr, 0);
1708
b3d74655 1709 parm = unmodified_parm (fbi, NULL, expr, NULL);
915df3d8 1710 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
b3d74655 1711 return add_condition (summary, index, TREE_TYPE (parm), NULL,
1712 predicate::changed, NULL_TREE);
b9a58fc5 1713 if (is_gimple_min_invariant (expr))
1714 return false;
1715 if (TREE_CODE (expr) == SSA_NAME)
1716 return nonconstant_names[SSA_NAME_VERSION (expr)];
1717 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1718 {
915df3d8 1719 predicate p1
1720 = will_be_nonconstant_expr_predicate (fbi, summary,
1721 TREE_OPERAND (expr, 0),
1722 nonconstant_names);
b9a58fc5 1723 if (p1 == true)
1724 return p1;
1725
915df3d8 1726 predicate p2
1727 = will_be_nonconstant_expr_predicate (fbi, summary,
1728 TREE_OPERAND (expr, 1),
1729 nonconstant_names);
b9a58fc5 1730 return p1.or_with (summary->conds, p2);
1731 }
1732 else if (TREE_CODE (expr) == COND_EXPR)
1733 {
915df3d8 1734 predicate p1
1735 = will_be_nonconstant_expr_predicate (fbi, summary,
1736 TREE_OPERAND (expr, 0),
1737 nonconstant_names);
b9a58fc5 1738 if (p1 == true)
1739 return p1;
1740
915df3d8 1741 predicate p2
1742 = will_be_nonconstant_expr_predicate (fbi, summary,
1743 TREE_OPERAND (expr, 1),
1744 nonconstant_names);
b9a58fc5 1745 if (p2 == true)
1746 return p2;
1747 p1 = p1.or_with (summary->conds, p2);
915df3d8 1748 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
b9a58fc5 1749 TREE_OPERAND (expr, 2),
1750 nonconstant_names);
1751 return p2.or_with (summary->conds, p1);
1752 }
c71993fc 1753 else if (TREE_CODE (expr) == CALL_EXPR)
1754 return true;
b9a58fc5 1755 else
1756 {
1757 debug_tree (expr);
1758 gcc_unreachable ();
1759 }
1760 return false;
1761}
1762
1763
1764/* Return predicate specifying when the STMT might have result that is not
1765 a compile time constant. */
1766
1767static predicate
1768will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2e966e2a 1769 class ipa_fn_summary *summary,
b9a58fc5 1770 gimple *stmt,
1771 vec<predicate> nonconstant_names)
1772{
1773 predicate p = true;
1774 ssa_op_iter iter;
1775 tree use;
b3d74655 1776 tree param_type = NULL_TREE;
b9a58fc5 1777 predicate op_non_const;
1778 bool is_load;
1779 int base_index;
b9a58fc5 1780 struct agg_position_info aggpos;
1781
1782 /* What statments might be optimized away
1783 when their arguments are constant. */
1784 if (gimple_code (stmt) != GIMPLE_ASSIGN
1785 && gimple_code (stmt) != GIMPLE_COND
1786 && gimple_code (stmt) != GIMPLE_SWITCH
1787 && (gimple_code (stmt) != GIMPLE_CALL
1788 || !(gimple_call_flags (stmt) & ECF_CONST)))
1789 return p;
1790
1791 /* Stores will stay anyway. */
1792 if (gimple_store_p (stmt))
1793 return p;
1794
1795 is_load = gimple_assign_load_p (stmt);
1796
1797 /* Loads can be optimized when the value is known. */
1798 if (is_load)
1799 {
b3d74655 1800 tree op = gimple_assign_rhs1 (stmt);
1801 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1802 &aggpos))
b9a58fc5 1803 return p;
1804 }
1805 else
1806 base_index = -1;
1807
1808 /* See if we understand all operands before we start
1809 adding conditionals. */
1810 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1811 {
915df3d8 1812 tree parm = unmodified_parm (fbi, stmt, use, NULL);
b9a58fc5 1813 /* For arguments we can build a condition. */
1814 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1815 continue;
1816 if (TREE_CODE (use) != SSA_NAME)
1817 return p;
1818 /* If we know when operand is constant,
1819 we still can say something useful. */
1820 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1821 continue;
1822 return p;
1823 }
1824
1825 if (is_load)
1826 op_non_const =
b3d74655 1827 add_condition (summary, base_index, param_type, &aggpos,
1828 predicate::changed, NULL_TREE);
b9a58fc5 1829 else
1830 op_non_const = false;
1831 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1832 {
b3d74655 1833 tree parm = unmodified_parm (fbi, stmt, use, NULL);
b9a58fc5 1834 int index;
1835
1836 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1837 {
1838 if (index != base_index)
b3d74655 1839 p = add_condition (summary, index, TREE_TYPE (parm), NULL,
1840 predicate::changed, NULL_TREE);
b9a58fc5 1841 else
1842 continue;
1843 }
1844 else
1845 p = nonconstant_names[SSA_NAME_VERSION (use)];
1846 op_non_const = p.or_with (summary->conds, op_non_const);
1847 }
1848 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1849 && gimple_op (stmt, 0)
1850 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1851 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1852 = op_non_const;
1853 return op_non_const;
1854}
1855
1856struct record_modified_bb_info
1857{
bc972b3e 1858 tree op;
b9a58fc5 1859 bitmap bb_set;
1860 gimple *stmt;
1861};
1862
1863/* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1864 probability how often it changes between USE_BB.
bc972b3e 1865 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
b9a58fc5 1866 is in different loop nest, we can do better.
1867 This is all just estimate. In theory we look for minimal cut separating
1868 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1869 anyway. */
1870
1871static basic_block
1872get_minimal_bb (basic_block init_bb, basic_block use_bb)
1873{
2e966e2a 1874 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
205ce1aa 1875 if (l && l->header->count < init_bb->count)
b9a58fc5 1876 return l->header;
1877 return init_bb;
1878}
1879
1880/* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1881 set except for info->stmt. */
1882
1883static bool
1884record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1885{
1886 struct record_modified_bb_info *info =
1887 (struct record_modified_bb_info *) data;
1888 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1889 return false;
bc972b3e 1890 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1891 return false;
b9a58fc5 1892 bitmap_set_bit (info->bb_set,
1893 SSA_NAME_IS_DEFAULT_DEF (vdef)
1894 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1895 : get_minimal_bb
1896 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1897 gimple_bb (info->stmt))->index);
bc972b3e 1898 if (dump_file)
1899 {
1900 fprintf (dump_file, " Param ");
1901 print_generic_expr (dump_file, info->op, TDF_SLIM);
1902 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
1903 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
1904 get_minimal_bb
1905 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1906 gimple_bb (info->stmt))->index);
1907 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
1908 }
b9a58fc5 1909 return false;
1910}
1911
1912/* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1913 will change since last invocation of STMT.
1914
1915 Value 0 is reserved for compile time invariants.
1916 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1917 ought to be REG_BR_PROB_BASE / estimated_iters. */
1918
1919static int
915df3d8 1920param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
b9a58fc5 1921{
1922 tree op = gimple_call_arg (stmt, i);
1923 basic_block bb = gimple_bb (stmt);
1924
1925 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1926 op = TREE_OPERAND (op, 0);
1927
1928 tree base = get_base_address (op);
1929
1930 /* Global invariants never change. */
1931 if (is_gimple_min_invariant (base))
1932 return 0;
1933
1934 /* We would have to do non-trivial analysis to really work out what
1935 is the probability of value to change (i.e. when init statement
1936 is in a sibling loop of the call).
1937
1938 We do an conservative estimate: when call is executed N times more often
1939 than the statement defining value, we take the frequency 1/N. */
1940 if (TREE_CODE (base) == SSA_NAME)
1941 {
bc972b3e 1942 profile_count init_count;
b9a58fc5 1943
bc972b3e 1944 if (!bb->count.nonzero_p ())
b9a58fc5 1945 return REG_BR_PROB_BASE;
1946
1947 if (SSA_NAME_IS_DEFAULT_DEF (base))
bc972b3e 1948 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
b9a58fc5 1949 else
bc972b3e 1950 init_count = get_minimal_bb
b9a58fc5 1951 (gimple_bb (SSA_NAME_DEF_STMT (base)),
bc972b3e 1952 gimple_bb (stmt))->count;
b9a58fc5 1953
bc972b3e 1954 if (init_count < bb->count)
1955 return MAX ((init_count.to_sreal_scale (bb->count)
1956 * REG_BR_PROB_BASE).to_int (), 1);
1957 return REG_BR_PROB_BASE;
b9a58fc5 1958 }
1959 else
1960 {
1961 ao_ref refd;
bc972b3e 1962 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
b9a58fc5 1963 struct record_modified_bb_info info;
b9a58fc5 1964 tree init = ctor_for_folding (base);
1965
1966 if (init != error_mark_node)
1967 return 0;
bc972b3e 1968 if (!bb->count.nonzero_p ())
b9a58fc5 1969 return REG_BR_PROB_BASE;
bc972b3e 1970 if (dump_file)
1971 {
b3d74655 1972 fprintf (dump_file, " Analyzing param change probability of ");
bc972b3e 1973 print_generic_expr (dump_file, op, TDF_SLIM);
1974 fprintf (dump_file, "\n");
1975 }
b9a58fc5 1976 ao_ref_init (&refd, op);
bc972b3e 1977 info.op = op;
b9a58fc5 1978 info.stmt = stmt;
1979 info.bb_set = BITMAP_ALLOC (NULL);
915df3d8 1980 int walked
1981 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1982 NULL, NULL, fbi->aa_walk_budget);
1983 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
b9a58fc5 1984 {
bc972b3e 1985 if (dump_file)
915df3d8 1986 {
1987 if (walked < 0)
1988 fprintf (dump_file, " Ran out of AA walking budget.\n");
1989 else
1990 fprintf (dump_file, " Set in same BB as used.\n");
1991 }
b9a58fc5 1992 BITMAP_FREE (info.bb_set);
1993 return REG_BR_PROB_BASE;
1994 }
1995
bc972b3e 1996 bitmap_iterator bi;
1997 unsigned index;
1998 /* Lookup the most frequent update of the value and believe that
1999 it dominates all the other; precise analysis here is difficult. */
b9a58fc5 2000 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
bc972b3e 2001 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2002 if (dump_file)
2003 {
2004 fprintf (dump_file, " Set with count ");
2005 max.dump (dump_file);
2006 fprintf (dump_file, " and used with count ");
2007 bb->count.dump (dump_file);
2008 fprintf (dump_file, " freq %f\n",
2009 max.to_sreal_scale (bb->count).to_double ());
2010 }
b9a58fc5 2011
2012 BITMAP_FREE (info.bb_set);
bc972b3e 2013 if (max < bb->count)
2014 return MAX ((max.to_sreal_scale (bb->count)
2015 * REG_BR_PROB_BASE).to_int (), 1);
2016 return REG_BR_PROB_BASE;
b9a58fc5 2017 }
2018}
2019
2020/* Find whether a basic block BB is the final block of a (half) diamond CFG
2021 sub-graph and if the predicate the condition depends on is known. If so,
2022 return true and store the pointer the predicate in *P. */
2023
2024static bool
915df3d8 2025phi_result_unknown_predicate (ipa_func_body_info *fbi,
1297cbcd 2026 ipa_fn_summary *summary, basic_block bb,
b9a58fc5 2027 predicate *p,
2028 vec<predicate> nonconstant_names)
2029{
2030 edge e;
2031 edge_iterator ei;
2032 basic_block first_bb = NULL;
2033 gimple *stmt;
2034
2035 if (single_pred_p (bb))
2036 {
2037 *p = false;
2038 return true;
2039 }
2040
2041 FOR_EACH_EDGE (e, ei, bb->preds)
2042 {
2043 if (single_succ_p (e->src))
2044 {
2045 if (!single_pred_p (e->src))
2046 return false;
2047 if (!first_bb)
2048 first_bb = single_pred (e->src);
2049 else if (single_pred (e->src) != first_bb)
2050 return false;
2051 }
2052 else
2053 {
2054 if (!first_bb)
2055 first_bb = e->src;
2056 else if (e->src != first_bb)
2057 return false;
2058 }
2059 }
2060
2061 if (!first_bb)
2062 return false;
2063
2064 stmt = last_stmt (first_bb);
2065 if (!stmt
2066 || gimple_code (stmt) != GIMPLE_COND
2067 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2068 return false;
2069
915df3d8 2070 *p = will_be_nonconstant_expr_predicate (fbi, summary,
b9a58fc5 2071 gimple_cond_lhs (stmt),
2072 nonconstant_names);
2073 if (*p == true)
2074 return false;
2075 else
2076 return true;
2077}
2078
2079/* Given a PHI statement in a function described by inline properties SUMMARY
2080 and *P being the predicate describing whether the selected PHI argument is
2081 known, store a predicate for the result of the PHI statement into
2082 NONCONSTANT_NAMES, if possible. */
2083
2084static void
2e966e2a 2085predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
b9a58fc5 2086 predicate *p,
2087 vec<predicate> nonconstant_names)
2088{
2089 unsigned i;
2090
2091 for (i = 0; i < gimple_phi_num_args (phi); i++)
2092 {
2093 tree arg = gimple_phi_arg (phi, i)->def;
2094 if (!is_gimple_min_invariant (arg))
2095 {
2096 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2097 *p = p->or_with (summary->conds,
2098 nonconstant_names[SSA_NAME_VERSION (arg)]);
2099 if (*p == true)
2100 return;
2101 }
2102 }
2103
2104 if (dump_file && (dump_flags & TDF_DETAILS))
2105 {
2106 fprintf (dump_file, "\t\tphi predicate: ");
2107 p->dump (dump_file, summary->conds);
2108 }
2109 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2110}
2111
b9a58fc5 2112/* For a typical usage of __builtin_expect (a<b, 1), we
2113 may introduce an extra relation stmt:
2114 With the builtin, we have
2115 t1 = a <= b;
2116 t2 = (long int) t1;
2117 t3 = __builtin_expect (t2, 1);
2118 if (t3 != 0)
2119 goto ...
2120 Without the builtin, we have
2121 if (a<=b)
2122 goto...
2123 This affects the size/time estimation and may have
2124 an impact on the earlier inlining.
2125 Here find this pattern and fix it up later. */
2126
2127static gimple *
2128find_foldable_builtin_expect (basic_block bb)
2129{
2130 gimple_stmt_iterator bsi;
2131
2132 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2133 {
2134 gimple *stmt = gsi_stmt (bsi);
2135 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
01107f42 2136 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
b9a58fc5 2137 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2138 {
2139 tree var = gimple_call_lhs (stmt);
2140 tree arg = gimple_call_arg (stmt, 0);
2141 use_operand_p use_p;
2142 gimple *use_stmt;
2143 bool match = false;
2144 bool done = false;
2145
2146 if (!var || !arg)
2147 continue;
2148 gcc_assert (TREE_CODE (var) == SSA_NAME);
2149
2150 while (TREE_CODE (arg) == SSA_NAME)
2151 {
2152 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2153 if (!is_gimple_assign (stmt_tmp))
2154 break;
2155 switch (gimple_assign_rhs_code (stmt_tmp))
2156 {
2157 case LT_EXPR:
2158 case LE_EXPR:
2159 case GT_EXPR:
2160 case GE_EXPR:
2161 case EQ_EXPR:
2162 case NE_EXPR:
2163 match = true;
2164 done = true;
2165 break;
2166 CASE_CONVERT:
2167 break;
2168 default:
2169 done = true;
2170 break;
2171 }
2172 if (done)
2173 break;
2174 arg = gimple_assign_rhs1 (stmt_tmp);
2175 }
2176
2177 if (match && single_imm_use (var, &use_p, &use_stmt)
2178 && gimple_code (use_stmt) == GIMPLE_COND)
2179 return use_stmt;
2180 }
2181 }
2182 return NULL;
2183}
2184
2185/* Return true when the basic blocks contains only clobbers followed by RESX.
2186 Such BBs are kept around to make removal of dead stores possible with
2187 presence of EH and will be optimized out by optimize_clobbers later in the
2188 game.
2189
2190 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2191 that can be clobber only, too.. When it is false, the RESX is not necessary
2192 on the end of basic block. */
2193
2194static bool
2195clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2196{
2197 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2198 edge_iterator ei;
2199 edge e;
2200
2201 if (need_eh)
2202 {
2203 if (gsi_end_p (gsi))
2204 return false;
2205 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2206 return false;
2207 gsi_prev (&gsi);
2208 }
2209 else if (!single_succ_p (bb))
2210 return false;
2211
2212 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2213 {
2214 gimple *stmt = gsi_stmt (gsi);
2215 if (is_gimple_debug (stmt))
2216 continue;
2217 if (gimple_clobber_p (stmt))
2218 continue;
2219 if (gimple_code (stmt) == GIMPLE_LABEL)
2220 break;
2221 return false;
2222 }
2223
2224 /* See if all predecestors are either throws or clobber only BBs. */
2225 FOR_EACH_EDGE (e, ei, bb->preds)
2226 if (!(e->flags & EDGE_EH)
2227 && !clobber_only_eh_bb_p (e->src, false))
2228 return false;
2229
2230 return true;
2231}
2232
2233/* Return true if STMT compute a floating point expression that may be affected
2234 by -ffast-math and similar flags. */
2235
2236static bool
2237fp_expression_p (gimple *stmt)
2238{
2239 ssa_op_iter i;
2240 tree op;
2241
2242 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2243 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2244 return true;
2245 return false;
2246}
2247
1297cbcd 2248/* Analyze function body for NODE.
2249 EARLY indicates run from early optimization pipeline. */
b9a58fc5 2250
2251static void
1297cbcd 2252analyze_function_body (struct cgraph_node *node, bool early)
b9a58fc5 2253{
2b7caeda 2254 sreal time = PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME);
b9a58fc5 2255 /* Estimate static overhead for function prologue/epilogue and alignment. */
2b7caeda 2256 int size = PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS);
b9a58fc5 2257 /* Benefits are scaled by probability of elimination that is in range
2258 <0,2>. */
2259 basic_block bb;
2260 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
cffe9a83 2261 sreal freq;
2e966e2a 2262 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
b9a58fc5 2263 predicate bb_predicate;
2264 struct ipa_func_body_info fbi;
2265 vec<predicate> nonconstant_names = vNULL;
2266 int nblocks, n;
2267 int *order;
b9a58fc5 2268 gimple *fix_builtin_expect_stmt;
2269
2270 gcc_assert (my_function && my_function->cfg);
2271 gcc_assert (cfun == my_function);
2272
2273 memset(&fbi, 0, sizeof(fbi));
1e0f11d2 2274 vec_free (info->conds);
b9a58fc5 2275 info->conds = NULL;
1e0f11d2 2276 vec_free (info->size_time_table);
b9a58fc5 2277 info->size_time_table = NULL;
2278
2279 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2280 so we can produce proper inline hints.
2281
2282 When optimizing and analyzing for early inliner, initialize node params
2283 so we can produce correct BB predicates. */
2284
2285 if (opt_for_fn (node->decl, optimize))
2286 {
2287 calculate_dominance_info (CDI_DOMINATORS);
c23b675c 2288 calculate_dominance_info (CDI_POST_DOMINATORS);
b9a58fc5 2289 if (!early)
2290 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2291 else
2292 {
2293 ipa_check_create_node_params ();
2294 ipa_initialize_node_params (node);
2295 }
2296
2297 if (ipa_node_params_sum)
2298 {
2299 fbi.node = node;
2300 fbi.info = IPA_NODE_REF (node);
2301 fbi.bb_infos = vNULL;
2302 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
915df3d8 2303 fbi.param_count = count_formal_params (node->decl);
2304 fbi.aa_walk_budget = PARAM_VALUE (PARAM_IPA_MAX_AA_STEPS);
2305
b9a58fc5 2306 nonconstant_names.safe_grow_cleared
2307 (SSANAMES (my_function)->length ());
2308 }
2309 }
2310
2311 if (dump_file)
2312 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2313 node->name ());
2314
2315 /* When we run into maximal number of entries, we assign everything to the
2316 constant truth case. Be sure to have it in list. */
2317 bb_predicate = true;
2318 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2319
2320 bb_predicate = predicate::not_inlined ();
07309989 2321 info->account_size_time (PARAM_VALUE (PARAM_UNINLINED_FUNCTION_INSNS)
2322 * ipa_fn_summary::size_scale,
2323 PARAM_VALUE (PARAM_UNINLINED_FUNCTION_TIME),
2324 bb_predicate,
b9a58fc5 2325 bb_predicate);
2326
2327 if (fbi.info)
2328 compute_bb_predicates (&fbi, node, info);
2329 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2330 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2331 for (n = 0; n < nblocks; n++)
2332 {
2333 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
cffe9a83 2334 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
b9a58fc5 2335 if (clobber_only_eh_bb_p (bb))
2336 {
2337 if (dump_file && (dump_flags & TDF_DETAILS))
2338 fprintf (dump_file, "\n Ignoring BB %i;"
2339 " it will be optimized away by cleanup_clobbers\n",
2340 bb->index);
2341 continue;
2342 }
2343
2344 /* TODO: Obviously predicates can be propagated down across CFG. */
2345 if (fbi.info)
2346 {
2347 if (bb->aux)
2348 bb_predicate = *(predicate *) bb->aux;
2349 else
2350 bb_predicate = false;
2351 }
2352 else
2353 bb_predicate = true;
2354
2355 if (dump_file && (dump_flags & TDF_DETAILS))
2356 {
2357 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2358 bb_predicate.dump (dump_file, info->conds);
2359 }
2360
2361 if (fbi.info && nonconstant_names.exists ())
2362 {
2363 predicate phi_predicate;
2364 bool first_phi = true;
2365
2366 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2367 gsi_next (&bsi))
2368 {
2369 if (first_phi
915df3d8 2370 && !phi_result_unknown_predicate (&fbi, info, bb,
b9a58fc5 2371 &phi_predicate,
2372 nonconstant_names))
2373 break;
2374 first_phi = false;
2375 if (dump_file && (dump_flags & TDF_DETAILS))
2376 {
2377 fprintf (dump_file, " ");
2378 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2379 }
2380 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2381 nonconstant_names);
2382 }
2383 }
2384
2385 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2386
772269dd 2387 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2388 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
b9a58fc5 2389 {
2390 gimple *stmt = gsi_stmt (bsi);
2391 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2392 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2393 int prob;
2394 predicate will_be_nonconstant;
2395
2396 /* This relation stmt should be folded after we remove
2397 buildin_expect call. Adjust the cost here. */
2398 if (stmt == fix_builtin_expect_stmt)
2399 {
2400 this_size--;
2401 this_time--;
2402 }
2403
2404 if (dump_file && (dump_flags & TDF_DETAILS))
2405 {
2406 fprintf (dump_file, " ");
2407 print_gimple_stmt (dump_file, stmt, 0);
2408 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
cffe9a83 2409 freq.to_double (), this_size,
b9a58fc5 2410 this_time);
2411 }
2412
b9a58fc5 2413 if (is_gimple_call (stmt)
2414 && !gimple_call_internal_p (stmt))
2415 {
2416 struct cgraph_edge *edge = node->get_edge (stmt);
b53d4f56 2417 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
b9a58fc5 2418
2419 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2420 resolved as constant. We however don't want to optimize
2421 out the cgraph edges. */
2422 if (nonconstant_names.exists ()
2423 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2424 && gimple_call_lhs (stmt)
2425 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2426 {
2427 predicate false_p = false;
2428 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2429 = false_p;
2430 }
2431 if (ipa_node_params_sum)
2432 {
2433 int count = gimple_call_num_args (stmt);
2434 int i;
2435
2436 if (count)
2437 es->param.safe_grow_cleared (count);
2438 for (i = 0; i < count; i++)
2439 {
915df3d8 2440 int prob = param_change_prob (&fbi, stmt, i);
b9a58fc5 2441 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2442 es->param[i].change_prob = prob;
2443 }
2444 }
2445
2446 es->call_stmt_size = this_size;
2447 es->call_stmt_time = this_time;
2448 es->loop_depth = bb_loop_depth (bb);
2449 edge_set_predicate (edge, &bb_predicate);
b5e4c6ae 2450 if (edge->speculative)
2451 {
2452 cgraph_edge *direct, *indirect;
2453 ipa_ref *ref;
2454 edge->speculative_call_info (direct, indirect, ref);
2455 gcc_assert (direct == edge);
2456 ipa_call_summary *es2
2457 = ipa_call_summaries->get_create (indirect);
2458 ipa_call_summaries->duplicate (edge, indirect,
2459 es, es2);
2460 }
b9a58fc5 2461 }
2462
2463 /* TODO: When conditional jump or swithc is known to be constant, but
2464 we did not translate it into the predicates, we really can account
2465 just maximum of the possible paths. */
2466 if (fbi.info)
2467 will_be_nonconstant
2468 = will_be_nonconstant_predicate (&fbi, info,
2469 stmt, nonconstant_names);
2470 else
2471 will_be_nonconstant = true;
2472 if (this_time || this_size)
2473 {
cffe9a83 2474 sreal final_time = (sreal)this_time * freq;
b9a58fc5 2475
915df3d8 2476 prob = eliminated_by_inlining_prob (&fbi, stmt);
b9a58fc5 2477 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file,
2479 "\t\t50%% will be eliminated by inlining\n");
2480 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2481 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2482
2e966e2a 2483 class predicate p = bb_predicate & will_be_nonconstant;
b9a58fc5 2484
2485 /* We can ignore statement when we proved it is never going
f4d3c071 2486 to happen, but we cannot do that for call statements
b9a58fc5 2487 because edges are accounted specially. */
2488
2489 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2490 {
cffe9a83 2491 time += final_time;
b9a58fc5 2492 size += this_size;
2493 }
2494
2495 /* We account everything but the calls. Calls have their own
2496 size/time info attached to cgraph edges. This is necessary
2497 in order to make the cost disappear after inlining. */
2498 if (!is_gimple_call (stmt))
2499 {
2500 if (prob)
2501 {
2502 predicate ip = bb_predicate & predicate::not_inlined ();
2503 info->account_size_time (this_size * prob,
fe462538 2504 (final_time * prob) / 2, ip,
b9a58fc5 2505 p);
2506 }
2507 if (prob != 2)
2508 info->account_size_time (this_size * (2 - prob),
fe462538 2509 (final_time * (2 - prob) / 2),
b9a58fc5 2510 bb_predicate,
2511 p);
2512 }
2513
2514 if (!info->fp_expressions && fp_expression_p (stmt))
2515 {
2516 info->fp_expressions = true;
2517 if (dump_file)
2518 fprintf (dump_file, " fp_expression set\n");
2519 }
3b04dbff 2520 }
b9a58fc5 2521
3b04dbff 2522 /* Account cost of address calculations in the statements. */
2523 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2524 {
2525 for (tree op = gimple_op (stmt, i);
2526 op && handled_component_p (op);
2527 op = TREE_OPERAND (op, 0))
2528 if ((TREE_CODE (op) == ARRAY_REF
2529 || TREE_CODE (op) == ARRAY_RANGE_REF)
2530 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2531 {
2532 predicate p = bb_predicate;
2533 if (fbi.info)
2534 p = p & will_be_nonconstant_expr_predicate
2535 (&fbi, info, TREE_OPERAND (op, 1),
2536 nonconstant_names);
2537 if (p != false)
2538 {
2539 time += freq;
2540 size += 1;
2541 if (dump_file)
2542 fprintf (dump_file,
2543 "\t\tAccounting address calculation.\n");
2544 info->account_size_time (ipa_fn_summary::size_scale,
2545 freq,
2546 bb_predicate,
2547 p);
2548 }
2549 }
b9a58fc5 2550 }
3b04dbff 2551
b9a58fc5 2552 }
2553 }
b9a58fc5 2554 free (order);
2555
2556 if (nonconstant_names.exists () && !early)
2557 {
2e966e2a 2558 class loop *loop;
b9a58fc5 2559 predicate loop_iterations = true;
2560 predicate loop_stride = true;
2561
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 flow_loops_dump (dump_file, NULL, 0);
2564 scev_initialize ();
2565 FOR_EACH_LOOP (loop, 0)
2566 {
2567 vec<edge> exits;
2568 edge ex;
2569 unsigned int j;
2e966e2a 2570 class tree_niter_desc niter_desc;
b9a58fc5 2571 bb_predicate = *(predicate *) loop->header->aux;
2572
2573 exits = get_loop_exit_edges (loop);
2574 FOR_EACH_VEC_ELT (exits, j, ex)
2575 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2576 && !is_gimple_min_invariant (niter_desc.niter))
2577 {
2578 predicate will_be_nonconstant
915df3d8 2579 = will_be_nonconstant_expr_predicate (&fbi, info,
b9a58fc5 2580 niter_desc.niter,
2581 nonconstant_names);
2582 if (will_be_nonconstant != true)
2583 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2584 if (will_be_nonconstant != true
2585 && will_be_nonconstant != false)
2586 /* This is slightly inprecise. We may want to represent each
2587 loop with independent predicate. */
2588 loop_iterations &= will_be_nonconstant;
2589 }
2590 exits.release ();
2591 }
2592
2593 /* To avoid quadratic behavior we analyze stride predicates only
2594 with respect to the containing loop. Thus we simply iterate
2595 over all defs in the outermost loop body. */
2596 for (loop = loops_for_fn (cfun)->tree_root->inner;
2597 loop != NULL; loop = loop->next)
2598 {
2599 basic_block *body = get_loop_body (loop);
2600 for (unsigned i = 0; i < loop->num_nodes; i++)
2601 {
2602 gimple_stmt_iterator gsi;
2603 bb_predicate = *(predicate *) body[i]->aux;
2604 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2605 gsi_next (&gsi))
2606 {
2607 gimple *stmt = gsi_stmt (gsi);
2608
2609 if (!is_gimple_assign (stmt))
2610 continue;
2611
2612 tree def = gimple_assign_lhs (stmt);
2613 if (TREE_CODE (def) != SSA_NAME)
2614 continue;
2615
2616 affine_iv iv;
2617 if (!simple_iv (loop_containing_stmt (stmt),
2618 loop_containing_stmt (stmt),
2619 def, &iv, true)
2620 || is_gimple_min_invariant (iv.step))
2621 continue;
2622
2623 predicate will_be_nonconstant
915df3d8 2624 = will_be_nonconstant_expr_predicate (&fbi, info, iv.step,
b9a58fc5 2625 nonconstant_names);
2626 if (will_be_nonconstant != true)
2627 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2628 if (will_be_nonconstant != true
2629 && will_be_nonconstant != false)
2630 /* This is slightly inprecise. We may want to represent
2631 each loop with independent predicate. */
2632 loop_stride = loop_stride & will_be_nonconstant;
2633 }
2634 }
2635 free (body);
2636 }
d2c2513e 2637 ipa_fn_summary *s = ipa_fn_summaries->get (node);
fbc9c62d 2638 set_hint_predicate (&s->loop_iterations, loop_iterations);
2639 set_hint_predicate (&s->loop_stride, loop_stride);
b9a58fc5 2640 scev_finalize ();
2641 }
2642 FOR_ALL_BB_FN (bb, my_function)
2643 {
2644 edge e;
2645 edge_iterator ei;
2646
2647 if (bb->aux)
2648 edge_predicate_pool.remove ((predicate *)bb->aux);
2649 bb->aux = NULL;
2650 FOR_EACH_EDGE (e, ei, bb->succs)
2651 {
2652 if (e->aux)
2653 edge_predicate_pool.remove ((predicate *) e->aux);
2654 e->aux = NULL;
2655 }
2656 }
d2c2513e 2657 ipa_fn_summary *s = ipa_fn_summaries->get (node);
fbc9c62d 2658 s->time = time;
2659 s->self_size = size;
b9a58fc5 2660 nonconstant_names.release ();
2661 ipa_release_body_info (&fbi);
2662 if (opt_for_fn (node->decl, optimize))
2663 {
2664 if (!early)
2665 loop_optimizer_finalize ();
2666 else if (!ipa_edge_args_sum)
2667 ipa_free_all_node_params ();
2668 free_dominance_info (CDI_DOMINATORS);
c23b675c 2669 free_dominance_info (CDI_POST_DOMINATORS);
b9a58fc5 2670 }
2671 if (dump_file)
2672 {
2673 fprintf (dump_file, "\n");
1297cbcd 2674 ipa_dump_fn_summary (dump_file, node);
b9a58fc5 2675 }
2676}
2677
2678
1297cbcd 2679/* Compute function summary.
2680 EARLY is true when we compute parameters during early opts. */
b9a58fc5 2681
2682void
1297cbcd 2683compute_fn_summary (struct cgraph_node *node, bool early)
b9a58fc5 2684{
2685 HOST_WIDE_INT self_stack_size;
2686 struct cgraph_edge *e;
2e966e2a 2687 class ipa_fn_summary *info;
b9a58fc5 2688
2689 gcc_assert (!node->global.inlined_to);
2690
1297cbcd 2691 if (!ipa_fn_summaries)
2692 ipa_fn_summary_alloc ();
b9a58fc5 2693
d2c2513e 2694 /* Create a new ipa_fn_summary. */
2695 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2696 ipa_fn_summaries->remove (node);
b53d4f56 2697 info = ipa_fn_summaries->get_create (node);
b9a58fc5 2698
2699 /* Estimate the stack size for the function if we're optimizing. */
2700 self_stack_size = optimize && !node->thunk.thunk_p
2701 ? estimated_stack_frame_size (node) : 0;
2702 info->estimated_self_stack_size = self_stack_size;
2703 info->estimated_stack_size = self_stack_size;
2704 info->stack_frame_offset = 0;
2705
2706 if (node->thunk.thunk_p)
2707 {
b53d4f56 2708 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
b9a58fc5 2709 predicate t = true;
2710
2711 node->local.can_change_signature = false;
2712 es->call_stmt_size = eni_size_weights.call_cost;
2713 es->call_stmt_time = eni_time_weights.call_cost;
07309989 2714 info->account_size_time (ipa_fn_summary::size_scale
2715 * PARAM_VALUE
2716 (PARAM_UNINLINED_FUNCTION_THUNK_INSNS),
2717 PARAM_VALUE
2718 (PARAM_UNINLINED_FUNCTION_THUNK_TIME), t, t);
b9a58fc5 2719 t = predicate::not_inlined ();
1297cbcd 2720 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2721 ipa_update_overall_fn_summary (node);
b9a58fc5 2722 info->self_size = info->size;
bb78a1de 2723 if (stdarg_p (TREE_TYPE (node->decl)))
f7491b34 2724 {
2725 info->inlinable = false;
2726 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2727 }
b9a58fc5 2728 else
2729 info->inlinable = true;
2730 }
2731 else
2732 {
2733 /* Even is_gimple_min_invariant rely on current_function_decl. */
2734 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2735
2736 /* Can this function be inlined at all? */
2737 if (!opt_for_fn (node->decl, optimize)
2738 && !lookup_attribute ("always_inline",
2739 DECL_ATTRIBUTES (node->decl)))
2740 info->inlinable = false;
2741 else
2742 info->inlinable = tree_inlinable_function_p (node->decl);
2743
b9a58fc5 2744 /* Type attributes can use parameter indices to describe them. */
85912956 2745 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2746 /* Likewise for #pragma omp declare simd functions or functions
2747 with simd attribute. */
2748 || lookup_attribute ("omp declare simd",
2749 DECL_ATTRIBUTES (node->decl)))
b9a58fc5 2750 node->local.can_change_signature = false;
2751 else
2752 {
2753 /* Otherwise, inlinable functions always can change signature. */
2754 if (info->inlinable)
2755 node->local.can_change_signature = true;
2756 else
2757 {
f4d3c071 2758 /* Functions calling builtin_apply cannot change signature. */
b9a58fc5 2759 for (e = node->callees; e; e = e->next_callee)
2760 {
2761 tree cdecl = e->callee->decl;
a0e9bfbb 2762 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2763 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
b9a58fc5 2764 break;
2765 }
2766 node->local.can_change_signature = !e;
2767 }
2768 }
1297cbcd 2769 analyze_function_body (node, early);
b9a58fc5 2770 pop_cfun ();
2771 }
2772 for (e = node->callees; e; e = e->next_callee)
2773 if (e->callee->comdat_local_p ())
2774 break;
2775 node->calls_comdat_local = (e != NULL);
2776
2777 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2778 info->size = info->self_size;
2779 info->stack_frame_offset = 0;
2780 info->estimated_stack_size = info->estimated_self_stack_size;
2781
2782 /* Code above should compute exactly the same result as
1297cbcd 2783 ipa_update_overall_fn_summary but because computation happens in
b9a58fc5 2784 different order the roundoff errors result in slight changes. */
1297cbcd 2785 ipa_update_overall_fn_summary (node);
b5e4c6ae 2786 /* In LTO mode we may have speculative edges set. */
2787 gcc_assert (in_lto_p || info->size == info->self_size);
b9a58fc5 2788}
2789
2790
2791/* Compute parameters of functions used by inliner using
2792 current_function_decl. */
2793
2794static unsigned int
1297cbcd 2795compute_fn_summary_for_current (void)
b9a58fc5 2796{
1297cbcd 2797 compute_fn_summary (cgraph_node::get (current_function_decl), true);
b9a58fc5 2798 return 0;
2799}
2800
b9a58fc5 2801/* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2802 KNOWN_CONTEXTS and KNOWN_AGGS. */
2803
2804static bool
2805estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2806 int *size, int *time,
2807 vec<tree> known_vals,
2808 vec<ipa_polymorphic_call_context> known_contexts,
2809 vec<ipa_agg_jump_function_p> known_aggs)
2810{
2811 tree target;
2812 struct cgraph_node *callee;
2e966e2a 2813 class ipa_fn_summary *isummary;
b9a58fc5 2814 enum availability avail;
2815 bool speculative;
2816
2817 if (!known_vals.exists () && !known_contexts.exists ())
2818 return false;
2819 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2820 return false;
2821
2822 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2823 known_aggs, &speculative);
2824 if (!target || speculative)
2825 return false;
2826
2827 /* Account for difference in cost between indirect and direct calls. */
2828 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2829 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2830 gcc_checking_assert (*time >= 0);
2831 gcc_checking_assert (*size >= 0);
2832
2833 callee = cgraph_node::get (target);
2834 if (!callee || !callee->definition)
2835 return false;
2836 callee = callee->function_symbol (&avail);
2837 if (avail < AVAIL_AVAILABLE)
2838 return false;
d2c2513e 2839 isummary = ipa_fn_summaries->get (callee);
d92348e0 2840 if (isummary == NULL)
2841 return false;
2842
b9a58fc5 2843 return isummary->inlinable;
2844}
2845
2846/* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2847 handle edge E with probability PROB.
2848 Set HINTS if edge may be devirtualized.
2849 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2850 site. */
2851
2852static inline void
2853estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2854 sreal *time,
2855 int prob,
2856 vec<tree> known_vals,
2857 vec<ipa_polymorphic_call_context> known_contexts,
2858 vec<ipa_agg_jump_function_p> known_aggs,
1297cbcd 2859 ipa_hints *hints)
b9a58fc5 2860{
2e966e2a 2861 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 2862 int call_size = es->call_stmt_size;
2863 int call_time = es->call_stmt_time;
2864 int cur_size;
2865 if (!e->callee
2866 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2867 known_vals, known_contexts, known_aggs)
2868 && hints && e->maybe_hot_p ())
2869 *hints |= INLINE_HINT_indirect_call;
1297cbcd 2870 cur_size = call_size * ipa_fn_summary::size_scale;
b9a58fc5 2871 *size += cur_size;
2872 if (min_size)
2873 *min_size += cur_size;
2874 if (prob == REG_BR_PROB_BASE)
7c343235 2875 *time += ((sreal)call_time) * e->sreal_frequency ();
b9a58fc5 2876 else
453addb7 2877 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
b9a58fc5 2878}
2879
2880
2881
2882/* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2883 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2884 describe context of the call site. */
2885
2886static void
2887estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2888 int *min_size, sreal *time,
1297cbcd 2889 ipa_hints *hints,
b9a58fc5 2890 clause_t possible_truths,
2891 vec<tree> known_vals,
2892 vec<ipa_polymorphic_call_context> known_contexts,
2893 vec<ipa_agg_jump_function_p> known_aggs)
2894{
2895 struct cgraph_edge *e;
2896 for (e = node->callees; e; e = e->next_callee)
2897 {
2e966e2a 2898 class ipa_call_summary *es = ipa_call_summaries->get_create (e);
b9a58fc5 2899
2900 /* Do not care about zero sized builtins. */
2901 if (e->inline_failed && !es->call_stmt_size)
2902 {
2903 gcc_checking_assert (!es->call_stmt_time);
2904 continue;
2905 }
2906 if (!es->predicate
2907 || es->predicate->evaluate (possible_truths))
2908 {
2909 if (e->inline_failed)
2910 {
2911 /* Predicates of calls shall not use NOT_CHANGED codes,
2912 sowe do not need to compute probabilities. */
2913 estimate_edge_size_and_time (e, size,
2914 es->predicate ? NULL : min_size,
2915 time, REG_BR_PROB_BASE,
2916 known_vals, known_contexts,
2917 known_aggs, hints);
2918 }
2919 else
2920 estimate_calls_size_and_time (e->callee, size, min_size, time,
2921 hints,
2922 possible_truths,
2923 known_vals, known_contexts,
2924 known_aggs);
2925 }
2926 }
2927 for (e = node->indirect_calls; e; e = e->next_callee)
2928 {
2e966e2a 2929 class ipa_call_summary *es = ipa_call_summaries->get_create (e);
b9a58fc5 2930 if (!es->predicate
2931 || es->predicate->evaluate (possible_truths))
2932 estimate_edge_size_and_time (e, size,
2933 es->predicate ? NULL : min_size,
2934 time, REG_BR_PROB_BASE,
2935 known_vals, known_contexts, known_aggs,
2936 hints);
2937 }
2938}
2939
2940
2941/* Estimate size and time needed to execute NODE assuming
2942 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2943 information about NODE's arguments. If non-NULL use also probability
2944 information present in INLINE_PARAM_SUMMARY vector.
2945 Additionally detemine hints determined by the context. Finally compute
2946 minimal size needed for the call that is independent on the call context and
2947 can be used for fast estimates. Return the values in RET_SIZE,
2948 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2949
2950void
2951estimate_node_size_and_time (struct cgraph_node *node,
2952 clause_t possible_truths,
2953 clause_t nonspec_possible_truths,
2954 vec<tree> known_vals,
2955 vec<ipa_polymorphic_call_context> known_contexts,
2956 vec<ipa_agg_jump_function_p> known_aggs,
2957 int *ret_size, int *ret_min_size,
2958 sreal *ret_time,
2959 sreal *ret_nonspecialized_time,
1297cbcd 2960 ipa_hints *ret_hints,
b9a58fc5 2961 vec<inline_param_summary>
2962 inline_param_summary)
2963{
2e966e2a 2964 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
b9a58fc5 2965 size_time_entry *e;
2966 int size = 0;
2967 sreal time = 0;
2968 int min_size = 0;
1297cbcd 2969 ipa_hints hints = 0;
b9a58fc5 2970 int i;
2971
2972 if (dump_file && (dump_flags & TDF_DETAILS))
2973 {
2974 bool found = false;
2975 fprintf (dump_file, " Estimating body: %s/%i\n"
2976 " Known to be false: ", node->name (),
2977 node->order);
2978
2979 for (i = predicate::not_inlined_condition;
2980 i < (predicate::first_dynamic_condition
2981 + (int) vec_safe_length (info->conds)); i++)
2982 if (!(possible_truths & (1 << i)))
2983 {
2984 if (found)
2985 fprintf (dump_file, ", ");
2986 found = true;
2987 dump_condition (dump_file, info->conds, i);
2988 }
2989 }
2990
2991 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2992 known_vals, known_contexts, known_aggs);
2993 sreal nonspecialized_time = time;
2994
2995 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2996 {
b9a58fc5 2997 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
03211402 2998
2999 /* Because predicates are conservative, it can happen that nonconst is 1
3000 but exec is 0. */
b9a58fc5 3001 if (exec)
3002 {
03211402 3003 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
3004
b9a58fc5 3005 gcc_checking_assert (e->time >= 0);
3006 gcc_checking_assert (time >= 0);
3007
3008 /* We compute specialized size only because size of nonspecialized
3009 copy is context independent.
3010
3011 The difference between nonspecialized execution and specialized is
3012 that nonspecialized is not going to have optimized out computations
3013 known to be constant in a specialized setting. */
3014 if (nonconst)
3015 size += e->size;
3016 nonspecialized_time += e->time;
3017 if (!nonconst)
3018 ;
3019 else if (!inline_param_summary.exists ())
3020 {
3021 if (nonconst)
3022 time += e->time;
3023 }
3024 else
3025 {
3026 int prob = e->nonconst_predicate.probability
3027 (info->conds, possible_truths,
3028 inline_param_summary);
3029 gcc_checking_assert (prob >= 0);
3030 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3031 time += e->time * prob / REG_BR_PROB_BASE;
3032 }
3033 gcc_checking_assert (time >= 0);
3034 }
3035 }
3036 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3037 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3038 min_size = (*info->size_time_table)[0].size;
3039 gcc_checking_assert (size >= 0);
3040 gcc_checking_assert (time >= 0);
3041 /* nonspecialized_time should be always bigger than specialized time.
3042 Roundoff issues however may get into the way. */
f903ba0b 3043 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
b9a58fc5 3044
3045 /* Roundoff issues may make specialized time bigger than nonspecialized
3046 time. We do not really want that to happen because some heurstics
3047 may get confused by seeing negative speedups. */
3048 if (time > nonspecialized_time)
3049 time = nonspecialized_time;
3050
3051 if (info->loop_iterations
3052 && !info->loop_iterations->evaluate (possible_truths))
3053 hints |= INLINE_HINT_loop_iterations;
3054 if (info->loop_stride
3055 && !info->loop_stride->evaluate (possible_truths))
3056 hints |= INLINE_HINT_loop_stride;
b9a58fc5 3057 if (info->scc_no)
3058 hints |= INLINE_HINT_in_scc;
3059 if (DECL_DECLARED_INLINE_P (node->decl))
3060 hints |= INLINE_HINT_declared_inline;
3061
1297cbcd 3062 size = RDIV (size, ipa_fn_summary::size_scale);
3063 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
b9a58fc5 3064
3065 if (dump_file && (dump_flags & TDF_DETAILS))
3066 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3067 time.to_double (), nonspecialized_time.to_double ());
3068 if (ret_time)
3069 *ret_time = time;
3070 if (ret_nonspecialized_time)
3071 *ret_nonspecialized_time = nonspecialized_time;
3072 if (ret_size)
3073 *ret_size = size;
3074 if (ret_min_size)
3075 *ret_min_size = min_size;
3076 if (ret_hints)
3077 *ret_hints = hints;
3078 return;
3079}
3080
3081
3082/* Estimate size and time needed to execute callee of EDGE assuming that
3083 parameters known to be constant at caller of EDGE are propagated.
3084 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3085 and types for parameters. */
3086
3087void
3088estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3089 vec<tree> known_vals,
3090 vec<ipa_polymorphic_call_context>
3091 known_contexts,
3092 vec<ipa_agg_jump_function_p> known_aggs,
3093 int *ret_size, sreal *ret_time,
3094 sreal *ret_nonspec_time,
1297cbcd 3095 ipa_hints *hints)
b9a58fc5 3096{
3097 clause_t clause, nonspec_clause;
3098
3099 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
3100 &clause, &nonspec_clause);
3101 estimate_node_size_and_time (node, clause, nonspec_clause,
3102 known_vals, known_contexts,
3103 known_aggs, ret_size, NULL, ret_time,
3104 ret_nonspec_time, hints, vNULL);
3105}
3106
3107
3108/* Update summary information of inline clones after inlining.
3109 Compute peak stack usage. */
3110
3111static void
3112inline_update_callee_summaries (struct cgraph_node *node, int depth)
3113{
3114 struct cgraph_edge *e;
d2c2513e 3115 ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
3116 ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
b9a58fc5 3117 HOST_WIDE_INT peak;
3118
3119 callee_info->stack_frame_offset
3120 = caller_info->stack_frame_offset
3121 + caller_info->estimated_self_stack_size;
3122 peak = callee_info->stack_frame_offset
3123 + callee_info->estimated_self_stack_size;
b53d4f56 3124
d2c2513e 3125 ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
b53d4f56 3126 if (s->estimated_stack_size < peak)
3127 s->estimated_stack_size = peak;
b9a58fc5 3128 ipa_propagate_frequency (node);
3129 for (e = node->callees; e; e = e->next_callee)
3130 {
3131 if (!e->inline_failed)
3132 inline_update_callee_summaries (e->callee, depth);
d2c2513e 3133 ipa_call_summaries->get (e)->loop_depth += depth;
b9a58fc5 3134 }
3135 for (e = node->indirect_calls; e; e = e->next_callee)
d2c2513e 3136 ipa_call_summaries->get (e)->loop_depth += depth;
b9a58fc5 3137}
3138
3139/* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3140 When functoin A is inlined in B and A calls C with parameter that
3141 changes with probability PROB1 and C is known to be passthroug
3142 of argument if B that change with probability PROB2, the probability
3143 of change is now PROB1*PROB2. */
3144
3145static void
3146remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3147 struct cgraph_edge *edge)
3148{
3149 if (ipa_node_params_sum)
3150 {
3151 int i;
2e966e2a 3152 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3153 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3154 class ipa_call_summary *inlined_es
d2c2513e 3155 = ipa_call_summaries->get (inlined_edge);
b9a58fc5 3156
2e2c4b6f 3157 if (es->param.length () == 0)
3158 return;
3159
b9a58fc5 3160 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3161 {
3162 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3163 if (jfunc->type == IPA_JF_PASS_THROUGH
3164 || jfunc->type == IPA_JF_ANCESTOR)
3165 {
3166 int id = jfunc->type == IPA_JF_PASS_THROUGH
3167 ? ipa_get_jf_pass_through_formal_id (jfunc)
3168 : ipa_get_jf_ancestor_formal_id (jfunc);
3169 if (id < (int) inlined_es->param.length ())
3170 {
3171 int prob1 = es->param[i].change_prob;
3172 int prob2 = inlined_es->param[id].change_prob;
3173 int prob = combine_probabilities (prob1, prob2);
3174
3175 if (prob1 && prob2 && !prob)
3176 prob = 1;
3177
3178 es->param[i].change_prob = prob;
3179 }
3180 }
3181 }
3182 }
3183}
3184
3185/* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3186
3187 Remap predicates of callees of NODE. Rest of arguments match
3188 remap_predicate.
3189
3190 Also update change probabilities. */
3191
3192static void
3193remap_edge_summaries (struct cgraph_edge *inlined_edge,
3194 struct cgraph_node *node,
2e966e2a 3195 class ipa_fn_summary *info,
3196 class ipa_fn_summary *callee_info,
b9a58fc5 3197 vec<int> operand_map,
3198 vec<int> offset_map,
3199 clause_t possible_truths,
3200 predicate *toplev_predicate)
3201{
3202 struct cgraph_edge *e, *next;
3203 for (e = node->callees; e; e = next)
3204 {
2e966e2a 3205 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 3206 predicate p;
3207 next = e->next_callee;
3208
3209 if (e->inline_failed)
3210 {
3211 remap_edge_change_prob (inlined_edge, e);
3212
3213 if (es->predicate)
3214 {
3215 p = es->predicate->remap_after_inlining
3216 (info, callee_info, operand_map,
3217 offset_map, possible_truths,
3218 *toplev_predicate);
3219 edge_set_predicate (e, &p);
3220 }
3221 else
3222 edge_set_predicate (e, toplev_predicate);
3223 }
3224 else
3225 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
3226 operand_map, offset_map, possible_truths,
3227 toplev_predicate);
3228 }
3229 for (e = node->indirect_calls; e; e = next)
3230 {
2e966e2a 3231 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 3232 predicate p;
3233 next = e->next_callee;
3234
3235 remap_edge_change_prob (inlined_edge, e);
3236 if (es->predicate)
3237 {
3238 p = es->predicate->remap_after_inlining
3239 (info, callee_info, operand_map, offset_map,
3240 possible_truths, *toplev_predicate);
3241 edge_set_predicate (e, &p);
3242 }
3243 else
3244 edge_set_predicate (e, toplev_predicate);
3245 }
3246}
3247
3248/* Same as remap_predicate, but set result into hint *HINT. */
3249
3250static void
2e966e2a 3251remap_hint_predicate (class ipa_fn_summary *info,
3252 class ipa_fn_summary *callee_info,
b9a58fc5 3253 predicate **hint,
3254 vec<int> operand_map,
3255 vec<int> offset_map,
3256 clause_t possible_truths,
3257 predicate *toplev_predicate)
3258{
3259 predicate p;
3260
3261 if (!*hint)
3262 return;
3263 p = (*hint)->remap_after_inlining
3264 (info, callee_info,
3265 operand_map, offset_map,
3266 possible_truths, *toplev_predicate);
3267 if (p != false && p != true)
3268 {
3269 if (!*hint)
3270 set_hint_predicate (hint, p);
3271 else
3272 **hint &= p;
3273 }
3274}
3275
3276/* We inlined EDGE. Update summary of the function we inlined into. */
3277
3278void
1297cbcd 3279ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
b9a58fc5 3280{
d2c2513e 3281 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
b9a58fc5 3282 struct cgraph_node *to = (edge->caller->global.inlined_to
3283 ? edge->caller->global.inlined_to : edge->caller);
2e966e2a 3284 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
b9a58fc5 3285 clause_t clause = 0; /* not_inline is known to be false. */
3286 size_time_entry *e;
3287 vec<int> operand_map = vNULL;
3288 vec<int> offset_map = vNULL;
3289 int i;
3290 predicate toplev_predicate;
3291 predicate true_p = true;
2e966e2a 3292 class ipa_call_summary *es = ipa_call_summaries->get (edge);
b9a58fc5 3293
3294 if (es->predicate)
3295 toplev_predicate = *es->predicate;
3296 else
3297 toplev_predicate = true;
3298
3299 info->fp_expressions |= callee_info->fp_expressions;
3300
3301 if (callee_info->conds)
3302 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3303 if (ipa_node_params_sum && callee_info->conds)
3304 {
2e966e2a 3305 class ipa_edge_args *args = IPA_EDGE_REF (edge);
b9a58fc5 3306 int count = ipa_get_cs_argument_count (args);
3307 int i;
3308
3309 if (count)
3310 {
3311 operand_map.safe_grow_cleared (count);
3312 offset_map.safe_grow_cleared (count);
3313 }
3314 for (i = 0; i < count; i++)
3315 {
3316 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3317 int map = -1;
3318
3319 /* TODO: handle non-NOPs when merging. */
3320 if (jfunc->type == IPA_JF_PASS_THROUGH)
3321 {
3322 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3323 map = ipa_get_jf_pass_through_formal_id (jfunc);
3324 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3325 offset_map[i] = -1;
3326 }
3327 else if (jfunc->type == IPA_JF_ANCESTOR)
3328 {
3329 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3330 if (offset >= 0 && offset < INT_MAX)
3331 {
3332 map = ipa_get_jf_ancestor_formal_id (jfunc);
3333 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3334 offset = -1;
3335 offset_map[i] = offset;
3336 }
3337 }
3338 operand_map[i] = map;
3339 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3340 }
3341 }
3342 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3343 {
3344 predicate p;
3345 p = e->exec_predicate.remap_after_inlining
3346 (info, callee_info, operand_map,
3347 offset_map, clause,
3348 toplev_predicate);
3349 predicate nonconstp;
3350 nonconstp = e->nonconst_predicate.remap_after_inlining
3351 (info, callee_info, operand_map,
3352 offset_map, clause,
3353 toplev_predicate);
3354 if (p != false && nonconstp != false)
3355 {
7c343235 3356 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
b9a58fc5 3357 int prob = e->nonconst_predicate.probability (callee_info->conds,
3358 clause, es->param);
3359 add_time = add_time * prob / REG_BR_PROB_BASE;
3360 if (prob != REG_BR_PROB_BASE
3361 && dump_file && (dump_flags & TDF_DETAILS))
3362 {
3363 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3364 (double) prob / REG_BR_PROB_BASE);
3365 }
3366 info->account_size_time (e->size, add_time, p, nonconstp);
3367 }
3368 }
3369 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3370 offset_map, clause, &toplev_predicate);
3371 remap_hint_predicate (info, callee_info,
3372 &callee_info->loop_iterations,
3373 operand_map, offset_map, clause, &toplev_predicate);
3374 remap_hint_predicate (info, callee_info,
3375 &callee_info->loop_stride,
3376 operand_map, offset_map, clause, &toplev_predicate);
b9a58fc5 3377
d2c2513e 3378 ipa_call_summary *s = ipa_call_summaries->get (edge);
b53d4f56 3379 inline_update_callee_summaries (edge->callee, s->loop_depth);
b9a58fc5 3380
3381 /* We do not maintain predicates of inlined edges, free it. */
3382 edge_set_predicate (edge, &true_p);
3383 /* Similarly remove param summaries. */
3384 es->param.release ();
3385 operand_map.release ();
3386 offset_map.release ();
3387}
3388
1297cbcd 3389/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
b9a58fc5 3390 and time. Recompute it. */
3391
3392void
1297cbcd 3393ipa_update_overall_fn_summary (struct cgraph_node *node)
b9a58fc5 3394{
2e966e2a 3395 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
b9a58fc5 3396 size_time_entry *e;
3397 int i;
3398
3399 info->size = 0;
3400 info->time = 0;
3401 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3402 {
3403 info->size += e->size;
3404 info->time += e->time;
3405 }
3406 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3407 &info->time, NULL,
3408 ~(clause_t) (1 << predicate::false_condition),
3409 vNULL, vNULL, vNULL);
1297cbcd 3410 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
b9a58fc5 3411}
3412
3413
3414/* This function performs intraprocedural analysis in NODE that is required to
3415 inline indirect calls. */
3416
3417static void
3418inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3419{
3420 ipa_analyze_node (node);
3421 if (dump_file && (dump_flags & TDF_DETAILS))
3422 {
3423 ipa_print_node_params (dump_file, node);
3424 ipa_print_node_jump_functions (dump_file, node);
3425 }
3426}
3427
3428
3429/* Note function body size. */
3430
3431void
3432inline_analyze_function (struct cgraph_node *node)
3433{
3434 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3435
3436 if (dump_file)
3437 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3438 node->name (), node->order);
3439 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3440 inline_indirect_intraprocedural_analysis (node);
1297cbcd 3441 compute_fn_summary (node, false);
b9a58fc5 3442 if (!optimize)
3443 {
3444 struct cgraph_edge *e;
3445 for (e = node->callees; e; e = e->next_callee)
3446 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3447 for (e = node->indirect_calls; e; e = e->next_callee)
3448 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3449 }
3450
3451 pop_cfun ();
3452}
3453
3454
3455/* Called when new function is inserted to callgraph late. */
3456
3457void
1297cbcd 3458ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
b9a58fc5 3459{
3460 inline_analyze_function (node);
3461}
3462
3463/* Note function body size. */
3464
a2da7d8a 3465static void
3466ipa_fn_summary_generate (void)
b9a58fc5 3467{
3468 struct cgraph_node *node;
3469
3470 FOR_EACH_DEFINED_FUNCTION (node)
3471 if (DECL_STRUCT_FUNCTION (node->decl))
3eed1ac0 3472 node->local.versionable = tree_versionable_function_p (node->decl);
b9a58fc5 3473
1297cbcd 3474 ipa_fn_summary_alloc ();
b9a58fc5 3475
1297cbcd 3476 ipa_fn_summaries->enable_insertion_hook ();
b9a58fc5 3477
3478 ipa_register_cgraph_hooks ();
b9a58fc5 3479
3480 FOR_EACH_DEFINED_FUNCTION (node)
5f4e4f36 3481 if (!node->alias
3482 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3483 || opt_for_fn (node->decl, optimize)))
b9a58fc5 3484 inline_analyze_function (node);
3485}
3486
3487
3488/* Write inline summary for edge E to OB. */
3489
3490static void
2e966e2a 3491read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
1e0f11d2 3492 bool prevails)
b9a58fc5 3493{
2e966e2a 3494 class ipa_call_summary *es = prevails
1e0f11d2 3495 ? ipa_call_summaries->get_create (e) : NULL;
b9a58fc5 3496 predicate p;
3497 int length, i;
3498
1e0f11d2 3499 int size = streamer_read_uhwi (ib);
3500 int time = streamer_read_uhwi (ib);
3501 int depth = streamer_read_uhwi (ib);
3502
3503 if (es)
3504 {
3505 es->call_stmt_size = size;
3506 es->call_stmt_time = time;
3507 es->loop_depth = depth;
3508 }
bd5ef087 3509
3510 bitpack_d bp = streamer_read_bitpack (ib);
1e0f11d2 3511 if (es)
3512 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3513 else
3514 bp_unpack_value (&bp, 1);
bd5ef087 3515
b9a58fc5 3516 p.stream_in (ib);
1e0f11d2 3517 if (es)
3518 edge_set_predicate (e, &p);
b9a58fc5 3519 length = streamer_read_uhwi (ib);
1e0f11d2 3520 if (length && es && e->possibly_call_in_translation_unit_p ())
b9a58fc5 3521 {
3522 es->param.safe_grow_cleared (length);
3523 for (i = 0; i < length; i++)
3524 es->param[i].change_prob = streamer_read_uhwi (ib);
3525 }
1e0f11d2 3526 else
3527 {
3528 for (i = 0; i < length; i++)
3529 streamer_read_uhwi (ib);
3530 }
b9a58fc5 3531}
3532
3533
3534/* Stream in inline summaries from the section. */
3535
3536static void
3537inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3538 size_t len)
3539{
3540 const struct lto_function_header *header =
3541 (const struct lto_function_header *) data;
3542 const int cfg_offset = sizeof (struct lto_function_header);
3543 const int main_offset = cfg_offset + header->cfg_size;
3544 const int string_offset = main_offset + header->main_size;
2e966e2a 3545 class data_in *data_in;
b9a58fc5 3546 unsigned int i, count2, j;
3547 unsigned int f_count;
3548
3549 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3550 file_data->mode_table);
3551
3552 data_in =
3553 lto_data_in_create (file_data, (const char *) data + string_offset,
3554 header->string_size, vNULL);
3555 f_count = streamer_read_uhwi (&ib);
3556 for (i = 0; i < f_count; i++)
3557 {
3558 unsigned int index;
3559 struct cgraph_node *node;
2e966e2a 3560 class ipa_fn_summary *info;
b9a58fc5 3561 lto_symtab_encoder_t encoder;
3562 struct bitpack_d bp;
3563 struct cgraph_edge *e;
3564 predicate p;
3565
3566 index = streamer_read_uhwi (&ib);
3567 encoder = file_data->symtab_node_encoder;
3568 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3569 index));
1e0f11d2 3570 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
b9a58fc5 3571
1e0f11d2 3572 int stack_size = streamer_read_uhwi (&ib);
3573 int size = streamer_read_uhwi (&ib);
3574 sreal time = sreal::stream_in (&ib);
3575
3576 if (info)
3577 {
3578 info->estimated_stack_size
3579 = info->estimated_self_stack_size = stack_size;
3580 info->size = info->self_size = size;
3581 info->time = time;
3582 }
b9a58fc5 3583
3584 bp = streamer_read_bitpack (&ib);
1e0f11d2 3585 if (info)
3586 {
3587 info->inlinable = bp_unpack_value (&bp, 1);
3588 info->fp_expressions = bp_unpack_value (&bp, 1);
3589 }
3590 else
3591 {
3592 bp_unpack_value (&bp, 1);
3593 bp_unpack_value (&bp, 1);
3594 }
b9a58fc5 3595
3596 count2 = streamer_read_uhwi (&ib);
1e0f11d2 3597 gcc_assert (!info || !info->conds);
b9a58fc5 3598 for (j = 0; j < count2; j++)
3599 {
3600 struct condition c;
b3d74655 3601 unsigned int k, count3;
b9a58fc5 3602 c.operand_num = streamer_read_uhwi (&ib);
b9a58fc5 3603 c.code = (enum tree_code) streamer_read_uhwi (&ib);
b3d74655 3604 c.type = stream_read_tree (&ib, data_in);
b9a58fc5 3605 c.val = stream_read_tree (&ib, data_in);
3606 bp = streamer_read_bitpack (&ib);
3607 c.agg_contents = bp_unpack_value (&bp, 1);
3608 c.by_ref = bp_unpack_value (&bp, 1);
3609 if (c.agg_contents)
3610 c.offset = streamer_read_uhwi (&ib);
b3d74655 3611 c.param_ops = NULL;
3612 count3 = streamer_read_uhwi (&ib);
3613 for (k = 0; k < count3; k++)
3614 {
3615 struct expr_eval_op op;
3616 enum gimple_rhs_class rhs_class;
3617 op.code = (enum tree_code) streamer_read_uhwi (&ib);
3618 op.type = stream_read_tree (&ib, data_in);
3619 switch (rhs_class = get_gimple_rhs_class (op.code))
3620 {
3621 case GIMPLE_UNARY_RHS:
3622 op.index = 0;
3623 op.val[0] = NULL_TREE;
3624 op.val[1] = NULL_TREE;
3625 break;
3626
3627 case GIMPLE_BINARY_RHS:
3628 case GIMPLE_TERNARY_RHS:
3629 bp = streamer_read_bitpack (&ib);
3630 op.index = bp_unpack_value (&bp, 2);
3631 op.val[0] = stream_read_tree (&ib, data_in);
3632 if (rhs_class == GIMPLE_BINARY_RHS)
3633 op.val[1] = NULL_TREE;
3634 else
3635 op.val[1] = stream_read_tree (&ib, data_in);
3636 break;
3637
3638 default:
3639 fatal_error (UNKNOWN_LOCATION,
3640 "invalid fnsummary in LTO stream");
3641 }
3642 vec_safe_push (c.param_ops, op);
3643 }
1e0f11d2 3644 if (info)
3645 vec_safe_push (info->conds, c);
b9a58fc5 3646 }
3647 count2 = streamer_read_uhwi (&ib);
1e0f11d2 3648 gcc_assert (!info || !info->size_time_table);
b9a58fc5 3649 for (j = 0; j < count2; j++)
3650 {
2e966e2a 3651 class size_time_entry e;
b9a58fc5 3652
3653 e.size = streamer_read_uhwi (&ib);
3654 e.time = sreal::stream_in (&ib);
3655 e.exec_predicate.stream_in (&ib);
3656 e.nonconst_predicate.stream_in (&ib);
3657
1e0f11d2 3658 if (info)
3659 vec_safe_push (info->size_time_table, e);
b9a58fc5 3660 }
3661
3662 p.stream_in (&ib);
1e0f11d2 3663 if (info)
3664 set_hint_predicate (&info->loop_iterations, p);
b9a58fc5 3665 p.stream_in (&ib);
1e0f11d2 3666 if (info)
3667 set_hint_predicate (&info->loop_stride, p);
b9a58fc5 3668 for (e = node->callees; e; e = e->next_callee)
1e0f11d2 3669 read_ipa_call_summary (&ib, e, info != NULL);
b9a58fc5 3670 for (e = node->indirect_calls; e; e = e->next_callee)
1e0f11d2 3671 read_ipa_call_summary (&ib, e, info != NULL);
b9a58fc5 3672 }
3673
1297cbcd 3674 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
b9a58fc5 3675 len);
3676 lto_data_in_delete (data_in);
3677}
3678
3679
3680/* Read inline summary. Jump functions are shared among ipa-cp
3681 and inliner, so when ipa-cp is active, we don't need to write them
3682 twice. */
3683
a2da7d8a 3684static void
3685ipa_fn_summary_read (void)
b9a58fc5 3686{
3687 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3688 struct lto_file_decl_data *file_data;
3689 unsigned int j = 0;
3690
1297cbcd 3691 ipa_fn_summary_alloc ();
b9a58fc5 3692
3693 while ((file_data = file_data_vec[j++]))
3694 {
3695 size_t len;
3696 const char *data = lto_get_section_data (file_data,
1297cbcd 3697 LTO_section_ipa_fn_summary,
b9a58fc5 3698 NULL, &len);
3699 if (data)
3700 inline_read_section (file_data, data, len);
3701 else
3702 /* Fatal error here. We do not want to support compiling ltrans units
3703 with different version of compiler or different flags than the WPA
3704 unit, so this should never happen. */
3705 fatal_error (input_location,
3706 "ipa inline summary is missing in input file");
3707 }
5f4e4f36 3708 ipa_register_cgraph_hooks ();
3709 if (!flag_ipa_cp)
3710 ipa_prop_read_jump_functions ();
b9a58fc5 3711
1297cbcd 3712 gcc_assert (ipa_fn_summaries);
3713 ipa_fn_summaries->enable_insertion_hook ();
b9a58fc5 3714}
3715
3716
3717/* Write inline summary for edge E to OB. */
3718
3719static void
3720write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3721{
2e966e2a 3722 class ipa_call_summary *es = ipa_call_summaries->get (e);
b9a58fc5 3723 int i;
3724
3725 streamer_write_uhwi (ob, es->call_stmt_size);
3726 streamer_write_uhwi (ob, es->call_stmt_time);
3727 streamer_write_uhwi (ob, es->loop_depth);
bd5ef087 3728
3729 bitpack_d bp = bitpack_create (ob->main_stream);
3730 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3731 streamer_write_bitpack (&bp);
3732
b9a58fc5 3733 if (es->predicate)
3734 es->predicate->stream_out (ob);
3735 else
3736 streamer_write_uhwi (ob, 0);
3737 streamer_write_uhwi (ob, es->param.length ());
3738 for (i = 0; i < (int) es->param.length (); i++)
3739 streamer_write_uhwi (ob, es->param[i].change_prob);
3740}
3741
3742
3743/* Write inline summary for node in SET.
3744 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3745 active, we don't need to write them twice. */
3746
a2da7d8a 3747static void
3748ipa_fn_summary_write (void)
b9a58fc5 3749{
1297cbcd 3750 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
b9a58fc5 3751 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3752 unsigned int count = 0;
3753 int i;
3754
3755 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3756 {
3757 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3758 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3759 if (cnode && cnode->definition && !cnode->alias)
3760 count++;
3761 }
3762 streamer_write_uhwi (ob, count);
3763
3764 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3765 {
3766 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3767 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3768 if (cnode && cnode->definition && !cnode->alias)
3769 {
2e966e2a 3770 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
b9a58fc5 3771 struct bitpack_d bp;
3772 struct cgraph_edge *edge;
3773 int i;
3774 size_time_entry *e;
3775 struct condition *c;
3776
3777 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3778 streamer_write_hwi (ob, info->estimated_self_stack_size);
3779 streamer_write_hwi (ob, info->self_size);
3780 info->time.stream_out (ob);
3781 bp = bitpack_create (ob->main_stream);
3782 bp_pack_value (&bp, info->inlinable, 1);
efa02472 3783 bp_pack_value (&bp, false, 1);
b9a58fc5 3784 bp_pack_value (&bp, info->fp_expressions, 1);
3785 streamer_write_bitpack (&bp);
3786 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3787 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3788 {
b3d74655 3789 int j;
3790 struct expr_eval_op *op;
3791
b9a58fc5 3792 streamer_write_uhwi (ob, c->operand_num);
b9a58fc5 3793 streamer_write_uhwi (ob, c->code);
b3d74655 3794 stream_write_tree (ob, c->type, true);
b9a58fc5 3795 stream_write_tree (ob, c->val, true);
3796 bp = bitpack_create (ob->main_stream);
3797 bp_pack_value (&bp, c->agg_contents, 1);
3798 bp_pack_value (&bp, c->by_ref, 1);
3799 streamer_write_bitpack (&bp);
3800 if (c->agg_contents)
3801 streamer_write_uhwi (ob, c->offset);
b3d74655 3802 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
3803 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
3804 {
3805 streamer_write_uhwi (ob, op->code);
3806 stream_write_tree (ob, op->type, true);
3807 if (op->val[0])
3808 {
3809 bp = bitpack_create (ob->main_stream);
3810 bp_pack_value (&bp, op->index, 2);
3811 streamer_write_bitpack (&bp);
3812 stream_write_tree (ob, op->val[0], true);
3813 if (op->val[1])
3814 stream_write_tree (ob, op->val[1], true);
3815 }
3816 }
b9a58fc5 3817 }
3818 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3819 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3820 {
3821 streamer_write_uhwi (ob, e->size);
3822 e->time.stream_out (ob);
3823 e->exec_predicate.stream_out (ob);
3824 e->nonconst_predicate.stream_out (ob);
3825 }
3826 if (info->loop_iterations)
3827 info->loop_iterations->stream_out (ob);
3828 else
3829 streamer_write_uhwi (ob, 0);
3830 if (info->loop_stride)
3831 info->loop_stride->stream_out (ob);
3832 else
3833 streamer_write_uhwi (ob, 0);
b9a58fc5 3834 for (edge = cnode->callees; edge; edge = edge->next_callee)
3835 write_ipa_call_summary (ob, edge);
3836 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3837 write_ipa_call_summary (ob, edge);
3838 }
3839 }
3840 streamer_write_char_stream (ob->main_stream, 0);
3841 produce_asm (ob, NULL);
3842 destroy_output_block (ob);
3843
5f4e4f36 3844 if (!flag_ipa_cp)
b9a58fc5 3845 ipa_prop_write_jump_functions ();
3846}
3847
3848
3849/* Release inline summary. */
3850
3851void
a2da7d8a 3852ipa_free_fn_summary (void)
b9a58fc5 3853{
3854 struct cgraph_node *node;
3855 if (!ipa_call_summaries)
3856 return;
3857 FOR_EACH_DEFINED_FUNCTION (node)
3858 if (!node->alias)
d2c2513e 3859 ipa_fn_summaries->remove (node);
1297cbcd 3860 ipa_fn_summaries->release ();
3861 ipa_fn_summaries = NULL;
b9a58fc5 3862 ipa_call_summaries->release ();
3863 delete ipa_call_summaries;
3864 ipa_call_summaries = NULL;
3865 edge_predicate_pool.release ();
3866}
a2da7d8a 3867
3868namespace {
3869
3870const pass_data pass_data_local_fn_summary =
3871{
3872 GIMPLE_PASS, /* type */
3873 "local-fnsummary", /* name */
3874 OPTGROUP_INLINE, /* optinfo_flags */
3875 TV_INLINE_PARAMETERS, /* tv_id */
3876 0, /* properties_required */
3877 0, /* properties_provided */
3878 0, /* properties_destroyed */
3879 0, /* todo_flags_start */
3880 0, /* todo_flags_finish */
3881};
3882
3883class pass_local_fn_summary : public gimple_opt_pass
3884{
3885public:
3886 pass_local_fn_summary (gcc::context *ctxt)
3887 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3888 {}
3889
3890 /* opt_pass methods: */
3891 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3892 virtual unsigned int execute (function *)
3893 {
3894 return compute_fn_summary_for_current ();
3895 }
3896
3897}; // class pass_local_fn_summary
3898
3899} // anon namespace
3900
3901gimple_opt_pass *
3902make_pass_local_fn_summary (gcc::context *ctxt)
3903{
3904 return new pass_local_fn_summary (ctxt);
3905}
3906
3907
3908/* Free inline summary. */
3909
3910namespace {
3911
3912const pass_data pass_data_ipa_free_fn_summary =
3913{
3914 SIMPLE_IPA_PASS, /* type */
3915 "free-fnsummary", /* name */
3916 OPTGROUP_NONE, /* optinfo_flags */
3917 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3918 0, /* properties_required */
3919 0, /* properties_provided */
3920 0, /* properties_destroyed */
3921 0, /* todo_flags_start */
369c9a01 3922 0, /* todo_flags_finish */
a2da7d8a 3923};
3924
3925class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3926{
3927public:
3928 pass_ipa_free_fn_summary (gcc::context *ctxt)
369c9a01 3929 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
3930 small_p (false)
a2da7d8a 3931 {}
3932
3933 /* opt_pass methods: */
369c9a01 3934 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
3935 void set_pass_param (unsigned int n, bool param)
3936 {
3937 gcc_assert (n == 0);
3938 small_p = param;
3939 }
3940 virtual bool gate (function *) { return small_p || !flag_wpa; }
a2da7d8a 3941 virtual unsigned int execute (function *)
3942 {
3943 ipa_free_fn_summary ();
9da914af 3944 return 0;
a2da7d8a 3945 }
3946
369c9a01 3947private:
3948 bool small_p;
a2da7d8a 3949}; // class pass_ipa_free_fn_summary
3950
3951} // anon namespace
3952
3953simple_ipa_opt_pass *
3954make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3955{
3956 return new pass_ipa_free_fn_summary (ctxt);
3957}
3958
3959namespace {
3960
3961const pass_data pass_data_ipa_fn_summary =
3962{
3963 IPA_PASS, /* type */
3964 "fnsummary", /* name */
3965 OPTGROUP_INLINE, /* optinfo_flags */
fcf94ac8 3966 TV_IPA_FNSUMMARY, /* tv_id */
a2da7d8a 3967 0, /* properties_required */
3968 0, /* properties_provided */
3969 0, /* properties_destroyed */
3970 0, /* todo_flags_start */
3971 ( TODO_dump_symtab ), /* todo_flags_finish */
3972};
3973
3974class pass_ipa_fn_summary : public ipa_opt_pass_d
3975{
3976public:
3977 pass_ipa_fn_summary (gcc::context *ctxt)
3978 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3979 ipa_fn_summary_generate, /* generate_summary */
3980 ipa_fn_summary_write, /* write_summary */
3981 ipa_fn_summary_read, /* read_summary */
3982 NULL, /* write_optimization_summary */
3983 NULL, /* read_optimization_summary */
3984 NULL, /* stmt_fixup */
3985 0, /* function_transform_todo_flags_start */
3986 NULL, /* function_transform */
3987 NULL) /* variable_transform */
3988 {}
3989
3990 /* opt_pass methods: */
3991 virtual unsigned int execute (function *) { return 0; }
3992
3993}; // class pass_ipa_fn_summary
3994
3995} // anon namespace
3996
3997ipa_opt_pass_d *
3998make_pass_ipa_fn_summary (gcc::context *ctxt)
3999{
4000 return new pass_ipa_fn_summary (ctxt);
4001}
a450b652 4002
4003/* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4004 within the same process. For use by toplev::finalize. */
4005
4006void
4007ipa_fnsummary_c_finalize (void)
4008{
4009 ipa_free_fn_summary ();
4010}