]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-fnsummary.c
Implement inline call summaries.
[thirdparty/gcc.git] / gcc / ipa-fnsummary.c
1 /* Function summary pass.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54 #include "config.h"
55 #include "system.h"
56 #include "coretypes.h"
57 #include "backend.h"
58 #include "tree.h"
59 #include "gimple.h"
60 #include "alloc-pool.h"
61 #include "tree-pass.h"
62 #include "ssa.h"
63 #include "tree-streamer.h"
64 #include "cgraph.h"
65 #include "diagnostic.h"
66 #include "fold-const.h"
67 #include "print-tree.h"
68 #include "tree-inline.h"
69 #include "gimple-pretty-print.h"
70 #include "cfganal.h"
71 #include "gimple-iterator.h"
72 #include "tree-cfg.h"
73 #include "tree-ssa-loop-niter.h"
74 #include "tree-ssa-loop.h"
75 #include "symbol-summary.h"
76 #include "ipa-prop.h"
77 #include "ipa-fnsummary.h"
78 #include "cfgloop.h"
79 #include "tree-scalar-evolution.h"
80 #include "ipa-utils.h"
81 #include "cfgexpand.h"
82 #include "gimplify.h"
83 #include "stringpool.h"
84 #include "attribs.h"
85
86 /* Summaries. */
87 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
88 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
89 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
90
91 /* Edge predicates goes here. */
92 static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95 /* Dump IPA hints. */
96 void
97 ipa_dump_hints (FILE *f, ipa_hints hints)
98 {
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_known_hot)
138 {
139 hints &= ~INLINE_HINT_known_hot;
140 fprintf (f, " known_hot");
141 }
142 gcc_assert (!hints);
143 }
144
145
146 /* Record SIZE and TIME to SUMMARY.
147 The accounted code will be executed when EXEC_PRED is true.
148 When NONCONST_PRED is false the code will evaulate to constant and
149 will get optimized out in specialized clones of the function.
150 If CALL is true account to call_size_time_table rather than
151 size_time_table. */
152
153 void
154 ipa_fn_summary::account_size_time (int size, sreal time,
155 const predicate &exec_pred,
156 const predicate &nonconst_pred_in,
157 bool call)
158 {
159 size_time_entry *e;
160 bool found = false;
161 int i;
162 predicate nonconst_pred;
163 vec<size_time_entry, va_gc> *table = call
164 ? call_size_time_table : size_time_table;
165
166 if (exec_pred == false)
167 return;
168
169 nonconst_pred = nonconst_pred_in & exec_pred;
170
171 if (nonconst_pred == false)
172 return;
173
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size && time == 0 && table)
177 return;
178
179 gcc_assert (time >= 0);
180
181 for (i = 0; vec_safe_iterate (table, i, &e); i++)
182 if (e->exec_predicate == exec_pred
183 && e->nonconst_predicate == nonconst_pred)
184 {
185 found = true;
186 break;
187 }
188 if (i == max_size_time_table_size)
189 {
190 i = 0;
191 found = true;
192 e = &(*table)[0];
193 if (dump_file && (dump_flags & TDF_DETAILS))
194 fprintf (dump_file,
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
197 }
198 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
199 {
200 fprintf (dump_file,
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size) / ipa_fn_summary::size_scale,
203 (time.to_double ()), found ? "" : "new ");
204 exec_pred.dump (dump_file, conds, 0);
205 if (exec_pred != nonconst_pred)
206 {
207 fprintf (dump_file, " nonconst:");
208 nonconst_pred.dump (dump_file, conds);
209 }
210 else
211 fprintf (dump_file, "\n");
212 }
213 if (!found)
214 {
215 class size_time_entry new_entry;
216 new_entry.size = size;
217 new_entry.time = time;
218 new_entry.exec_predicate = exec_pred;
219 new_entry.nonconst_predicate = nonconst_pred;
220 if (call)
221 vec_safe_push (call_size_time_table, new_entry);
222 else
223 vec_safe_push (size_time_table, new_entry);
224 }
225 else
226 {
227 e->size += size;
228 e->time += time;
229 }
230 }
231
232 /* We proved E to be unreachable, redirect it to __bultin_unreachable. */
233
234 static struct cgraph_edge *
235 redirect_to_unreachable (struct cgraph_edge *e)
236 {
237 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
238 struct cgraph_node *target = cgraph_node::get_create
239 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
240
241 if (e->speculative)
242 e = e->resolve_speculation (target->decl);
243 else if (!e->callee)
244 e->make_direct (target);
245 else
246 e->redirect_callee (target);
247 class ipa_call_summary *es = ipa_call_summaries->get (e);
248 e->inline_failed = CIF_UNREACHABLE;
249 e->count = profile_count::zero ();
250 es->call_stmt_size = 0;
251 es->call_stmt_time = 0;
252 if (callee)
253 callee->remove_symbol_and_inline_clones ();
254 return e;
255 }
256
257 /* Set predicate for edge E. */
258
259 static void
260 edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
261 {
262 /* If the edge is determined to be never executed, redirect it
263 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
264 be optimized out. */
265 if (predicate && *predicate == false
266 /* When handling speculative edges, we need to do the redirection
267 just once. Do it always on the direct edge, so we do not
268 attempt to resolve speculation while duplicating the edge. */
269 && (!e->speculative || e->callee))
270 e = redirect_to_unreachable (e);
271
272 class ipa_call_summary *es = ipa_call_summaries->get (e);
273 if (predicate && *predicate != true)
274 {
275 if (!es->predicate)
276 es->predicate = edge_predicate_pool.allocate ();
277 *es->predicate = *predicate;
278 }
279 else
280 {
281 if (es->predicate)
282 edge_predicate_pool.remove (es->predicate);
283 es->predicate = NULL;
284 }
285 }
286
287 /* Set predicate for hint *P. */
288
289 static void
290 set_hint_predicate (predicate **p, predicate new_predicate)
291 {
292 if (new_predicate == false || new_predicate == true)
293 {
294 if (*p)
295 edge_predicate_pool.remove (*p);
296 *p = NULL;
297 }
298 else
299 {
300 if (!*p)
301 *p = edge_predicate_pool.allocate ();
302 **p = new_predicate;
303 }
304 }
305
306
307 /* Compute what conditions may or may not hold given invormation about
308 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
309 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
310 copy when called in a given context. It is a bitmask of conditions. Bit
311 0 means that condition is known to be false, while bit 1 means that condition
312 may or may not be true. These differs - for example NOT_INLINED condition
313 is always false in the second and also builtin_constant_p tests cannot use
314 the fact that parameter is indeed a constant.
315
316 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
317 KNOWN_AGGS is a vector of aggreggate known offset/value set for each
318 parameter. Return clause of possible truths. When INLINE_P is true, assume
319 that we are inlining.
320
321 ERROR_MARK means compile time invariant. */
322
323 static void
324 evaluate_conditions_for_known_args (struct cgraph_node *node,
325 bool inline_p,
326 vec<tree> known_vals,
327 vec<value_range> known_value_ranges,
328 vec<ipa_agg_value_set> known_aggs,
329 clause_t *ret_clause,
330 clause_t *ret_nonspec_clause)
331 {
332 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
333 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
334 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
335 int i;
336 struct condition *c;
337
338 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
339 {
340 tree val;
341 tree res;
342 int j;
343 struct expr_eval_op *op;
344
345 /* We allow call stmt to have fewer arguments than the callee function
346 (especially for K&R style programs). So bound check here (we assume
347 known_aggs vector, if non-NULL, has the same length as
348 known_vals). */
349 gcc_checking_assert (!known_aggs.exists ()
350 || (known_vals.length () == known_aggs.length ()));
351 if (c->operand_num >= (int) known_vals.length ())
352 {
353 clause |= 1 << (i + predicate::first_dynamic_condition);
354 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
355 continue;
356 }
357
358 if (c->agg_contents)
359 {
360 struct ipa_agg_value_set *agg;
361
362 if (c->code == predicate::changed
363 && !c->by_ref
364 && (known_vals[c->operand_num] == error_mark_node))
365 continue;
366
367 if (known_aggs.exists ())
368 {
369 agg = &known_aggs[c->operand_num];
370 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
371 c->offset, c->by_ref);
372 }
373 else
374 val = NULL_TREE;
375 }
376 else
377 {
378 val = known_vals[c->operand_num];
379 if (val == error_mark_node && c->code != predicate::changed)
380 val = NULL_TREE;
381 }
382
383 if (!val
384 && (c->code == predicate::changed
385 || c->code == predicate::is_not_constant))
386 {
387 clause |= 1 << (i + predicate::first_dynamic_condition);
388 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
389 continue;
390 }
391 if (c->code == predicate::changed)
392 {
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396
397 if (c->code == predicate::is_not_constant)
398 {
399 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
400 continue;
401 }
402
403 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
404 {
405 if (c->type != TREE_TYPE (val))
406 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
407 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
408 {
409 if (!val)
410 break;
411 if (!op->val[0])
412 val = fold_unary (op->code, op->type, val);
413 else if (!op->val[1])
414 val = fold_binary (op->code, op->type,
415 op->index ? op->val[0] : val,
416 op->index ? val : op->val[0]);
417 else if (op->index == 0)
418 val = fold_ternary (op->code, op->type,
419 val, op->val[0], op->val[1]);
420 else if (op->index == 1)
421 val = fold_ternary (op->code, op->type,
422 op->val[0], val, op->val[1]);
423 else if (op->index == 2)
424 val = fold_ternary (op->code, op->type,
425 op->val[0], op->val[1], val);
426 else
427 val = NULL_TREE;
428 }
429
430 res = val
431 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
432 : NULL;
433
434 if (res && integer_zerop (res))
435 continue;
436 if (res && integer_onep (res))
437 {
438 clause |= 1 << (i + predicate::first_dynamic_condition);
439 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
440 continue;
441 }
442 }
443 if (c->operand_num < (int) known_value_ranges.length ()
444 && !c->agg_contents
445 && !known_value_ranges[c->operand_num].undefined_p ()
446 && !known_value_ranges[c->operand_num].varying_p ()
447 && TYPE_SIZE (c->type)
448 == TYPE_SIZE (known_value_ranges[c->operand_num].type ())
449 && (!val || TREE_CODE (val) != INTEGER_CST))
450 {
451 value_range vr = known_value_ranges[c->operand_num];
452 if (!useless_type_conversion_p (c->type, vr.type ()))
453 {
454 value_range res;
455 range_fold_unary_expr (&res, NOP_EXPR,
456 c->type, &vr, vr.type ());
457 vr = res;
458 }
459 tree type = c->type;
460
461 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
462 {
463 if (vr.varying_p () || vr.undefined_p ())
464 break;
465
466 value_range res;
467 if (!op->val[0])
468 range_fold_unary_expr (&res, op->code, op->type, &vr, type);
469 else if (!op->val[1])
470 {
471 value_range op0 (op->val[0], op->val[0]);
472 range_fold_binary_expr (&res, op->code, op->type,
473 op->index ? &op0 : &vr,
474 op->index ? &vr : &op0);
475 }
476 else
477 gcc_unreachable ();
478 type = op->type;
479 vr = res;
480 }
481 if (!vr.varying_p () && !vr.undefined_p ())
482 {
483 value_range res;
484 value_range val_vr (c->val, c->val);
485 range_fold_binary_expr (&res, c->code, boolean_type_node,
486 &vr,
487 &val_vr);
488 if (res.zero_p ())
489 continue;
490 }
491 }
492
493 clause |= 1 << (i + predicate::first_dynamic_condition);
494 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
495 }
496 *ret_clause = clause;
497 if (ret_nonspec_clause)
498 *ret_nonspec_clause = nonspec_clause;
499 }
500
501
502 /* Work out what conditions might be true at invocation of E. */
503
504 void
505 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
506 clause_t *clause_ptr,
507 clause_t *nonspec_clause_ptr,
508 vec<tree> *known_vals_ptr,
509 vec<ipa_polymorphic_call_context>
510 *known_contexts_ptr,
511 vec<ipa_agg_value_set> *known_aggs_ptr)
512 {
513 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
514 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
515 vec<tree> known_vals = vNULL;
516 auto_vec<value_range, 32> known_value_ranges;
517 vec<ipa_agg_value_set> known_aggs = vNULL;
518 class ipa_edge_args *args;
519
520 if (clause_ptr)
521 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
522 if (known_vals_ptr)
523 known_vals_ptr->create (0);
524 if (known_contexts_ptr)
525 known_contexts_ptr->create (0);
526
527 if (ipa_node_params_sum
528 && !e->call_stmt_cannot_inline_p
529 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)
530 && (args = IPA_EDGE_REF (e)) != NULL)
531 {
532 struct cgraph_node *caller;
533 class ipa_node_params *caller_parms_info, *callee_pi;
534 class ipa_call_summary *es = ipa_call_summaries->get (e);
535 int i, count = ipa_get_cs_argument_count (args);
536
537 if (e->caller->inlined_to)
538 caller = e->caller->inlined_to;
539 else
540 caller = e->caller;
541 caller_parms_info = IPA_NODE_REF (caller);
542 callee_pi = IPA_NODE_REF (callee);
543
544 if (count && (info->conds || known_vals_ptr))
545 known_vals.safe_grow_cleared (count);
546 if (count && info->conds)
547 known_value_ranges.safe_grow_cleared (count);
548 if (count && (info->conds || known_aggs_ptr))
549 known_aggs.safe_grow_cleared (count);
550 if (count && known_contexts_ptr)
551 known_contexts_ptr->safe_grow_cleared (count);
552
553 if (callee_pi)
554 for (i = 0; i < count; i++)
555 {
556 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
557 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
558 ipa_get_type (callee_pi, i));
559
560 if (!cst && e->call_stmt
561 && i < (int)gimple_call_num_args (e->call_stmt))
562 {
563 cst = gimple_call_arg (e->call_stmt, i);
564 if (!is_gimple_min_invariant (cst))
565 cst = NULL;
566 }
567 if (cst)
568 {
569 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
570 if (known_vals.exists ())
571 known_vals[i] = cst;
572 }
573 else if (inline_p && !es->param[i].change_prob)
574 known_vals[i] = error_mark_node;
575
576 if (known_contexts_ptr)
577 (*known_contexts_ptr)[i]
578 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
579
580 known_aggs[i] = ipa_agg_value_set_from_jfunc (caller_parms_info,
581 caller, &jf->agg);
582 if (info->conds)
583 known_value_ranges[i]
584 = ipa_value_range_from_jfunc (caller_parms_info, e, jf,
585 ipa_get_type (callee_pi, i));
586 }
587 else
588 gcc_assert (callee->thunk.thunk_p);
589 }
590 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
591 && ((clause_ptr && info->conds) || known_vals_ptr))
592 {
593 int i, count = (int)gimple_call_num_args (e->call_stmt);
594
595 if (count && (info->conds || known_vals_ptr))
596 known_vals.safe_grow_cleared (count);
597 for (i = 0; i < count; i++)
598 {
599 tree cst = gimple_call_arg (e->call_stmt, i);
600 if (!is_gimple_min_invariant (cst))
601 cst = NULL;
602 if (cst)
603 known_vals[i] = cst;
604 }
605 }
606
607 evaluate_conditions_for_known_args (callee, inline_p,
608 known_vals,
609 known_value_ranges,
610 known_aggs, clause_ptr,
611 nonspec_clause_ptr);
612
613 if (known_vals_ptr)
614 *known_vals_ptr = known_vals;
615 else
616 known_vals.release ();
617
618 if (known_aggs_ptr)
619 *known_aggs_ptr = known_aggs;
620 else
621 ipa_release_agg_values (known_aggs);
622 }
623
624
625 /* Allocate the function summary. */
626
627 static void
628 ipa_fn_summary_alloc (void)
629 {
630 gcc_checking_assert (!ipa_fn_summaries);
631 ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
632 (symtab);
633 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
634 ipa_call_summaries = new ipa_call_summary_t (symtab);
635 }
636
637 ipa_call_summary::~ipa_call_summary ()
638 {
639 if (predicate)
640 edge_predicate_pool.remove (predicate);
641
642 param.release ();
643 }
644
645 ipa_fn_summary::~ipa_fn_summary ()
646 {
647 if (loop_iterations)
648 edge_predicate_pool.remove (loop_iterations);
649 if (loop_stride)
650 edge_predicate_pool.remove (loop_stride);
651 vec_free (conds);
652 vec_free (size_time_table);
653 vec_free (call_size_time_table);
654 }
655
656 void
657 ipa_fn_summary_t::remove_callees (cgraph_node *node)
658 {
659 cgraph_edge *e;
660 for (e = node->callees; e; e = e->next_callee)
661 ipa_call_summaries->remove (e);
662 for (e = node->indirect_calls; e; e = e->next_callee)
663 ipa_call_summaries->remove (e);
664 }
665
666 /* Same as remap_predicate_after_duplication but handle hint predicate *P.
667 Additionally care about allocating new memory slot for updated predicate
668 and set it to NULL when it becomes true or false (and thus uninteresting).
669 */
670
671 static void
672 remap_hint_predicate_after_duplication (predicate **p,
673 clause_t possible_truths)
674 {
675 predicate new_predicate;
676
677 if (!*p)
678 return;
679
680 new_predicate = (*p)->remap_after_duplication (possible_truths);
681 /* We do not want to free previous predicate; it is used by node origin. */
682 *p = NULL;
683 set_hint_predicate (p, new_predicate);
684 }
685
686
687 /* Hook that is called by cgraph.c when a node is duplicated. */
688 void
689 ipa_fn_summary_t::duplicate (cgraph_node *src,
690 cgraph_node *dst,
691 ipa_fn_summary *,
692 ipa_fn_summary *info)
693 {
694 new (info) ipa_fn_summary (*ipa_fn_summaries->get (src));
695 /* TODO: as an optimization, we may avoid copying conditions
696 that are known to be false or true. */
697 info->conds = vec_safe_copy (info->conds);
698
699 /* When there are any replacements in the function body, see if we can figure
700 out that something was optimized out. */
701 if (ipa_node_params_sum && dst->clone.tree_map)
702 {
703 vec<size_time_entry, va_gc> *entry = info->size_time_table;
704 /* Use SRC parm info since it may not be copied yet. */
705 class ipa_node_params *parms_info = IPA_NODE_REF (src);
706 vec<tree> known_vals = vNULL;
707 int count = ipa_get_param_count (parms_info);
708 int i, j;
709 clause_t possible_truths;
710 predicate true_pred = true;
711 size_time_entry *e;
712 int optimized_out_size = 0;
713 bool inlined_to_p = false;
714 struct cgraph_edge *edge, *next;
715
716 info->size_time_table = 0;
717 known_vals.safe_grow_cleared (count);
718 for (i = 0; i < count; i++)
719 {
720 struct ipa_replace_map *r;
721
722 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
723 {
724 if (r->parm_num == i)
725 {
726 known_vals[i] = r->new_tree;
727 break;
728 }
729 }
730 }
731 evaluate_conditions_for_known_args (dst, false,
732 known_vals,
733 vNULL,
734 vNULL,
735 &possible_truths,
736 /* We are going to specialize,
737 so ignore nonspec truths. */
738 NULL);
739 known_vals.release ();
740
741 info->account_size_time (0, 0, true_pred, true_pred);
742
743 /* Remap size_time vectors.
744 Simplify the predicate by prunning out alternatives that are known
745 to be false.
746 TODO: as on optimization, we can also eliminate conditions known
747 to be true. */
748 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
749 {
750 predicate new_exec_pred;
751 predicate new_nonconst_pred;
752 new_exec_pred = e->exec_predicate.remap_after_duplication
753 (possible_truths);
754 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
755 (possible_truths);
756 if (new_exec_pred == false || new_nonconst_pred == false)
757 optimized_out_size += e->size;
758 else
759 info->account_size_time (e->size, e->time, new_exec_pred,
760 new_nonconst_pred);
761 }
762
763 /* Remap edge predicates with the same simplification as above.
764 Also copy constantness arrays. */
765 for (edge = dst->callees; edge; edge = next)
766 {
767 predicate new_predicate;
768 class ipa_call_summary *es = ipa_call_summaries->get (edge);
769 next = edge->next_callee;
770
771 if (!edge->inline_failed)
772 inlined_to_p = true;
773 if (!es->predicate)
774 continue;
775 new_predicate = es->predicate->remap_after_duplication
776 (possible_truths);
777 if (new_predicate == false && *es->predicate != false)
778 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
779 edge_set_predicate (edge, &new_predicate);
780 }
781
782 /* Remap indirect edge predicates with the same simplificaiton as above.
783 Also copy constantness arrays. */
784 for (edge = dst->indirect_calls; edge; edge = next)
785 {
786 predicate new_predicate;
787 class ipa_call_summary *es = ipa_call_summaries->get (edge);
788 next = edge->next_callee;
789
790 gcc_checking_assert (edge->inline_failed);
791 if (!es->predicate)
792 continue;
793 new_predicate = es->predicate->remap_after_duplication
794 (possible_truths);
795 if (new_predicate == false && *es->predicate != false)
796 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
797 edge_set_predicate (edge, &new_predicate);
798 }
799 remap_hint_predicate_after_duplication (&info->loop_iterations,
800 possible_truths);
801 remap_hint_predicate_after_duplication (&info->loop_stride,
802 possible_truths);
803
804 /* If inliner or someone after inliner will ever start producing
805 non-trivial clones, we will get trouble with lack of information
806 about updating self sizes, because size vectors already contains
807 sizes of the calees. */
808 gcc_assert (!inlined_to_p || !optimized_out_size);
809 }
810 else
811 {
812 info->size_time_table = vec_safe_copy (info->size_time_table);
813 if (info->loop_iterations)
814 {
815 predicate p = *info->loop_iterations;
816 info->loop_iterations = NULL;
817 set_hint_predicate (&info->loop_iterations, p);
818 }
819 if (info->loop_stride)
820 {
821 predicate p = *info->loop_stride;
822 info->loop_stride = NULL;
823 set_hint_predicate (&info->loop_stride, p);
824 }
825 }
826 if (!dst->inlined_to)
827 ipa_update_overall_fn_summary (dst);
828 }
829
830
831 /* Hook that is called by cgraph.c when a node is duplicated. */
832
833 void
834 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
835 struct cgraph_edge *dst,
836 class ipa_call_summary *srcinfo,
837 class ipa_call_summary *info)
838 {
839 new (info) ipa_call_summary (*srcinfo);
840 info->predicate = NULL;
841 edge_set_predicate (dst, srcinfo->predicate);
842 info->param = srcinfo->param.copy ();
843 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
844 {
845 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
846 - eni_size_weights.call_cost);
847 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
848 - eni_time_weights.call_cost);
849 }
850 }
851
852 /* Dump edge summaries associated to NODE and recursively to all clones.
853 Indent by INDENT. */
854
855 static void
856 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
857 class ipa_fn_summary *info)
858 {
859 struct cgraph_edge *edge;
860 for (edge = node->callees; edge; edge = edge->next_callee)
861 {
862 class ipa_call_summary *es = ipa_call_summaries->get (edge);
863 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
864 int i;
865
866 fprintf (f,
867 "%*s%s/%i %s\n%*s freq:%4.2f",
868 indent, "", callee->name (), callee->order,
869 !edge->inline_failed
870 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
871 indent, "", edge->sreal_frequency ().to_double ());
872
873 if (es)
874 fprintf (f, " loop depth:%2i size:%2i time: %2i",
875 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
876
877 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
878 ipa_size_summary *ss = ipa_size_summaries->get (callee);
879 if (s != NULL)
880 fprintf (f, " callee size:%2i stack:%2i",
881 (int) (ss->size / ipa_fn_summary::size_scale),
882 (int) s->estimated_stack_size);
883
884 if (es && es->predicate)
885 {
886 fprintf (f, " predicate: ");
887 es->predicate->dump (f, info->conds);
888 }
889 else
890 fprintf (f, "\n");
891 if (es && es->param.exists ())
892 for (i = 0; i < (int) es->param.length (); i++)
893 {
894 int prob = es->param[i].change_prob;
895
896 if (!prob)
897 fprintf (f, "%*s op%i is compile time invariant\n",
898 indent + 2, "", i);
899 else if (prob != REG_BR_PROB_BASE)
900 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
901 prob * 100.0 / REG_BR_PROB_BASE);
902 }
903 if (!edge->inline_failed)
904 {
905 ipa_size_summary *ss = ipa_size_summaries->get (callee);
906 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
907 indent + 2, "",
908 (int) ipa_get_stack_frame_offset (callee),
909 (int) ss->estimated_self_stack_size);
910 dump_ipa_call_summary (f, indent + 2, callee, info);
911 }
912 }
913 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
914 {
915 class ipa_call_summary *es = ipa_call_summaries->get (edge);
916 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
917 " time: %2i",
918 indent, "",
919 es->loop_depth,
920 edge->sreal_frequency ().to_double (), es->call_stmt_size,
921 es->call_stmt_time);
922 if (es->predicate)
923 {
924 fprintf (f, "predicate: ");
925 es->predicate->dump (f, info->conds);
926 }
927 else
928 fprintf (f, "\n");
929 }
930 }
931
932
933 void
934 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
935 {
936 if (node->definition)
937 {
938 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
939 class ipa_size_summary *ss = ipa_size_summaries->get (node);
940 if (s != NULL)
941 {
942 size_time_entry *e;
943 int i;
944 fprintf (f, "IPA function summary for %s", node->dump_name ());
945 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
946 fprintf (f, " always_inline");
947 if (s->inlinable)
948 fprintf (f, " inlinable");
949 if (s->fp_expressions)
950 fprintf (f, " fp_expression");
951 fprintf (f, "\n global time: %f\n", s->time.to_double ());
952 fprintf (f, " self size: %i\n", ss->self_size);
953 fprintf (f, " global size: %i\n", ss->size);
954 fprintf (f, " min size: %i\n", s->min_size);
955 fprintf (f, " self stack: %i\n",
956 (int) ss->estimated_self_stack_size);
957 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
958 if (s->growth)
959 fprintf (f, " estimated growth:%i\n", (int) s->growth);
960 if (s->scc_no)
961 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
962 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
963 {
964 fprintf (f, " size:%f, time:%f",
965 (double) e->size / ipa_fn_summary::size_scale,
966 e->time.to_double ());
967 if (e->exec_predicate != true)
968 {
969 fprintf (f, ", executed if:");
970 e->exec_predicate.dump (f, s->conds, 0);
971 }
972 if (e->exec_predicate != e->nonconst_predicate)
973 {
974 fprintf (f, ", nonconst if:");
975 e->nonconst_predicate.dump (f, s->conds, 0);
976 }
977 fprintf (f, "\n");
978 }
979 if (s->loop_iterations)
980 {
981 fprintf (f, " loop iterations:");
982 s->loop_iterations->dump (f, s->conds);
983 }
984 if (s->loop_stride)
985 {
986 fprintf (f, " loop stride:");
987 s->loop_stride->dump (f, s->conds);
988 }
989 fprintf (f, " calls:\n");
990 dump_ipa_call_summary (f, 4, node, s);
991 fprintf (f, "\n");
992 }
993 else
994 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
995 }
996 }
997
998 DEBUG_FUNCTION void
999 ipa_debug_fn_summary (struct cgraph_node *node)
1000 {
1001 ipa_dump_fn_summary (stderr, node);
1002 }
1003
1004 void
1005 ipa_dump_fn_summaries (FILE *f)
1006 {
1007 struct cgraph_node *node;
1008
1009 FOR_EACH_DEFINED_FUNCTION (node)
1010 if (!node->inlined_to)
1011 ipa_dump_fn_summary (f, node);
1012 }
1013
1014 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1015 boolean variable pointed to by DATA. */
1016
1017 static bool
1018 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1019 void *data)
1020 {
1021 bool *b = (bool *) data;
1022 *b = true;
1023 return true;
1024 }
1025
1026 /* If OP refers to value of function parameter, return the corresponding
1027 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1028 PARM_DECL) will be stored to *SIZE_P in that case too. */
1029
1030 static tree
1031 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1032 poly_int64 *size_p)
1033 {
1034 /* SSA_NAME referring to parm default def? */
1035 if (TREE_CODE (op) == SSA_NAME
1036 && SSA_NAME_IS_DEFAULT_DEF (op)
1037 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1038 {
1039 if (size_p)
1040 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1041 return SSA_NAME_VAR (op);
1042 }
1043 /* Non-SSA parm reference? */
1044 if (TREE_CODE (op) == PARM_DECL)
1045 {
1046 bool modified = false;
1047
1048 ao_ref refd;
1049 ao_ref_init (&refd, op);
1050 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1051 mark_modified, &modified, NULL, NULL,
1052 fbi->aa_walk_budget + 1);
1053 if (walked < 0)
1054 {
1055 fbi->aa_walk_budget = 0;
1056 return NULL_TREE;
1057 }
1058 if (!modified)
1059 {
1060 if (size_p)
1061 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1062 return op;
1063 }
1064 }
1065 return NULL_TREE;
1066 }
1067
1068 /* If OP refers to value of function parameter, return the corresponding
1069 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1070 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1071 stored to *SIZE_P in that case too. */
1072
1073 static tree
1074 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1075 poly_int64 *size_p)
1076 {
1077 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1078 if (res)
1079 return res;
1080
1081 if (TREE_CODE (op) == SSA_NAME
1082 && !SSA_NAME_IS_DEFAULT_DEF (op)
1083 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1084 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1085 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1086 size_p);
1087 return NULL_TREE;
1088 }
1089
1090 /* If OP refers to a value of a function parameter or value loaded from an
1091 aggregate passed to a parameter (either by value or reference), return TRUE
1092 and store the number of the parameter to *INDEX_P, the access size into
1093 *SIZE_P, and information whether and how it has been loaded from an
1094 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1095 statement in which OP is used or loaded. */
1096
1097 static bool
1098 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1099 gimple *stmt, tree op, int *index_p,
1100 poly_int64 *size_p,
1101 struct agg_position_info *aggpos)
1102 {
1103 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1104
1105 gcc_checking_assert (aggpos);
1106 if (res)
1107 {
1108 *index_p = ipa_get_param_decl_index (fbi->info, res);
1109 if (*index_p < 0)
1110 return false;
1111 aggpos->agg_contents = false;
1112 aggpos->by_ref = false;
1113 return true;
1114 }
1115
1116 if (TREE_CODE (op) == SSA_NAME)
1117 {
1118 if (SSA_NAME_IS_DEFAULT_DEF (op)
1119 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1120 return false;
1121 stmt = SSA_NAME_DEF_STMT (op);
1122 op = gimple_assign_rhs1 (stmt);
1123 if (!REFERENCE_CLASS_P (op))
1124 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1125 aggpos);
1126 }
1127
1128 aggpos->agg_contents = true;
1129 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1130 stmt, op, index_p, &aggpos->offset,
1131 size_p, &aggpos->by_ref);
1132 }
1133
1134 /* See if statement might disappear after inlining.
1135 0 - means not eliminated
1136 1 - half of statements goes away
1137 2 - for sure it is eliminated.
1138 We are not terribly sophisticated, basically looking for simple abstraction
1139 penalty wrappers. */
1140
1141 static int
1142 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1143 {
1144 enum gimple_code code = gimple_code (stmt);
1145 enum tree_code rhs_code;
1146
1147 if (!optimize)
1148 return 0;
1149
1150 switch (code)
1151 {
1152 case GIMPLE_RETURN:
1153 return 2;
1154 case GIMPLE_ASSIGN:
1155 if (gimple_num_ops (stmt) != 2)
1156 return 0;
1157
1158 rhs_code = gimple_assign_rhs_code (stmt);
1159
1160 /* Casts of parameters, loads from parameters passed by reference
1161 and stores to return value or parameters are often free after
1162 inlining dua to SRA and further combining.
1163 Assume that half of statements goes away. */
1164 if (CONVERT_EXPR_CODE_P (rhs_code)
1165 || rhs_code == VIEW_CONVERT_EXPR
1166 || rhs_code == ADDR_EXPR
1167 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1168 {
1169 tree rhs = gimple_assign_rhs1 (stmt);
1170 tree lhs = gimple_assign_lhs (stmt);
1171 tree inner_rhs = get_base_address (rhs);
1172 tree inner_lhs = get_base_address (lhs);
1173 bool rhs_free = false;
1174 bool lhs_free = false;
1175
1176 if (!inner_rhs)
1177 inner_rhs = rhs;
1178 if (!inner_lhs)
1179 inner_lhs = lhs;
1180
1181 /* Reads of parameter are expected to be free. */
1182 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1183 rhs_free = true;
1184 /* Match expressions of form &this->field. Those will most likely
1185 combine with something upstream after inlining. */
1186 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1187 {
1188 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1189 if (TREE_CODE (op) == PARM_DECL)
1190 rhs_free = true;
1191 else if (TREE_CODE (op) == MEM_REF
1192 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1193 NULL))
1194 rhs_free = true;
1195 }
1196
1197 /* When parameter is not SSA register because its address is taken
1198 and it is just copied into one, the statement will be completely
1199 free after inlining (we will copy propagate backward). */
1200 if (rhs_free && is_gimple_reg (lhs))
1201 return 2;
1202
1203 /* Reads of parameters passed by reference
1204 expected to be free (i.e. optimized out after inlining). */
1205 if (TREE_CODE (inner_rhs) == MEM_REF
1206 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1207 rhs_free = true;
1208
1209 /* Copying parameter passed by reference into gimple register is
1210 probably also going to copy propagate, but we can't be quite
1211 sure. */
1212 if (rhs_free && is_gimple_reg (lhs))
1213 lhs_free = true;
1214
1215 /* Writes to parameters, parameters passed by value and return value
1216 (either dirrectly or passed via invisible reference) are free.
1217
1218 TODO: We ought to handle testcase like
1219 struct a {int a,b;};
1220 struct a
1221 retrurnsturct (void)
1222 {
1223 struct a a ={1,2};
1224 return a;
1225 }
1226
1227 This translate into:
1228
1229 retrurnsturct ()
1230 {
1231 int a$b;
1232 int a$a;
1233 struct a a;
1234 struct a D.2739;
1235
1236 <bb 2>:
1237 D.2739.a = 1;
1238 D.2739.b = 2;
1239 return D.2739;
1240
1241 }
1242 For that we either need to copy ipa-split logic detecting writes
1243 to return value. */
1244 if (TREE_CODE (inner_lhs) == PARM_DECL
1245 || TREE_CODE (inner_lhs) == RESULT_DECL
1246 || (TREE_CODE (inner_lhs) == MEM_REF
1247 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1248 NULL)
1249 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1250 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1251 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1252 (inner_lhs,
1253 0))) == RESULT_DECL))))
1254 lhs_free = true;
1255 if (lhs_free
1256 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1257 rhs_free = true;
1258 if (lhs_free && rhs_free)
1259 return 1;
1260 }
1261 return 0;
1262 default:
1263 return 0;
1264 }
1265 }
1266
1267 /* Analyze EXPR if it represents a series of simple operations performed on
1268 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1269 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1270 Type of the parameter or load from an aggregate via the parameter is
1271 stored in *TYPE_P. Operations on the parameter are recorded to
1272 PARAM_OPS_P if it is not NULL. */
1273
1274 static bool
1275 decompose_param_expr (struct ipa_func_body_info *fbi,
1276 gimple *stmt, tree expr,
1277 int *index_p, tree *type_p,
1278 struct agg_position_info *aggpos,
1279 expr_eval_ops *param_ops_p = NULL)
1280 {
1281 int op_limit = param_ipa_max_param_expr_ops;
1282 int op_count = 0;
1283
1284 if (param_ops_p)
1285 *param_ops_p = NULL;
1286
1287 while (true)
1288 {
1289 expr_eval_op eval_op;
1290 unsigned rhs_count;
1291 unsigned cst_count = 0;
1292
1293 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1294 aggpos))
1295 {
1296 tree type = TREE_TYPE (expr);
1297
1298 if (aggpos->agg_contents)
1299 {
1300 /* Stop if containing bit-field. */
1301 if (TREE_CODE (expr) == BIT_FIELD_REF
1302 || contains_bitfld_component_ref_p (expr))
1303 break;
1304 }
1305
1306 *type_p = type;
1307 return true;
1308 }
1309
1310 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1311 break;
1312
1313 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1314 break;
1315
1316 switch (gimple_assign_rhs_class (stmt))
1317 {
1318 case GIMPLE_SINGLE_RHS:
1319 expr = gimple_assign_rhs1 (stmt);
1320 continue;
1321
1322 case GIMPLE_UNARY_RHS:
1323 rhs_count = 1;
1324 break;
1325
1326 case GIMPLE_BINARY_RHS:
1327 rhs_count = 2;
1328 break;
1329
1330 case GIMPLE_TERNARY_RHS:
1331 rhs_count = 3;
1332 break;
1333
1334 default:
1335 goto fail;
1336 }
1337
1338 /* Stop if expression is too complex. */
1339 if (op_count++ == op_limit)
1340 break;
1341
1342 if (param_ops_p)
1343 {
1344 eval_op.code = gimple_assign_rhs_code (stmt);
1345 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1346 eval_op.val[0] = NULL_TREE;
1347 eval_op.val[1] = NULL_TREE;
1348 }
1349
1350 expr = NULL_TREE;
1351 for (unsigned i = 0; i < rhs_count; i++)
1352 {
1353 tree op = gimple_op (stmt, i + 1);
1354
1355 gcc_assert (op && !TYPE_P (op));
1356 if (is_gimple_ip_invariant (op))
1357 {
1358 if (++cst_count == rhs_count)
1359 goto fail;
1360
1361 eval_op.val[cst_count - 1] = op;
1362 }
1363 else if (!expr)
1364 {
1365 /* Found a non-constant operand, and record its index in rhs
1366 operands. */
1367 eval_op.index = i;
1368 expr = op;
1369 }
1370 else
1371 {
1372 /* Found more than one non-constant operands. */
1373 goto fail;
1374 }
1375 }
1376
1377 if (param_ops_p)
1378 vec_safe_insert (*param_ops_p, 0, eval_op);
1379 }
1380
1381 /* Failed to decompose, free resource and return. */
1382 fail:
1383 if (param_ops_p)
1384 vec_free (*param_ops_p);
1385
1386 return false;
1387 }
1388
1389 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1390 predicates to the CFG edges. */
1391
1392 static void
1393 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1394 class ipa_fn_summary *summary,
1395 class ipa_node_params *params_summary,
1396 basic_block bb)
1397 {
1398 gimple *last;
1399 tree op, op2;
1400 int index;
1401 struct agg_position_info aggpos;
1402 enum tree_code code, inverted_code;
1403 edge e;
1404 edge_iterator ei;
1405 gimple *set_stmt;
1406 tree param_type;
1407 expr_eval_ops param_ops;
1408
1409 last = last_stmt (bb);
1410 if (!last || gimple_code (last) != GIMPLE_COND)
1411 return;
1412 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1413 return;
1414 op = gimple_cond_lhs (last);
1415
1416 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1417 &param_ops))
1418 {
1419 code = gimple_cond_code (last);
1420 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1421
1422 FOR_EACH_EDGE (e, ei, bb->succs)
1423 {
1424 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1425 ? code : inverted_code);
1426 /* invert_tree_comparison will return ERROR_MARK on FP
1427 comparsions that are not EQ/NE instead of returning proper
1428 unordered one. Be sure it is not confused with NON_CONSTANT.
1429
1430 And if the edge's target is the final block of diamond CFG graph
1431 of this conditional statement, we do not need to compute
1432 predicate for the edge because the final block's predicate must
1433 be at least as that of the first block of the statement. */
1434 if (this_code != ERROR_MARK
1435 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1436 {
1437 predicate p
1438 = add_condition (summary, params_summary, index,
1439 param_type, &aggpos,
1440 this_code, gimple_cond_rhs (last), param_ops);
1441 e->aux = edge_predicate_pool.allocate ();
1442 *(predicate *) e->aux = p;
1443 }
1444 }
1445 vec_free (param_ops);
1446 }
1447
1448 if (TREE_CODE (op) != SSA_NAME)
1449 return;
1450 /* Special case
1451 if (builtin_constant_p (op))
1452 constant_code
1453 else
1454 nonconstant_code.
1455 Here we can predicate nonconstant_code. We can't
1456 really handle constant_code since we have no predicate
1457 for this and also the constant code is not known to be
1458 optimized away when inliner doen't see operand is constant.
1459 Other optimizers might think otherwise. */
1460 if (gimple_cond_code (last) != NE_EXPR
1461 || !integer_zerop (gimple_cond_rhs (last)))
1462 return;
1463 set_stmt = SSA_NAME_DEF_STMT (op);
1464 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1465 || gimple_call_num_args (set_stmt) != 1)
1466 return;
1467 op2 = gimple_call_arg (set_stmt, 0);
1468 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1469 return;
1470 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1471 {
1472 predicate p = add_condition (summary, params_summary, index,
1473 param_type, &aggpos,
1474 predicate::is_not_constant, NULL_TREE);
1475 e->aux = edge_predicate_pool.allocate ();
1476 *(predicate *) e->aux = p;
1477 }
1478 }
1479
1480
1481 /* If BB ends by a switch we can turn into predicates, attach corresponding
1482 predicates to the CFG edges. */
1483
1484 static void
1485 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1486 class ipa_fn_summary *summary,
1487 class ipa_node_params *params_summary,
1488 basic_block bb)
1489 {
1490 gimple *lastg;
1491 tree op;
1492 int index;
1493 struct agg_position_info aggpos;
1494 edge e;
1495 edge_iterator ei;
1496 size_t n;
1497 size_t case_idx;
1498 tree param_type;
1499 expr_eval_ops param_ops;
1500
1501 lastg = last_stmt (bb);
1502 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1503 return;
1504 gswitch *last = as_a <gswitch *> (lastg);
1505 op = gimple_switch_index (last);
1506 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1507 &param_ops))
1508 return;
1509
1510 auto_vec<std::pair<tree, tree> > ranges;
1511 tree type = TREE_TYPE (op);
1512 int bound_limit = param_ipa_max_switch_predicate_bounds;
1513 int bound_count = 0;
1514 wide_int vr_wmin, vr_wmax;
1515 value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
1516
1517 FOR_EACH_EDGE (e, ei, bb->succs)
1518 {
1519 e->aux = edge_predicate_pool.allocate ();
1520 *(predicate *) e->aux = false;
1521 }
1522
1523 e = gimple_switch_edge (cfun, last, 0);
1524 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1525 default case if its target basic block is in convergence point of all
1526 switch cases, which can be determined by checking whether it
1527 post-dominates the switch statement. */
1528 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1529 bound_count = INT_MAX;
1530
1531 n = gimple_switch_num_labels (last);
1532 for (case_idx = 1; case_idx < n; ++case_idx)
1533 {
1534 tree cl = gimple_switch_label (last, case_idx);
1535 tree min = CASE_LOW (cl);
1536 tree max = CASE_HIGH (cl);
1537 predicate p;
1538
1539 e = gimple_switch_edge (cfun, last, case_idx);
1540
1541 /* The case value might not have same type as switch expression,
1542 extend the value based on the expression type. */
1543 if (TREE_TYPE (min) != type)
1544 min = wide_int_to_tree (type, wi::to_wide (min));
1545
1546 if (!max)
1547 max = min;
1548 else if (TREE_TYPE (max) != type)
1549 max = wide_int_to_tree (type, wi::to_wide (max));
1550
1551 /* The case's target basic block is in convergence point of all switch
1552 cases, its predicate should be at least as that of the switch
1553 statement. */
1554 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1555 p = true;
1556 else if (min == max)
1557 p = add_condition (summary, params_summary, index, param_type,
1558 &aggpos, EQ_EXPR, min, param_ops);
1559 else
1560 {
1561 predicate p1, p2;
1562 p1 = add_condition (summary, params_summary, index, param_type,
1563 &aggpos, GE_EXPR, min, param_ops);
1564 p2 = add_condition (summary, params_summary,index, param_type,
1565 &aggpos, LE_EXPR, max, param_ops);
1566 p = p1 & p2;
1567 }
1568 *(class predicate *) e->aux
1569 = p.or_with (summary->conds, *(class predicate *) e->aux);
1570
1571 /* If there are too many disjoint case ranges, predicate for default
1572 case might become too complicated. So add a limit here. */
1573 if (bound_count > bound_limit)
1574 continue;
1575
1576 bool new_range = true;
1577
1578 if (!ranges.is_empty ())
1579 {
1580 wide_int curr_wmin = wi::to_wide (min);
1581 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1582
1583 /* Merge case ranges if they are continuous. */
1584 if (curr_wmin == last_wmax + 1)
1585 new_range = false;
1586 else if (vr_type == VR_ANTI_RANGE)
1587 {
1588 /* If two disjoint case ranges can be connected by anti-range
1589 of switch index, combine them to one range. */
1590 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1591 vr_type = VR_UNDEFINED;
1592 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1593 new_range = false;
1594 }
1595 }
1596
1597 /* Create/extend a case range. And we count endpoints of range set,
1598 this number nearly equals to number of conditions that we will create
1599 for predicate of default case. */
1600 if (new_range)
1601 {
1602 bound_count += (min == max) ? 1 : 2;
1603 ranges.safe_push (std::make_pair (min, max));
1604 }
1605 else
1606 {
1607 bound_count += (ranges.last ().first == ranges.last ().second);
1608 ranges.last ().second = max;
1609 }
1610 }
1611
1612 e = gimple_switch_edge (cfun, last, 0);
1613 if (bound_count > bound_limit)
1614 {
1615 *(class predicate *) e->aux = true;
1616 vec_free (param_ops);
1617 return;
1618 }
1619
1620 predicate p_seg = true;
1621 predicate p_all = false;
1622
1623 if (vr_type != VR_RANGE)
1624 {
1625 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1626 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1627 }
1628
1629 /* Construct predicate to represent default range set that is negation of
1630 all case ranges. Case range is classified as containing single/non-single
1631 values. Suppose a piece of case ranges in the following.
1632
1633 [D1...D2] [S1] ... [Sn] [D3...D4]
1634
1635 To represent default case's range sets between two non-single value
1636 case ranges (From D2 to D3), we construct predicate as:
1637
1638 D2 < x < D3 && x != S1 && ... && x != Sn
1639 */
1640 for (size_t i = 0; i < ranges.length (); i++)
1641 {
1642 tree min = ranges[i].first;
1643 tree max = ranges[i].second;
1644
1645 if (min == max)
1646 p_seg &= add_condition (summary, params_summary, index,
1647 param_type, &aggpos, NE_EXPR,
1648 min, param_ops);
1649 else
1650 {
1651 /* Do not create sub-predicate for range that is beyond low bound
1652 of switch index. */
1653 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1654 {
1655 p_seg &= add_condition (summary, params_summary, index,
1656 param_type, &aggpos,
1657 LT_EXPR, min, param_ops);
1658 p_all = p_all.or_with (summary->conds, p_seg);
1659 }
1660
1661 /* Do not create sub-predicate for range that is beyond up bound
1662 of switch index. */
1663 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1664 {
1665 p_seg = false;
1666 break;
1667 }
1668
1669 p_seg = add_condition (summary, params_summary, index,
1670 param_type, &aggpos, GT_EXPR,
1671 max, param_ops);
1672 }
1673 }
1674
1675 p_all = p_all.or_with (summary->conds, p_seg);
1676 *(class predicate *) e->aux
1677 = p_all.or_with (summary->conds, *(class predicate *) e->aux);
1678
1679 vec_free (param_ops);
1680 }
1681
1682
1683 /* For each BB in NODE attach to its AUX pointer predicate under
1684 which it is executable. */
1685
1686 static void
1687 compute_bb_predicates (struct ipa_func_body_info *fbi,
1688 struct cgraph_node *node,
1689 class ipa_fn_summary *summary,
1690 class ipa_node_params *params_summary)
1691 {
1692 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1693 bool done = false;
1694 basic_block bb;
1695
1696 FOR_EACH_BB_FN (bb, my_function)
1697 {
1698 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1699 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1700 }
1701
1702 /* Entry block is always executable. */
1703 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1704 = edge_predicate_pool.allocate ();
1705 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1706
1707 /* A simple dataflow propagation of predicates forward in the CFG.
1708 TODO: work in reverse postorder. */
1709 while (!done)
1710 {
1711 done = true;
1712 FOR_EACH_BB_FN (bb, my_function)
1713 {
1714 predicate p = false;
1715 edge e;
1716 edge_iterator ei;
1717 FOR_EACH_EDGE (e, ei, bb->preds)
1718 {
1719 if (e->src->aux)
1720 {
1721 predicate this_bb_predicate
1722 = *(predicate *) e->src->aux;
1723 if (e->aux)
1724 this_bb_predicate &= (*(class predicate *) e->aux);
1725 p = p.or_with (summary->conds, this_bb_predicate);
1726 if (p == true)
1727 break;
1728 }
1729 }
1730 if (p != false)
1731 {
1732 basic_block pdom_bb;
1733
1734 if (!bb->aux)
1735 {
1736 done = false;
1737 bb->aux = edge_predicate_pool.allocate ();
1738 *((predicate *) bb->aux) = p;
1739 }
1740 else if (p != *(predicate *) bb->aux)
1741 {
1742 /* This OR operation is needed to ensure monotonous data flow
1743 in the case we hit the limit on number of clauses and the
1744 and/or operations above give approximate answers. */
1745 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1746 if (p != *(predicate *) bb->aux)
1747 {
1748 done = false;
1749 *((predicate *) bb->aux) = p;
1750 }
1751 }
1752
1753 /* For switch/if statement, we can OR-combine predicates of all
1754 its cases/branches to get predicate for basic block in their
1755 convergence point, but sometimes this will generate very
1756 complicated predicate. Actually, we can get simplified
1757 predicate in another way by using the fact that predicate
1758 for a basic block must also hold true for its post dominators.
1759 To be specific, basic block in convergence point of
1760 conditional statement should include predicate of the
1761 statement. */
1762 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1763 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
1764 ;
1765 else if (!pdom_bb->aux)
1766 {
1767 done = false;
1768 pdom_bb->aux = edge_predicate_pool.allocate ();
1769 *((predicate *) pdom_bb->aux) = p;
1770 }
1771 else if (p != *(predicate *) pdom_bb->aux)
1772 {
1773 p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
1774 if (p != *(predicate *) pdom_bb->aux)
1775 {
1776 done = false;
1777 *((predicate *) pdom_bb->aux) = p;
1778 }
1779 }
1780 }
1781 }
1782 }
1783 }
1784
1785
1786 /* Return predicate specifying when the STMT might have result that is not
1787 a compile time constant. */
1788
1789 static predicate
1790 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
1791 class ipa_fn_summary *summary,
1792 class ipa_node_params *params_summary,
1793 tree expr,
1794 vec<predicate> nonconstant_names)
1795 {
1796 tree parm;
1797 int index;
1798
1799 while (UNARY_CLASS_P (expr))
1800 expr = TREE_OPERAND (expr, 0);
1801
1802 parm = unmodified_parm (fbi, NULL, expr, NULL);
1803 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1804 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
1805 predicate::changed, NULL_TREE);
1806 if (is_gimple_min_invariant (expr))
1807 return false;
1808 if (TREE_CODE (expr) == SSA_NAME)
1809 return nonconstant_names[SSA_NAME_VERSION (expr)];
1810 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1811 {
1812 predicate p1
1813 = will_be_nonconstant_expr_predicate (fbi, summary,
1814 params_summary,
1815 TREE_OPERAND (expr, 0),
1816 nonconstant_names);
1817 if (p1 == true)
1818 return p1;
1819
1820 predicate p2
1821 = will_be_nonconstant_expr_predicate (fbi, summary,
1822 params_summary,
1823 TREE_OPERAND (expr, 1),
1824 nonconstant_names);
1825 return p1.or_with (summary->conds, p2);
1826 }
1827 else if (TREE_CODE (expr) == COND_EXPR)
1828 {
1829 predicate p1
1830 = will_be_nonconstant_expr_predicate (fbi, summary,
1831 params_summary,
1832 TREE_OPERAND (expr, 0),
1833 nonconstant_names);
1834 if (p1 == true)
1835 return p1;
1836
1837 predicate p2
1838 = will_be_nonconstant_expr_predicate (fbi, summary,
1839 params_summary,
1840 TREE_OPERAND (expr, 1),
1841 nonconstant_names);
1842 if (p2 == true)
1843 return p2;
1844 p1 = p1.or_with (summary->conds, p2);
1845 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
1846 params_summary,
1847 TREE_OPERAND (expr, 2),
1848 nonconstant_names);
1849 return p2.or_with (summary->conds, p1);
1850 }
1851 else if (TREE_CODE (expr) == CALL_EXPR)
1852 return true;
1853 else
1854 {
1855 debug_tree (expr);
1856 gcc_unreachable ();
1857 }
1858 return false;
1859 }
1860
1861
1862 /* Return predicate specifying when the STMT might have result that is not
1863 a compile time constant. */
1864
1865 static predicate
1866 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1867 class ipa_fn_summary *summary,
1868 class ipa_node_params *params_summary,
1869 gimple *stmt,
1870 vec<predicate> nonconstant_names)
1871 {
1872 predicate p = true;
1873 ssa_op_iter iter;
1874 tree use;
1875 tree param_type = NULL_TREE;
1876 predicate op_non_const;
1877 bool is_load;
1878 int base_index;
1879 struct agg_position_info aggpos;
1880
1881 /* What statments might be optimized away
1882 when their arguments are constant. */
1883 if (gimple_code (stmt) != GIMPLE_ASSIGN
1884 && gimple_code (stmt) != GIMPLE_COND
1885 && gimple_code (stmt) != GIMPLE_SWITCH
1886 && (gimple_code (stmt) != GIMPLE_CALL
1887 || !(gimple_call_flags (stmt) & ECF_CONST)))
1888 return p;
1889
1890 /* Stores will stay anyway. */
1891 if (gimple_store_p (stmt))
1892 return p;
1893
1894 is_load = gimple_assign_load_p (stmt);
1895
1896 /* Loads can be optimized when the value is known. */
1897 if (is_load)
1898 {
1899 tree op = gimple_assign_rhs1 (stmt);
1900 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
1901 &aggpos))
1902 return p;
1903 }
1904 else
1905 base_index = -1;
1906
1907 /* See if we understand all operands before we start
1908 adding conditionals. */
1909 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1910 {
1911 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1912 /* For arguments we can build a condition. */
1913 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1914 continue;
1915 if (TREE_CODE (use) != SSA_NAME)
1916 return p;
1917 /* If we know when operand is constant,
1918 we still can say something useful. */
1919 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1920 continue;
1921 return p;
1922 }
1923
1924 if (is_load)
1925 op_non_const =
1926 add_condition (summary, params_summary,
1927 base_index, param_type, &aggpos,
1928 predicate::changed, NULL_TREE);
1929 else
1930 op_non_const = false;
1931 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1932 {
1933 tree parm = unmodified_parm (fbi, stmt, use, NULL);
1934 int index;
1935
1936 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1937 {
1938 if (index != base_index)
1939 p = add_condition (summary, params_summary, index,
1940 TREE_TYPE (parm), NULL,
1941 predicate::changed, NULL_TREE);
1942 else
1943 continue;
1944 }
1945 else
1946 p = nonconstant_names[SSA_NAME_VERSION (use)];
1947 op_non_const = p.or_with (summary->conds, op_non_const);
1948 }
1949 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1950 && gimple_op (stmt, 0)
1951 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1952 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1953 = op_non_const;
1954 return op_non_const;
1955 }
1956
1957 struct record_modified_bb_info
1958 {
1959 tree op;
1960 bitmap bb_set;
1961 gimple *stmt;
1962 };
1963
1964 /* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1965 probability how often it changes between USE_BB.
1966 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
1967 is in different loop nest, we can do better.
1968 This is all just estimate. In theory we look for minimal cut separating
1969 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1970 anyway. */
1971
1972 static basic_block
1973 get_minimal_bb (basic_block init_bb, basic_block use_bb)
1974 {
1975 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1976 if (l && l->header->count < init_bb->count)
1977 return l->header;
1978 return init_bb;
1979 }
1980
1981 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1982 set except for info->stmt. */
1983
1984 static bool
1985 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1986 {
1987 struct record_modified_bb_info *info =
1988 (struct record_modified_bb_info *) data;
1989 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1990 return false;
1991 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
1992 return false;
1993 bitmap_set_bit (info->bb_set,
1994 SSA_NAME_IS_DEFAULT_DEF (vdef)
1995 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1996 : get_minimal_bb
1997 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1998 gimple_bb (info->stmt))->index);
1999 if (dump_file)
2000 {
2001 fprintf (dump_file, " Param ");
2002 print_generic_expr (dump_file, info->op, TDF_SLIM);
2003 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2004 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2005 get_minimal_bb
2006 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2007 gimple_bb (info->stmt))->index);
2008 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2009 }
2010 return false;
2011 }
2012
2013 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2014 will change since last invocation of STMT.
2015
2016 Value 0 is reserved for compile time invariants.
2017 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2018 ought to be REG_BR_PROB_BASE / estimated_iters. */
2019
2020 static int
2021 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2022 {
2023 tree op = gimple_call_arg (stmt, i);
2024 basic_block bb = gimple_bb (stmt);
2025
2026 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2027 op = TREE_OPERAND (op, 0);
2028
2029 tree base = get_base_address (op);
2030
2031 /* Global invariants never change. */
2032 if (is_gimple_min_invariant (base))
2033 return 0;
2034
2035 /* We would have to do non-trivial analysis to really work out what
2036 is the probability of value to change (i.e. when init statement
2037 is in a sibling loop of the call).
2038
2039 We do an conservative estimate: when call is executed N times more often
2040 than the statement defining value, we take the frequency 1/N. */
2041 if (TREE_CODE (base) == SSA_NAME)
2042 {
2043 profile_count init_count;
2044
2045 if (!bb->count.nonzero_p ())
2046 return REG_BR_PROB_BASE;
2047
2048 if (SSA_NAME_IS_DEFAULT_DEF (base))
2049 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2050 else
2051 init_count = get_minimal_bb
2052 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2053 gimple_bb (stmt))->count;
2054
2055 if (init_count < bb->count)
2056 return MAX ((init_count.to_sreal_scale (bb->count)
2057 * REG_BR_PROB_BASE).to_int (), 1);
2058 return REG_BR_PROB_BASE;
2059 }
2060 else
2061 {
2062 ao_ref refd;
2063 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2064 struct record_modified_bb_info info;
2065 tree init = ctor_for_folding (base);
2066
2067 if (init != error_mark_node)
2068 return 0;
2069 if (!bb->count.nonzero_p ())
2070 return REG_BR_PROB_BASE;
2071 if (dump_file)
2072 {
2073 fprintf (dump_file, " Analyzing param change probability of ");
2074 print_generic_expr (dump_file, op, TDF_SLIM);
2075 fprintf (dump_file, "\n");
2076 }
2077 ao_ref_init (&refd, op);
2078 info.op = op;
2079 info.stmt = stmt;
2080 info.bb_set = BITMAP_ALLOC (NULL);
2081 int walked
2082 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2083 NULL, NULL, fbi->aa_walk_budget);
2084 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2085 {
2086 if (dump_file)
2087 {
2088 if (walked < 0)
2089 fprintf (dump_file, " Ran out of AA walking budget.\n");
2090 else
2091 fprintf (dump_file, " Set in same BB as used.\n");
2092 }
2093 BITMAP_FREE (info.bb_set);
2094 return REG_BR_PROB_BASE;
2095 }
2096
2097 bitmap_iterator bi;
2098 unsigned index;
2099 /* Lookup the most frequent update of the value and believe that
2100 it dominates all the other; precise analysis here is difficult. */
2101 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2102 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2103 if (dump_file)
2104 {
2105 fprintf (dump_file, " Set with count ");
2106 max.dump (dump_file);
2107 fprintf (dump_file, " and used with count ");
2108 bb->count.dump (dump_file);
2109 fprintf (dump_file, " freq %f\n",
2110 max.to_sreal_scale (bb->count).to_double ());
2111 }
2112
2113 BITMAP_FREE (info.bb_set);
2114 if (max < bb->count)
2115 return MAX ((max.to_sreal_scale (bb->count)
2116 * REG_BR_PROB_BASE).to_int (), 1);
2117 return REG_BR_PROB_BASE;
2118 }
2119 }
2120
2121 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2122 sub-graph and if the predicate the condition depends on is known. If so,
2123 return true and store the pointer the predicate in *P. */
2124
2125 static bool
2126 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2127 ipa_fn_summary *summary,
2128 class ipa_node_params *params_summary,
2129 basic_block bb,
2130 predicate *p,
2131 vec<predicate> nonconstant_names)
2132 {
2133 edge e;
2134 edge_iterator ei;
2135 basic_block first_bb = NULL;
2136 gimple *stmt;
2137
2138 if (single_pred_p (bb))
2139 {
2140 *p = false;
2141 return true;
2142 }
2143
2144 FOR_EACH_EDGE (e, ei, bb->preds)
2145 {
2146 if (single_succ_p (e->src))
2147 {
2148 if (!single_pred_p (e->src))
2149 return false;
2150 if (!first_bb)
2151 first_bb = single_pred (e->src);
2152 else if (single_pred (e->src) != first_bb)
2153 return false;
2154 }
2155 else
2156 {
2157 if (!first_bb)
2158 first_bb = e->src;
2159 else if (e->src != first_bb)
2160 return false;
2161 }
2162 }
2163
2164 if (!first_bb)
2165 return false;
2166
2167 stmt = last_stmt (first_bb);
2168 if (!stmt
2169 || gimple_code (stmt) != GIMPLE_COND
2170 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2171 return false;
2172
2173 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2174 gimple_cond_lhs (stmt),
2175 nonconstant_names);
2176 if (*p == true)
2177 return false;
2178 else
2179 return true;
2180 }
2181
2182 /* Given a PHI statement in a function described by inline properties SUMMARY
2183 and *P being the predicate describing whether the selected PHI argument is
2184 known, store a predicate for the result of the PHI statement into
2185 NONCONSTANT_NAMES, if possible. */
2186
2187 static void
2188 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2189 predicate *p,
2190 vec<predicate> nonconstant_names)
2191 {
2192 unsigned i;
2193
2194 for (i = 0; i < gimple_phi_num_args (phi); i++)
2195 {
2196 tree arg = gimple_phi_arg (phi, i)->def;
2197 if (!is_gimple_min_invariant (arg))
2198 {
2199 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2200 *p = p->or_with (summary->conds,
2201 nonconstant_names[SSA_NAME_VERSION (arg)]);
2202 if (*p == true)
2203 return;
2204 }
2205 }
2206
2207 if (dump_file && (dump_flags & TDF_DETAILS))
2208 {
2209 fprintf (dump_file, "\t\tphi predicate: ");
2210 p->dump (dump_file, summary->conds);
2211 }
2212 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2213 }
2214
2215 /* For a typical usage of __builtin_expect (a<b, 1), we
2216 may introduce an extra relation stmt:
2217 With the builtin, we have
2218 t1 = a <= b;
2219 t2 = (long int) t1;
2220 t3 = __builtin_expect (t2, 1);
2221 if (t3 != 0)
2222 goto ...
2223 Without the builtin, we have
2224 if (a<=b)
2225 goto...
2226 This affects the size/time estimation and may have
2227 an impact on the earlier inlining.
2228 Here find this pattern and fix it up later. */
2229
2230 static gimple *
2231 find_foldable_builtin_expect (basic_block bb)
2232 {
2233 gimple_stmt_iterator bsi;
2234
2235 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2236 {
2237 gimple *stmt = gsi_stmt (bsi);
2238 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2239 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2240 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2241 {
2242 tree var = gimple_call_lhs (stmt);
2243 tree arg = gimple_call_arg (stmt, 0);
2244 use_operand_p use_p;
2245 gimple *use_stmt;
2246 bool match = false;
2247 bool done = false;
2248
2249 if (!var || !arg)
2250 continue;
2251 gcc_assert (TREE_CODE (var) == SSA_NAME);
2252
2253 while (TREE_CODE (arg) == SSA_NAME)
2254 {
2255 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2256 if (!is_gimple_assign (stmt_tmp))
2257 break;
2258 switch (gimple_assign_rhs_code (stmt_tmp))
2259 {
2260 case LT_EXPR:
2261 case LE_EXPR:
2262 case GT_EXPR:
2263 case GE_EXPR:
2264 case EQ_EXPR:
2265 case NE_EXPR:
2266 match = true;
2267 done = true;
2268 break;
2269 CASE_CONVERT:
2270 break;
2271 default:
2272 done = true;
2273 break;
2274 }
2275 if (done)
2276 break;
2277 arg = gimple_assign_rhs1 (stmt_tmp);
2278 }
2279
2280 if (match && single_imm_use (var, &use_p, &use_stmt)
2281 && gimple_code (use_stmt) == GIMPLE_COND)
2282 return use_stmt;
2283 }
2284 }
2285 return NULL;
2286 }
2287
2288 /* Return true when the basic blocks contains only clobbers followed by RESX.
2289 Such BBs are kept around to make removal of dead stores possible with
2290 presence of EH and will be optimized out by optimize_clobbers later in the
2291 game.
2292
2293 NEED_EH is used to recurse in case the clobber has non-EH predecestors
2294 that can be clobber only, too.. When it is false, the RESX is not necessary
2295 on the end of basic block. */
2296
2297 static bool
2298 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2299 {
2300 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2301 edge_iterator ei;
2302 edge e;
2303
2304 if (need_eh)
2305 {
2306 if (gsi_end_p (gsi))
2307 return false;
2308 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2309 return false;
2310 gsi_prev (&gsi);
2311 }
2312 else if (!single_succ_p (bb))
2313 return false;
2314
2315 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2316 {
2317 gimple *stmt = gsi_stmt (gsi);
2318 if (is_gimple_debug (stmt))
2319 continue;
2320 if (gimple_clobber_p (stmt))
2321 continue;
2322 if (gimple_code (stmt) == GIMPLE_LABEL)
2323 break;
2324 return false;
2325 }
2326
2327 /* See if all predecestors are either throws or clobber only BBs. */
2328 FOR_EACH_EDGE (e, ei, bb->preds)
2329 if (!(e->flags & EDGE_EH)
2330 && !clobber_only_eh_bb_p (e->src, false))
2331 return false;
2332
2333 return true;
2334 }
2335
2336 /* Return true if STMT compute a floating point expression that may be affected
2337 by -ffast-math and similar flags. */
2338
2339 static bool
2340 fp_expression_p (gimple *stmt)
2341 {
2342 ssa_op_iter i;
2343 tree op;
2344
2345 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2346 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2347 return true;
2348 return false;
2349 }
2350
2351 /* Analyze function body for NODE.
2352 EARLY indicates run from early optimization pipeline. */
2353
2354 static void
2355 analyze_function_body (struct cgraph_node *node, bool early)
2356 {
2357 sreal time = param_uninlined_function_time;
2358 /* Estimate static overhead for function prologue/epilogue and alignment. */
2359 int size = param_uninlined_function_insns;
2360 /* Benefits are scaled by probability of elimination that is in range
2361 <0,2>. */
2362 basic_block bb;
2363 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2364 sreal freq;
2365 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2366 class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
2367 predicate bb_predicate;
2368 struct ipa_func_body_info fbi;
2369 vec<predicate> nonconstant_names = vNULL;
2370 int nblocks, n;
2371 int *order;
2372 gimple *fix_builtin_expect_stmt;
2373
2374 gcc_assert (my_function && my_function->cfg);
2375 gcc_assert (cfun == my_function);
2376
2377 memset(&fbi, 0, sizeof(fbi));
2378 vec_free (info->conds);
2379 info->conds = NULL;
2380 vec_free (info->size_time_table);
2381 info->size_time_table = NULL;
2382
2383 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2384 so we can produce proper inline hints.
2385
2386 When optimizing and analyzing for early inliner, initialize node params
2387 so we can produce correct BB predicates. */
2388
2389 if (opt_for_fn (node->decl, optimize))
2390 {
2391 calculate_dominance_info (CDI_DOMINATORS);
2392 calculate_dominance_info (CDI_POST_DOMINATORS);
2393 if (!early)
2394 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2395 else
2396 {
2397 ipa_check_create_node_params ();
2398 ipa_initialize_node_params (node);
2399 }
2400
2401 if (ipa_node_params_sum)
2402 {
2403 fbi.node = node;
2404 fbi.info = IPA_NODE_REF (node);
2405 fbi.bb_infos = vNULL;
2406 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2407 fbi.param_count = count_formal_params (node->decl);
2408 fbi.aa_walk_budget = param_ipa_max_aa_steps;
2409
2410 nonconstant_names.safe_grow_cleared
2411 (SSANAMES (my_function)->length ());
2412 }
2413 }
2414
2415 if (dump_file)
2416 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2417 node->name ());
2418
2419 /* When we run into maximal number of entries, we assign everything to the
2420 constant truth case. Be sure to have it in list. */
2421 bb_predicate = true;
2422 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2423
2424 bb_predicate = predicate::not_inlined ();
2425 info->account_size_time (param_uninlined_function_insns
2426 * ipa_fn_summary::size_scale,
2427 param_uninlined_function_time,
2428 bb_predicate,
2429 bb_predicate);
2430
2431 if (fbi.info)
2432 compute_bb_predicates (&fbi, node, info, params_summary);
2433 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2434 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2435 for (n = 0; n < nblocks; n++)
2436 {
2437 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2438 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2439 if (clobber_only_eh_bb_p (bb))
2440 {
2441 if (dump_file && (dump_flags & TDF_DETAILS))
2442 fprintf (dump_file, "\n Ignoring BB %i;"
2443 " it will be optimized away by cleanup_clobbers\n",
2444 bb->index);
2445 continue;
2446 }
2447
2448 /* TODO: Obviously predicates can be propagated down across CFG. */
2449 if (fbi.info)
2450 {
2451 if (bb->aux)
2452 bb_predicate = *(predicate *) bb->aux;
2453 else
2454 bb_predicate = false;
2455 }
2456 else
2457 bb_predicate = true;
2458
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2460 {
2461 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2462 bb_predicate.dump (dump_file, info->conds);
2463 }
2464
2465 if (fbi.info && nonconstant_names.exists ())
2466 {
2467 predicate phi_predicate;
2468 bool first_phi = true;
2469
2470 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2471 gsi_next (&bsi))
2472 {
2473 if (first_phi
2474 && !phi_result_unknown_predicate (&fbi, info,
2475 params_summary,
2476 bb,
2477 &phi_predicate,
2478 nonconstant_names))
2479 break;
2480 first_phi = false;
2481 if (dump_file && (dump_flags & TDF_DETAILS))
2482 {
2483 fprintf (dump_file, " ");
2484 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2485 }
2486 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2487 nonconstant_names);
2488 }
2489 }
2490
2491 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2492
2493 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2494 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2495 {
2496 gimple *stmt = gsi_stmt (bsi);
2497 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2498 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2499 int prob;
2500 predicate will_be_nonconstant;
2501
2502 /* This relation stmt should be folded after we remove
2503 buildin_expect call. Adjust the cost here. */
2504 if (stmt == fix_builtin_expect_stmt)
2505 {
2506 this_size--;
2507 this_time--;
2508 }
2509
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 {
2512 fprintf (dump_file, " ");
2513 print_gimple_stmt (dump_file, stmt, 0);
2514 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2515 freq.to_double (), this_size,
2516 this_time);
2517 }
2518
2519 if (is_gimple_call (stmt)
2520 && !gimple_call_internal_p (stmt))
2521 {
2522 struct cgraph_edge *edge = node->get_edge (stmt);
2523 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2524
2525 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2526 resolved as constant. We however don't want to optimize
2527 out the cgraph edges. */
2528 if (nonconstant_names.exists ()
2529 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2530 && gimple_call_lhs (stmt)
2531 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2532 {
2533 predicate false_p = false;
2534 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2535 = false_p;
2536 }
2537 if (ipa_node_params_sum)
2538 {
2539 int count = gimple_call_num_args (stmt);
2540 int i;
2541
2542 if (count)
2543 es->param.safe_grow_cleared (count);
2544 for (i = 0; i < count; i++)
2545 {
2546 int prob = param_change_prob (&fbi, stmt, i);
2547 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2548 es->param[i].change_prob = prob;
2549 }
2550 }
2551
2552 es->call_stmt_size = this_size;
2553 es->call_stmt_time = this_time;
2554 es->loop_depth = bb_loop_depth (bb);
2555 edge_set_predicate (edge, &bb_predicate);
2556 if (edge->speculative)
2557 {
2558 cgraph_edge *direct, *indirect;
2559 ipa_ref *ref;
2560 edge->speculative_call_info (direct, indirect, ref);
2561 gcc_assert (direct == edge);
2562 ipa_call_summary *es2
2563 = ipa_call_summaries->get_create (indirect);
2564 ipa_call_summaries->duplicate (edge, indirect,
2565 es, es2);
2566 }
2567 }
2568
2569 /* TODO: When conditional jump or swithc is known to be constant, but
2570 we did not translate it into the predicates, we really can account
2571 just maximum of the possible paths. */
2572 if (fbi.info)
2573 will_be_nonconstant
2574 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2575 stmt, nonconstant_names);
2576 else
2577 will_be_nonconstant = true;
2578 if (this_time || this_size)
2579 {
2580 sreal final_time = (sreal)this_time * freq;
2581
2582 prob = eliminated_by_inlining_prob (&fbi, stmt);
2583 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2584 fprintf (dump_file,
2585 "\t\t50%% will be eliminated by inlining\n");
2586 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2587 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2588
2589 class predicate p = bb_predicate & will_be_nonconstant;
2590
2591 /* We can ignore statement when we proved it is never going
2592 to happen, but we cannot do that for call statements
2593 because edges are accounted specially. */
2594
2595 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2596 {
2597 time += final_time;
2598 size += this_size;
2599 }
2600
2601 /* We account everything but the calls. Calls have their own
2602 size/time info attached to cgraph edges. This is necessary
2603 in order to make the cost disappear after inlining. */
2604 if (!is_gimple_call (stmt))
2605 {
2606 if (prob)
2607 {
2608 predicate ip = bb_predicate & predicate::not_inlined ();
2609 info->account_size_time (this_size * prob,
2610 (final_time * prob) / 2, ip,
2611 p);
2612 }
2613 if (prob != 2)
2614 info->account_size_time (this_size * (2 - prob),
2615 (final_time * (2 - prob) / 2),
2616 bb_predicate,
2617 p);
2618 }
2619
2620 if (!info->fp_expressions && fp_expression_p (stmt))
2621 {
2622 info->fp_expressions = true;
2623 if (dump_file)
2624 fprintf (dump_file, " fp_expression set\n");
2625 }
2626 }
2627
2628 /* Account cost of address calculations in the statements. */
2629 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
2630 {
2631 for (tree op = gimple_op (stmt, i);
2632 op && handled_component_p (op);
2633 op = TREE_OPERAND (op, 0))
2634 if ((TREE_CODE (op) == ARRAY_REF
2635 || TREE_CODE (op) == ARRAY_RANGE_REF)
2636 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
2637 {
2638 predicate p = bb_predicate;
2639 if (fbi.info)
2640 p = p & will_be_nonconstant_expr_predicate
2641 (&fbi, info, params_summary,
2642 TREE_OPERAND (op, 1),
2643 nonconstant_names);
2644 if (p != false)
2645 {
2646 time += freq;
2647 size += 1;
2648 if (dump_file)
2649 fprintf (dump_file,
2650 "\t\tAccounting address calculation.\n");
2651 info->account_size_time (ipa_fn_summary::size_scale,
2652 freq,
2653 bb_predicate,
2654 p);
2655 }
2656 }
2657 }
2658
2659 }
2660 }
2661 free (order);
2662
2663 if (nonconstant_names.exists () && !early)
2664 {
2665 class loop *loop;
2666 predicate loop_iterations = true;
2667 predicate loop_stride = true;
2668
2669 if (dump_file && (dump_flags & TDF_DETAILS))
2670 flow_loops_dump (dump_file, NULL, 0);
2671 scev_initialize ();
2672 FOR_EACH_LOOP (loop, 0)
2673 {
2674 vec<edge> exits;
2675 edge ex;
2676 unsigned int j;
2677 class tree_niter_desc niter_desc;
2678 bb_predicate = *(predicate *) loop->header->aux;
2679
2680 exits = get_loop_exit_edges (loop);
2681 FOR_EACH_VEC_ELT (exits, j, ex)
2682 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2683 && !is_gimple_min_invariant (niter_desc.niter))
2684 {
2685 predicate will_be_nonconstant
2686 = will_be_nonconstant_expr_predicate (&fbi, info,
2687 params_summary,
2688 niter_desc.niter,
2689 nonconstant_names);
2690 if (will_be_nonconstant != true)
2691 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2692 if (will_be_nonconstant != true
2693 && will_be_nonconstant != false)
2694 /* This is slightly inprecise. We may want to represent each
2695 loop with independent predicate. */
2696 loop_iterations &= will_be_nonconstant;
2697 }
2698 exits.release ();
2699 }
2700
2701 /* To avoid quadratic behavior we analyze stride predicates only
2702 with respect to the containing loop. Thus we simply iterate
2703 over all defs in the outermost loop body. */
2704 for (loop = loops_for_fn (cfun)->tree_root->inner;
2705 loop != NULL; loop = loop->next)
2706 {
2707 basic_block *body = get_loop_body (loop);
2708 for (unsigned i = 0; i < loop->num_nodes; i++)
2709 {
2710 gimple_stmt_iterator gsi;
2711 bb_predicate = *(predicate *) body[i]->aux;
2712 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2713 gsi_next (&gsi))
2714 {
2715 gimple *stmt = gsi_stmt (gsi);
2716
2717 if (!is_gimple_assign (stmt))
2718 continue;
2719
2720 tree def = gimple_assign_lhs (stmt);
2721 if (TREE_CODE (def) != SSA_NAME)
2722 continue;
2723
2724 affine_iv iv;
2725 if (!simple_iv (loop_containing_stmt (stmt),
2726 loop_containing_stmt (stmt),
2727 def, &iv, true)
2728 || is_gimple_min_invariant (iv.step))
2729 continue;
2730
2731 predicate will_be_nonconstant
2732 = will_be_nonconstant_expr_predicate (&fbi, info,
2733 params_summary,
2734 iv.step,
2735 nonconstant_names);
2736 if (will_be_nonconstant != true)
2737 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2738 if (will_be_nonconstant != true
2739 && will_be_nonconstant != false)
2740 /* This is slightly inprecise. We may want to represent
2741 each loop with independent predicate. */
2742 loop_stride = loop_stride & will_be_nonconstant;
2743 }
2744 }
2745 free (body);
2746 }
2747 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2748 set_hint_predicate (&s->loop_iterations, loop_iterations);
2749 set_hint_predicate (&s->loop_stride, loop_stride);
2750 scev_finalize ();
2751 }
2752 FOR_ALL_BB_FN (bb, my_function)
2753 {
2754 edge e;
2755 edge_iterator ei;
2756
2757 if (bb->aux)
2758 edge_predicate_pool.remove ((predicate *)bb->aux);
2759 bb->aux = NULL;
2760 FOR_EACH_EDGE (e, ei, bb->succs)
2761 {
2762 if (e->aux)
2763 edge_predicate_pool.remove ((predicate *) e->aux);
2764 e->aux = NULL;
2765 }
2766 }
2767 ipa_fn_summary *s = ipa_fn_summaries->get (node);
2768 ipa_size_summary *ss = ipa_size_summaries->get (node);
2769 s->time = time;
2770 ss->self_size = size;
2771 nonconstant_names.release ();
2772 ipa_release_body_info (&fbi);
2773 if (opt_for_fn (node->decl, optimize))
2774 {
2775 if (!early)
2776 loop_optimizer_finalize ();
2777 else if (!ipa_edge_args_sum)
2778 ipa_free_all_node_params ();
2779 free_dominance_info (CDI_DOMINATORS);
2780 free_dominance_info (CDI_POST_DOMINATORS);
2781 }
2782 if (dump_file)
2783 {
2784 fprintf (dump_file, "\n");
2785 ipa_dump_fn_summary (dump_file, node);
2786 }
2787 }
2788
2789
2790 /* Compute function summary.
2791 EARLY is true when we compute parameters during early opts. */
2792
2793 void
2794 compute_fn_summary (struct cgraph_node *node, bool early)
2795 {
2796 HOST_WIDE_INT self_stack_size;
2797 struct cgraph_edge *e;
2798
2799 gcc_assert (!node->inlined_to);
2800
2801 if (!ipa_fn_summaries)
2802 ipa_fn_summary_alloc ();
2803
2804 /* Create a new ipa_fn_summary. */
2805 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
2806 ipa_fn_summaries->remove (node);
2807 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2808 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
2809
2810 /* Estimate the stack size for the function if we're optimizing. */
2811 self_stack_size = optimize && !node->thunk.thunk_p
2812 ? estimated_stack_frame_size (node) : 0;
2813 size_info->estimated_self_stack_size = self_stack_size;
2814 info->estimated_stack_size = self_stack_size;
2815
2816 if (node->thunk.thunk_p)
2817 {
2818 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
2819 predicate t = true;
2820
2821 node->can_change_signature = false;
2822 es->call_stmt_size = eni_size_weights.call_cost;
2823 es->call_stmt_time = eni_time_weights.call_cost;
2824 info->account_size_time (ipa_fn_summary::size_scale
2825 * param_uninlined_function_thunk_insns,
2826 param_uninlined_function_thunk_time, t, t);
2827 t = predicate::not_inlined ();
2828 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2829 ipa_update_overall_fn_summary (node);
2830 size_info->self_size = size_info->size;
2831 if (stdarg_p (TREE_TYPE (node->decl)))
2832 {
2833 info->inlinable = false;
2834 node->callees->inline_failed = CIF_VARIADIC_THUNK;
2835 }
2836 else
2837 info->inlinable = true;
2838 }
2839 else
2840 {
2841 /* Even is_gimple_min_invariant rely on current_function_decl. */
2842 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2843
2844 /* Can this function be inlined at all? */
2845 if (!opt_for_fn (node->decl, optimize)
2846 && !lookup_attribute ("always_inline",
2847 DECL_ATTRIBUTES (node->decl)))
2848 info->inlinable = false;
2849 else
2850 info->inlinable = tree_inlinable_function_p (node->decl);
2851
2852 /* Type attributes can use parameter indices to describe them. */
2853 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl))
2854 /* Likewise for #pragma omp declare simd functions or functions
2855 with simd attribute. */
2856 || lookup_attribute ("omp declare simd",
2857 DECL_ATTRIBUTES (node->decl)))
2858 node->can_change_signature = false;
2859 else
2860 {
2861 /* Otherwise, inlinable functions always can change signature. */
2862 if (info->inlinable)
2863 node->can_change_signature = true;
2864 else
2865 {
2866 /* Functions calling builtin_apply cannot change signature. */
2867 for (e = node->callees; e; e = e->next_callee)
2868 {
2869 tree cdecl = e->callee->decl;
2870 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS)
2871 || fndecl_built_in_p (cdecl, BUILT_IN_VA_START))
2872 break;
2873 }
2874 node->can_change_signature = !e;
2875 }
2876 }
2877 analyze_function_body (node, early);
2878 pop_cfun ();
2879 }
2880 for (e = node->callees; e; e = e->next_callee)
2881 if (e->callee->comdat_local_p ())
2882 break;
2883 node->calls_comdat_local = (e != NULL);
2884
2885 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2886 size_info->size = size_info->self_size;
2887 info->estimated_stack_size = size_info->estimated_self_stack_size;
2888
2889 /* Code above should compute exactly the same result as
2890 ipa_update_overall_fn_summary but because computation happens in
2891 different order the roundoff errors result in slight changes. */
2892 ipa_update_overall_fn_summary (node);
2893 /* In LTO mode we may have speculative edges set. */
2894 gcc_assert (in_lto_p || size_info->size == size_info->self_size);
2895 }
2896
2897
2898 /* Compute parameters of functions used by inliner using
2899 current_function_decl. */
2900
2901 static unsigned int
2902 compute_fn_summary_for_current (void)
2903 {
2904 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2905 return 0;
2906 }
2907
2908 /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2909 KNOWN_CONTEXTS and KNOWN_AGGS. */
2910
2911 static bool
2912 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2913 int *size, int *time,
2914 vec<tree> known_vals,
2915 vec<ipa_polymorphic_call_context> known_contexts,
2916 vec<ipa_agg_value_set> known_aggs)
2917 {
2918 tree target;
2919 struct cgraph_node *callee;
2920 class ipa_fn_summary *isummary;
2921 enum availability avail;
2922 bool speculative;
2923
2924 if (!known_vals.exists () && !known_contexts.exists ())
2925 return false;
2926 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2927 return false;
2928
2929 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2930 known_aggs, &speculative);
2931 if (!target || speculative)
2932 return false;
2933
2934 /* Account for difference in cost between indirect and direct calls. */
2935 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2936 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2937 gcc_checking_assert (*time >= 0);
2938 gcc_checking_assert (*size >= 0);
2939
2940 callee = cgraph_node::get (target);
2941 if (!callee || !callee->definition)
2942 return false;
2943 callee = callee->function_symbol (&avail);
2944 if (avail < AVAIL_AVAILABLE)
2945 return false;
2946 isummary = ipa_fn_summaries->get (callee);
2947 if (isummary == NULL)
2948 return false;
2949
2950 return isummary->inlinable;
2951 }
2952
2953 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2954 handle edge E with probability PROB.
2955 Set HINTS if edge may be devirtualized.
2956 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2957 site. */
2958
2959 static inline void
2960 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2961 sreal *time,
2962 vec<tree> known_vals,
2963 vec<ipa_polymorphic_call_context> known_contexts,
2964 vec<ipa_agg_value_set> known_aggs,
2965 ipa_hints *hints)
2966 {
2967 class ipa_call_summary *es = ipa_call_summaries->get (e);
2968 int call_size = es->call_stmt_size;
2969 int call_time = es->call_stmt_time;
2970 int cur_size;
2971
2972 if (!e->callee && hints && e->maybe_hot_p ()
2973 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2974 known_vals, known_contexts, known_aggs))
2975 *hints |= INLINE_HINT_indirect_call;
2976 cur_size = call_size * ipa_fn_summary::size_scale;
2977 *size += cur_size;
2978 if (min_size)
2979 *min_size += cur_size;
2980 if (time)
2981 *time += ((sreal)call_time) * e->sreal_frequency ();
2982 }
2983
2984
2985 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2986 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2987 describe context of the call site.
2988
2989 Helper for estimate_calls_size_and_time which does the same but
2990 (in most cases) faster. */
2991
2992 static void
2993 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
2994 int *min_size, sreal *time,
2995 ipa_hints *hints,
2996 clause_t possible_truths,
2997 vec<tree> known_vals,
2998 vec<ipa_polymorphic_call_context> known_contexts,
2999 vec<ipa_agg_value_set> known_aggs)
3000 {
3001 struct cgraph_edge *e;
3002 for (e = node->callees; e; e = e->next_callee)
3003 {
3004 if (!e->inline_failed)
3005 {
3006 gcc_checking_assert (!ipa_call_summaries->get (e));
3007 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3008 hints,
3009 possible_truths,
3010 known_vals, known_contexts,
3011 known_aggs);
3012 continue;
3013 }
3014 class ipa_call_summary *es = ipa_call_summaries->get (e);
3015
3016 /* Do not care about zero sized builtins. */
3017 if (!es->call_stmt_size)
3018 {
3019 gcc_checking_assert (!es->call_stmt_time);
3020 continue;
3021 }
3022 if (!es->predicate
3023 || es->predicate->evaluate (possible_truths))
3024 {
3025 /* Predicates of calls shall not use NOT_CHANGED codes,
3026 sowe do not need to compute probabilities. */
3027 estimate_edge_size_and_time (e, size,
3028 es->predicate ? NULL : min_size,
3029 time,
3030 known_vals, known_contexts,
3031 known_aggs, hints);
3032 }
3033 }
3034 for (e = node->indirect_calls; e; e = e->next_callee)
3035 {
3036 class ipa_call_summary *es = ipa_call_summaries->get (e);
3037 if (!es->predicate
3038 || es->predicate->evaluate (possible_truths))
3039 estimate_edge_size_and_time (e, size,
3040 es->predicate ? NULL : min_size,
3041 time,
3042 known_vals, known_contexts, known_aggs,
3043 hints);
3044 }
3045 }
3046
3047 /* Populate sum->call_size_time_table for edges from NODE. */
3048
3049 static void
3050 summarize_calls_size_and_time (struct cgraph_node *node,
3051 ipa_fn_summary *sum)
3052 {
3053 struct cgraph_edge *e;
3054 for (e = node->callees; e; e = e->next_callee)
3055 {
3056 if (!e->inline_failed)
3057 {
3058 gcc_checking_assert (!ipa_call_summaries->get (e));
3059 summarize_calls_size_and_time (e->callee, sum);
3060 continue;
3061 }
3062 int size = 0;
3063 sreal time = 0;
3064
3065 estimate_edge_size_and_time (e, &size, NULL, &time,
3066 vNULL, vNULL, vNULL, NULL);
3067
3068 struct predicate pred = true;
3069 class ipa_call_summary *es = ipa_call_summaries->get (e);
3070
3071 if (es->predicate)
3072 pred = *es->predicate;
3073 sum->account_size_time (size, time, pred, pred, true);
3074 }
3075 for (e = node->indirect_calls; e; e = e->next_callee)
3076 {
3077 int size = 0;
3078 sreal time = 0;
3079
3080 estimate_edge_size_and_time (e, &size, NULL, &time,
3081 vNULL, vNULL, vNULL, NULL);
3082 struct predicate pred = true;
3083 class ipa_call_summary *es = ipa_call_summaries->get (e);
3084
3085 if (es->predicate)
3086 pred = *es->predicate;
3087 sum->account_size_time (size, time, pred, pred, true);
3088 }
3089 }
3090
3091 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3092 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
3093 describe context of the call site. */
3094
3095 static void
3096 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3097 int *min_size, sreal *time,
3098 ipa_hints *hints,
3099 clause_t possible_truths,
3100 vec<tree> known_vals,
3101 vec<ipa_polymorphic_call_context> known_contexts,
3102 vec<ipa_agg_value_set> known_aggs)
3103 {
3104 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3105 bool use_table = true;
3106
3107 gcc_assert (node->callees || node->indirect_calls);
3108
3109 /* During early inlining we do not calculate info for very
3110 large functions and thus there is no need for producing
3111 summaries. */
3112 if (!ipa_node_params_sum)
3113 use_table = false;
3114 /* Do not calculate summaries for simple wrappers; it is waste
3115 of memory. */
3116 else if (node->callees && node->indirect_calls
3117 && node->callees->inline_failed && !node->callees->next_callee)
3118 use_table = false;
3119 /* If there is an indirect edge that may be optimized, we need
3120 to go the slow way. */
3121 else if ((known_vals.length ()
3122 || known_contexts.length ()
3123 || known_aggs.length ()) && hints)
3124 {
3125 class ipa_node_params *params_summary = IPA_NODE_REF (node);
3126 unsigned int nargs = params_summary
3127 ? ipa_get_param_count (params_summary) : 0;
3128
3129 for (unsigned int i = 0; i < nargs && use_table; i++)
3130 {
3131 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3132 && ((known_vals.length () > i && known_vals[i])
3133 || (known_aggs.length () > i
3134 && known_aggs[i].items.length ())))
3135 use_table = false;
3136 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3137 && (known_contexts.length () > i
3138 && !known_contexts[i].useless_p ()))
3139 use_table = false;
3140 }
3141 }
3142
3143 /* Fast path is via the call size time table. */
3144 if (use_table)
3145 {
3146 /* Build summary if it is absent. */
3147 if (!sum->call_size_time_table)
3148 {
3149 predicate true_pred = true;
3150 sum->account_size_time (0, 0, true_pred, true_pred, true);
3151 summarize_calls_size_and_time (node, sum);
3152 }
3153
3154 int old_size = *size;
3155 sreal old_time = time ? *time : 0;
3156
3157 if (min_size)
3158 *min_size += (*sum->call_size_time_table)[0].size;
3159
3160 unsigned int i;
3161 size_time_entry *e;
3162
3163 /* Walk the table and account sizes and times. */
3164 for (i = 0; vec_safe_iterate (sum->call_size_time_table, i, &e);
3165 i++)
3166 if (e->exec_predicate.evaluate (possible_truths))
3167 {
3168 *size += e->size;
3169 if (time)
3170 *time += e->time;
3171 }
3172
3173 /* Be careful and see if both methods agree. */
3174 if ((flag_checking || dump_file)
3175 /* Do not try to sanity check when we know we lost some
3176 precision. */
3177 && sum->call_size_time_table->length ()
3178 < ipa_fn_summary::max_size_time_table_size)
3179 {
3180 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3181 possible_truths, known_vals,
3182 known_contexts, known_aggs);
3183 gcc_assert (*size == old_size);
3184 if (time && (*time - old_time > 1 || *time - old_time < -1)
3185 && dump_file)
3186 fprintf (dump_file, "Time mismatch in call summary %f!=%f",
3187 old_time.to_double (),
3188 time->to_double ());
3189 }
3190 }
3191 /* Slow path by walking all edges. */
3192 else
3193 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3194 possible_truths, known_vals, known_contexts,
3195 known_aggs);
3196 }
3197
3198 /* Default constructor for ipa call context.
3199 Memory alloction of known_vals, known_contexts
3200 and known_aggs vectors is owned by the caller, but can
3201 be release by ipa_call_context::release.
3202
3203 inline_param_summary is owned by the caller. */
3204 ipa_call_context::ipa_call_context (cgraph_node *node,
3205 clause_t possible_truths,
3206 clause_t nonspec_possible_truths,
3207 vec<tree> known_vals,
3208 vec<ipa_polymorphic_call_context>
3209 known_contexts,
3210 vec<ipa_agg_value_set> known_aggs,
3211 vec<inline_param_summary>
3212 inline_param_summary)
3213 : m_node (node), m_possible_truths (possible_truths),
3214 m_nonspec_possible_truths (nonspec_possible_truths),
3215 m_inline_param_summary (inline_param_summary),
3216 m_known_vals (known_vals),
3217 m_known_contexts (known_contexts),
3218 m_known_aggs (known_aggs)
3219 {
3220 }
3221
3222 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3223
3224 void
3225 ipa_call_context::duplicate_from (const ipa_call_context &ctx)
3226 {
3227 m_node = ctx.m_node;
3228 m_possible_truths = ctx.m_possible_truths;
3229 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3230 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3231 unsigned int nargs = params_summary
3232 ? ipa_get_param_count (params_summary) : 0;
3233
3234 m_inline_param_summary = vNULL;
3235 /* Copy the info only if there is at least one useful entry. */
3236 if (ctx.m_inline_param_summary.exists ())
3237 {
3238 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3239
3240 for (unsigned int i = 0; i < n; i++)
3241 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3242 && !ctx.m_inline_param_summary[i].useless_p ())
3243 {
3244 m_inline_param_summary
3245 = ctx.m_inline_param_summary.copy ();
3246 break;
3247 }
3248 }
3249 m_known_vals = vNULL;
3250 if (ctx.m_known_vals.exists ())
3251 {
3252 unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
3253
3254 for (unsigned int i = 0; i < n; i++)
3255 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3256 && ctx.m_known_vals[i])
3257 {
3258 m_known_vals = ctx.m_known_vals.copy ();
3259 break;
3260 }
3261 }
3262
3263 m_known_contexts = vNULL;
3264 if (ctx.m_known_contexts.exists ())
3265 {
3266 unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
3267
3268 for (unsigned int i = 0; i < n; i++)
3269 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3270 && !ctx.m_known_contexts[i].useless_p ())
3271 {
3272 m_known_contexts = ctx.m_known_contexts.copy ();
3273 break;
3274 }
3275 }
3276
3277 m_known_aggs = vNULL;
3278 if (ctx.m_known_aggs.exists ())
3279 {
3280 unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
3281
3282 for (unsigned int i = 0; i < n; i++)
3283 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3284 && !ctx.m_known_aggs[i].is_empty ())
3285 {
3286 m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
3287 break;
3288 }
3289 }
3290 }
3291
3292 /* Release memory used by known_vals/contexts/aggs vectors.
3293 If ALL is true release also inline_param_summary.
3294 This happens when context was previously duplciated to be stored
3295 into cache. */
3296
3297 void
3298 ipa_call_context::release (bool all)
3299 {
3300 /* See if context is initialized at first place. */
3301 if (!m_node)
3302 return;
3303 m_known_vals.release ();
3304 m_known_contexts.release ();
3305 ipa_release_agg_values (m_known_aggs);
3306 if (all)
3307 m_inline_param_summary.release ();
3308 }
3309
3310 /* Return true if CTX describes the same call context as THIS. */
3311
3312 bool
3313 ipa_call_context::equal_to (const ipa_call_context &ctx)
3314 {
3315 if (m_node != ctx.m_node
3316 || m_possible_truths != ctx.m_possible_truths
3317 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3318 return false;
3319
3320 class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
3321 unsigned int nargs = params_summary
3322 ? ipa_get_param_count (params_summary) : 0;
3323
3324 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3325 {
3326 for (unsigned int i = 0; i < nargs; i++)
3327 {
3328 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3329 continue;
3330 if (i >= m_inline_param_summary.length ()
3331 || m_inline_param_summary[i].useless_p ())
3332 {
3333 if (i < ctx.m_inline_param_summary.length ()
3334 && !ctx.m_inline_param_summary[i].useless_p ())
3335 return false;
3336 continue;
3337 }
3338 if (i >= ctx.m_inline_param_summary.length ()
3339 || ctx.m_inline_param_summary[i].useless_p ())
3340 {
3341 if (i < m_inline_param_summary.length ()
3342 && !m_inline_param_summary[i].useless_p ())
3343 return false;
3344 continue;
3345 }
3346 if (!m_inline_param_summary[i].equal_to
3347 (ctx.m_inline_param_summary[i]))
3348 return false;
3349 }
3350 }
3351 if (m_known_vals.exists () || ctx.m_known_vals.exists ())
3352 {
3353 for (unsigned int i = 0; i < nargs; i++)
3354 {
3355 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3356 continue;
3357 if (i >= m_known_vals.length () || !m_known_vals[i])
3358 {
3359 if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
3360 return false;
3361 continue;
3362 }
3363 if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
3364 {
3365 if (i < m_known_vals.length () && m_known_vals[i])
3366 return false;
3367 continue;
3368 }
3369 if (m_known_vals[i] != ctx.m_known_vals[i])
3370 return false;
3371 }
3372 }
3373 if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
3374 {
3375 for (unsigned int i = 0; i < nargs; i++)
3376 {
3377 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3378 continue;
3379 if (i >= m_known_contexts.length ()
3380 || m_known_contexts[i].useless_p ())
3381 {
3382 if (i < ctx.m_known_contexts.length ()
3383 && !ctx.m_known_contexts[i].useless_p ())
3384 return false;
3385 continue;
3386 }
3387 if (i >= ctx.m_known_contexts.length ()
3388 || ctx.m_known_contexts[i].useless_p ())
3389 {
3390 if (i < m_known_contexts.length ()
3391 && !m_known_contexts[i].useless_p ())
3392 return false;
3393 continue;
3394 }
3395 if (!m_known_contexts[i].equal_to
3396 (ctx.m_known_contexts[i]))
3397 return false;
3398 }
3399 }
3400 if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
3401 {
3402 for (unsigned int i = 0; i < nargs; i++)
3403 {
3404 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3405 continue;
3406 if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
3407 {
3408 if (i < ctx.m_known_aggs.length ()
3409 && !ctx.m_known_aggs[i].is_empty ())
3410 return false;
3411 continue;
3412 }
3413 if (i >= ctx.m_known_aggs.length ()
3414 || ctx.m_known_aggs[i].is_empty ())
3415 {
3416 if (i < m_known_aggs.length ()
3417 && !m_known_aggs[i].is_empty ())
3418 return false;
3419 continue;
3420 }
3421 if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
3422 return false;
3423 }
3424 }
3425 return true;
3426 }
3427
3428 /* Estimate size and time needed to execute call in the given context.
3429 Additionally detemine hints determined by the context. Finally compute
3430 minimal size needed for the call that is independent on the call context and
3431 can be used for fast estimates. Return the values in RET_SIZE,
3432 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
3433
3434 void
3435 ipa_call_context::estimate_size_and_time (int *ret_size,
3436 int *ret_min_size,
3437 sreal *ret_time,
3438 sreal *ret_nonspecialized_time,
3439 ipa_hints *ret_hints)
3440 {
3441 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3442 size_time_entry *e;
3443 int size = 0;
3444 sreal time = 0;
3445 int min_size = 0;
3446 ipa_hints hints = 0;
3447 int i;
3448
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 {
3451 bool found = false;
3452 fprintf (dump_file, " Estimating body: %s/%i\n"
3453 " Known to be false: ", m_node->name (),
3454 m_node->order);
3455
3456 for (i = predicate::not_inlined_condition;
3457 i < (predicate::first_dynamic_condition
3458 + (int) vec_safe_length (info->conds)); i++)
3459 if (!(m_possible_truths & (1 << i)))
3460 {
3461 if (found)
3462 fprintf (dump_file, ", ");
3463 found = true;
3464 dump_condition (dump_file, info->conds, i);
3465 }
3466 }
3467
3468 if (m_node->callees || m_node->indirect_calls)
3469 estimate_calls_size_and_time (m_node, &size, &min_size,
3470 ret_time ? &time : NULL,
3471 ret_hints ? &hints : NULL, m_possible_truths,
3472 m_known_vals, m_known_contexts, m_known_aggs);
3473
3474 sreal nonspecialized_time = time;
3475
3476 min_size += (*info->size_time_table)[0].size;
3477 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3478 {
3479 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3480
3481 /* Because predicates are conservative, it can happen that nonconst is 1
3482 but exec is 0. */
3483 if (exec)
3484 {
3485 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3486
3487 gcc_checking_assert (e->time >= 0);
3488 gcc_checking_assert (time >= 0);
3489
3490 /* We compute specialized size only because size of nonspecialized
3491 copy is context independent.
3492
3493 The difference between nonspecialized execution and specialized is
3494 that nonspecialized is not going to have optimized out computations
3495 known to be constant in a specialized setting. */
3496 if (nonconst)
3497 size += e->size;
3498 if (!ret_time)
3499 continue;
3500 nonspecialized_time += e->time;
3501 if (!nonconst)
3502 ;
3503 else if (!m_inline_param_summary.exists ())
3504 {
3505 if (nonconst)
3506 time += e->time;
3507 }
3508 else
3509 {
3510 int prob = e->nonconst_predicate.probability
3511 (info->conds, m_possible_truths,
3512 m_inline_param_summary);
3513 gcc_checking_assert (prob >= 0);
3514 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3515 if (prob == REG_BR_PROB_BASE)
3516 time += e->time;
3517 else
3518 time += e->time * prob / REG_BR_PROB_BASE;
3519 }
3520 gcc_checking_assert (time >= 0);
3521 }
3522 }
3523 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
3524 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
3525 gcc_checking_assert (min_size >= 0);
3526 gcc_checking_assert (size >= 0);
3527 gcc_checking_assert (time >= 0);
3528 /* nonspecialized_time should be always bigger than specialized time.
3529 Roundoff issues however may get into the way. */
3530 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3531
3532 /* Roundoff issues may make specialized time bigger than nonspecialized
3533 time. We do not really want that to happen because some heurstics
3534 may get confused by seeing negative speedups. */
3535 if (time > nonspecialized_time)
3536 time = nonspecialized_time;
3537
3538 if (ret_hints)
3539 {
3540 if (info->loop_iterations
3541 && !info->loop_iterations->evaluate (m_possible_truths))
3542 hints |= INLINE_HINT_loop_iterations;
3543 if (info->loop_stride
3544 && !info->loop_stride->evaluate (m_possible_truths))
3545 hints |= INLINE_HINT_loop_stride;
3546 if (info->scc_no)
3547 hints |= INLINE_HINT_in_scc;
3548 if (DECL_DECLARED_INLINE_P (m_node->decl))
3549 hints |= INLINE_HINT_declared_inline;
3550 }
3551
3552 size = RDIV (size, ipa_fn_summary::size_scale);
3553 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3554
3555 if (dump_file && (dump_flags & TDF_DETAILS))
3556 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
3557 time.to_double (), nonspecialized_time.to_double ());
3558 if (ret_time)
3559 *ret_time = time;
3560 if (ret_nonspecialized_time)
3561 *ret_nonspecialized_time = nonspecialized_time;
3562 if (ret_size)
3563 *ret_size = size;
3564 if (ret_min_size)
3565 *ret_min_size = min_size;
3566 if (ret_hints)
3567 *ret_hints = hints;
3568 return;
3569 }
3570
3571
3572 /* Estimate size and time needed to execute callee of EDGE assuming that
3573 parameters known to be constant at caller of EDGE are propagated.
3574 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
3575 and types for parameters. */
3576
3577 void
3578 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
3579 vec<tree> known_vals,
3580 vec<ipa_polymorphic_call_context>
3581 known_contexts,
3582 vec<ipa_agg_value_set> known_aggs,
3583 int *ret_size, sreal *ret_time,
3584 sreal *ret_nonspec_time,
3585 ipa_hints *hints)
3586 {
3587 clause_t clause, nonspec_clause;
3588
3589 /* TODO: Also pass known value ranges. */
3590 evaluate_conditions_for_known_args (node, false, known_vals, vNULL,
3591 known_aggs, &clause, &nonspec_clause);
3592 ipa_call_context ctx (node, clause, nonspec_clause,
3593 known_vals, known_contexts,
3594 known_aggs, vNULL);
3595 ctx.estimate_size_and_time (ret_size, NULL, ret_time,
3596 ret_nonspec_time, hints);
3597 }
3598
3599 /* Return stack frame offset where frame of NODE is supposed to start inside
3600 of the function it is inlined to.
3601 Return 0 for functions that are not inlined. */
3602
3603 HOST_WIDE_INT
3604 ipa_get_stack_frame_offset (struct cgraph_node *node)
3605 {
3606 HOST_WIDE_INT offset = 0;
3607 if (!node->inlined_to)
3608 return 0;
3609 node = node->callers->caller;
3610 while (true)
3611 {
3612 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
3613 if (!node->inlined_to)
3614 return offset;
3615 node = node->callers->caller;
3616 }
3617 }
3618
3619
3620 /* Update summary information of inline clones after inlining.
3621 Compute peak stack usage. */
3622
3623 static void
3624 inline_update_callee_summaries (struct cgraph_node *node, int depth)
3625 {
3626 struct cgraph_edge *e;
3627
3628 ipa_propagate_frequency (node);
3629 for (e = node->callees; e; e = e->next_callee)
3630 {
3631 if (!e->inline_failed)
3632 inline_update_callee_summaries (e->callee, depth);
3633 else
3634 ipa_call_summaries->get (e)->loop_depth += depth;
3635 }
3636 for (e = node->indirect_calls; e; e = e->next_callee)
3637 ipa_call_summaries->get (e)->loop_depth += depth;
3638 }
3639
3640 /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
3641 When function A is inlined in B and A calls C with parameter that
3642 changes with probability PROB1 and C is known to be passthroug
3643 of argument if B that change with probability PROB2, the probability
3644 of change is now PROB1*PROB2. */
3645
3646 static void
3647 remap_edge_change_prob (struct cgraph_edge *inlined_edge,
3648 struct cgraph_edge *edge)
3649 {
3650 if (ipa_node_params_sum)
3651 {
3652 int i;
3653 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3654 if (!args)
3655 return;
3656 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3657 class ipa_call_summary *inlined_es
3658 = ipa_call_summaries->get (inlined_edge);
3659
3660 if (es->param.length () == 0)
3661 return;
3662
3663 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
3664 {
3665 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3666 if (jfunc->type == IPA_JF_PASS_THROUGH
3667 || jfunc->type == IPA_JF_ANCESTOR)
3668 {
3669 int id = jfunc->type == IPA_JF_PASS_THROUGH
3670 ? ipa_get_jf_pass_through_formal_id (jfunc)
3671 : ipa_get_jf_ancestor_formal_id (jfunc);
3672 if (id < (int) inlined_es->param.length ())
3673 {
3674 int prob1 = es->param[i].change_prob;
3675 int prob2 = inlined_es->param[id].change_prob;
3676 int prob = combine_probabilities (prob1, prob2);
3677
3678 if (prob1 && prob2 && !prob)
3679 prob = 1;
3680
3681 es->param[i].change_prob = prob;
3682 }
3683 }
3684 }
3685 }
3686 }
3687
3688 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
3689
3690 Remap predicates of callees of NODE. Rest of arguments match
3691 remap_predicate.
3692
3693 Also update change probabilities. */
3694
3695 static void
3696 remap_edge_summaries (struct cgraph_edge *inlined_edge,
3697 struct cgraph_node *node,
3698 class ipa_fn_summary *info,
3699 class ipa_node_params *params_summary,
3700 class ipa_fn_summary *callee_info,
3701 vec<int> operand_map,
3702 vec<int> offset_map,
3703 clause_t possible_truths,
3704 predicate *toplev_predicate)
3705 {
3706 struct cgraph_edge *e, *next;
3707 for (e = node->callees; e; e = next)
3708 {
3709 predicate p;
3710 next = e->next_callee;
3711
3712 if (e->inline_failed)
3713 {
3714 class ipa_call_summary *es = ipa_call_summaries->get (e);
3715 remap_edge_change_prob (inlined_edge, e);
3716
3717 if (es->predicate)
3718 {
3719 p = es->predicate->remap_after_inlining
3720 (info, params_summary,
3721 callee_info, operand_map,
3722 offset_map, possible_truths,
3723 *toplev_predicate);
3724 edge_set_predicate (e, &p);
3725 }
3726 else
3727 edge_set_predicate (e, toplev_predicate);
3728 }
3729 else
3730 remap_edge_summaries (inlined_edge, e->callee, info,
3731 params_summary, callee_info,
3732 operand_map, offset_map, possible_truths,
3733 toplev_predicate);
3734 }
3735 for (e = node->indirect_calls; e; e = next)
3736 {
3737 class ipa_call_summary *es = ipa_call_summaries->get (e);
3738 predicate p;
3739 next = e->next_callee;
3740
3741 remap_edge_change_prob (inlined_edge, e);
3742 if (es->predicate)
3743 {
3744 p = es->predicate->remap_after_inlining
3745 (info, params_summary,
3746 callee_info, operand_map, offset_map,
3747 possible_truths, *toplev_predicate);
3748 edge_set_predicate (e, &p);
3749 }
3750 else
3751 edge_set_predicate (e, toplev_predicate);
3752 }
3753 }
3754
3755 /* Same as remap_predicate, but set result into hint *HINT. */
3756
3757 static void
3758 remap_hint_predicate (class ipa_fn_summary *info,
3759 class ipa_node_params *params_summary,
3760 class ipa_fn_summary *callee_info,
3761 predicate **hint,
3762 vec<int> operand_map,
3763 vec<int> offset_map,
3764 clause_t possible_truths,
3765 predicate *toplev_predicate)
3766 {
3767 predicate p;
3768
3769 if (!*hint)
3770 return;
3771 p = (*hint)->remap_after_inlining
3772 (info, params_summary, callee_info,
3773 operand_map, offset_map,
3774 possible_truths, *toplev_predicate);
3775 if (p != false && p != true)
3776 {
3777 if (!*hint)
3778 set_hint_predicate (hint, p);
3779 else
3780 **hint &= p;
3781 }
3782 }
3783
3784 /* We inlined EDGE. Update summary of the function we inlined into. */
3785
3786 void
3787 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
3788 {
3789 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
3790 struct cgraph_node *to = (edge->caller->inlined_to
3791 ? edge->caller->inlined_to : edge->caller);
3792 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
3793 clause_t clause = 0; /* not_inline is known to be false. */
3794 size_time_entry *e;
3795 auto_vec<int, 8> operand_map;
3796 auto_vec<int, 8> offset_map;
3797 int i;
3798 predicate toplev_predicate;
3799 class ipa_call_summary *es = ipa_call_summaries->get (edge);
3800 class ipa_node_params *params_summary = (ipa_node_params_sum
3801 ? IPA_NODE_REF (to) : NULL);
3802
3803 if (es->predicate)
3804 toplev_predicate = *es->predicate;
3805 else
3806 toplev_predicate = true;
3807
3808 info->fp_expressions |= callee_info->fp_expressions;
3809
3810 if (callee_info->conds)
3811 evaluate_properties_for_edge (edge, true, &clause,
3812 NULL, NULL, NULL, NULL);
3813 if (ipa_node_params_sum && callee_info->conds)
3814 {
3815 class ipa_edge_args *args = IPA_EDGE_REF (edge);
3816 int count = args ? ipa_get_cs_argument_count (args) : 0;
3817 int i;
3818
3819 if (count)
3820 {
3821 operand_map.safe_grow_cleared (count);
3822 offset_map.safe_grow_cleared (count);
3823 }
3824 for (i = 0; i < count; i++)
3825 {
3826 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3827 int map = -1;
3828
3829 /* TODO: handle non-NOPs when merging. */
3830 if (jfunc->type == IPA_JF_PASS_THROUGH)
3831 {
3832 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3833 map = ipa_get_jf_pass_through_formal_id (jfunc);
3834 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3835 offset_map[i] = -1;
3836 }
3837 else if (jfunc->type == IPA_JF_ANCESTOR)
3838 {
3839 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3840 if (offset >= 0 && offset < INT_MAX)
3841 {
3842 map = ipa_get_jf_ancestor_formal_id (jfunc);
3843 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3844 offset = -1;
3845 offset_map[i] = offset;
3846 }
3847 }
3848 operand_map[i] = map;
3849 gcc_assert (map < ipa_get_param_count (params_summary));
3850 }
3851 }
3852 sreal freq = edge->sreal_frequency ();
3853 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3854 {
3855 predicate p;
3856 p = e->exec_predicate.remap_after_inlining
3857 (info, params_summary,
3858 callee_info, operand_map,
3859 offset_map, clause,
3860 toplev_predicate);
3861 predicate nonconstp;
3862 nonconstp = e->nonconst_predicate.remap_after_inlining
3863 (info, params_summary,
3864 callee_info, operand_map,
3865 offset_map, clause,
3866 toplev_predicate);
3867 if (p != false && nonconstp != false)
3868 {
3869 sreal add_time = ((sreal)e->time * freq);
3870 int prob = e->nonconst_predicate.probability (callee_info->conds,
3871 clause, es->param);
3872 if (prob != REG_BR_PROB_BASE)
3873 add_time = add_time * prob / REG_BR_PROB_BASE;
3874 if (prob != REG_BR_PROB_BASE
3875 && dump_file && (dump_flags & TDF_DETAILS))
3876 {
3877 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3878 (double) prob / REG_BR_PROB_BASE);
3879 }
3880 info->account_size_time (e->size, add_time, p, nonconstp);
3881 }
3882 }
3883 remap_edge_summaries (edge, edge->callee, info, params_summary,
3884 callee_info, operand_map,
3885 offset_map, clause, &toplev_predicate);
3886 remap_hint_predicate (info, params_summary, callee_info,
3887 &callee_info->loop_iterations,
3888 operand_map, offset_map, clause, &toplev_predicate);
3889 remap_hint_predicate (info, params_summary, callee_info,
3890 &callee_info->loop_stride,
3891 operand_map, offset_map, clause, &toplev_predicate);
3892
3893 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
3894 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
3895
3896 if (info->estimated_stack_size < peak)
3897 info->estimated_stack_size = peak;
3898
3899 inline_update_callee_summaries (edge->callee, es->loop_depth);
3900
3901 /* Free summaries that are not maintained for inline clones/edges. */
3902 ipa_call_summaries->remove (edge);
3903 ipa_fn_summaries->remove (edge->callee);
3904 ipa_remove_from_growth_caches (edge);
3905 }
3906
3907 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
3908 overall size and time. Recompute it. */
3909
3910 void
3911 ipa_update_overall_fn_summary (struct cgraph_node *node)
3912 {
3913 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
3914 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
3915 size_time_entry *e;
3916 int i;
3917
3918 size_info->size = 0;
3919 info->time = 0;
3920 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3921 {
3922 size_info->size += e->size;
3923 info->time += e->time;
3924 }
3925 info->min_size = (*info->size_time_table)[0].size;
3926 vec_free (info->call_size_time_table);
3927 if (node->callees || node->indirect_calls)
3928 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
3929 &info->time, NULL,
3930 ~(clause_t) (1 << predicate::false_condition),
3931 vNULL, vNULL, vNULL);
3932 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
3933 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
3934 }
3935
3936
3937 /* This function performs intraprocedural analysis in NODE that is required to
3938 inline indirect calls. */
3939
3940 static void
3941 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3942 {
3943 ipa_analyze_node (node);
3944 if (dump_file && (dump_flags & TDF_DETAILS))
3945 {
3946 ipa_print_node_params (dump_file, node);
3947 ipa_print_node_jump_functions (dump_file, node);
3948 }
3949 }
3950
3951
3952 /* Note function body size. */
3953
3954 void
3955 inline_analyze_function (struct cgraph_node *node)
3956 {
3957 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3958
3959 if (dump_file)
3960 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3961 node->name (), node->order);
3962 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3963 inline_indirect_intraprocedural_analysis (node);
3964 compute_fn_summary (node, false);
3965 if (!optimize)
3966 {
3967 struct cgraph_edge *e;
3968 for (e = node->callees; e; e = e->next_callee)
3969 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3970 for (e = node->indirect_calls; e; e = e->next_callee)
3971 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3972 }
3973
3974 pop_cfun ();
3975 }
3976
3977
3978 /* Called when new function is inserted to callgraph late. */
3979
3980 void
3981 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3982 {
3983 inline_analyze_function (node);
3984 }
3985
3986 /* Note function body size. */
3987
3988 static void
3989 ipa_fn_summary_generate (void)
3990 {
3991 struct cgraph_node *node;
3992
3993 FOR_EACH_DEFINED_FUNCTION (node)
3994 if (DECL_STRUCT_FUNCTION (node->decl))
3995 node->versionable = tree_versionable_function_p (node->decl);
3996
3997 ipa_fn_summary_alloc ();
3998
3999 ipa_fn_summaries->enable_insertion_hook ();
4000
4001 ipa_register_cgraph_hooks ();
4002
4003 FOR_EACH_DEFINED_FUNCTION (node)
4004 if (!node->alias
4005 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4006 || opt_for_fn (node->decl, optimize)))
4007 inline_analyze_function (node);
4008 }
4009
4010
4011 /* Write inline summary for edge E to OB. */
4012
4013 static void
4014 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4015 bool prevails)
4016 {
4017 class ipa_call_summary *es = prevails
4018 ? ipa_call_summaries->get_create (e) : NULL;
4019 predicate p;
4020 int length, i;
4021
4022 int size = streamer_read_uhwi (ib);
4023 int time = streamer_read_uhwi (ib);
4024 int depth = streamer_read_uhwi (ib);
4025
4026 if (es)
4027 {
4028 es->call_stmt_size = size;
4029 es->call_stmt_time = time;
4030 es->loop_depth = depth;
4031 }
4032
4033 bitpack_d bp = streamer_read_bitpack (ib);
4034 if (es)
4035 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4036 else
4037 bp_unpack_value (&bp, 1);
4038
4039 p.stream_in (ib);
4040 if (es)
4041 edge_set_predicate (e, &p);
4042 length = streamer_read_uhwi (ib);
4043 if (length && es && e->possibly_call_in_translation_unit_p ())
4044 {
4045 es->param.safe_grow_cleared (length);
4046 for (i = 0; i < length; i++)
4047 es->param[i].change_prob = streamer_read_uhwi (ib);
4048 }
4049 else
4050 {
4051 for (i = 0; i < length; i++)
4052 streamer_read_uhwi (ib);
4053 }
4054 }
4055
4056
4057 /* Stream in inline summaries from the section. */
4058
4059 static void
4060 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4061 size_t len)
4062 {
4063 const struct lto_function_header *header =
4064 (const struct lto_function_header *) data;
4065 const int cfg_offset = sizeof (struct lto_function_header);
4066 const int main_offset = cfg_offset + header->cfg_size;
4067 const int string_offset = main_offset + header->main_size;
4068 class data_in *data_in;
4069 unsigned int i, count2, j;
4070 unsigned int f_count;
4071
4072 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4073 file_data->mode_table);
4074
4075 data_in =
4076 lto_data_in_create (file_data, (const char *) data + string_offset,
4077 header->string_size, vNULL);
4078 f_count = streamer_read_uhwi (&ib);
4079 for (i = 0; i < f_count; i++)
4080 {
4081 unsigned int index;
4082 struct cgraph_node *node;
4083 class ipa_fn_summary *info;
4084 class ipa_node_params *params_summary;
4085 class ipa_size_summary *size_info;
4086 lto_symtab_encoder_t encoder;
4087 struct bitpack_d bp;
4088 struct cgraph_edge *e;
4089 predicate p;
4090
4091 index = streamer_read_uhwi (&ib);
4092 encoder = file_data->symtab_node_encoder;
4093 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4094 index));
4095 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4096 params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
4097 size_info = node->prevailing_p ()
4098 ? ipa_size_summaries->get_create (node) : NULL;
4099
4100 int stack_size = streamer_read_uhwi (&ib);
4101 int size = streamer_read_uhwi (&ib);
4102 sreal time = sreal::stream_in (&ib);
4103
4104 if (info)
4105 {
4106 info->estimated_stack_size
4107 = size_info->estimated_self_stack_size = stack_size;
4108 size_info->size = size_info->self_size = size;
4109 info->time = time;
4110 }
4111
4112 bp = streamer_read_bitpack (&ib);
4113 if (info)
4114 {
4115 info->inlinable = bp_unpack_value (&bp, 1);
4116 info->fp_expressions = bp_unpack_value (&bp, 1);
4117 }
4118 else
4119 {
4120 bp_unpack_value (&bp, 1);
4121 bp_unpack_value (&bp, 1);
4122 }
4123
4124 count2 = streamer_read_uhwi (&ib);
4125 gcc_assert (!info || !info->conds);
4126 if (info)
4127 vec_safe_reserve_exact (info->conds, count2);
4128 for (j = 0; j < count2; j++)
4129 {
4130 struct condition c;
4131 unsigned int k, count3;
4132 c.operand_num = streamer_read_uhwi (&ib);
4133 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4134 c.type = stream_read_tree (&ib, data_in);
4135 c.val = stream_read_tree (&ib, data_in);
4136 bp = streamer_read_bitpack (&ib);
4137 c.agg_contents = bp_unpack_value (&bp, 1);
4138 c.by_ref = bp_unpack_value (&bp, 1);
4139 if (c.agg_contents)
4140 c.offset = streamer_read_uhwi (&ib);
4141 count3 = streamer_read_uhwi (&ib);
4142 c.param_ops = NULL;
4143 if (info)
4144 vec_safe_reserve_exact (c.param_ops, count3);
4145 if (params_summary)
4146 ipa_set_param_used_by_ipa_predicates
4147 (params_summary, c.operand_num, true);
4148 for (k = 0; k < count3; k++)
4149 {
4150 struct expr_eval_op op;
4151 enum gimple_rhs_class rhs_class;
4152 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4153 op.type = stream_read_tree (&ib, data_in);
4154 switch (rhs_class = get_gimple_rhs_class (op.code))
4155 {
4156 case GIMPLE_UNARY_RHS:
4157 op.index = 0;
4158 op.val[0] = NULL_TREE;
4159 op.val[1] = NULL_TREE;
4160 break;
4161
4162 case GIMPLE_BINARY_RHS:
4163 case GIMPLE_TERNARY_RHS:
4164 bp = streamer_read_bitpack (&ib);
4165 op.index = bp_unpack_value (&bp, 2);
4166 op.val[0] = stream_read_tree (&ib, data_in);
4167 if (rhs_class == GIMPLE_BINARY_RHS)
4168 op.val[1] = NULL_TREE;
4169 else
4170 op.val[1] = stream_read_tree (&ib, data_in);
4171 break;
4172
4173 default:
4174 fatal_error (UNKNOWN_LOCATION,
4175 "invalid fnsummary in LTO stream");
4176 }
4177 if (info)
4178 c.param_ops->quick_push (op);
4179 }
4180 if (info)
4181 info->conds->quick_push (c);
4182 }
4183 count2 = streamer_read_uhwi (&ib);
4184 gcc_assert (!info || !info->size_time_table);
4185 if (info && count2)
4186 vec_safe_reserve_exact (info->size_time_table, count2);
4187 for (j = 0; j < count2; j++)
4188 {
4189 class size_time_entry e;
4190
4191 e.size = streamer_read_uhwi (&ib);
4192 e.time = sreal::stream_in (&ib);
4193 e.exec_predicate.stream_in (&ib);
4194 e.nonconst_predicate.stream_in (&ib);
4195
4196 if (info)
4197 info->size_time_table->quick_push (e);
4198 }
4199
4200 p.stream_in (&ib);
4201 if (info)
4202 set_hint_predicate (&info->loop_iterations, p);
4203 p.stream_in (&ib);
4204 if (info)
4205 set_hint_predicate (&info->loop_stride, p);
4206 for (e = node->callees; e; e = e->next_callee)
4207 read_ipa_call_summary (&ib, e, info != NULL);
4208 for (e = node->indirect_calls; e; e = e->next_callee)
4209 read_ipa_call_summary (&ib, e, info != NULL);
4210 }
4211
4212 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4213 len);
4214 lto_data_in_delete (data_in);
4215 }
4216
4217
4218 /* Read inline summary. Jump functions are shared among ipa-cp
4219 and inliner, so when ipa-cp is active, we don't need to write them
4220 twice. */
4221
4222 static void
4223 ipa_fn_summary_read (void)
4224 {
4225 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4226 struct lto_file_decl_data *file_data;
4227 unsigned int j = 0;
4228
4229 ipa_fn_summary_alloc ();
4230
4231 while ((file_data = file_data_vec[j++]))
4232 {
4233 size_t len;
4234 const char *data
4235 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4236 &len);
4237 if (data)
4238 inline_read_section (file_data, data, len);
4239 else
4240 /* Fatal error here. We do not want to support compiling ltrans units
4241 with different version of compiler or different flags than the WPA
4242 unit, so this should never happen. */
4243 fatal_error (input_location,
4244 "ipa inline summary is missing in input file");
4245 }
4246 ipa_register_cgraph_hooks ();
4247 if (!flag_ipa_cp)
4248 ipa_prop_read_jump_functions ();
4249
4250 gcc_assert (ipa_fn_summaries);
4251 ipa_fn_summaries->enable_insertion_hook ();
4252 }
4253
4254
4255 /* Write inline summary for edge E to OB. */
4256
4257 static void
4258 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4259 {
4260 class ipa_call_summary *es = ipa_call_summaries->get (e);
4261 int i;
4262
4263 streamer_write_uhwi (ob, es->call_stmt_size);
4264 streamer_write_uhwi (ob, es->call_stmt_time);
4265 streamer_write_uhwi (ob, es->loop_depth);
4266
4267 bitpack_d bp = bitpack_create (ob->main_stream);
4268 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4269 streamer_write_bitpack (&bp);
4270
4271 if (es->predicate)
4272 es->predicate->stream_out (ob);
4273 else
4274 streamer_write_uhwi (ob, 0);
4275 streamer_write_uhwi (ob, es->param.length ());
4276 for (i = 0; i < (int) es->param.length (); i++)
4277 streamer_write_uhwi (ob, es->param[i].change_prob);
4278 }
4279
4280
4281 /* Write inline summary for node in SET.
4282 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4283 active, we don't need to write them twice. */
4284
4285 static void
4286 ipa_fn_summary_write (void)
4287 {
4288 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4289 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4290 unsigned int count = 0;
4291 int i;
4292
4293 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4294 {
4295 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4296 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4297 if (cnode && cnode->definition && !cnode->alias)
4298 count++;
4299 }
4300 streamer_write_uhwi (ob, count);
4301
4302 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
4303 {
4304 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
4305 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
4306 if (cnode && cnode->definition && !cnode->alias)
4307 {
4308 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4309 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4310 struct bitpack_d bp;
4311 struct cgraph_edge *edge;
4312 int i;
4313 size_time_entry *e;
4314 struct condition *c;
4315
4316 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4317 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4318 streamer_write_hwi (ob, size_info->self_size);
4319 info->time.stream_out (ob);
4320 bp = bitpack_create (ob->main_stream);
4321 bp_pack_value (&bp, info->inlinable, 1);
4322 bp_pack_value (&bp, false, 1);
4323 bp_pack_value (&bp, info->fp_expressions, 1);
4324 streamer_write_bitpack (&bp);
4325 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4326 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4327 {
4328 int j;
4329 struct expr_eval_op *op;
4330
4331 streamer_write_uhwi (ob, c->operand_num);
4332 streamer_write_uhwi (ob, c->code);
4333 stream_write_tree (ob, c->type, true);
4334 stream_write_tree (ob, c->val, true);
4335 bp = bitpack_create (ob->main_stream);
4336 bp_pack_value (&bp, c->agg_contents, 1);
4337 bp_pack_value (&bp, c->by_ref, 1);
4338 streamer_write_bitpack (&bp);
4339 if (c->agg_contents)
4340 streamer_write_uhwi (ob, c->offset);
4341 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4342 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4343 {
4344 streamer_write_uhwi (ob, op->code);
4345 stream_write_tree (ob, op->type, true);
4346 if (op->val[0])
4347 {
4348 bp = bitpack_create (ob->main_stream);
4349 bp_pack_value (&bp, op->index, 2);
4350 streamer_write_bitpack (&bp);
4351 stream_write_tree (ob, op->val[0], true);
4352 if (op->val[1])
4353 stream_write_tree (ob, op->val[1], true);
4354 }
4355 }
4356 }
4357 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
4358 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
4359 {
4360 streamer_write_uhwi (ob, e->size);
4361 e->time.stream_out (ob);
4362 e->exec_predicate.stream_out (ob);
4363 e->nonconst_predicate.stream_out (ob);
4364 }
4365 if (info->loop_iterations)
4366 info->loop_iterations->stream_out (ob);
4367 else
4368 streamer_write_uhwi (ob, 0);
4369 if (info->loop_stride)
4370 info->loop_stride->stream_out (ob);
4371 else
4372 streamer_write_uhwi (ob, 0);
4373 for (edge = cnode->callees; edge; edge = edge->next_callee)
4374 write_ipa_call_summary (ob, edge);
4375 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4376 write_ipa_call_summary (ob, edge);
4377 }
4378 }
4379 streamer_write_char_stream (ob->main_stream, 0);
4380 produce_asm (ob, NULL);
4381 destroy_output_block (ob);
4382
4383 if (!flag_ipa_cp)
4384 ipa_prop_write_jump_functions ();
4385 }
4386
4387
4388 /* Release function summary. */
4389
4390 void
4391 ipa_free_fn_summary (void)
4392 {
4393 if (!ipa_call_summaries)
4394 return;
4395 ggc_delete (ipa_fn_summaries);
4396 ipa_fn_summaries = NULL;
4397 delete ipa_call_summaries;
4398 ipa_call_summaries = NULL;
4399 edge_predicate_pool.release ();
4400 /* During IPA this is one of largest datastructures to release. */
4401 if (flag_wpa)
4402 ggc_trim ();
4403 }
4404
4405 /* Release function summary. */
4406
4407 void
4408 ipa_free_size_summary (void)
4409 {
4410 if (!ipa_size_summaries)
4411 return;
4412 delete ipa_size_summaries;
4413 ipa_size_summaries = NULL;
4414 }
4415
4416 namespace {
4417
4418 const pass_data pass_data_local_fn_summary =
4419 {
4420 GIMPLE_PASS, /* type */
4421 "local-fnsummary", /* name */
4422 OPTGROUP_INLINE, /* optinfo_flags */
4423 TV_INLINE_PARAMETERS, /* tv_id */
4424 0, /* properties_required */
4425 0, /* properties_provided */
4426 0, /* properties_destroyed */
4427 0, /* todo_flags_start */
4428 0, /* todo_flags_finish */
4429 };
4430
4431 class pass_local_fn_summary : public gimple_opt_pass
4432 {
4433 public:
4434 pass_local_fn_summary (gcc::context *ctxt)
4435 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4436 {}
4437
4438 /* opt_pass methods: */
4439 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
4440 virtual unsigned int execute (function *)
4441 {
4442 return compute_fn_summary_for_current ();
4443 }
4444
4445 }; // class pass_local_fn_summary
4446
4447 } // anon namespace
4448
4449 gimple_opt_pass *
4450 make_pass_local_fn_summary (gcc::context *ctxt)
4451 {
4452 return new pass_local_fn_summary (ctxt);
4453 }
4454
4455
4456 /* Free inline summary. */
4457
4458 namespace {
4459
4460 const pass_data pass_data_ipa_free_fn_summary =
4461 {
4462 SIMPLE_IPA_PASS, /* type */
4463 "free-fnsummary", /* name */
4464 OPTGROUP_NONE, /* optinfo_flags */
4465 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
4466 0, /* properties_required */
4467 0, /* properties_provided */
4468 0, /* properties_destroyed */
4469 0, /* todo_flags_start */
4470 0, /* todo_flags_finish */
4471 };
4472
4473 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
4474 {
4475 public:
4476 pass_ipa_free_fn_summary (gcc::context *ctxt)
4477 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
4478 small_p (false)
4479 {}
4480
4481 /* opt_pass methods: */
4482 opt_pass *clone () { return new pass_ipa_free_fn_summary (m_ctxt); }
4483 void set_pass_param (unsigned int n, bool param)
4484 {
4485 gcc_assert (n == 0);
4486 small_p = param;
4487 }
4488 virtual bool gate (function *) { return true; }
4489 virtual unsigned int execute (function *)
4490 {
4491 ipa_free_fn_summary ();
4492 if (!flag_wpa)
4493 ipa_free_size_summary ();
4494 return 0;
4495 }
4496
4497 private:
4498 bool small_p;
4499 }; // class pass_ipa_free_fn_summary
4500
4501 } // anon namespace
4502
4503 simple_ipa_opt_pass *
4504 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
4505 {
4506 return new pass_ipa_free_fn_summary (ctxt);
4507 }
4508
4509 namespace {
4510
4511 const pass_data pass_data_ipa_fn_summary =
4512 {
4513 IPA_PASS, /* type */
4514 "fnsummary", /* name */
4515 OPTGROUP_INLINE, /* optinfo_flags */
4516 TV_IPA_FNSUMMARY, /* tv_id */
4517 0, /* properties_required */
4518 0, /* properties_provided */
4519 0, /* properties_destroyed */
4520 0, /* todo_flags_start */
4521 ( TODO_dump_symtab ), /* todo_flags_finish */
4522 };
4523
4524 class pass_ipa_fn_summary : public ipa_opt_pass_d
4525 {
4526 public:
4527 pass_ipa_fn_summary (gcc::context *ctxt)
4528 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
4529 ipa_fn_summary_generate, /* generate_summary */
4530 ipa_fn_summary_write, /* write_summary */
4531 ipa_fn_summary_read, /* read_summary */
4532 NULL, /* write_optimization_summary */
4533 NULL, /* read_optimization_summary */
4534 NULL, /* stmt_fixup */
4535 0, /* function_transform_todo_flags_start */
4536 NULL, /* function_transform */
4537 NULL) /* variable_transform */
4538 {}
4539
4540 /* opt_pass methods: */
4541 virtual unsigned int execute (function *) { return 0; }
4542
4543 }; // class pass_ipa_fn_summary
4544
4545 } // anon namespace
4546
4547 ipa_opt_pass_d *
4548 make_pass_ipa_fn_summary (gcc::context *ctxt)
4549 {
4550 return new pass_ipa_fn_summary (ctxt);
4551 }
4552
4553 /* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
4554 within the same process. For use by toplev::finalize. */
4555
4556 void
4557 ipa_fnsummary_c_finalize (void)
4558 {
4559 ipa_free_fn_summary ();
4560 }