]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-cp.cc
[prange] Reword dispatch error message
[thirdparty/gcc.git] / gcc / ipa-cp.cc
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
102
103 #define INCLUDE_ALGORITHM
104 #include "config.h"
105 #include "system.h"
106 #include "coretypes.h"
107 #include "backend.h"
108 #include "tree.h"
109 #include "gimple-expr.h"
110 #include "gimple.h"
111 #include "predict.h"
112 #include "sreal.h"
113 #include "alloc-pool.h"
114 #include "tree-pass.h"
115 #include "cgraph.h"
116 #include "diagnostic.h"
117 #include "fold-const.h"
118 #include "gimple-iterator.h"
119 #include "gimple-fold.h"
120 #include "symbol-summary.h"
121 #include "tree-vrp.h"
122 #include "ipa-cp.h"
123 #include "ipa-prop.h"
124 #include "tree-pretty-print.h"
125 #include "tree-inline.h"
126 #include "ipa-fnsummary.h"
127 #include "ipa-utils.h"
128 #include "tree-ssa-ccp.h"
129 #include "stringpool.h"
130 #include "attribs.h"
131 #include "dbgcnt.h"
132 #include "symtab-clones.h"
133 #include "gimple-range.h"
134
135
136 /* Allocation pools for values and their sources in ipa-cp. */
137
138 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
140
141 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
143
144 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 ("IPA-CP value sources");
146
147 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
149
150 /* Base count to use in heuristics when using profile feedback. */
151
152 static profile_count base_count;
153
154 /* Original overall size of the program. */
155
156 static long overall_size, orig_overall_size;
157
158 /* Node name to unique clone suffix number map. */
159 static hash_map<const char *, unsigned> *clone_num_suffixes;
160
161 /* Return the param lattices structure corresponding to the Ith formal
162 parameter of the function described by INFO. */
163 static inline class ipcp_param_lattices *
164 ipa_get_parm_lattices (class ipa_node_params *info, int i)
165 {
166 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
167 gcc_checking_assert (!info->ipcp_orig_node);
168 return &(info->lattices[i]);
169 }
170
171 /* Return the lattice corresponding to the scalar value of the Ith formal
172 parameter of the function described by INFO. */
173 static inline ipcp_lattice<tree> *
174 ipa_get_scalar_lat (class ipa_node_params *info, int i)
175 {
176 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
177 return &plats->itself;
178 }
179
180 /* Return the lattice corresponding to the scalar value of the Ith formal
181 parameter of the function described by INFO. */
182 static inline ipcp_lattice<ipa_polymorphic_call_context> *
183 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
184 {
185 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
186 return &plats->ctxlat;
187 }
188
189 /* Return whether LAT is a lattice with a single constant and without an
190 undefined value. */
191
192 template <typename valtype>
193 inline bool
194 ipcp_lattice<valtype>::is_single_const ()
195 {
196 if (bottom || contains_variable || values_count != 1)
197 return false;
198 else
199 return true;
200 }
201
202 /* Return true iff X and Y should be considered equal values by IPA-CP. */
203
204 bool
205 values_equal_for_ipcp_p (tree x, tree y)
206 {
207 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
208
209 if (x == y)
210 return true;
211
212 if (TREE_CODE (x) == ADDR_EXPR
213 && TREE_CODE (y) == ADDR_EXPR
214 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
215 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
216 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
217 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
218 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
219 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
220 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
221 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
222 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
223 else
224 return operand_equal_p (x, y, 0);
225 }
226
227 /* Print V which is extracted from a value in a lattice to F. */
228
229 static void
230 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
231 {
232 v.dump(f, false);
233 }
234
235 /* Print a lattice LAT to F. */
236
237 template <typename valtype>
238 void
239 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
240 {
241 ipcp_value<valtype> *val;
242 bool prev = false;
243
244 if (bottom)
245 {
246 fprintf (f, "BOTTOM\n");
247 return;
248 }
249
250 if (!values_count && !contains_variable)
251 {
252 fprintf (f, "TOP\n");
253 return;
254 }
255
256 if (contains_variable)
257 {
258 fprintf (f, "VARIABLE");
259 prev = true;
260 if (dump_benefits)
261 fprintf (f, "\n");
262 }
263
264 for (val = values; val; val = val->next)
265 {
266 if (dump_benefits && prev)
267 fprintf (f, " ");
268 else if (!dump_benefits && prev)
269 fprintf (f, ", ");
270 else
271 prev = true;
272
273 print_ipcp_constant_value (f, val->value);
274
275 if (dump_sources)
276 {
277 ipcp_value_source<valtype> *s;
278
279 if (val->self_recursion_generated_p ())
280 fprintf (f, " [self_gen(%i), from:",
281 val->self_recursion_generated_level);
282 else
283 fprintf (f, " [scc: %i, from:", val->scc_no);
284 for (s = val->sources; s; s = s->next)
285 fprintf (f, " %i(%f)", s->cs->caller->order,
286 s->cs->sreal_frequency ().to_double ());
287 fprintf (f, "]");
288 }
289
290 if (dump_benefits)
291 fprintf (f, " [loc_time: %g, loc_size: %i, "
292 "prop_time: %g, prop_size: %i]\n",
293 val->local_time_benefit.to_double (), val->local_size_cost,
294 val->prop_time_benefit.to_double (), val->prop_size_cost);
295 }
296 if (!dump_benefits)
297 fprintf (f, "\n");
298 }
299
300 void
301 ipcp_bits_lattice::print (FILE *f)
302 {
303 if (top_p ())
304 fprintf (f, " Bits unknown (TOP)\n");
305 else if (bottom_p ())
306 fprintf (f, " Bits unusable (BOTTOM)\n");
307 else
308 {
309 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
310 fprintf (f, ", mask = "); print_hex (get_mask (), f);
311 fprintf (f, "\n");
312 }
313 }
314
315 /* Print value range lattice to F. */
316
317 void
318 ipcp_vr_lattice::print (FILE * f)
319 {
320 m_vr.dump (f);
321 }
322
323 /* Print all ipcp_lattices of all functions to F. */
324
325 static void
326 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
327 {
328 struct cgraph_node *node;
329 int i, count;
330
331 fprintf (f, "\nLattices:\n");
332 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
333 {
334 class ipa_node_params *info;
335
336 info = ipa_node_params_sum->get (node);
337 /* Skip unoptimized functions and constprop clones since we don't make
338 lattices for them. */
339 if (!info || info->ipcp_orig_node)
340 continue;
341 fprintf (f, " Node: %s:\n", node->dump_name ());
342 count = ipa_get_param_count (info);
343 for (i = 0; i < count; i++)
344 {
345 struct ipcp_agg_lattice *aglat;
346 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
347 fprintf (f, " param [%d]: ", i);
348 plats->itself.print (f, dump_sources, dump_benefits);
349 fprintf (f, " ctxs: ");
350 plats->ctxlat.print (f, dump_sources, dump_benefits);
351 plats->bits_lattice.print (f);
352 fprintf (f, " ");
353 plats->m_value_range.print (f);
354 fprintf (f, "\n");
355 if (plats->virt_call)
356 fprintf (f, " virt_call flag set\n");
357
358 if (plats->aggs_bottom)
359 {
360 fprintf (f, " AGGS BOTTOM\n");
361 continue;
362 }
363 if (plats->aggs_contain_variable)
364 fprintf (f, " AGGS VARIABLE\n");
365 for (aglat = plats->aggs; aglat; aglat = aglat->next)
366 {
367 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
368 plats->aggs_by_ref ? "ref " : "", aglat->offset);
369 aglat->print (f, dump_sources, dump_benefits);
370 }
371 }
372 }
373 }
374
375 /* Determine whether it is at all technically possible to create clones of NODE
376 and store this information in the ipa_node_params structure associated
377 with NODE. */
378
379 static void
380 determine_versionability (struct cgraph_node *node,
381 class ipa_node_params *info)
382 {
383 const char *reason = NULL;
384
385 /* There are a number of generic reasons functions cannot be versioned. We
386 also cannot remove parameters if there are type attributes such as fnspec
387 present. */
388 if (node->alias || node->thunk)
389 reason = "alias or thunk";
390 else if (!node->versionable)
391 reason = "not a tree_versionable_function";
392 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
393 reason = "insufficient body availability";
394 else if (!opt_for_fn (node->decl, optimize)
395 || !opt_for_fn (node->decl, flag_ipa_cp))
396 reason = "non-optimized function";
397 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
398 {
399 /* Ideally we should clone the SIMD clones themselves and create
400 vector copies of them, so IPA-cp and SIMD clones can happily
401 coexist, but that may not be worth the effort. */
402 reason = "function has SIMD clones";
403 }
404 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
405 {
406 /* Ideally we should clone the target clones themselves and create
407 copies of them, so IPA-cp and target clones can happily
408 coexist, but that may not be worth the effort. */
409 reason = "function target_clones attribute";
410 }
411 /* Don't clone decls local to a comdat group; it breaks and for C++
412 decloned constructors, inlining is always better anyway. */
413 else if (node->comdat_local_p ())
414 reason = "comdat-local function";
415 else if (node->calls_comdat_local)
416 {
417 /* TODO: call is versionable if we make sure that all
418 callers are inside of a comdat group. */
419 reason = "calls comdat-local function";
420 }
421
422 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
423 work only when inlined. Cloning them may still lead to better code
424 because ipa-cp will not give up on cloning further. If the function is
425 external this however leads to wrong code because we may end up producing
426 offline copy of the function. */
427 if (DECL_EXTERNAL (node->decl))
428 for (cgraph_edge *edge = node->callees; !reason && edge;
429 edge = edge->next_callee)
430 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
431 {
432 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
433 reason = "external function which calls va_arg_pack";
434 if (DECL_FUNCTION_CODE (edge->callee->decl)
435 == BUILT_IN_VA_ARG_PACK_LEN)
436 reason = "external function which calls va_arg_pack_len";
437 }
438
439 if (reason && dump_file && !node->alias && !node->thunk)
440 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
441 node->dump_name (), reason);
442
443 info->versionable = (reason == NULL);
444 }
445
446 /* Return true if it is at all technically possible to create clones of a
447 NODE. */
448
449 static bool
450 ipcp_versionable_function_p (struct cgraph_node *node)
451 {
452 ipa_node_params *info = ipa_node_params_sum->get (node);
453 return info && info->versionable;
454 }
455
456 /* Structure holding accumulated information about callers of a node. */
457
458 struct caller_statistics
459 {
460 /* If requested (see below), self-recursive call counts are summed into this
461 field. */
462 profile_count rec_count_sum;
463 /* The sum of all ipa counts of all the other (non-recursive) calls. */
464 profile_count count_sum;
465 /* Sum of all frequencies for all calls. */
466 sreal freq_sum;
467 /* Number of calls and hot calls respectively. */
468 int n_calls, n_hot_calls;
469 /* If itself is set up, also count the number of non-self-recursive
470 calls. */
471 int n_nonrec_calls;
472 /* If non-NULL, this is the node itself and calls from it should have their
473 counts included in rec_count_sum and not count_sum. */
474 cgraph_node *itself;
475 };
476
477 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
478 from IGNORED_CALLER are not counted. */
479
480 static inline void
481 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
482 {
483 stats->rec_count_sum = profile_count::zero ();
484 stats->count_sum = profile_count::zero ();
485 stats->n_calls = 0;
486 stats->n_hot_calls = 0;
487 stats->n_nonrec_calls = 0;
488 stats->freq_sum = 0;
489 stats->itself = itself;
490 }
491
492 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
493 non-thunk incoming edges to NODE. */
494
495 static bool
496 gather_caller_stats (struct cgraph_node *node, void *data)
497 {
498 struct caller_statistics *stats = (struct caller_statistics *) data;
499 struct cgraph_edge *cs;
500
501 for (cs = node->callers; cs; cs = cs->next_caller)
502 if (!cs->caller->thunk)
503 {
504 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
505 if (info && info->node_dead)
506 continue;
507
508 if (cs->count.ipa ().initialized_p ())
509 {
510 if (stats->itself && stats->itself == cs->caller)
511 stats->rec_count_sum += cs->count.ipa ();
512 else
513 stats->count_sum += cs->count.ipa ();
514 }
515 stats->freq_sum += cs->sreal_frequency ();
516 stats->n_calls++;
517 if (stats->itself && stats->itself != cs->caller)
518 stats->n_nonrec_calls++;
519
520 if (cs->maybe_hot_p ())
521 stats->n_hot_calls ++;
522 }
523 return false;
524
525 }
526
527 /* Return true if this NODE is viable candidate for cloning. */
528
529 static bool
530 ipcp_cloning_candidate_p (struct cgraph_node *node)
531 {
532 struct caller_statistics stats;
533
534 gcc_checking_assert (node->has_gimple_body_p ());
535
536 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
537 {
538 if (dump_file)
539 fprintf (dump_file, "Not considering %s for cloning; "
540 "-fipa-cp-clone disabled.\n",
541 node->dump_name ());
542 return false;
543 }
544
545 if (node->optimize_for_size_p ())
546 {
547 if (dump_file)
548 fprintf (dump_file, "Not considering %s for cloning; "
549 "optimizing it for size.\n",
550 node->dump_name ());
551 return false;
552 }
553
554 init_caller_stats (&stats);
555 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
556
557 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
558 {
559 if (dump_file)
560 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
561 node->dump_name ());
562 return true;
563 }
564
565 /* When profile is available and function is hot, propagate into it even if
566 calls seems cold; constant propagation can improve function's speed
567 significantly. */
568 if (stats.count_sum > profile_count::zero ()
569 && node->count.ipa ().initialized_p ())
570 {
571 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
572 {
573 if (dump_file)
574 fprintf (dump_file, "Considering %s for cloning; "
575 "usually called directly.\n",
576 node->dump_name ());
577 return true;
578 }
579 }
580 if (!stats.n_hot_calls)
581 {
582 if (dump_file)
583 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
584 node->dump_name ());
585 return false;
586 }
587 if (dump_file)
588 fprintf (dump_file, "Considering %s for cloning.\n",
589 node->dump_name ());
590 return true;
591 }
592
593 template <typename valtype>
594 class value_topo_info
595 {
596 public:
597 /* Head of the linked list of topologically sorted values. */
598 ipcp_value<valtype> *values_topo;
599 /* Stack for creating SCCs, represented by a linked list too. */
600 ipcp_value<valtype> *stack;
601 /* Counter driving the algorithm in add_val_to_toposort. */
602 int dfs_counter;
603
604 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
605 {}
606 void add_val (ipcp_value<valtype> *cur_val);
607 void propagate_effects ();
608 };
609
610 /* Arrays representing a topological ordering of call graph nodes and a stack
611 of nodes used during constant propagation and also data required to perform
612 topological sort of values and propagation of benefits in the determined
613 order. */
614
615 class ipa_topo_info
616 {
617 public:
618 /* Array with obtained topological order of cgraph nodes. */
619 struct cgraph_node **order;
620 /* Stack of cgraph nodes used during propagation within SCC until all values
621 in the SCC stabilize. */
622 struct cgraph_node **stack;
623 int nnodes, stack_top;
624
625 value_topo_info<tree> constants;
626 value_topo_info<ipa_polymorphic_call_context> contexts;
627
628 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
629 constants ()
630 {}
631 };
632
633 /* Skip edges from and to nodes without ipa_cp enabled.
634 Ignore not available symbols. */
635
636 static bool
637 ignore_edge_p (cgraph_edge *e)
638 {
639 enum availability avail;
640 cgraph_node *ultimate_target
641 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
642
643 return (avail <= AVAIL_INTERPOSABLE
644 || !opt_for_fn (ultimate_target->decl, optimize)
645 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
646 }
647
648 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
649
650 static void
651 build_toporder_info (class ipa_topo_info *topo)
652 {
653 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
654 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
655
656 gcc_checking_assert (topo->stack_top == 0);
657 topo->nnodes = ipa_reduced_postorder (topo->order, true,
658 ignore_edge_p);
659 }
660
661 /* Free information about strongly connected components and the arrays in
662 TOPO. */
663
664 static void
665 free_toporder_info (class ipa_topo_info *topo)
666 {
667 ipa_free_postorder_info ();
668 free (topo->order);
669 free (topo->stack);
670 }
671
672 /* Add NODE to the stack in TOPO, unless it is already there. */
673
674 static inline void
675 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
676 {
677 ipa_node_params *info = ipa_node_params_sum->get (node);
678 if (info->node_enqueued)
679 return;
680 info->node_enqueued = 1;
681 topo->stack[topo->stack_top++] = node;
682 }
683
684 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
685 is empty. */
686
687 static struct cgraph_node *
688 pop_node_from_stack (class ipa_topo_info *topo)
689 {
690 if (topo->stack_top)
691 {
692 struct cgraph_node *node;
693 topo->stack_top--;
694 node = topo->stack[topo->stack_top];
695 ipa_node_params_sum->get (node)->node_enqueued = 0;
696 return node;
697 }
698 else
699 return NULL;
700 }
701
702 /* Set lattice LAT to bottom and return true if it previously was not set as
703 such. */
704
705 template <typename valtype>
706 inline bool
707 ipcp_lattice<valtype>::set_to_bottom ()
708 {
709 bool ret = !bottom;
710 bottom = true;
711 return ret;
712 }
713
714 /* Mark lattice as containing an unknown value and return true if it previously
715 was not marked as such. */
716
717 template <typename valtype>
718 inline bool
719 ipcp_lattice<valtype>::set_contains_variable ()
720 {
721 bool ret = !contains_variable;
722 contains_variable = true;
723 return ret;
724 }
725
726 /* Set all aggregate lattices in PLATS to bottom and return true if they were
727 not previously set as such. */
728
729 static inline bool
730 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
731 {
732 bool ret = !plats->aggs_bottom;
733 plats->aggs_bottom = true;
734 return ret;
735 }
736
737 /* Mark all aggregate lattices in PLATS as containing an unknown value and
738 return true if they were not previously marked as such. */
739
740 static inline bool
741 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
742 {
743 bool ret = !plats->aggs_contain_variable;
744 plats->aggs_contain_variable = true;
745 return ret;
746 }
747
748 bool
749 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
750 {
751 return meet_with_1 (other.m_vr);
752 }
753
754 /* Meet the current value of the lattice with the range described by
755 P_VR. */
756
757 bool
758 ipcp_vr_lattice::meet_with (const vrange &p_vr)
759 {
760 return meet_with_1 (p_vr);
761 }
762
763 /* Meet the current value of the lattice with the range described by
764 OTHER_VR. Return TRUE if anything changed. */
765
766 bool
767 ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
768 {
769 if (bottom_p ())
770 return false;
771
772 if (other_vr.varying_p ())
773 return set_to_bottom ();
774
775 bool res;
776 if (flag_checking)
777 {
778 Value_Range save (m_vr);
779 res = m_vr.union_ (other_vr);
780 gcc_assert (res == (m_vr != save));
781 }
782 else
783 res = m_vr.union_ (other_vr);
784 return res;
785 }
786
787 /* Return true if value range information in the lattice is yet unknown. */
788
789 bool
790 ipcp_vr_lattice::top_p () const
791 {
792 return m_vr.undefined_p ();
793 }
794
795 /* Return true if value range information in the lattice is known to be
796 unusable. */
797
798 bool
799 ipcp_vr_lattice::bottom_p () const
800 {
801 return m_vr.varying_p ();
802 }
803
804 /* Set value range information in the lattice to bottom. Return true if it
805 previously was in a different state. */
806
807 bool
808 ipcp_vr_lattice::set_to_bottom ()
809 {
810 if (m_vr.varying_p ())
811 return false;
812
813 /* Setting an unsupported type here forces the temporary to default
814 to unsupported_range, which can handle VARYING/DEFINED ranges,
815 but nothing else (union, intersect, etc). This allows us to set
816 bottoms on any ranges, and is safe as all users of the lattice
817 check for bottom first. */
818 m_vr.set_type (void_type_node);
819 m_vr.set_varying (void_type_node);
820
821 return true;
822 }
823
824 /* Set lattice value to bottom, if it already isn't the case. */
825
826 bool
827 ipcp_bits_lattice::set_to_bottom ()
828 {
829 if (bottom_p ())
830 return false;
831 m_lattice_val = IPA_BITS_VARYING;
832 m_value = 0;
833 m_mask = -1;
834 return true;
835 }
836
837 /* Set to constant if it isn't already. Only meant to be called
838 when switching state from TOP. */
839
840 bool
841 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
842 {
843 gcc_assert (top_p ());
844 m_lattice_val = IPA_BITS_CONSTANT;
845 m_value = wi::bit_and (wi::bit_not (mask), value);
846 m_mask = mask;
847 return true;
848 }
849
850 /* Return true if any of the known bits are non-zero. */
851
852 bool
853 ipcp_bits_lattice::known_nonzero_p () const
854 {
855 if (!constant_p ())
856 return false;
857 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
858 }
859
860 /* Convert operand to value, mask form. */
861
862 void
863 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
864 {
865 wide_int get_nonzero_bits (const_tree);
866
867 if (TREE_CODE (operand) == INTEGER_CST)
868 {
869 *valuep = wi::to_widest (operand);
870 *maskp = 0;
871 }
872 else
873 {
874 *valuep = 0;
875 *maskp = -1;
876 }
877 }
878
879 /* Meet operation, similar to ccp_lattice_meet, we xor values
880 if this->value, value have different values at same bit positions, we want
881 to drop that bit to varying. Return true if mask is changed.
882 This function assumes that the lattice value is in CONSTANT state. If
883 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
884
885 bool
886 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
887 unsigned precision, bool drop_all_ones)
888 {
889 gcc_assert (constant_p ());
890
891 widest_int old_mask = m_mask;
892 m_mask = (m_mask | mask) | (m_value ^ value);
893 if (drop_all_ones)
894 m_mask |= m_value;
895 m_value &= ~m_mask;
896
897 if (wi::sext (m_mask, precision) == -1)
898 return set_to_bottom ();
899
900 return m_mask != old_mask;
901 }
902
903 /* Meet the bits lattice with operand
904 described by <value, mask, sgn, precision. */
905
906 bool
907 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
908 unsigned precision)
909 {
910 if (bottom_p ())
911 return false;
912
913 if (top_p ())
914 {
915 if (wi::sext (mask, precision) == -1)
916 return set_to_bottom ();
917 return set_to_constant (value, mask);
918 }
919
920 return meet_with_1 (value, mask, precision, false);
921 }
922
923 /* Meet bits lattice with the result of bit_value_binop (other, operand)
924 if code is binary operation or bit_value_unop (other) if code is unary op.
925 In the case when code is nop_expr, no adjustment is required. If
926 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
927
928 bool
929 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
930 signop sgn, enum tree_code code, tree operand,
931 bool drop_all_ones)
932 {
933 if (other.bottom_p ())
934 return set_to_bottom ();
935
936 if (bottom_p () || other.top_p ())
937 return false;
938
939 widest_int adjusted_value, adjusted_mask;
940
941 if (TREE_CODE_CLASS (code) == tcc_binary)
942 {
943 tree type = TREE_TYPE (operand);
944 widest_int o_value, o_mask;
945 get_value_and_mask (operand, &o_value, &o_mask);
946
947 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
948 sgn, precision, other.get_value (), other.get_mask (),
949 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
950
951 if (wi::sext (adjusted_mask, precision) == -1)
952 return set_to_bottom ();
953 }
954
955 else if (TREE_CODE_CLASS (code) == tcc_unary)
956 {
957 bit_value_unop (code, sgn, precision, &adjusted_value,
958 &adjusted_mask, sgn, precision, other.get_value (),
959 other.get_mask ());
960
961 if (wi::sext (adjusted_mask, precision) == -1)
962 return set_to_bottom ();
963 }
964
965 else
966 return set_to_bottom ();
967
968 if (top_p ())
969 {
970 if (drop_all_ones)
971 {
972 adjusted_mask |= adjusted_value;
973 adjusted_value &= ~adjusted_mask;
974 }
975 if (wi::sext (adjusted_mask, precision) == -1)
976 return set_to_bottom ();
977 return set_to_constant (adjusted_value, adjusted_mask);
978 }
979 else
980 return meet_with_1 (adjusted_value, adjusted_mask, precision,
981 drop_all_ones);
982 }
983
984 /* Dump the contents of the list to FILE. */
985
986 void
987 ipa_argagg_value_list::dump (FILE *f)
988 {
989 bool comma = false;
990 for (const ipa_argagg_value &av : m_elts)
991 {
992 fprintf (f, "%s %i[%u]=", comma ? "," : "",
993 av.index, av.unit_offset);
994 print_generic_expr (f, av.value);
995 if (av.by_ref)
996 fprintf (f, "(by_ref)");
997 if (av.killed)
998 fprintf (f, "(killed)");
999 comma = true;
1000 }
1001 fprintf (f, "\n");
1002 }
1003
1004 /* Dump the contents of the list to stderr. */
1005
1006 void
1007 ipa_argagg_value_list::debug ()
1008 {
1009 dump (stderr);
1010 }
1011
1012 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1013 NULL if there is no such constant. */
1014
1015 const ipa_argagg_value *
1016 ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1017 {
1018 ipa_argagg_value key;
1019 key.index = index;
1020 key.unit_offset = unit_offset;
1021 const ipa_argagg_value *res
1022 = std::lower_bound (m_elts.begin (), m_elts.end (), key,
1023 [] (const ipa_argagg_value &elt,
1024 const ipa_argagg_value &val)
1025 {
1026 if (elt.index < val.index)
1027 return true;
1028 if (elt.index > val.index)
1029 return false;
1030 if (elt.unit_offset < val.unit_offset)
1031 return true;
1032 return false;
1033 });
1034
1035 if (res == m_elts.end ()
1036 || res->index != index
1037 || res->unit_offset != unit_offset)
1038 res = nullptr;
1039
1040 /* TODO: perhaps remove the check (that the underlying array is indeed
1041 sorted) if it turns out it can be too slow? */
1042 if (!flag_checking)
1043 return res;
1044
1045 const ipa_argagg_value *slow_res = NULL;
1046 int prev_index = -1;
1047 unsigned prev_unit_offset = 0;
1048 for (const ipa_argagg_value &av : m_elts)
1049 {
1050 gcc_assert (prev_index < 0
1051 || prev_index < av.index
1052 || prev_unit_offset < av.unit_offset);
1053 prev_index = av.index;
1054 prev_unit_offset = av.unit_offset;
1055 if (av.index == index
1056 && av.unit_offset == unit_offset)
1057 slow_res = &av;
1058 }
1059 gcc_assert (res == slow_res);
1060
1061 return res;
1062 }
1063
1064 /* Return the first item describing a constant stored for parameter with INDEX,
1065 regardless of offset or reference, or NULL if there is no such constant. */
1066
1067 const ipa_argagg_value *
1068 ipa_argagg_value_list::get_elt_for_index (int index) const
1069 {
1070 const ipa_argagg_value *res
1071 = std::lower_bound (m_elts.begin (), m_elts.end (), index,
1072 [] (const ipa_argagg_value &elt, unsigned idx)
1073 {
1074 return elt.index < idx;
1075 });
1076 if (res == m_elts.end ()
1077 || res->index != index)
1078 res = nullptr;
1079 return res;
1080 }
1081
1082 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1083 performing any check of whether value is passed by reference, or NULL_TREE
1084 if there is no such constant. */
1085
1086 tree
1087 ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1088 {
1089 const ipa_argagg_value *av = get_elt (index, unit_offset);
1090 return av ? av->value : NULL_TREE;
1091 }
1092
1093 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1094 passed by reference or not according to BY_REF, or NULL_TREE if there is
1095 no such constant. */
1096
1097 tree
1098 ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1099 bool by_ref) const
1100 {
1101 const ipa_argagg_value *av = get_elt (index, unit_offset);
1102 if (av && av->by_ref == by_ref)
1103 return av->value;
1104 return NULL_TREE;
1105 }
1106
1107 /* Return true if all elements present in OTHER are also present in this
1108 list. */
1109
1110 bool
1111 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1112 {
1113 unsigned j = 0;
1114 for (unsigned i = 0; i < other.m_elts.size (); i++)
1115 {
1116 unsigned other_index = other.m_elts[i].index;
1117 unsigned other_offset = other.m_elts[i].unit_offset;
1118
1119 while (j < m_elts.size ()
1120 && (m_elts[j].index < other_index
1121 || (m_elts[j].index == other_index
1122 && m_elts[j].unit_offset < other_offset)))
1123 j++;
1124
1125 if (j >= m_elts.size ()
1126 || m_elts[j].index != other_index
1127 || m_elts[j].unit_offset != other_offset
1128 || m_elts[j].by_ref != other.m_elts[i].by_ref
1129 || !m_elts[j].value
1130 || !values_equal_for_ipcp_p (m_elts[j].value, other.m_elts[i].value))
1131 return false;
1132 }
1133 return true;
1134 }
1135
1136 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1137 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1138 offsets but skip those which would end up with a negative offset. */
1139
1140 void
1141 ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1142 unsigned dest_index,
1143 unsigned unit_delta,
1144 vec<ipa_argagg_value> *res) const
1145 {
1146 const ipa_argagg_value *av = get_elt_for_index (src_index);
1147 if (!av)
1148 return;
1149 unsigned prev_unit_offset = 0;
1150 bool first = true;
1151 for (; av < m_elts.end (); ++av)
1152 {
1153 if (av->index > src_index)
1154 return;
1155 if (av->index == src_index
1156 && (av->unit_offset >= unit_delta)
1157 && av->value)
1158 {
1159 ipa_argagg_value new_av;
1160 gcc_checking_assert (av->value);
1161 new_av.value = av->value;
1162 new_av.unit_offset = av->unit_offset - unit_delta;
1163 new_av.index = dest_index;
1164 new_av.by_ref = av->by_ref;
1165 gcc_assert (!av->killed);
1166 new_av.killed = false;
1167
1168 /* Quick check that the offsets we push are indeed increasing. */
1169 gcc_assert (first
1170 || new_av.unit_offset > prev_unit_offset);
1171 prev_unit_offset = new_av.unit_offset;
1172 first = false;
1173
1174 res->safe_push (new_av);
1175 }
1176 }
1177 }
1178
1179 /* Push to RES information about single lattices describing aggregate values in
1180 PLATS as those describing parameter DEST_INDEX and the original offset minus
1181 UNIT_DELTA. Return true if any item has been pushed to RES. */
1182
1183 static bool
1184 push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1185 unsigned unit_delta,
1186 vec<ipa_argagg_value> *res)
1187 {
1188 if (plats->aggs_contain_variable)
1189 return false;
1190
1191 bool pushed_sth = false;
1192 bool first = true;
1193 unsigned prev_unit_offset = 0;
1194 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1195 if (aglat->is_single_const ()
1196 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1197 {
1198 ipa_argagg_value iav;
1199 iav.value = aglat->values->value;
1200 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1201 iav.index = dest_index;
1202 iav.by_ref = plats->aggs_by_ref;
1203 iav.killed = false;
1204
1205 gcc_assert (first
1206 || iav.unit_offset > prev_unit_offset);
1207 prev_unit_offset = iav.unit_offset;
1208 first = false;
1209
1210 pushed_sth = true;
1211 res->safe_push (iav);
1212 }
1213 return pushed_sth;
1214 }
1215
1216 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1217 Return the number of remaining valid entries. */
1218
1219 static unsigned
1220 intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1221 const vec<ipa_argagg_value> &other)
1222 {
1223 unsigned valid_entries = 0;
1224 unsigned j = 0;
1225 for (unsigned i = 0; i < elts.length (); i++)
1226 {
1227 if (!elts[i].value)
1228 continue;
1229
1230 unsigned this_index = elts[i].index;
1231 unsigned this_offset = elts[i].unit_offset;
1232
1233 while (j < other.length ()
1234 && (other[j].index < this_index
1235 || (other[j].index == this_index
1236 && other[j].unit_offset < this_offset)))
1237 j++;
1238
1239 if (j >= other.length ())
1240 {
1241 elts[i].value = NULL_TREE;
1242 continue;
1243 }
1244
1245 if (other[j].index == this_index
1246 && other[j].unit_offset == this_offset
1247 && other[j].by_ref == elts[i].by_ref
1248 && other[j].value
1249 && values_equal_for_ipcp_p (other[j].value, elts[i].value))
1250 valid_entries++;
1251 else
1252 elts[i].value = NULL_TREE;
1253 }
1254 return valid_entries;
1255 }
1256
1257 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1258 return true is any of them has not been marked as such so far. */
1259
1260 static inline bool
1261 set_all_contains_variable (class ipcp_param_lattices *plats)
1262 {
1263 bool ret;
1264 ret = plats->itself.set_contains_variable ();
1265 ret |= plats->ctxlat.set_contains_variable ();
1266 ret |= set_agg_lats_contain_variable (plats);
1267 ret |= plats->bits_lattice.set_to_bottom ();
1268 ret |= plats->m_value_range.set_to_bottom ();
1269 return ret;
1270 }
1271
1272 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1273 points to by the number of callers to NODE. */
1274
1275 static bool
1276 count_callers (cgraph_node *node, void *data)
1277 {
1278 int *caller_count = (int *) data;
1279
1280 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1281 /* Local thunks can be handled transparently, but if the thunk cannot
1282 be optimized out, count it as a real use. */
1283 if (!cs->caller->thunk || !cs->caller->local)
1284 ++*caller_count;
1285 return false;
1286 }
1287
1288 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1289 the one caller of some other node. Set the caller's corresponding flag. */
1290
1291 static bool
1292 set_single_call_flag (cgraph_node *node, void *)
1293 {
1294 cgraph_edge *cs = node->callers;
1295 /* Local thunks can be handled transparently, skip them. */
1296 while (cs && cs->caller->thunk && cs->caller->local)
1297 cs = cs->next_caller;
1298 if (cs)
1299 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1300 {
1301 info->node_calling_single_call = true;
1302 return true;
1303 }
1304 return false;
1305 }
1306
1307 /* Initialize ipcp_lattices. */
1308
1309 static void
1310 initialize_node_lattices (struct cgraph_node *node)
1311 {
1312 ipa_node_params *info = ipa_node_params_sum->get (node);
1313 struct cgraph_edge *ie;
1314 bool disable = false, variable = false;
1315 int i;
1316
1317 gcc_checking_assert (node->has_gimple_body_p ());
1318
1319 if (!ipa_get_param_count (info))
1320 disable = true;
1321 else if (node->local)
1322 {
1323 int caller_count = 0;
1324 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1325 true);
1326 gcc_checking_assert (caller_count > 0);
1327 if (caller_count == 1)
1328 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1329 NULL, true);
1330 }
1331 else
1332 {
1333 /* When cloning is allowed, we can assume that externally visible
1334 functions are not called. We will compensate this by cloning
1335 later. */
1336 if (ipcp_versionable_function_p (node)
1337 && ipcp_cloning_candidate_p (node))
1338 variable = true;
1339 else
1340 disable = true;
1341 }
1342
1343 if (dump_file && (dump_flags & TDF_DETAILS)
1344 && !node->alias && !node->thunk)
1345 {
1346 fprintf (dump_file, "Initializing lattices of %s\n",
1347 node->dump_name ());
1348 if (disable || variable)
1349 fprintf (dump_file, " Marking all lattices as %s\n",
1350 disable ? "BOTTOM" : "VARIABLE");
1351 }
1352
1353 auto_vec<bool, 16> surviving_params;
1354 bool pre_modified = false;
1355
1356 clone_info *cinfo = clone_info::get (node);
1357
1358 if (!disable && cinfo && cinfo->param_adjustments)
1359 {
1360 /* At the moment all IPA optimizations should use the number of
1361 parameters of the prevailing decl as the m_always_copy_start.
1362 Handling any other value would complicate the code below, so for the
1363 time bing let's only assert it is so. */
1364 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1365 == ipa_get_param_count (info))
1366 || cinfo->param_adjustments->m_always_copy_start < 0);
1367
1368 pre_modified = true;
1369 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1370
1371 if (dump_file && (dump_flags & TDF_DETAILS)
1372 && !node->alias && !node->thunk)
1373 {
1374 bool first = true;
1375 for (int j = 0; j < ipa_get_param_count (info); j++)
1376 {
1377 if (j < (int) surviving_params.length ()
1378 && surviving_params[j])
1379 continue;
1380 if (first)
1381 {
1382 fprintf (dump_file,
1383 " The following parameters are dead on arrival:");
1384 first = false;
1385 }
1386 fprintf (dump_file, " %u", j);
1387 }
1388 if (!first)
1389 fprintf (dump_file, "\n");
1390 }
1391 }
1392
1393 for (i = 0; i < ipa_get_param_count (info); i++)
1394 {
1395 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1396 tree type = ipa_get_type (info, i);
1397 if (disable
1398 || !ipa_get_type (info, i)
1399 || (pre_modified && (surviving_params.length () <= (unsigned) i
1400 || !surviving_params[i])))
1401 {
1402 plats->itself.set_to_bottom ();
1403 plats->ctxlat.set_to_bottom ();
1404 set_agg_lats_to_bottom (plats);
1405 plats->bits_lattice.set_to_bottom ();
1406 plats->m_value_range.init (type);
1407 plats->m_value_range.set_to_bottom ();
1408 }
1409 else
1410 {
1411 plats->m_value_range.init (type);
1412 if (variable)
1413 set_all_contains_variable (plats);
1414 }
1415 }
1416
1417 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1418 if (ie->indirect_info->polymorphic
1419 && ie->indirect_info->param_index >= 0)
1420 {
1421 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1422 ipa_get_parm_lattices (info,
1423 ie->indirect_info->param_index)->virt_call = 1;
1424 }
1425 }
1426
1427 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1428 PARAM_TYPE. */
1429
1430 static bool
1431 ipacp_value_safe_for_type (tree param_type, tree value)
1432 {
1433 tree val_type = TREE_TYPE (value);
1434 if (param_type == val_type
1435 || useless_type_conversion_p (param_type, val_type)
1436 || fold_convertible_p (param_type, value))
1437 return true;
1438 else
1439 return false;
1440 }
1441
1442 /* Return the result of a (possibly arithmetic) operation on the constant
1443 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1444 the type of the parameter to which the result is passed. Return
1445 NULL_TREE if that cannot be determined or be considered an
1446 interprocedural invariant. */
1447
1448 static tree
1449 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1450 tree res_type)
1451 {
1452 tree res;
1453
1454 if (opcode == NOP_EXPR)
1455 return input;
1456 if (!is_gimple_ip_invariant (input))
1457 return NULL_TREE;
1458
1459 if (opcode == ASSERT_EXPR)
1460 {
1461 if (values_equal_for_ipcp_p (input, operand))
1462 return input;
1463 else
1464 return NULL_TREE;
1465 }
1466
1467 if (!res_type)
1468 {
1469 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1470 res_type = boolean_type_node;
1471 else if (expr_type_first_operand_type_p (opcode))
1472 res_type = TREE_TYPE (input);
1473 else
1474 return NULL_TREE;
1475 }
1476
1477 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1478 res = fold_unary (opcode, res_type, input);
1479 else
1480 res = fold_binary (opcode, res_type, input, operand);
1481
1482 if (res && !is_gimple_ip_invariant (res))
1483 return NULL_TREE;
1484
1485 return res;
1486 }
1487
1488 /* Return the result of a (possibly arithmetic) pass through jump function
1489 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1490 to which the result is passed. Return NULL_TREE if that cannot be
1491 determined or be considered an interprocedural invariant. */
1492
1493 static tree
1494 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1495 tree res_type)
1496 {
1497 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1498 input,
1499 ipa_get_jf_pass_through_operand (jfunc),
1500 res_type);
1501 }
1502
1503 /* Return the result of an ancestor jump function JFUNC on the constant value
1504 INPUT. Return NULL_TREE if that cannot be determined. */
1505
1506 static tree
1507 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1508 {
1509 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1510 if (TREE_CODE (input) == ADDR_EXPR)
1511 {
1512 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1513 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1514 if (known_eq (off, 0))
1515 return input;
1516 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1517 return build1 (ADDR_EXPR, TREE_TYPE (input),
1518 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1519 build_int_cst (ptr_type_node, byte_offset)));
1520 }
1521 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1522 && zerop (input))
1523 return input;
1524 else
1525 return NULL_TREE;
1526 }
1527
1528 /* Determine whether JFUNC evaluates to a single known constant value and if
1529 so, return it. Otherwise return NULL. INFO describes the caller node or
1530 the one it is inlined to, so that pass-through jump functions can be
1531 evaluated. PARM_TYPE is the type of the parameter to which the result is
1532 passed. */
1533
1534 tree
1535 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1536 tree parm_type)
1537 {
1538 if (jfunc->type == IPA_JF_CONST)
1539 return ipa_get_jf_constant (jfunc);
1540 else if (jfunc->type == IPA_JF_PASS_THROUGH
1541 || jfunc->type == IPA_JF_ANCESTOR)
1542 {
1543 tree input;
1544 int idx;
1545
1546 if (jfunc->type == IPA_JF_PASS_THROUGH)
1547 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1548 else
1549 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1550
1551 if (info->ipcp_orig_node)
1552 input = info->known_csts[idx];
1553 else
1554 {
1555 ipcp_lattice<tree> *lat;
1556
1557 if (info->lattices.is_empty ()
1558 || idx >= ipa_get_param_count (info))
1559 return NULL_TREE;
1560 lat = ipa_get_scalar_lat (info, idx);
1561 if (!lat->is_single_const ())
1562 return NULL_TREE;
1563 input = lat->values->value;
1564 }
1565
1566 if (!input)
1567 return NULL_TREE;
1568
1569 if (jfunc->type == IPA_JF_PASS_THROUGH)
1570 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1571 else
1572 return ipa_get_jf_ancestor_result (jfunc, input);
1573 }
1574 else
1575 return NULL_TREE;
1576 }
1577
1578 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1579 that INFO describes the caller node or the one it is inlined to, CS is the
1580 call graph edge corresponding to JFUNC and CSIDX index of the described
1581 parameter. */
1582
1583 ipa_polymorphic_call_context
1584 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1585 ipa_jump_func *jfunc)
1586 {
1587 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1588 ipa_polymorphic_call_context ctx;
1589 ipa_polymorphic_call_context *edge_ctx
1590 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1591
1592 if (edge_ctx && !edge_ctx->useless_p ())
1593 ctx = *edge_ctx;
1594
1595 if (jfunc->type == IPA_JF_PASS_THROUGH
1596 || jfunc->type == IPA_JF_ANCESTOR)
1597 {
1598 ipa_polymorphic_call_context srcctx;
1599 int srcidx;
1600 bool type_preserved = true;
1601 if (jfunc->type == IPA_JF_PASS_THROUGH)
1602 {
1603 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1604 return ctx;
1605 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1606 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1607 }
1608 else
1609 {
1610 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1611 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1612 }
1613 if (info->ipcp_orig_node)
1614 {
1615 if (info->known_contexts.exists ())
1616 srcctx = info->known_contexts[srcidx];
1617 }
1618 else
1619 {
1620 if (info->lattices.is_empty ()
1621 || srcidx >= ipa_get_param_count (info))
1622 return ctx;
1623 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1624 lat = ipa_get_poly_ctx_lat (info, srcidx);
1625 if (!lat->is_single_const ())
1626 return ctx;
1627 srcctx = lat->values->value;
1628 }
1629 if (srcctx.useless_p ())
1630 return ctx;
1631 if (jfunc->type == IPA_JF_ANCESTOR)
1632 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1633 if (!type_preserved)
1634 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1635 srcctx.combine_with (ctx);
1636 return srcctx;
1637 }
1638
1639 return ctx;
1640 }
1641
1642 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1643 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1644 the result is a range that is not VARYING nor UNDEFINED. */
1645
1646 static bool
1647 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1648 const vrange &src_vr,
1649 enum tree_code operation,
1650 tree dst_type, tree src_type)
1651 {
1652 if (!ipa_supports_p (dst_type) || !ipa_supports_p (src_type))
1653 return false;
1654
1655 range_op_handler handler (operation);
1656 if (!handler)
1657 return false;
1658
1659 Value_Range varying (dst_type);
1660 varying.set_varying (dst_type);
1661
1662 return (handler.operand_check_p (dst_type, src_type, dst_type)
1663 && handler.fold_range (dst_vr, dst_type, src_vr, varying)
1664 && !dst_vr.varying_p ()
1665 && !dst_vr.undefined_p ());
1666 }
1667
1668 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1669 first be extracted onto a vrange. */
1670
1671 static bool
1672 ipa_vr_operation_and_type_effects (vrange &dst_vr,
1673 const ipa_vr &src_vr,
1674 enum tree_code operation,
1675 tree dst_type, tree src_type)
1676 {
1677 Value_Range tmp;
1678 src_vr.get_vrange (tmp);
1679 return ipa_vr_operation_and_type_effects (dst_vr, tmp, operation,
1680 dst_type, src_type);
1681 }
1682
1683 /* Determine range of JFUNC given that INFO describes the caller node or
1684 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1685 and PARM_TYPE of the parameter. */
1686
1687 void
1688 ipa_value_range_from_jfunc (vrange &vr,
1689 ipa_node_params *info, cgraph_edge *cs,
1690 ipa_jump_func *jfunc, tree parm_type)
1691 {
1692 vr.set_undefined ();
1693
1694 if (jfunc->m_vr)
1695 ipa_vr_operation_and_type_effects (vr,
1696 *jfunc->m_vr,
1697 NOP_EXPR, parm_type,
1698 jfunc->m_vr->type ());
1699 if (vr.singleton_p ())
1700 return;
1701 if (jfunc->type == IPA_JF_PASS_THROUGH)
1702 {
1703 int idx;
1704 ipcp_transformation *sum
1705 = ipcp_get_transformation_summary (cs->caller->inlined_to
1706 ? cs->caller->inlined_to
1707 : cs->caller);
1708 if (!sum || !sum->m_vr)
1709 return;
1710
1711 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1712
1713 if (!(*sum->m_vr)[idx].known_p ())
1714 return;
1715 tree vr_type = ipa_get_type (info, idx);
1716 Value_Range srcvr;
1717 (*sum->m_vr)[idx].get_vrange (srcvr);
1718
1719 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1720
1721 if (TREE_CODE_CLASS (operation) == tcc_unary)
1722 {
1723 Value_Range res (parm_type);
1724
1725 if (ipa_vr_operation_and_type_effects (res,
1726 srcvr,
1727 operation, parm_type,
1728 vr_type))
1729 vr.intersect (res);
1730 }
1731 else
1732 {
1733 Value_Range op_res (vr_type);
1734 Value_Range res (vr_type);
1735 tree op = ipa_get_jf_pass_through_operand (jfunc);
1736 Value_Range op_vr (TREE_TYPE (op));
1737 range_op_handler handler (operation);
1738
1739 ipa_range_set_and_normalize (op_vr, op);
1740
1741 if (!handler
1742 || !op_res.supports_type_p (vr_type)
1743 || !handler.fold_range (op_res, vr_type, srcvr, op_vr))
1744 op_res.set_varying (vr_type);
1745
1746 if (ipa_vr_operation_and_type_effects (res,
1747 op_res,
1748 NOP_EXPR, parm_type,
1749 vr_type))
1750 vr.intersect (res);
1751 }
1752 }
1753 }
1754
1755 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1756 single known constant value and if so, return it. Otherwise return NULL.
1757 NODE and INFO describes the caller node or the one it is inlined to, and
1758 its related info. */
1759
1760 tree
1761 ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1762 const ipa_agg_jf_item *item)
1763 {
1764 tree value = NULL_TREE;
1765 int src_idx;
1766
1767 if (item->offset < 0
1768 || item->jftype == IPA_JF_UNKNOWN
1769 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1770 return NULL_TREE;
1771
1772 if (item->jftype == IPA_JF_CONST)
1773 return item->value.constant;
1774
1775 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1776 || item->jftype == IPA_JF_LOAD_AGG);
1777
1778 src_idx = item->value.pass_through.formal_id;
1779
1780 if (info->ipcp_orig_node)
1781 {
1782 if (item->jftype == IPA_JF_PASS_THROUGH)
1783 value = info->known_csts[src_idx];
1784 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1785 {
1786 ipa_argagg_value_list avl (ts);
1787 value = avl.get_value (src_idx,
1788 item->value.load_agg.offset / BITS_PER_UNIT,
1789 item->value.load_agg.by_ref);
1790 }
1791 }
1792 else if (!info->lattices.is_empty ())
1793 {
1794 class ipcp_param_lattices *src_plats
1795 = ipa_get_parm_lattices (info, src_idx);
1796
1797 if (item->jftype == IPA_JF_PASS_THROUGH)
1798 {
1799 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1800
1801 if (!lat->is_single_const ())
1802 return NULL_TREE;
1803
1804 value = lat->values->value;
1805 }
1806 else if (src_plats->aggs
1807 && !src_plats->aggs_bottom
1808 && !src_plats->aggs_contain_variable
1809 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1810 {
1811 struct ipcp_agg_lattice *aglat;
1812
1813 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1814 {
1815 if (aglat->offset > item->value.load_agg.offset)
1816 break;
1817
1818 if (aglat->offset == item->value.load_agg.offset)
1819 {
1820 if (aglat->is_single_const ())
1821 value = aglat->values->value;
1822 break;
1823 }
1824 }
1825 }
1826 }
1827
1828 if (!value)
1829 return NULL_TREE;
1830
1831 if (item->jftype == IPA_JF_LOAD_AGG)
1832 {
1833 tree load_type = item->value.load_agg.type;
1834 tree value_type = TREE_TYPE (value);
1835
1836 /* Ensure value type is compatible with load type. */
1837 if (!useless_type_conversion_p (load_type, value_type))
1838 return NULL_TREE;
1839 }
1840
1841 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1842 value,
1843 item->value.pass_through.operand,
1844 item->type);
1845 }
1846
1847 /* Process all items in AGG_JFUNC relative to caller (or the node the original
1848 caller is inlined to) NODE which described by INFO and push the results to
1849 RES as describing values passed in parameter DST_INDEX. */
1850
1851 void
1852 ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
1853 ipa_agg_jump_function *agg_jfunc,
1854 unsigned dst_index,
1855 vec<ipa_argagg_value> *res)
1856 {
1857 unsigned prev_unit_offset = 0;
1858 bool first = true;
1859
1860 for (const ipa_agg_jf_item &item : agg_jfunc->items)
1861 {
1862 tree value = ipa_agg_value_from_jfunc (info, node, &item);
1863 if (!value)
1864 continue;
1865
1866 ipa_argagg_value iav;
1867 iav.value = value;
1868 iav.unit_offset = item.offset / BITS_PER_UNIT;
1869 iav.index = dst_index;
1870 iav.by_ref = agg_jfunc->by_ref;
1871 iav.killed = 0;
1872
1873 gcc_assert (first
1874 || iav.unit_offset > prev_unit_offset);
1875 prev_unit_offset = iav.unit_offset;
1876 first = false;
1877
1878 res->safe_push (iav);
1879 }
1880 }
1881
1882 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1883 bottom, not containing a variable component and without any known value at
1884 the same time. */
1885
1886 DEBUG_FUNCTION void
1887 ipcp_verify_propagated_values (void)
1888 {
1889 struct cgraph_node *node;
1890
1891 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1892 {
1893 ipa_node_params *info = ipa_node_params_sum->get (node);
1894 if (!opt_for_fn (node->decl, flag_ipa_cp)
1895 || !opt_for_fn (node->decl, optimize))
1896 continue;
1897 int i, count = ipa_get_param_count (info);
1898
1899 for (i = 0; i < count; i++)
1900 {
1901 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1902
1903 if (!lat->bottom
1904 && !lat->contains_variable
1905 && lat->values_count == 0)
1906 {
1907 if (dump_file)
1908 {
1909 symtab->dump (dump_file);
1910 fprintf (dump_file, "\nIPA lattices after constant "
1911 "propagation, before gcc_unreachable:\n");
1912 print_all_lattices (dump_file, true, false);
1913 }
1914
1915 gcc_unreachable ();
1916 }
1917 }
1918 }
1919 }
1920
1921 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1922
1923 static bool
1924 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1925 ipa_polymorphic_call_context y)
1926 {
1927 return x.equal_to (y);
1928 }
1929
1930
1931 /* Add a new value source to the value represented by THIS, marking that a
1932 value comes from edge CS and (if the underlying jump function is a
1933 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1934 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1935 scalar value of the parameter itself or the offset within an aggregate. */
1936
1937 template <typename valtype>
1938 void
1939 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1940 int src_idx, HOST_WIDE_INT offset)
1941 {
1942 ipcp_value_source<valtype> *src;
1943
1944 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1945 src->offset = offset;
1946 src->cs = cs;
1947 src->val = src_val;
1948 src->index = src_idx;
1949
1950 src->next = sources;
1951 sources = src;
1952 }
1953
1954 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1955 SOURCE and clear all other fields. */
1956
1957 static ipcp_value<tree> *
1958 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1959 {
1960 ipcp_value<tree> *val;
1961
1962 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1963 val->value = cst;
1964 val->self_recursion_generated_level = same_lat_gen_level;
1965 return val;
1966 }
1967
1968 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1969 value to SOURCE and clear all other fields. */
1970
1971 static ipcp_value<ipa_polymorphic_call_context> *
1972 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1973 unsigned same_lat_gen_level)
1974 {
1975 ipcp_value<ipa_polymorphic_call_context> *val;
1976
1977 val = new (ipcp_poly_ctx_values_pool.allocate ())
1978 ipcp_value<ipa_polymorphic_call_context>();
1979 val->value = ctx;
1980 val->self_recursion_generated_level = same_lat_gen_level;
1981 return val;
1982 }
1983
1984 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1985 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1986 meaning. OFFSET -1 means the source is scalar and not a part of an
1987 aggregate. If non-NULL, VAL_P records address of existing or newly added
1988 ipcp_value.
1989
1990 If the value is generated for a self-recursive call as a result of an
1991 arithmetic pass-through jump-function acting on a value in the same lattice,
1992 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1993 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1994
1995 template <typename valtype>
1996 bool
1997 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1998 ipcp_value<valtype> *src_val,
1999 int src_idx, HOST_WIDE_INT offset,
2000 ipcp_value<valtype> **val_p,
2001 unsigned same_lat_gen_level)
2002 {
2003 ipcp_value<valtype> *val, *last_val = NULL;
2004
2005 if (val_p)
2006 *val_p = NULL;
2007
2008 if (bottom)
2009 return false;
2010
2011 for (val = values; val; last_val = val, val = val->next)
2012 if (values_equal_for_ipcp_p (val->value, newval))
2013 {
2014 if (val_p)
2015 *val_p = val;
2016
2017 if (val->self_recursion_generated_level < same_lat_gen_level)
2018 val->self_recursion_generated_level = same_lat_gen_level;
2019
2020 if (ipa_edge_within_scc (cs))
2021 {
2022 ipcp_value_source<valtype> *s;
2023 for (s = val->sources; s; s = s->next)
2024 if (s->cs == cs && s->val == src_val)
2025 break;
2026 if (s)
2027 return false;
2028 }
2029
2030 val->add_source (cs, src_val, src_idx, offset);
2031 return false;
2032 }
2033
2034 if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2035 param_ipa_cp_value_list_size))
2036 {
2037 /* We can only free sources, not the values themselves, because sources
2038 of other values in this SCC might point to them. */
2039 for (val = values; val; val = val->next)
2040 {
2041 while (val->sources)
2042 {
2043 ipcp_value_source<valtype> *src = val->sources;
2044 val->sources = src->next;
2045 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2046 }
2047 }
2048 values = NULL;
2049 return set_to_bottom ();
2050 }
2051
2052 values_count++;
2053 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2054 val->add_source (cs, src_val, src_idx, offset);
2055 val->next = NULL;
2056
2057 /* Add the new value to end of value list, which can reduce iterations
2058 of propagation stage for recursive function. */
2059 if (last_val)
2060 last_val->next = val;
2061 else
2062 values = val;
2063
2064 if (val_p)
2065 *val_p = val;
2066
2067 return true;
2068 }
2069
2070 /* A helper function that returns result of operation specified by OPCODE on
2071 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2072 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2073 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2074 the result. */
2075
2076 static tree
2077 get_val_across_arith_op (enum tree_code opcode,
2078 tree opnd1_type,
2079 tree opnd2,
2080 ipcp_value<tree> *src_val,
2081 tree res_type)
2082 {
2083 tree opnd1 = src_val->value;
2084
2085 /* Skip source values that is incompatible with specified type. */
2086 if (opnd1_type
2087 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2088 return NULL_TREE;
2089
2090 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2091 }
2092
2093 /* Propagate values through an arithmetic transformation described by a jump
2094 function associated with edge CS, taking values from SRC_LAT and putting
2095 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2096 OPND2 is a constant value if transformation is a binary operation.
2097 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2098 a part of the aggregate. SRC_IDX is the index of the source parameter.
2099 RES_TYPE is the value type of result being propagated into. Return true if
2100 DEST_LAT changed. */
2101
2102 static bool
2103 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2104 enum tree_code opcode,
2105 tree opnd1_type,
2106 tree opnd2,
2107 ipcp_lattice<tree> *src_lat,
2108 ipcp_lattice<tree> *dest_lat,
2109 HOST_WIDE_INT src_offset,
2110 int src_idx,
2111 tree res_type)
2112 {
2113 ipcp_value<tree> *src_val;
2114 bool ret = false;
2115
2116 /* Due to circular dependencies, propagating within an SCC through arithmetic
2117 transformation would create infinite number of values. But for
2118 self-feeding recursive function, we could allow propagation in a limited
2119 count, and this can enable a simple kind of recursive function versioning.
2120 For other scenario, we would just make lattices bottom. */
2121 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2122 {
2123 int i;
2124
2125 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2126 param_ipa_cp_max_recursive_depth);
2127 if (src_lat != dest_lat || max_recursive_depth < 1)
2128 return dest_lat->set_contains_variable ();
2129
2130 /* No benefit if recursive execution is in low probability. */
2131 if (cs->sreal_frequency () * 100
2132 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2133 param_ipa_cp_min_recursive_probability))
2134 return dest_lat->set_contains_variable ();
2135
2136 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2137
2138 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2139 {
2140 /* Now we do not use self-recursively generated value as propagation
2141 source, this is absolutely conservative, but could avoid explosion
2142 of lattice's value space, especially when one recursive function
2143 calls another recursive. */
2144 if (src_val->self_recursion_generated_p ())
2145 {
2146 ipcp_value_source<tree> *s;
2147
2148 /* If the lattice has already been propagated for the call site,
2149 no need to do that again. */
2150 for (s = src_val->sources; s; s = s->next)
2151 if (s->cs == cs)
2152 return dest_lat->set_contains_variable ();
2153 }
2154 else
2155 val_seeds.safe_push (src_val);
2156 }
2157
2158 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2159
2160 /* Recursively generate lattice values with a limited count. */
2161 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2162 {
2163 for (int j = 1; j < max_recursive_depth; j++)
2164 {
2165 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2166 src_val, res_type);
2167 if (!cstval
2168 || !ipacp_value_safe_for_type (res_type, cstval))
2169 break;
2170
2171 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2172 src_offset, &src_val, j);
2173 gcc_checking_assert (src_val);
2174 }
2175 }
2176 ret |= dest_lat->set_contains_variable ();
2177 }
2178 else
2179 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2180 {
2181 /* Now we do not use self-recursively generated value as propagation
2182 source, otherwise it is easy to make value space of normal lattice
2183 overflow. */
2184 if (src_val->self_recursion_generated_p ())
2185 {
2186 ret |= dest_lat->set_contains_variable ();
2187 continue;
2188 }
2189
2190 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2191 src_val, res_type);
2192 if (cstval
2193 && ipacp_value_safe_for_type (res_type, cstval))
2194 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2195 src_offset);
2196 else
2197 ret |= dest_lat->set_contains_variable ();
2198 }
2199
2200 return ret;
2201 }
2202
2203 /* Propagate values through a pass-through jump function JFUNC associated with
2204 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2205 is the index of the source parameter. PARM_TYPE is the type of the
2206 parameter to which the result is passed. */
2207
2208 static bool
2209 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2210 ipcp_lattice<tree> *src_lat,
2211 ipcp_lattice<tree> *dest_lat, int src_idx,
2212 tree parm_type)
2213 {
2214 return propagate_vals_across_arith_jfunc (cs,
2215 ipa_get_jf_pass_through_operation (jfunc),
2216 NULL_TREE,
2217 ipa_get_jf_pass_through_operand (jfunc),
2218 src_lat, dest_lat, -1, src_idx, parm_type);
2219 }
2220
2221 /* Propagate values through an ancestor jump function JFUNC associated with
2222 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2223 is the index of the source parameter. */
2224
2225 static bool
2226 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2227 struct ipa_jump_func *jfunc,
2228 ipcp_lattice<tree> *src_lat,
2229 ipcp_lattice<tree> *dest_lat, int src_idx,
2230 tree param_type)
2231 {
2232 ipcp_value<tree> *src_val;
2233 bool ret = false;
2234
2235 if (ipa_edge_within_scc (cs))
2236 return dest_lat->set_contains_variable ();
2237
2238 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2239 {
2240 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2241
2242 if (t && ipacp_value_safe_for_type (param_type, t))
2243 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2244 else
2245 ret |= dest_lat->set_contains_variable ();
2246 }
2247
2248 return ret;
2249 }
2250
2251 /* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2253 parameter to which the result is passed. */
2254
2255 static bool
2256 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2257 struct ipa_jump_func *jfunc,
2258 ipcp_lattice<tree> *dest_lat,
2259 tree param_type)
2260 {
2261 if (dest_lat->bottom)
2262 return false;
2263
2264 if (jfunc->type == IPA_JF_CONST)
2265 {
2266 tree val = ipa_get_jf_constant (jfunc);
2267 if (ipacp_value_safe_for_type (param_type, val))
2268 return dest_lat->add_value (val, cs, NULL, 0);
2269 else
2270 return dest_lat->set_contains_variable ();
2271 }
2272 else if (jfunc->type == IPA_JF_PASS_THROUGH
2273 || jfunc->type == IPA_JF_ANCESTOR)
2274 {
2275 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2276 ipcp_lattice<tree> *src_lat;
2277 int src_idx;
2278 bool ret;
2279
2280 if (jfunc->type == IPA_JF_PASS_THROUGH)
2281 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2282 else
2283 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2284
2285 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2286 if (src_lat->bottom)
2287 return dest_lat->set_contains_variable ();
2288
2289 /* If we would need to clone the caller and cannot, do not propagate. */
2290 if (!ipcp_versionable_function_p (cs->caller)
2291 && (src_lat->contains_variable
2292 || (src_lat->values_count > 1)))
2293 return dest_lat->set_contains_variable ();
2294
2295 if (jfunc->type == IPA_JF_PASS_THROUGH)
2296 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2297 dest_lat, src_idx,
2298 param_type);
2299 else
2300 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2301 src_idx, param_type);
2302
2303 if (src_lat->contains_variable)
2304 ret |= dest_lat->set_contains_variable ();
2305
2306 return ret;
2307 }
2308
2309 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2310 use it for indirect inlining), we should propagate them too. */
2311 return dest_lat->set_contains_variable ();
2312 }
2313
2314 /* Propagate scalar values across jump function JFUNC that is associated with
2315 edge CS and describes argument IDX and put the values into DEST_LAT. */
2316
2317 static bool
2318 propagate_context_across_jump_function (cgraph_edge *cs,
2319 ipa_jump_func *jfunc, int idx,
2320 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2321 {
2322 if (dest_lat->bottom)
2323 return false;
2324 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2325 bool ret = false;
2326 bool added_sth = false;
2327 bool type_preserved = true;
2328
2329 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2330 = ipa_get_ith_polymorhic_call_context (args, idx);
2331
2332 if (edge_ctx_ptr)
2333 edge_ctx = *edge_ctx_ptr;
2334
2335 if (jfunc->type == IPA_JF_PASS_THROUGH
2336 || jfunc->type == IPA_JF_ANCESTOR)
2337 {
2338 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2339 int src_idx;
2340 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2341
2342 /* TODO: Once we figure out how to propagate speculations, it will
2343 probably be a good idea to switch to speculation if type_preserved is
2344 not set instead of punting. */
2345 if (jfunc->type == IPA_JF_PASS_THROUGH)
2346 {
2347 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2348 goto prop_fail;
2349 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2350 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2351 }
2352 else
2353 {
2354 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2355 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2356 }
2357
2358 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2359 /* If we would need to clone the caller and cannot, do not propagate. */
2360 if (!ipcp_versionable_function_p (cs->caller)
2361 && (src_lat->contains_variable
2362 || (src_lat->values_count > 1)))
2363 goto prop_fail;
2364
2365 ipcp_value<ipa_polymorphic_call_context> *src_val;
2366 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2367 {
2368 ipa_polymorphic_call_context cur = src_val->value;
2369
2370 if (!type_preserved)
2371 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2372 if (jfunc->type == IPA_JF_ANCESTOR)
2373 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2374 /* TODO: In cases we know how the context is going to be used,
2375 we can improve the result by passing proper OTR_TYPE. */
2376 cur.combine_with (edge_ctx);
2377 if (!cur.useless_p ())
2378 {
2379 if (src_lat->contains_variable
2380 && !edge_ctx.equal_to (cur))
2381 ret |= dest_lat->set_contains_variable ();
2382 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2383 added_sth = true;
2384 }
2385 }
2386 }
2387
2388 prop_fail:
2389 if (!added_sth)
2390 {
2391 if (!edge_ctx.useless_p ())
2392 ret |= dest_lat->add_value (edge_ctx, cs);
2393 else
2394 ret |= dest_lat->set_contains_variable ();
2395 }
2396
2397 return ret;
2398 }
2399
2400 /* Propagate bits across jfunc that is associated with
2401 edge cs and update dest_lattice accordingly. */
2402
2403 bool
2404 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2405 ipa_jump_func *jfunc,
2406 ipcp_bits_lattice *dest_lattice)
2407 {
2408 if (dest_lattice->bottom_p ())
2409 return false;
2410
2411 enum availability availability;
2412 cgraph_node *callee = cs->callee->function_symbol (&availability);
2413 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2414 tree parm_type = ipa_get_type (callee_info, idx);
2415
2416 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2417 transform for these cases. Similarly, we can have bad type mismatches
2418 with LTO, avoid doing anything with those too. */
2419 if (!parm_type
2420 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2421 {
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2424 "param %i of %s is NULL or unsuitable for bits propagation\n",
2425 idx, cs->callee->dump_name ());
2426
2427 return dest_lattice->set_to_bottom ();
2428 }
2429
2430 unsigned precision = TYPE_PRECISION (parm_type);
2431 signop sgn = TYPE_SIGN (parm_type);
2432
2433 if (jfunc->type == IPA_JF_PASS_THROUGH
2434 || jfunc->type == IPA_JF_ANCESTOR)
2435 {
2436 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2437 tree operand = NULL_TREE;
2438 enum tree_code code;
2439 unsigned src_idx;
2440 bool keep_null = false;
2441
2442 if (jfunc->type == IPA_JF_PASS_THROUGH)
2443 {
2444 code = ipa_get_jf_pass_through_operation (jfunc);
2445 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2446 if (code != NOP_EXPR)
2447 operand = ipa_get_jf_pass_through_operand (jfunc);
2448 }
2449 else
2450 {
2451 code = POINTER_PLUS_EXPR;
2452 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2453 unsigned HOST_WIDE_INT offset
2454 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2455 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2456 operand = build_int_cstu (size_type_node, offset);
2457 }
2458
2459 class ipcp_param_lattices *src_lats
2460 = ipa_get_parm_lattices (caller_info, src_idx);
2461
2462 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2463 for eg consider:
2464 int f(int x)
2465 {
2466 g (x & 0xff);
2467 }
2468 Assume lattice for x is bottom, however we can still propagate
2469 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2470 and we store it in jump function during analysis stage. */
2471
2472 if (!src_lats->bits_lattice.bottom_p ())
2473 {
2474 bool drop_all_ones
2475 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2476
2477 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2478 sgn, code, operand, drop_all_ones);
2479 }
2480 }
2481
2482 Value_Range vr (parm_type);
2483 if (jfunc->m_vr)
2484 {
2485 jfunc->m_vr->get_vrange (vr);
2486 if (!vr.undefined_p () && !vr.varying_p ())
2487 {
2488 irange_bitmask bm = vr.get_bitmask ();
2489 widest_int mask
2490 = widest_int::from (bm.mask (), TYPE_SIGN (parm_type));
2491 widest_int value
2492 = widest_int::from (bm.value (), TYPE_SIGN (parm_type));
2493 return dest_lattice->meet_with (value, mask, precision);
2494 }
2495 }
2496 return dest_lattice->set_to_bottom ();
2497 }
2498
2499 /* Propagate value range across jump function JFUNC that is associated with
2500 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2501 accordingly. */
2502
2503 static bool
2504 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2505 class ipcp_param_lattices *dest_plats,
2506 tree param_type)
2507 {
2508 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2509
2510 if (dest_lat->bottom_p ())
2511 return false;
2512
2513 if (!param_type
2514 || (!INTEGRAL_TYPE_P (param_type)
2515 && !POINTER_TYPE_P (param_type)))
2516 return dest_lat->set_to_bottom ();
2517
2518 if (jfunc->type == IPA_JF_PASS_THROUGH)
2519 {
2520 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2521 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2522 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2523 class ipcp_param_lattices *src_lats
2524 = ipa_get_parm_lattices (caller_info, src_idx);
2525 tree operand_type = ipa_get_type (caller_info, src_idx);
2526
2527 if (src_lats->m_value_range.bottom_p ())
2528 return dest_lat->set_to_bottom ();
2529
2530 Value_Range vr (param_type);
2531 if (TREE_CODE_CLASS (operation) == tcc_unary)
2532 ipa_vr_operation_and_type_effects (vr,
2533 src_lats->m_value_range.m_vr,
2534 operation, param_type,
2535 operand_type);
2536 /* A crude way to prevent unbounded number of value range updates
2537 in SCC components. We should allow limited number of updates within
2538 SCC, too. */
2539 else if (!ipa_edge_within_scc (cs))
2540 {
2541 tree op = ipa_get_jf_pass_through_operand (jfunc);
2542 Value_Range op_vr (TREE_TYPE (op));
2543 Value_Range op_res (param_type);
2544 range_op_handler handler (operation);
2545
2546 ipa_range_set_and_normalize (op_vr, op);
2547
2548 if (!handler
2549 || !ipa_supports_p (operand_type)
2550 || !handler.fold_range (op_res, operand_type,
2551 src_lats->m_value_range.m_vr, op_vr))
2552 op_res.set_varying (param_type);
2553
2554 ipa_vr_operation_and_type_effects (vr,
2555 op_res,
2556 NOP_EXPR, param_type,
2557 operand_type);
2558 }
2559 if (!vr.undefined_p () && !vr.varying_p ())
2560 {
2561 if (jfunc->m_vr)
2562 {
2563 Value_Range jvr (param_type);
2564 if (ipa_vr_operation_and_type_effects (jvr, *jfunc->m_vr,
2565 NOP_EXPR,
2566 param_type,
2567 jfunc->m_vr->type ()))
2568 vr.intersect (jvr);
2569 }
2570 return dest_lat->meet_with (vr);
2571 }
2572 }
2573 else if (jfunc->type == IPA_JF_CONST)
2574 {
2575 tree val = ipa_get_jf_constant (jfunc);
2576 if (TREE_CODE (val) == INTEGER_CST)
2577 {
2578 val = fold_convert (param_type, val);
2579 if (TREE_OVERFLOW_P (val))
2580 val = drop_tree_overflow (val);
2581
2582 Value_Range tmpvr (val, val);
2583 return dest_lat->meet_with (tmpvr);
2584 }
2585 }
2586
2587 Value_Range vr (param_type);
2588 if (jfunc->m_vr
2589 && ipa_vr_operation_and_type_effects (vr, *jfunc->m_vr, NOP_EXPR,
2590 param_type,
2591 jfunc->m_vr->type ()))
2592 return dest_lat->meet_with (vr);
2593 else
2594 return dest_lat->set_to_bottom ();
2595 }
2596
2597 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2598 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2599 other cases, return false). If there are no aggregate items, set
2600 aggs_by_ref to NEW_AGGS_BY_REF. */
2601
2602 static bool
2603 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2604 bool new_aggs_by_ref)
2605 {
2606 if (dest_plats->aggs)
2607 {
2608 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2609 {
2610 set_agg_lats_to_bottom (dest_plats);
2611 return true;
2612 }
2613 }
2614 else
2615 dest_plats->aggs_by_ref = new_aggs_by_ref;
2616 return false;
2617 }
2618
2619 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2620 already existing lattice for the given OFFSET and SIZE, marking all skipped
2621 lattices as containing variable and checking for overlaps. If there is no
2622 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2623 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2624 unless there are too many already. If there are two many, return false. If
2625 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2626 skipped lattices were newly marked as containing variable, set *CHANGE to
2627 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2628
2629 static bool
2630 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2631 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2632 struct ipcp_agg_lattice ***aglat,
2633 bool pre_existing, bool *change, int max_agg_items)
2634 {
2635 gcc_checking_assert (offset >= 0);
2636
2637 while (**aglat && (**aglat)->offset < offset)
2638 {
2639 if ((**aglat)->offset + (**aglat)->size > offset)
2640 {
2641 set_agg_lats_to_bottom (dest_plats);
2642 return false;
2643 }
2644 *change |= (**aglat)->set_contains_variable ();
2645 *aglat = &(**aglat)->next;
2646 }
2647
2648 if (**aglat && (**aglat)->offset == offset)
2649 {
2650 if ((**aglat)->size != val_size)
2651 {
2652 set_agg_lats_to_bottom (dest_plats);
2653 return false;
2654 }
2655 gcc_assert (!(**aglat)->next
2656 || (**aglat)->next->offset >= offset + val_size);
2657 return true;
2658 }
2659 else
2660 {
2661 struct ipcp_agg_lattice *new_al;
2662
2663 if (**aglat && (**aglat)->offset < offset + val_size)
2664 {
2665 set_agg_lats_to_bottom (dest_plats);
2666 return false;
2667 }
2668 if (dest_plats->aggs_count == max_agg_items)
2669 return false;
2670 dest_plats->aggs_count++;
2671 new_al = ipcp_agg_lattice_pool.allocate ();
2672
2673 new_al->offset = offset;
2674 new_al->size = val_size;
2675 new_al->contains_variable = pre_existing;
2676
2677 new_al->next = **aglat;
2678 **aglat = new_al;
2679 return true;
2680 }
2681 }
2682
2683 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2684 containing an unknown value. */
2685
2686 static bool
2687 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2688 {
2689 bool ret = false;
2690 while (aglat)
2691 {
2692 ret |= aglat->set_contains_variable ();
2693 aglat = aglat->next;
2694 }
2695 return ret;
2696 }
2697
2698 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2699 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2700 parameter used for lattice value sources. Return true if DEST_PLATS changed
2701 in any way. */
2702
2703 static bool
2704 merge_aggregate_lattices (struct cgraph_edge *cs,
2705 class ipcp_param_lattices *dest_plats,
2706 class ipcp_param_lattices *src_plats,
2707 int src_idx, HOST_WIDE_INT offset_delta)
2708 {
2709 bool pre_existing = dest_plats->aggs != NULL;
2710 struct ipcp_agg_lattice **dst_aglat;
2711 bool ret = false;
2712
2713 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2714 return true;
2715 if (src_plats->aggs_bottom)
2716 return set_agg_lats_contain_variable (dest_plats);
2717 if (src_plats->aggs_contain_variable)
2718 ret |= set_agg_lats_contain_variable (dest_plats);
2719 dst_aglat = &dest_plats->aggs;
2720
2721 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2722 param_ipa_max_agg_items);
2723 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2724 src_aglat;
2725 src_aglat = src_aglat->next)
2726 {
2727 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2728
2729 if (new_offset < 0)
2730 continue;
2731 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2732 &dst_aglat, pre_existing, &ret, max_agg_items))
2733 {
2734 struct ipcp_agg_lattice *new_al = *dst_aglat;
2735
2736 dst_aglat = &(*dst_aglat)->next;
2737 if (src_aglat->bottom)
2738 {
2739 ret |= new_al->set_contains_variable ();
2740 continue;
2741 }
2742 if (src_aglat->contains_variable)
2743 ret |= new_al->set_contains_variable ();
2744 for (ipcp_value<tree> *val = src_aglat->values;
2745 val;
2746 val = val->next)
2747 ret |= new_al->add_value (val->value, cs, val, src_idx,
2748 src_aglat->offset);
2749 }
2750 else if (dest_plats->aggs_bottom)
2751 return true;
2752 }
2753 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2754 return ret;
2755 }
2756
2757 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2758 pass-through JFUNC and if so, whether it has conform and conforms to the
2759 rules about propagating values passed by reference. */
2760
2761 static bool
2762 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2763 struct ipa_jump_func *jfunc)
2764 {
2765 return src_plats->aggs
2766 && (!src_plats->aggs_by_ref
2767 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2768 }
2769
2770 /* Propagate values through ITEM, jump function for a part of an aggregate,
2771 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2772 associated with the jump function. Return true if AGLAT changed in any
2773 way. */
2774
2775 static bool
2776 propagate_aggregate_lattice (struct cgraph_edge *cs,
2777 struct ipa_agg_jf_item *item,
2778 struct ipcp_agg_lattice *aglat)
2779 {
2780 class ipa_node_params *caller_info;
2781 class ipcp_param_lattices *src_plats;
2782 struct ipcp_lattice<tree> *src_lat;
2783 HOST_WIDE_INT src_offset;
2784 int src_idx;
2785 tree load_type;
2786 bool ret;
2787
2788 if (item->jftype == IPA_JF_CONST)
2789 {
2790 tree value = item->value.constant;
2791
2792 gcc_checking_assert (is_gimple_ip_invariant (value));
2793 return aglat->add_value (value, cs, NULL, 0);
2794 }
2795
2796 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2797 || item->jftype == IPA_JF_LOAD_AGG);
2798
2799 caller_info = ipa_node_params_sum->get (cs->caller);
2800 src_idx = item->value.pass_through.formal_id;
2801 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2802
2803 if (item->jftype == IPA_JF_PASS_THROUGH)
2804 {
2805 load_type = NULL_TREE;
2806 src_lat = &src_plats->itself;
2807 src_offset = -1;
2808 }
2809 else
2810 {
2811 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2812 struct ipcp_agg_lattice *src_aglat;
2813
2814 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2815 if (src_aglat->offset >= load_offset)
2816 break;
2817
2818 load_type = item->value.load_agg.type;
2819 if (!src_aglat
2820 || src_aglat->offset > load_offset
2821 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2822 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2823 return aglat->set_contains_variable ();
2824
2825 src_lat = src_aglat;
2826 src_offset = load_offset;
2827 }
2828
2829 if (src_lat->bottom
2830 || (!ipcp_versionable_function_p (cs->caller)
2831 && !src_lat->is_single_const ()))
2832 return aglat->set_contains_variable ();
2833
2834 ret = propagate_vals_across_arith_jfunc (cs,
2835 item->value.pass_through.operation,
2836 load_type,
2837 item->value.pass_through.operand,
2838 src_lat, aglat,
2839 src_offset,
2840 src_idx,
2841 item->type);
2842
2843 if (src_lat->contains_variable)
2844 ret |= aglat->set_contains_variable ();
2845
2846 return ret;
2847 }
2848
2849 /* Propagate scalar values across jump function JFUNC that is associated with
2850 edge CS and put the values into DEST_LAT. */
2851
2852 static bool
2853 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2854 struct ipa_jump_func *jfunc,
2855 class ipcp_param_lattices *dest_plats)
2856 {
2857 bool ret = false;
2858
2859 if (dest_plats->aggs_bottom)
2860 return false;
2861
2862 if (jfunc->type == IPA_JF_PASS_THROUGH
2863 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2864 {
2865 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2866 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2867 class ipcp_param_lattices *src_plats;
2868
2869 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2870 if (agg_pass_through_permissible_p (src_plats, jfunc))
2871 {
2872 /* Currently we do not produce clobber aggregate jump
2873 functions, replace with merging when we do. */
2874 gcc_assert (!jfunc->agg.items);
2875 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2876 src_idx, 0);
2877 return ret;
2878 }
2879 }
2880 else if (jfunc->type == IPA_JF_ANCESTOR
2881 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2882 {
2883 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2884 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2885 class ipcp_param_lattices *src_plats;
2886
2887 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2888 if (src_plats->aggs && src_plats->aggs_by_ref)
2889 {
2890 /* Currently we do not produce clobber aggregate jump
2891 functions, replace with merging when we do. */
2892 gcc_assert (!jfunc->agg.items);
2893 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2894 ipa_get_jf_ancestor_offset (jfunc));
2895 }
2896 else if (!src_plats->aggs_by_ref)
2897 ret |= set_agg_lats_to_bottom (dest_plats);
2898 else
2899 ret |= set_agg_lats_contain_variable (dest_plats);
2900 return ret;
2901 }
2902
2903 if (jfunc->agg.items)
2904 {
2905 bool pre_existing = dest_plats->aggs != NULL;
2906 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2907 struct ipa_agg_jf_item *item;
2908 int i;
2909
2910 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2911 return true;
2912
2913 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2914 param_ipa_max_agg_items);
2915 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2916 {
2917 HOST_WIDE_INT val_size;
2918
2919 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2920 continue;
2921 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2922
2923 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2924 &aglat, pre_existing, &ret, max_agg_items))
2925 {
2926 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2927 aglat = &(*aglat)->next;
2928 }
2929 else if (dest_plats->aggs_bottom)
2930 return true;
2931 }
2932
2933 ret |= set_chain_of_aglats_contains_variable (*aglat);
2934 }
2935 else
2936 ret |= set_agg_lats_contain_variable (dest_plats);
2937
2938 return ret;
2939 }
2940
2941 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2942 non-thunk) destination, the call passes through a thunk. */
2943
2944 static bool
2945 call_passes_through_thunk (cgraph_edge *cs)
2946 {
2947 cgraph_node *alias_or_thunk = cs->callee;
2948 while (alias_or_thunk->alias)
2949 alias_or_thunk = alias_or_thunk->get_alias_target ();
2950 return alias_or_thunk->thunk;
2951 }
2952
2953 /* Propagate constants from the caller to the callee of CS. INFO describes the
2954 caller. */
2955
2956 static bool
2957 propagate_constants_across_call (struct cgraph_edge *cs)
2958 {
2959 class ipa_node_params *callee_info;
2960 enum availability availability;
2961 cgraph_node *callee;
2962 class ipa_edge_args *args;
2963 bool ret = false;
2964 int i, args_count, parms_count;
2965
2966 callee = cs->callee->function_symbol (&availability);
2967 if (!callee->definition)
2968 return false;
2969 gcc_checking_assert (callee->has_gimple_body_p ());
2970 callee_info = ipa_node_params_sum->get (callee);
2971 if (!callee_info)
2972 return false;
2973
2974 args = ipa_edge_args_sum->get (cs);
2975 parms_count = ipa_get_param_count (callee_info);
2976 if (parms_count == 0)
2977 return false;
2978 if (!args
2979 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2980 || !opt_for_fn (cs->caller->decl, optimize))
2981 {
2982 for (i = 0; i < parms_count; i++)
2983 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2984 i));
2985 return ret;
2986 }
2987 args_count = ipa_get_cs_argument_count (args);
2988
2989 /* If this call goes through a thunk we must not propagate to the first (0th)
2990 parameter. However, we might need to uncover a thunk from below a series
2991 of aliases first. */
2992 if (call_passes_through_thunk (cs))
2993 {
2994 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2995 0));
2996 i = 1;
2997 }
2998 else
2999 i = 0;
3000
3001 for (; (i < args_count) && (i < parms_count); i++)
3002 {
3003 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3004 class ipcp_param_lattices *dest_plats;
3005 tree param_type = ipa_get_type (callee_info, i);
3006
3007 dest_plats = ipa_get_parm_lattices (callee_info, i);
3008 if (availability == AVAIL_INTERPOSABLE)
3009 ret |= set_all_contains_variable (dest_plats);
3010 else
3011 {
3012 ret |= propagate_scalar_across_jump_function (cs, jump_func,
3013 &dest_plats->itself,
3014 param_type);
3015 ret |= propagate_context_across_jump_function (cs, jump_func, i,
3016 &dest_plats->ctxlat);
3017 ret
3018 |= propagate_bits_across_jump_function (cs, i, jump_func,
3019 &dest_plats->bits_lattice);
3020 ret |= propagate_aggs_across_jump_function (cs, jump_func,
3021 dest_plats);
3022 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3023 ret |= propagate_vr_across_jump_function (cs, jump_func,
3024 dest_plats, param_type);
3025 else
3026 ret |= dest_plats->m_value_range.set_to_bottom ();
3027 }
3028 }
3029 for (; i < parms_count; i++)
3030 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
3031
3032 return ret;
3033 }
3034
3035 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3036 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3037 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3038 KNOWN_AGGS is ignored. */
3039
3040 static tree
3041 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3042 const vec<tree> &known_csts,
3043 const vec<ipa_polymorphic_call_context> &known_contexts,
3044 const ipa_argagg_value_list &avs,
3045 bool *speculative)
3046 {
3047 int param_index = ie->indirect_info->param_index;
3048 HOST_WIDE_INT anc_offset;
3049 tree t = NULL;
3050 tree target = NULL;
3051
3052 *speculative = false;
3053
3054 if (param_index == -1)
3055 return NULL_TREE;
3056
3057 if (!ie->indirect_info->polymorphic)
3058 {
3059 tree t = NULL;
3060
3061 if (ie->indirect_info->agg_contents)
3062 {
3063 t = NULL;
3064 if ((unsigned) param_index < known_csts.length ()
3065 && known_csts[param_index])
3066 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3067 ie->indirect_info->offset,
3068 ie->indirect_info->by_ref);
3069
3070 if (!t && ie->indirect_info->guaranteed_unmodified)
3071 t = avs.get_value (param_index,
3072 ie->indirect_info->offset / BITS_PER_UNIT,
3073 ie->indirect_info->by_ref);
3074 }
3075 else if ((unsigned) param_index < known_csts.length ())
3076 t = known_csts[param_index];
3077
3078 if (t
3079 && TREE_CODE (t) == ADDR_EXPR
3080 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3081 return TREE_OPERAND (t, 0);
3082 else
3083 return NULL_TREE;
3084 }
3085
3086 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3087 return NULL_TREE;
3088
3089 gcc_assert (!ie->indirect_info->agg_contents);
3090 gcc_assert (!ie->indirect_info->by_ref);
3091 anc_offset = ie->indirect_info->offset;
3092
3093 t = NULL;
3094
3095 if ((unsigned) param_index < known_csts.length ()
3096 && known_csts[param_index])
3097 t = ipa_find_agg_cst_from_init (known_csts[param_index],
3098 ie->indirect_info->offset, true);
3099
3100 /* Try to work out value of virtual table pointer value in replacements. */
3101 /* or known aggregate values. */
3102 if (!t)
3103 t = avs.get_value (param_index,
3104 ie->indirect_info->offset / BITS_PER_UNIT,
3105 true);
3106
3107 /* If we found the virtual table pointer, lookup the target. */
3108 if (t)
3109 {
3110 tree vtable;
3111 unsigned HOST_WIDE_INT offset;
3112 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3113 {
3114 bool can_refer;
3115 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3116 vtable, offset, &can_refer);
3117 if (can_refer)
3118 {
3119 if (!target
3120 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3121 || !possible_polymorphic_call_target_p
3122 (ie, cgraph_node::get (target)))
3123 {
3124 /* Do not speculate builtin_unreachable, it is stupid! */
3125 if (ie->indirect_info->vptr_changed)
3126 return NULL;
3127 target = ipa_impossible_devirt_target (ie, target);
3128 }
3129 *speculative = ie->indirect_info->vptr_changed;
3130 if (!*speculative)
3131 return target;
3132 }
3133 }
3134 }
3135
3136 /* Do we know the constant value of pointer? */
3137 if (!t && (unsigned) param_index < known_csts.length ())
3138 t = known_csts[param_index];
3139
3140 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3141
3142 ipa_polymorphic_call_context context;
3143 if (known_contexts.length () > (unsigned int) param_index)
3144 {
3145 context = known_contexts[param_index];
3146 context.offset_by (anc_offset);
3147 if (ie->indirect_info->vptr_changed)
3148 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3149 ie->indirect_info->otr_type);
3150 if (t)
3151 {
3152 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3153 (t, ie->indirect_info->otr_type, anc_offset);
3154 if (!ctx2.useless_p ())
3155 context.combine_with (ctx2, ie->indirect_info->otr_type);
3156 }
3157 }
3158 else if (t)
3159 {
3160 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3161 anc_offset);
3162 if (ie->indirect_info->vptr_changed)
3163 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3164 ie->indirect_info->otr_type);
3165 }
3166 else
3167 return NULL_TREE;
3168
3169 vec <cgraph_node *>targets;
3170 bool final;
3171
3172 targets = possible_polymorphic_call_targets
3173 (ie->indirect_info->otr_type,
3174 ie->indirect_info->otr_token,
3175 context, &final);
3176 if (!final || targets.length () > 1)
3177 {
3178 struct cgraph_node *node;
3179 if (*speculative)
3180 return target;
3181 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3182 || ie->speculative || !ie->maybe_hot_p ())
3183 return NULL;
3184 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3185 ie->indirect_info->otr_token,
3186 context);
3187 if (node)
3188 {
3189 *speculative = true;
3190 target = node->decl;
3191 }
3192 else
3193 return NULL;
3194 }
3195 else
3196 {
3197 *speculative = false;
3198 if (targets.length () == 1)
3199 target = targets[0]->decl;
3200 else
3201 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3202 }
3203
3204 if (target && !possible_polymorphic_call_target_p (ie,
3205 cgraph_node::get (target)))
3206 {
3207 if (*speculative)
3208 return NULL;
3209 target = ipa_impossible_devirt_target (ie, target);
3210 }
3211
3212 return target;
3213 }
3214
3215 /* If an indirect edge IE can be turned into a direct one based on data in
3216 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3217 whether the discovered target is only speculative guess. */
3218
3219 tree
3220 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3221 ipa_call_arg_values *avals,
3222 bool *speculative)
3223 {
3224 ipa_argagg_value_list avl (avals);
3225 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3226 avals->m_known_contexts,
3227 avl, speculative);
3228 }
3229
3230 /* Calculate devirtualization time bonus for NODE, assuming we know information
3231 about arguments stored in AVALS. */
3232
3233 static int
3234 devirtualization_time_bonus (struct cgraph_node *node,
3235 ipa_auto_call_arg_values *avals)
3236 {
3237 struct cgraph_edge *ie;
3238 int res = 0;
3239
3240 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3241 {
3242 struct cgraph_node *callee;
3243 class ipa_fn_summary *isummary;
3244 enum availability avail;
3245 tree target;
3246 bool speculative;
3247
3248 ipa_argagg_value_list avl (avals);
3249 target = ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3250 avals->m_known_contexts,
3251 avl, &speculative);
3252 if (!target)
3253 continue;
3254
3255 /* Only bare minimum benefit for clearly un-inlineable targets. */
3256 res += 1;
3257 callee = cgraph_node::get (target);
3258 if (!callee || !callee->definition)
3259 continue;
3260 callee = callee->function_symbol (&avail);
3261 if (avail < AVAIL_AVAILABLE)
3262 continue;
3263 isummary = ipa_fn_summaries->get (callee);
3264 if (!isummary || !isummary->inlinable)
3265 continue;
3266
3267 int size = ipa_size_summaries->get (callee)->size;
3268 /* FIXME: The values below need re-considering and perhaps also
3269 integrating into the cost metrics, at lest in some very basic way. */
3270 int max_inline_insns_auto
3271 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3272 if (size <= max_inline_insns_auto / 4)
3273 res += 31 / ((int)speculative + 1);
3274 else if (size <= max_inline_insns_auto / 2)
3275 res += 15 / ((int)speculative + 1);
3276 else if (size <= max_inline_insns_auto
3277 || DECL_DECLARED_INLINE_P (callee->decl))
3278 res += 7 / ((int)speculative + 1);
3279 }
3280
3281 return res;
3282 }
3283
3284 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3285
3286 static int
3287 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3288 {
3289 int result = 0;
3290 ipa_hints hints = estimates.hints;
3291 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3292 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3293
3294 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3295
3296 if (hints & INLINE_HINT_loop_iterations)
3297 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3298
3299 if (hints & INLINE_HINT_loop_stride)
3300 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3301
3302 return result;
3303 }
3304
3305 /* If there is a reason to penalize the function described by INFO in the
3306 cloning goodness evaluation, do so. */
3307
3308 static inline sreal
3309 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3310 sreal evaluation)
3311 {
3312 if (info->node_within_scc && !info->node_is_self_scc)
3313 evaluation = (evaluation
3314 * (100 - opt_for_fn (node->decl,
3315 param_ipa_cp_recursion_penalty))) / 100;
3316
3317 if (info->node_calling_single_call)
3318 evaluation = (evaluation
3319 * (100 - opt_for_fn (node->decl,
3320 param_ipa_cp_single_call_penalty)))
3321 / 100;
3322
3323 return evaluation;
3324 }
3325
3326 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3327 and SIZE_COST and with the sum of frequencies of incoming edges to the
3328 potential new clone in FREQUENCIES. */
3329
3330 static bool
3331 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3332 sreal freq_sum, profile_count count_sum,
3333 int size_cost)
3334 {
3335 if (time_benefit == 0
3336 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3337 || node->optimize_for_size_p ())
3338 return false;
3339
3340 gcc_assert (size_cost > 0);
3341
3342 ipa_node_params *info = ipa_node_params_sum->get (node);
3343 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3344 if (count_sum.nonzero_p ())
3345 {
3346 gcc_assert (base_count.nonzero_p ());
3347 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3348 sreal evaluation = (time_benefit * factor) / size_cost;
3349 evaluation = incorporate_penalties (node, info, evaluation);
3350 evaluation *= 1000;
3351
3352 if (dump_file && (dump_flags & TDF_DETAILS))
3353 {
3354 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3355 "size: %i, count_sum: ", time_benefit.to_double (),
3356 size_cost);
3357 count_sum.dump (dump_file);
3358 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3359 info->node_within_scc
3360 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3361 info->node_calling_single_call ? ", single_call" : "",
3362 evaluation.to_double (), eval_threshold);
3363 }
3364
3365 return evaluation.to_int () >= eval_threshold;
3366 }
3367 else
3368 {
3369 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3370 evaluation = incorporate_penalties (node, info, evaluation);
3371 evaluation *= 1000;
3372
3373 if (dump_file && (dump_flags & TDF_DETAILS))
3374 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3375 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3376 "threshold: %i\n",
3377 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3378 info->node_within_scc
3379 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3380 info->node_calling_single_call ? ", single_call" : "",
3381 evaluation.to_double (), eval_threshold);
3382
3383 return evaluation.to_int () >= eval_threshold;
3384 }
3385 }
3386
3387 /* Grow vectors in AVALS and fill them with information about values of
3388 parameters that are known to be independent of the context. Only calculate
3389 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3390 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3391 parameters will be stored in it.
3392
3393 TODO: Also grow context independent value range vectors. */
3394
3395 static bool
3396 gather_context_independent_values (class ipa_node_params *info,
3397 ipa_auto_call_arg_values *avals,
3398 bool calculate_aggs,
3399 int *removable_params_cost)
3400 {
3401 int i, count = ipa_get_param_count (info);
3402 bool ret = false;
3403
3404 avals->m_known_vals.safe_grow_cleared (count, true);
3405 avals->m_known_contexts.safe_grow_cleared (count, true);
3406
3407 if (removable_params_cost)
3408 *removable_params_cost = 0;
3409
3410 for (i = 0; i < count; i++)
3411 {
3412 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3413 ipcp_lattice<tree> *lat = &plats->itself;
3414
3415 if (lat->is_single_const ())
3416 {
3417 ipcp_value<tree> *val = lat->values;
3418 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3419 avals->m_known_vals[i] = val->value;
3420 if (removable_params_cost)
3421 *removable_params_cost
3422 += estimate_move_cost (TREE_TYPE (val->value), false);
3423 ret = true;
3424 }
3425 else if (removable_params_cost
3426 && !ipa_is_param_used (info, i))
3427 *removable_params_cost
3428 += ipa_get_param_move_cost (info, i);
3429
3430 if (!ipa_is_param_used (info, i))
3431 continue;
3432
3433 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3434 /* Do not account known context as reason for cloning. We can see
3435 if it permits devirtualization. */
3436 if (ctxlat->is_single_const ())
3437 avals->m_known_contexts[i] = ctxlat->values->value;
3438
3439 if (calculate_aggs)
3440 ret |= push_agg_values_from_plats (plats, i, 0, &avals->m_known_aggs);
3441 }
3442
3443 return ret;
3444 }
3445
3446 /* Perform time and size measurement of NODE with the context given in AVALS,
3447 calculate the benefit compared to the node without specialization and store
3448 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3449 context-independent or unused removable parameters and EST_MOVE_COST, the
3450 estimated movement of the considered parameter. */
3451
3452 static void
3453 perform_estimation_of_a_value (cgraph_node *node,
3454 ipa_auto_call_arg_values *avals,
3455 int removable_params_cost, int est_move_cost,
3456 ipcp_value_base *val)
3457 {
3458 sreal time_benefit;
3459 ipa_call_estimates estimates;
3460
3461 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3462
3463 /* Extern inline functions have no cloning local time benefits because they
3464 will be inlined anyway. The only reason to clone them is if it enables
3465 optimization in any of the functions they call. */
3466 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3467 time_benefit = 0;
3468 else
3469 time_benefit = (estimates.nonspecialized_time - estimates.time)
3470 + (devirtualization_time_bonus (node, avals)
3471 + hint_time_bonus (node, estimates)
3472 + removable_params_cost + est_move_cost);
3473
3474 int size = estimates.size;
3475 gcc_checking_assert (size >=0);
3476 /* The inliner-heuristics based estimates may think that in certain
3477 contexts some functions do not have any size at all but we want
3478 all specializations to have at least a tiny cost, not least not to
3479 divide by zero. */
3480 if (size == 0)
3481 size = 1;
3482
3483 val->local_time_benefit = time_benefit;
3484 val->local_size_cost = size;
3485 }
3486
3487 /* Get the overall limit oof growth based on parameters extracted from growth.
3488 it does not really make sense to mix functions with different overall growth
3489 limits but it is possible and if it happens, we do not want to select one
3490 limit at random. */
3491
3492 static long
3493 get_max_overall_size (cgraph_node *node)
3494 {
3495 long max_new_size = orig_overall_size;
3496 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3497 if (max_new_size < large_unit)
3498 max_new_size = large_unit;
3499 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3500 max_new_size += max_new_size * unit_growth / 100 + 1;
3501 return max_new_size;
3502 }
3503
3504 /* Return true if NODE should be cloned just for a parameter removal, possibly
3505 dumping a reason if not. */
3506
3507 static bool
3508 clone_for_param_removal_p (cgraph_node *node)
3509 {
3510 if (!node->can_change_signature)
3511 {
3512 if (dump_file && (dump_flags & TDF_DETAILS))
3513 fprintf (dump_file, " Not considering cloning to remove parameters, "
3514 "function cannot change signature.\n");
3515 return false;
3516 }
3517 if (node->can_be_local_p ())
3518 {
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 fprintf (dump_file, " Not considering cloning to remove parameters, "
3521 "IPA-SRA can do it potentially better.\n");
3522 return false;
3523 }
3524 return true;
3525 }
3526
3527 /* Iterate over known values of parameters of NODE and estimate the local
3528 effects in terms of time and size they have. */
3529
3530 static void
3531 estimate_local_effects (struct cgraph_node *node)
3532 {
3533 ipa_node_params *info = ipa_node_params_sum->get (node);
3534 int count = ipa_get_param_count (info);
3535 bool always_const;
3536 int removable_params_cost;
3537
3538 if (!count || !ipcp_versionable_function_p (node))
3539 return;
3540
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3543
3544 ipa_auto_call_arg_values avals;
3545 always_const = gather_context_independent_values (info, &avals, true,
3546 &removable_params_cost);
3547 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3548 if (always_const || devirt_bonus
3549 || (removable_params_cost && clone_for_param_removal_p (node)))
3550 {
3551 struct caller_statistics stats;
3552 ipa_call_estimates estimates;
3553
3554 init_caller_stats (&stats);
3555 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3556 false);
3557 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3558 sreal time = estimates.nonspecialized_time - estimates.time;
3559 time += devirt_bonus;
3560 time += hint_time_bonus (node, estimates);
3561 time += removable_params_cost;
3562 int size = estimates.size - stats.n_calls * removable_params_cost;
3563
3564 if (dump_file)
3565 fprintf (dump_file, " - context independent values, size: %i, "
3566 "time_benefit: %f\n", size, (time).to_double ());
3567
3568 if (size <= 0 || node->local)
3569 {
3570 info->do_clone_for_all_contexts = true;
3571
3572 if (dump_file)
3573 fprintf (dump_file, " Decided to specialize for all "
3574 "known contexts, code not going to grow.\n");
3575 }
3576 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3577 stats.count_sum, size))
3578 {
3579 if (size + overall_size <= get_max_overall_size (node))
3580 {
3581 info->do_clone_for_all_contexts = true;
3582 overall_size += size;
3583
3584 if (dump_file)
3585 fprintf (dump_file, " Decided to specialize for all "
3586 "known contexts, growth (to %li) deemed "
3587 "beneficial.\n", overall_size);
3588 }
3589 else if (dump_file && (dump_flags & TDF_DETAILS))
3590 fprintf (dump_file, " Not cloning for all contexts because "
3591 "maximum unit size would be reached with %li.\n",
3592 size + overall_size);
3593 }
3594 else if (dump_file && (dump_flags & TDF_DETAILS))
3595 fprintf (dump_file, " Not cloning for all contexts because "
3596 "!good_cloning_opportunity_p.\n");
3597
3598 }
3599
3600 for (int i = 0; i < count; i++)
3601 {
3602 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3603 ipcp_lattice<tree> *lat = &plats->itself;
3604 ipcp_value<tree> *val;
3605
3606 if (lat->bottom
3607 || !lat->values
3608 || avals.m_known_vals[i])
3609 continue;
3610
3611 for (val = lat->values; val; val = val->next)
3612 {
3613 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3614 avals.m_known_vals[i] = val->value;
3615
3616 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3617 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3618 emc, val);
3619
3620 if (dump_file && (dump_flags & TDF_DETAILS))
3621 {
3622 fprintf (dump_file, " - estimates for value ");
3623 print_ipcp_constant_value (dump_file, val->value);
3624 fprintf (dump_file, " for ");
3625 ipa_dump_param (dump_file, info, i);
3626 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3627 val->local_time_benefit.to_double (),
3628 val->local_size_cost);
3629 }
3630 }
3631 avals.m_known_vals[i] = NULL_TREE;
3632 }
3633
3634 for (int i = 0; i < count; i++)
3635 {
3636 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3637
3638 if (!plats->virt_call)
3639 continue;
3640
3641 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3642 ipcp_value<ipa_polymorphic_call_context> *val;
3643
3644 if (ctxlat->bottom
3645 || !ctxlat->values
3646 || !avals.m_known_contexts[i].useless_p ())
3647 continue;
3648
3649 for (val = ctxlat->values; val; val = val->next)
3650 {
3651 avals.m_known_contexts[i] = val->value;
3652 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3653 0, val);
3654
3655 if (dump_file && (dump_flags & TDF_DETAILS))
3656 {
3657 fprintf (dump_file, " - estimates for polymorphic context ");
3658 print_ipcp_constant_value (dump_file, val->value);
3659 fprintf (dump_file, " for ");
3660 ipa_dump_param (dump_file, info, i);
3661 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3662 val->local_time_benefit.to_double (),
3663 val->local_size_cost);
3664 }
3665 }
3666 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3667 }
3668
3669 unsigned all_ctx_len = avals.m_known_aggs.length ();
3670 auto_vec<ipa_argagg_value, 32> all_ctx;
3671 all_ctx.reserve_exact (all_ctx_len);
3672 all_ctx.splice (avals.m_known_aggs);
3673 avals.m_known_aggs.safe_grow_cleared (all_ctx_len + 1);
3674
3675 unsigned j = 0;
3676 for (int index = 0; index < count; index++)
3677 {
3678 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, index);
3679
3680 if (plats->aggs_bottom || !plats->aggs)
3681 continue;
3682
3683 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3684 {
3685 ipcp_value<tree> *val;
3686 if (aglat->bottom || !aglat->values
3687 /* If the following is true, the one value is already part of all
3688 context estimations. */
3689 || (!plats->aggs_contain_variable
3690 && aglat->is_single_const ()))
3691 continue;
3692
3693 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3694 while (j < all_ctx_len
3695 && (all_ctx[j].index < index
3696 || (all_ctx[j].index == index
3697 && all_ctx[j].unit_offset < unit_offset)))
3698 {
3699 avals.m_known_aggs[j] = all_ctx[j];
3700 j++;
3701 }
3702
3703 for (unsigned k = j; k < all_ctx_len; k++)
3704 avals.m_known_aggs[k+1] = all_ctx[k];
3705
3706 for (val = aglat->values; val; val = val->next)
3707 {
3708 avals.m_known_aggs[j].value = val->value;
3709 avals.m_known_aggs[j].unit_offset = unit_offset;
3710 avals.m_known_aggs[j].index = index;
3711 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3712 avals.m_known_aggs[j].killed = false;
3713
3714 perform_estimation_of_a_value (node, &avals,
3715 removable_params_cost, 0, val);
3716
3717 if (dump_file && (dump_flags & TDF_DETAILS))
3718 {
3719 fprintf (dump_file, " - estimates for value ");
3720 print_ipcp_constant_value (dump_file, val->value);
3721 fprintf (dump_file, " for ");
3722 ipa_dump_param (dump_file, info, index);
3723 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3724 "]: time_benefit: %g, size: %i\n",
3725 plats->aggs_by_ref ? "ref " : "",
3726 aglat->offset,
3727 val->local_time_benefit.to_double (),
3728 val->local_size_cost);
3729 }
3730 }
3731 }
3732 }
3733 }
3734
3735
3736 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3737 topological sort of values. */
3738
3739 template <typename valtype>
3740 void
3741 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3742 {
3743 ipcp_value_source<valtype> *src;
3744
3745 if (cur_val->dfs)
3746 return;
3747
3748 dfs_counter++;
3749 cur_val->dfs = dfs_counter;
3750 cur_val->low_link = dfs_counter;
3751
3752 cur_val->topo_next = stack;
3753 stack = cur_val;
3754 cur_val->on_stack = true;
3755
3756 for (src = cur_val->sources; src; src = src->next)
3757 if (src->val)
3758 {
3759 if (src->val->dfs == 0)
3760 {
3761 add_val (src->val);
3762 if (src->val->low_link < cur_val->low_link)
3763 cur_val->low_link = src->val->low_link;
3764 }
3765 else if (src->val->on_stack
3766 && src->val->dfs < cur_val->low_link)
3767 cur_val->low_link = src->val->dfs;
3768 }
3769
3770 if (cur_val->dfs == cur_val->low_link)
3771 {
3772 ipcp_value<valtype> *v, *scc_list = NULL;
3773
3774 do
3775 {
3776 v = stack;
3777 stack = v->topo_next;
3778 v->on_stack = false;
3779 v->scc_no = cur_val->dfs;
3780
3781 v->scc_next = scc_list;
3782 scc_list = v;
3783 }
3784 while (v != cur_val);
3785
3786 cur_val->topo_next = values_topo;
3787 values_topo = cur_val;
3788 }
3789 }
3790
3791 /* Add all values in lattices associated with NODE to the topological sort if
3792 they are not there yet. */
3793
3794 static void
3795 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3796 {
3797 ipa_node_params *info = ipa_node_params_sum->get (node);
3798 int i, count = ipa_get_param_count (info);
3799
3800 for (i = 0; i < count; i++)
3801 {
3802 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3803 ipcp_lattice<tree> *lat = &plats->itself;
3804 struct ipcp_agg_lattice *aglat;
3805
3806 if (!lat->bottom)
3807 {
3808 ipcp_value<tree> *val;
3809 for (val = lat->values; val; val = val->next)
3810 topo->constants.add_val (val);
3811 }
3812
3813 if (!plats->aggs_bottom)
3814 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3815 if (!aglat->bottom)
3816 {
3817 ipcp_value<tree> *val;
3818 for (val = aglat->values; val; val = val->next)
3819 topo->constants.add_val (val);
3820 }
3821
3822 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3823 if (!ctxlat->bottom)
3824 {
3825 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3826 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3827 topo->contexts.add_val (ctxval);
3828 }
3829 }
3830 }
3831
3832 /* One pass of constants propagation along the call graph edges, from callers
3833 to callees (requires topological ordering in TOPO), iterate over strongly
3834 connected components. */
3835
3836 static void
3837 propagate_constants_topo (class ipa_topo_info *topo)
3838 {
3839 int i;
3840
3841 for (i = topo->nnodes - 1; i >= 0; i--)
3842 {
3843 unsigned j;
3844 struct cgraph_node *v, *node = topo->order[i];
3845 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3846
3847 /* First, iteratively propagate within the strongly connected component
3848 until all lattices stabilize. */
3849 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3850 if (v->has_gimple_body_p ())
3851 {
3852 if (opt_for_fn (v->decl, flag_ipa_cp)
3853 && opt_for_fn (v->decl, optimize))
3854 push_node_to_stack (topo, v);
3855 /* When V is not optimized, we can not push it to stack, but
3856 still we need to set all its callees lattices to bottom. */
3857 else
3858 {
3859 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3860 propagate_constants_across_call (cs);
3861 }
3862 }
3863
3864 v = pop_node_from_stack (topo);
3865 while (v)
3866 {
3867 struct cgraph_edge *cs;
3868 class ipa_node_params *info = NULL;
3869 bool self_scc = true;
3870
3871 for (cs = v->callees; cs; cs = cs->next_callee)
3872 if (ipa_edge_within_scc (cs))
3873 {
3874 cgraph_node *callee = cs->callee->function_symbol ();
3875
3876 if (v != callee)
3877 self_scc = false;
3878
3879 if (!info)
3880 {
3881 info = ipa_node_params_sum->get (v);
3882 info->node_within_scc = true;
3883 }
3884
3885 if (propagate_constants_across_call (cs))
3886 push_node_to_stack (topo, callee);
3887 }
3888
3889 if (info)
3890 info->node_is_self_scc = self_scc;
3891
3892 v = pop_node_from_stack (topo);
3893 }
3894
3895 /* Afterwards, propagate along edges leading out of the SCC, calculates
3896 the local effects of the discovered constants and all valid values to
3897 their topological sort. */
3898 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3899 if (v->has_gimple_body_p ()
3900 && opt_for_fn (v->decl, flag_ipa_cp)
3901 && opt_for_fn (v->decl, optimize))
3902 {
3903 struct cgraph_edge *cs;
3904
3905 estimate_local_effects (v);
3906 add_all_node_vals_to_toposort (v, topo);
3907 for (cs = v->callees; cs; cs = cs->next_callee)
3908 if (!ipa_edge_within_scc (cs))
3909 propagate_constants_across_call (cs);
3910 }
3911 cycle_nodes.release ();
3912 }
3913 }
3914
3915 /* Propagate the estimated effects of individual values along the topological
3916 from the dependent values to those they depend on. */
3917
3918 template <typename valtype>
3919 void
3920 value_topo_info<valtype>::propagate_effects ()
3921 {
3922 ipcp_value<valtype> *base;
3923 hash_set<ipcp_value<valtype> *> processed_srcvals;
3924
3925 for (base = values_topo; base; base = base->topo_next)
3926 {
3927 ipcp_value_source<valtype> *src;
3928 ipcp_value<valtype> *val;
3929 sreal time = 0;
3930 HOST_WIDE_INT size = 0;
3931
3932 for (val = base; val; val = val->scc_next)
3933 {
3934 time = time + val->local_time_benefit + val->prop_time_benefit;
3935 size = size + val->local_size_cost + val->prop_size_cost;
3936 }
3937
3938 for (val = base; val; val = val->scc_next)
3939 {
3940 processed_srcvals.empty ();
3941 for (src = val->sources; src; src = src->next)
3942 if (src->val
3943 && src->cs->maybe_hot_p ())
3944 {
3945 if (!processed_srcvals.add (src->val))
3946 {
3947 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3948 if (prop_size < INT_MAX)
3949 src->val->prop_size_cost = prop_size;
3950 else
3951 continue;
3952 }
3953
3954 int special_factor = 1;
3955 if (val->same_scc (src->val))
3956 special_factor
3957 = opt_for_fn(src->cs->caller->decl,
3958 param_ipa_cp_recursive_freq_factor);
3959 else if (val->self_recursion_generated_p ()
3960 && (src->cs->callee->function_symbol ()
3961 == src->cs->caller))
3962 {
3963 int max_recur_gen_depth
3964 = opt_for_fn(src->cs->caller->decl,
3965 param_ipa_cp_max_recursive_depth);
3966 special_factor = max_recur_gen_depth
3967 - val->self_recursion_generated_level + 1;
3968 }
3969
3970 src->val->prop_time_benefit
3971 += time * special_factor * src->cs->sreal_frequency ();
3972 }
3973
3974 if (size < INT_MAX)
3975 {
3976 val->prop_time_benefit = time;
3977 val->prop_size_cost = size;
3978 }
3979 else
3980 {
3981 val->prop_time_benefit = 0;
3982 val->prop_size_cost = 0;
3983 }
3984 }
3985 }
3986 }
3987
3988 /* Callback for qsort to sort counts of all edges. */
3989
3990 static int
3991 compare_edge_profile_counts (const void *a, const void *b)
3992 {
3993 const profile_count *cnt1 = (const profile_count *) a;
3994 const profile_count *cnt2 = (const profile_count *) b;
3995
3996 if (*cnt1 < *cnt2)
3997 return 1;
3998 if (*cnt1 > *cnt2)
3999 return -1;
4000 return 0;
4001 }
4002
4003
4004 /* Propagate constants, polymorphic contexts and their effects from the
4005 summaries interprocedurally. */
4006
4007 static void
4008 ipcp_propagate_stage (class ipa_topo_info *topo)
4009 {
4010 struct cgraph_node *node;
4011
4012 if (dump_file)
4013 fprintf (dump_file, "\n Propagating constants:\n\n");
4014
4015 base_count = profile_count::uninitialized ();
4016
4017 bool compute_count_base = false;
4018 unsigned base_count_pos_percent = 0;
4019 FOR_EACH_DEFINED_FUNCTION (node)
4020 {
4021 if (node->has_gimple_body_p ()
4022 && opt_for_fn (node->decl, flag_ipa_cp)
4023 && opt_for_fn (node->decl, optimize))
4024 {
4025 ipa_node_params *info = ipa_node_params_sum->get (node);
4026 determine_versionability (node, info);
4027
4028 unsigned nlattices = ipa_get_param_count (info);
4029 info->lattices.safe_grow_cleared (nlattices, true);
4030 initialize_node_lattices (node);
4031 }
4032 ipa_size_summary *s = ipa_size_summaries->get (node);
4033 if (node->definition && !node->alias && s != NULL)
4034 overall_size += s->self_size;
4035 if (node->count.ipa ().initialized_p ())
4036 {
4037 compute_count_base = true;
4038 unsigned pos_percent = opt_for_fn (node->decl,
4039 param_ipa_cp_profile_count_base);
4040 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4041 }
4042 }
4043
4044 if (compute_count_base)
4045 {
4046 auto_vec<profile_count> all_edge_counts;
4047 all_edge_counts.reserve_exact (symtab->edges_count);
4048 FOR_EACH_DEFINED_FUNCTION (node)
4049 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4050 {
4051 profile_count count = cs->count.ipa ();
4052 if (!count.nonzero_p ())
4053 continue;
4054
4055 enum availability avail;
4056 cgraph_node *tgt
4057 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4058 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4059 if (info && info->versionable)
4060 all_edge_counts.quick_push (count);
4061 }
4062
4063 if (!all_edge_counts.is_empty ())
4064 {
4065 gcc_assert (base_count_pos_percent <= 100);
4066 all_edge_counts.qsort (compare_edge_profile_counts);
4067
4068 unsigned base_count_pos
4069 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4070 base_count = all_edge_counts[base_count_pos];
4071
4072 if (dump_file)
4073 {
4074 fprintf (dump_file, "\nSelected base_count from %u edges at "
4075 "position %u, arriving at: ", all_edge_counts.length (),
4076 base_count_pos);
4077 base_count.dump (dump_file);
4078 fprintf (dump_file, "\n");
4079 }
4080 }
4081 else if (dump_file)
4082 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4083 "continuing as if without profile feedback.\n");
4084 }
4085
4086 orig_overall_size = overall_size;
4087
4088 if (dump_file)
4089 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4090
4091 propagate_constants_topo (topo);
4092 if (flag_checking)
4093 ipcp_verify_propagated_values ();
4094 topo->constants.propagate_effects ();
4095 topo->contexts.propagate_effects ();
4096
4097 if (dump_file)
4098 {
4099 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4100 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4101 }
4102 }
4103
4104 /* Discover newly direct outgoing edges from NODE which is a new clone with
4105 known KNOWN_CSTS and make them direct. */
4106
4107 static void
4108 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4109 vec<tree> known_csts,
4110 vec<ipa_polymorphic_call_context>
4111 known_contexts,
4112 vec<ipa_argagg_value, va_gc> *aggvals)
4113 {
4114 struct cgraph_edge *ie, *next_ie;
4115 bool found = false;
4116
4117 for (ie = node->indirect_calls; ie; ie = next_ie)
4118 {
4119 tree target;
4120 bool speculative;
4121
4122 next_ie = ie->next_callee;
4123 ipa_argagg_value_list avs (aggvals);
4124 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4125 avs, &speculative);
4126 if (target)
4127 {
4128 bool agg_contents = ie->indirect_info->agg_contents;
4129 bool polymorphic = ie->indirect_info->polymorphic;
4130 int param_index = ie->indirect_info->param_index;
4131 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4132 speculative);
4133 found = true;
4134
4135 if (cs && !agg_contents && !polymorphic)
4136 {
4137 ipa_node_params *info = ipa_node_params_sum->get (node);
4138 int c = ipa_get_controlled_uses (info, param_index);
4139 if (c != IPA_UNDESCRIBED_USE
4140 && !ipa_get_param_load_dereferenced (info, param_index))
4141 {
4142 struct ipa_ref *to_del;
4143
4144 c--;
4145 ipa_set_controlled_uses (info, param_index, c);
4146 if (dump_file && (dump_flags & TDF_DETAILS))
4147 fprintf (dump_file, " controlled uses count of param "
4148 "%i bumped down to %i\n", param_index, c);
4149 if (c == 0
4150 && (to_del = node->find_reference (cs->callee, NULL, 0,
4151 IPA_REF_ADDR)))
4152 {
4153 if (dump_file && (dump_flags & TDF_DETAILS))
4154 fprintf (dump_file, " and even removing its "
4155 "cloning-created reference\n");
4156 to_del->remove_reference ();
4157 }
4158 }
4159 }
4160 }
4161 }
4162 /* Turning calls to direct calls will improve overall summary. */
4163 if (found)
4164 ipa_update_overall_fn_summary (node);
4165 }
4166
4167 class edge_clone_summary;
4168 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4169
4170 /* Edge clone summary. */
4171
4172 class edge_clone_summary
4173 {
4174 public:
4175 /* Default constructor. */
4176 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4177
4178 /* Default destructor. */
4179 ~edge_clone_summary ()
4180 {
4181 if (prev_clone)
4182 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4183 if (next_clone)
4184 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4185 }
4186
4187 cgraph_edge *prev_clone;
4188 cgraph_edge *next_clone;
4189 };
4190
4191 class edge_clone_summary_t:
4192 public call_summary <edge_clone_summary *>
4193 {
4194 public:
4195 edge_clone_summary_t (symbol_table *symtab):
4196 call_summary <edge_clone_summary *> (symtab)
4197 {
4198 m_initialize_when_cloning = true;
4199 }
4200
4201 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4202 edge_clone_summary *src_data,
4203 edge_clone_summary *dst_data) final override;
4204 };
4205
4206 /* Edge duplication hook. */
4207
4208 void
4209 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4210 edge_clone_summary *src_data,
4211 edge_clone_summary *dst_data)
4212 {
4213 if (src_data->next_clone)
4214 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4215 dst_data->prev_clone = src_edge;
4216 dst_data->next_clone = src_data->next_clone;
4217 src_data->next_clone = dst_edge;
4218 }
4219
4220 /* Return true is CS calls DEST or its clone for all contexts. When
4221 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4222 edges from/to an all-context clone. */
4223
4224 static bool
4225 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4226 bool allow_recursion_to_clone)
4227 {
4228 enum availability availability;
4229 cgraph_node *callee = cs->callee->function_symbol (&availability);
4230
4231 if (availability <= AVAIL_INTERPOSABLE)
4232 return false;
4233 if (callee == dest)
4234 return true;
4235 if (!allow_recursion_to_clone && cs->caller == callee)
4236 return false;
4237
4238 ipa_node_params *info = ipa_node_params_sum->get (callee);
4239 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4240 }
4241
4242 /* Return true if edge CS does bring about the value described by SRC to
4243 DEST_VAL of node DEST or its clone for all contexts. */
4244
4245 static bool
4246 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4247 cgraph_node *dest, ipcp_value<tree> *dest_val)
4248 {
4249 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4250
4251 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4252 || caller_info->node_dead)
4253 return false;
4254
4255 if (!src->val)
4256 return true;
4257
4258 if (caller_info->ipcp_orig_node)
4259 {
4260 tree t = NULL_TREE;
4261 if (src->offset == -1)
4262 t = caller_info->known_csts[src->index];
4263 else if (ipcp_transformation *ts
4264 = ipcp_get_transformation_summary (cs->caller))
4265 {
4266 ipa_argagg_value_list avl (ts);
4267 t = avl.get_value (src->index, src->offset / BITS_PER_UNIT);
4268 }
4269 return (t != NULL_TREE
4270 && values_equal_for_ipcp_p (src->val->value, t));
4271 }
4272 else
4273 {
4274 if (src->val == dest_val)
4275 return true;
4276
4277 struct ipcp_agg_lattice *aglat;
4278 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4279 src->index);
4280 if (src->offset == -1)
4281 return (plats->itself.is_single_const ()
4282 && values_equal_for_ipcp_p (src->val->value,
4283 plats->itself.values->value));
4284 else
4285 {
4286 if (plats->aggs_bottom || plats->aggs_contain_variable)
4287 return false;
4288 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4289 if (aglat->offset == src->offset)
4290 return (aglat->is_single_const ()
4291 && values_equal_for_ipcp_p (src->val->value,
4292 aglat->values->value));
4293 }
4294 return false;
4295 }
4296 }
4297
4298 /* Return true if edge CS does bring about the value described by SRC to
4299 DST_VAL of node DEST or its clone for all contexts. */
4300
4301 static bool
4302 cgraph_edge_brings_value_p (cgraph_edge *cs,
4303 ipcp_value_source<ipa_polymorphic_call_context> *src,
4304 cgraph_node *dest,
4305 ipcp_value<ipa_polymorphic_call_context> *)
4306 {
4307 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4308
4309 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4310 || caller_info->node_dead)
4311 return false;
4312 if (!src->val)
4313 return true;
4314
4315 if (caller_info->ipcp_orig_node)
4316 return (caller_info->known_contexts.length () > (unsigned) src->index)
4317 && values_equal_for_ipcp_p (src->val->value,
4318 caller_info->known_contexts[src->index]);
4319
4320 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4321 src->index);
4322 return plats->ctxlat.is_single_const ()
4323 && values_equal_for_ipcp_p (src->val->value,
4324 plats->ctxlat.values->value);
4325 }
4326
4327 /* Get the next clone in the linked list of clones of an edge. */
4328
4329 static inline struct cgraph_edge *
4330 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4331 {
4332 edge_clone_summary *s = edge_clone_summaries->get (cs);
4333 return s != NULL ? s->next_clone : NULL;
4334 }
4335
4336 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4337 of them is viable and hot, return true. In that case, for those that still
4338 hold, add their edge frequency and their number and cumulative profile
4339 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4340 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4341
4342 template <typename valtype>
4343 static bool
4344 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4345 sreal *freq_sum, int *caller_count,
4346 profile_count *rec_count_sum,
4347 profile_count *nonrec_count_sum)
4348 {
4349 ipcp_value_source<valtype> *src;
4350 sreal freq = 0;
4351 int count = 0;
4352 profile_count rec_cnt = profile_count::zero ();
4353 profile_count nonrec_cnt = profile_count::zero ();
4354 bool hot = false;
4355 bool non_self_recursive = false;
4356
4357 for (src = val->sources; src; src = src->next)
4358 {
4359 struct cgraph_edge *cs = src->cs;
4360 while (cs)
4361 {
4362 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4363 {
4364 count++;
4365 freq += cs->sreal_frequency ();
4366 hot |= cs->maybe_hot_p ();
4367 if (cs->caller != dest)
4368 {
4369 non_self_recursive = true;
4370 if (cs->count.ipa ().initialized_p ())
4371 rec_cnt += cs->count.ipa ();
4372 }
4373 else if (cs->count.ipa ().initialized_p ())
4374 nonrec_cnt += cs->count.ipa ();
4375 }
4376 cs = get_next_cgraph_edge_clone (cs);
4377 }
4378 }
4379
4380 /* If the only edges bringing a value are self-recursive ones, do not bother
4381 evaluating it. */
4382 if (!non_self_recursive)
4383 return false;
4384
4385 *freq_sum = freq;
4386 *caller_count = count;
4387 *rec_count_sum = rec_cnt;
4388 *nonrec_count_sum = nonrec_cnt;
4389
4390 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4391 {
4392 struct cgraph_edge *cs;
4393
4394 /* Cold non-SCC source edge could trigger hot recursive execution of
4395 function. Consider the case as hot and rely on following cost model
4396 computation to further select right one. */
4397 for (cs = dest->callers; cs; cs = cs->next_caller)
4398 if (cs->caller == dest && cs->maybe_hot_p ())
4399 return true;
4400 }
4401
4402 return hot;
4403 }
4404
4405 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4406 to let a non-self-recursive caller be the first element. Thus, we can
4407 simplify intersecting operations on values that arrive from all of these
4408 callers, especially when there exists self-recursive call. Return true if
4409 this kind of adjustment is possible. */
4410
4411 static bool
4412 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4413 cgraph_node *node)
4414 {
4415 for (unsigned i = 0; i < callers.length (); i++)
4416 {
4417 cgraph_edge *cs = callers[i];
4418
4419 if (cs->caller != node)
4420 {
4421 if (i > 0)
4422 {
4423 callers[i] = callers[0];
4424 callers[0] = cs;
4425 }
4426 return true;
4427 }
4428 }
4429 return false;
4430 }
4431
4432 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4433 is assumed their number is known and equal to CALLER_COUNT. */
4434
4435 template <typename valtype>
4436 static vec<cgraph_edge *>
4437 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4438 int caller_count)
4439 {
4440 ipcp_value_source<valtype> *src;
4441 vec<cgraph_edge *> ret;
4442
4443 ret.create (caller_count);
4444 for (src = val->sources; src; src = src->next)
4445 {
4446 struct cgraph_edge *cs = src->cs;
4447 while (cs)
4448 {
4449 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4450 ret.quick_push (cs);
4451 cs = get_next_cgraph_edge_clone (cs);
4452 }
4453 }
4454
4455 if (caller_count > 1)
4456 adjust_callers_for_value_intersection (ret, dest);
4457
4458 return ret;
4459 }
4460
4461 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4462 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4463 should be set to true when the reference created for the constant should be
4464 a load one and not an address one because the corresponding parameter p is
4465 only used as *p. */
4466
4467 static struct ipa_replace_map *
4468 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4469 bool force_load_ref)
4470 {
4471 struct ipa_replace_map *replace_map;
4472
4473 replace_map = ggc_alloc<ipa_replace_map> ();
4474 if (dump_file)
4475 {
4476 fprintf (dump_file, " replacing ");
4477 ipa_dump_param (dump_file, info, parm_num);
4478
4479 fprintf (dump_file, " with const ");
4480 print_generic_expr (dump_file, value);
4481
4482 if (force_load_ref)
4483 fprintf (dump_file, " - forcing load reference\n");
4484 else
4485 fprintf (dump_file, "\n");
4486 }
4487 replace_map->parm_num = parm_num;
4488 replace_map->new_tree = value;
4489 replace_map->force_load_ref = force_load_ref;
4490 return replace_map;
4491 }
4492
4493 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4494 one, otherwise it will be referred to as the original node. */
4495
4496 static void
4497 dump_profile_updates (cgraph_node *node, bool spec)
4498 {
4499 if (spec)
4500 fprintf (dump_file, " setting count of the specialized node %s to ",
4501 node->dump_name ());
4502 else
4503 fprintf (dump_file, " setting count of the original node %s to ",
4504 node->dump_name ());
4505
4506 node->count.dump (dump_file);
4507 fprintf (dump_file, "\n");
4508 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4509 {
4510 fprintf (dump_file, " edge to %s has count ",
4511 cs->callee->dump_name ());
4512 cs->count.dump (dump_file);
4513 fprintf (dump_file, "\n");
4514 }
4515 }
4516
4517 /* With partial train run we do not want to assume that original's count is
4518 zero whenever we redurect all executed edges to clone. Simply drop profile
4519 to local one in this case. In eany case, return the new value. ORIG_NODE
4520 is the original node and its count has not been updaed yet. */
4521
4522 profile_count
4523 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4524 {
4525 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4526 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4527 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4528 remainder = remainder.guessed_local ();
4529
4530 return remainder;
4531 }
4532
4533 /* Structure to sum counts coming from nodes other than the original node and
4534 its clones. */
4535
4536 struct gather_other_count_struct
4537 {
4538 cgraph_node *orig;
4539 profile_count other_count;
4540 };
4541
4542 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4543 counts that come from non-self-recursive calls.. */
4544
4545 static bool
4546 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4547 {
4548 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4549 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4550 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4551 desc->other_count += cs->count.ipa ();
4552 return false;
4553 }
4554
4555 /* Structure to help analyze if we need to boost counts of some clones of some
4556 non-recursive edges to match the new callee count. */
4557
4558 struct desc_incoming_count_struct
4559 {
4560 cgraph_node *orig;
4561 hash_set <cgraph_edge *> *processed_edges;
4562 profile_count count;
4563 unsigned unproc_orig_rec_edges;
4564 };
4565
4566 /* Go over edges calling NODE and its thunks and gather information about
4567 incoming counts so that we know if we need to make any adjustments. */
4568
4569 static void
4570 analyze_clone_icoming_counts (cgraph_node *node,
4571 desc_incoming_count_struct *desc)
4572 {
4573 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4574 if (cs->caller->thunk)
4575 {
4576 analyze_clone_icoming_counts (cs->caller, desc);
4577 continue;
4578 }
4579 else
4580 {
4581 if (cs->count.initialized_p ())
4582 desc->count += cs->count.ipa ();
4583 if (!desc->processed_edges->contains (cs)
4584 && cs->caller->clone_of == desc->orig)
4585 desc->unproc_orig_rec_edges++;
4586 }
4587 }
4588
4589 /* If caller edge counts of a clone created for a self-recursive arithmetic
4590 jump function must be adjusted because it is coming from a the "seed" clone
4591 for the first value and so has been excessively scaled back as if it was not
4592 a recursive call, adjust it so that the incoming counts of NODE match its
4593 count. NODE is the node or its thunk. */
4594
4595 static void
4596 adjust_clone_incoming_counts (cgraph_node *node,
4597 desc_incoming_count_struct *desc)
4598 {
4599 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4600 if (cs->caller->thunk)
4601 {
4602 adjust_clone_incoming_counts (cs->caller, desc);
4603 profile_count sum = profile_count::zero ();
4604 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4605 if (e->count.initialized_p ())
4606 sum += e->count.ipa ();
4607 cs->count = cs->count.combine_with_ipa_count (sum);
4608 }
4609 else if (!desc->processed_edges->contains (cs)
4610 && cs->caller->clone_of == desc->orig)
4611 {
4612 cs->count += desc->count;
4613 if (dump_file)
4614 {
4615 fprintf (dump_file, " Adjusted count of an incoming edge of "
4616 "a clone %s -> %s to ", cs->caller->dump_name (),
4617 cs->callee->dump_name ());
4618 cs->count.dump (dump_file);
4619 fprintf (dump_file, "\n");
4620 }
4621 }
4622 }
4623
4624 /* When ORIG_NODE has been cloned for values which have been generated fora
4625 self-recursive call as a result of an arithmetic pass-through
4626 jump-functions, adjust its count together with counts of all such clones in
4627 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4628
4629 The function sums the counts of the original node and all its clones that
4630 cannot be attributed to a specific clone because it comes from a
4631 non-recursive edge. This sum is then evenly divided between the clones and
4632 on top of that each one gets all the counts which can be attributed directly
4633 to it. */
4634
4635 static void
4636 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4637 const vec<cgraph_node *> &self_gen_clones)
4638 {
4639 profile_count redist_sum = orig_node->count.ipa ();
4640 if (!(redist_sum > profile_count::zero ()))
4641 return;
4642
4643 if (dump_file)
4644 fprintf (dump_file, " Updating profile of self recursive clone "
4645 "series\n");
4646
4647 gather_other_count_struct gocs;
4648 gocs.orig = orig_node;
4649 gocs.other_count = profile_count::zero ();
4650
4651 auto_vec <profile_count, 8> other_edges_count;
4652 for (cgraph_node *n : self_gen_clones)
4653 {
4654 gocs.other_count = profile_count::zero ();
4655 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4656 &gocs, false);
4657 other_edges_count.safe_push (gocs.other_count);
4658 redist_sum -= gocs.other_count;
4659 }
4660
4661 hash_set<cgraph_edge *> processed_edges;
4662 unsigned i = 0;
4663 for (cgraph_node *n : self_gen_clones)
4664 {
4665 profile_count orig_count = n->count;
4666 profile_count new_count
4667 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4668 new_count = lenient_count_portion_handling (new_count, orig_node);
4669 n->count = new_count;
4670 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4671 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4672 {
4673 cs->count = cs->count.apply_scale (new_count, orig_count);
4674 processed_edges.add (cs);
4675 }
4676 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4677 cs->count = cs->count.apply_scale (new_count, orig_count);
4678
4679 i++;
4680 }
4681
4682 /* There are still going to be edges to ORIG_NODE that have one or more
4683 clones coming from another node clone in SELF_GEN_CLONES and which we
4684 scaled by the same amount, which means that the total incoming sum of
4685 counts to ORIG_NODE will be too high, scale such edges back. */
4686 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4687 {
4688 if (cs->callee->ultimate_alias_target () == orig_node)
4689 {
4690 unsigned den = 0;
4691 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4692 if (e->callee->ultimate_alias_target () == orig_node
4693 && processed_edges.contains (e))
4694 den++;
4695 if (den > 0)
4696 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4697 if (e->callee->ultimate_alias_target () == orig_node
4698 && processed_edges.contains (e))
4699 e->count /= den;
4700 }
4701 }
4702
4703 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4704 along self-recursive edges are likely to have fairly low count and so
4705 edges from them to nodes in the self_gen_clones do not correspond to the
4706 artificially distributed count of the nodes, the total sum of incoming
4707 edges to some clones might be too low. Detect this situation and correct
4708 it. */
4709 for (cgraph_node *n : self_gen_clones)
4710 {
4711 if (!(n->count.ipa () > profile_count::zero ()))
4712 continue;
4713
4714 desc_incoming_count_struct desc;
4715 desc.orig = orig_node;
4716 desc.processed_edges = &processed_edges;
4717 desc.count = profile_count::zero ();
4718 desc.unproc_orig_rec_edges = 0;
4719 analyze_clone_icoming_counts (n, &desc);
4720
4721 if (n->count.differs_from_p (desc.count))
4722 {
4723 if (n->count > desc.count
4724 && desc.unproc_orig_rec_edges > 0)
4725 {
4726 desc.count = n->count - desc.count;
4727 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4728 adjust_clone_incoming_counts (n, &desc);
4729 }
4730 else if (dump_file)
4731 fprintf (dump_file,
4732 " Unable to fix up incoming counts for %s.\n",
4733 n->dump_name ());
4734 }
4735 }
4736
4737 if (dump_file)
4738 for (cgraph_node *n : self_gen_clones)
4739 dump_profile_updates (n, n != orig_node);
4740 return;
4741 }
4742
4743 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4744 their profile information to reflect this. This function should not be used
4745 for clones generated for arithmetic pass-through jump functions on a
4746 self-recursive call graph edge, that situation is handled by
4747 update_counts_for_self_gen_clones. */
4748
4749 static void
4750 update_profiling_info (struct cgraph_node *orig_node,
4751 struct cgraph_node *new_node)
4752 {
4753 struct caller_statistics stats;
4754 profile_count new_sum;
4755 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4756
4757 if (!(orig_node_count > profile_count::zero ()))
4758 return;
4759
4760 if (dump_file)
4761 {
4762 fprintf (dump_file, " Updating profile from original count: ");
4763 orig_node_count.dump (dump_file);
4764 fprintf (dump_file, "\n");
4765 }
4766
4767 init_caller_stats (&stats, new_node);
4768 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4769 false);
4770 new_sum = stats.count_sum;
4771
4772 bool orig_edges_processed = false;
4773 if (new_sum > orig_node_count)
4774 {
4775 /* TODO: Profile has alreay gone astray, keep what we have but lower it
4776 to global0 category. */
4777 remainder = orig_node->count.global0 ();
4778
4779 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4780 cs->count = cs->count.global0 ();
4781 for (cgraph_edge *cs = orig_node->indirect_calls;
4782 cs;
4783 cs = cs->next_callee)
4784 cs->count = cs->count.global0 ();
4785 orig_edges_processed = true;
4786 }
4787 else if (stats.rec_count_sum.nonzero_p ())
4788 {
4789 int new_nonrec_calls = stats.n_nonrec_calls;
4790 /* There are self-recursive edges which are likely to bring in the
4791 majority of calls but which we must divide in between the original and
4792 new node. */
4793 init_caller_stats (&stats, orig_node);
4794 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4795 &stats, false);
4796 int orig_nonrec_calls = stats.n_nonrec_calls;
4797 profile_count orig_nonrec_call_count = stats.count_sum;
4798
4799 if (orig_node->local)
4800 {
4801 if (!orig_nonrec_call_count.nonzero_p ())
4802 {
4803 if (dump_file)
4804 fprintf (dump_file, " The original is local and the only "
4805 "incoming edges from non-dead callers with nonzero "
4806 "counts are self-recursive, assuming it is cold.\n");
4807 /* The NEW_NODE count and counts of all its outgoing edges
4808 are still unmodified copies of ORIG_NODE's. Just clear
4809 the latter and bail out. */
4810 profile_count zero;
4811 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4812 zero = profile_count::zero ().guessed_local ();
4813 else
4814 zero = profile_count::adjusted_zero ();
4815 orig_node->count = zero;
4816 for (cgraph_edge *cs = orig_node->callees;
4817 cs;
4818 cs = cs->next_callee)
4819 cs->count = zero;
4820 for (cgraph_edge *cs = orig_node->indirect_calls;
4821 cs;
4822 cs = cs->next_callee)
4823 cs->count = zero;
4824 return;
4825 }
4826 }
4827 else
4828 {
4829 /* Let's behave as if there was another caller that accounts for all
4830 the calls that were either indirect or from other compilation
4831 units. */
4832 orig_nonrec_calls++;
4833 profile_count pretend_caller_count
4834 = (orig_node_count - new_sum - orig_nonrec_call_count
4835 - stats.rec_count_sum);
4836 orig_nonrec_call_count += pretend_caller_count;
4837 }
4838
4839 /* Divide all "unexplained" counts roughly proportionally to sums of
4840 counts of non-recursive calls.
4841
4842 We put rather arbitrary limits on how many counts we claim because the
4843 number of non-self-recursive incoming count is only a rough guideline
4844 and there are cases (such as mcf) where using it blindly just takes
4845 too many. And if lattices are considered in the opposite order we
4846 could also take too few. */
4847 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4848
4849 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4850 profile_count new_part
4851 = MAX(MIN (unexp.apply_scale (new_sum,
4852 new_sum + orig_nonrec_call_count),
4853 unexp.apply_scale (limit_den - 1, limit_den)),
4854 unexp.apply_scale (new_nonrec_calls, limit_den));
4855 if (dump_file)
4856 {
4857 fprintf (dump_file, " Claiming ");
4858 new_part.dump (dump_file);
4859 fprintf (dump_file, " of unexplained ");
4860 unexp.dump (dump_file);
4861 fprintf (dump_file, " counts because of self-recursive "
4862 "calls\n");
4863 }
4864 new_sum += new_part;
4865 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4866 orig_node);
4867 }
4868 else
4869 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4870 orig_node);
4871
4872 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4873 new_node->count = new_sum;
4874 orig_node->count = remainder;
4875
4876 profile_count orig_new_node_count = orig_node_count;
4877 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4878 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4879 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4880 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4881 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4882
4883 if (!orig_edges_processed)
4884 {
4885 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4886 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4887 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4888 for (cgraph_edge *cs = orig_node->indirect_calls;
4889 cs;
4890 cs = cs->next_callee)
4891 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4892 }
4893
4894 if (dump_file)
4895 {
4896 dump_profile_updates (new_node, true);
4897 dump_profile_updates (orig_node, false);
4898 }
4899 }
4900
4901 /* Update the respective profile of specialized NEW_NODE and the original
4902 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4903 have been redirected to the specialized version. */
4904
4905 static void
4906 update_specialized_profile (struct cgraph_node *new_node,
4907 struct cgraph_node *orig_node,
4908 profile_count redirected_sum)
4909 {
4910 struct cgraph_edge *cs;
4911 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
4912
4913 if (dump_file)
4914 {
4915 fprintf (dump_file, " the sum of counts of redirected edges is ");
4916 redirected_sum.dump (dump_file);
4917 fprintf (dump_file, "\n old ipa count of the original node is ");
4918 orig_node_count.dump (dump_file);
4919 fprintf (dump_file, "\n");
4920 }
4921 if (!(orig_node_count > profile_count::zero ()))
4922 return;
4923
4924 new_node_count = new_node->count;
4925 new_node->count += redirected_sum;
4926 orig_node->count
4927 = lenient_count_portion_handling (orig_node->count - redirected_sum,
4928 orig_node);
4929
4930 for (cs = new_node->callees; cs; cs = cs->next_callee)
4931 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4932
4933 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4934 {
4935 profile_count dec = cs->count.apply_scale (redirected_sum,
4936 orig_node_count);
4937 cs->count -= dec;
4938 }
4939
4940 if (dump_file)
4941 {
4942 dump_profile_updates (new_node, true);
4943 dump_profile_updates (orig_node, false);
4944 }
4945 }
4946
4947 static void adjust_references_in_caller (cgraph_edge *cs,
4948 symtab_node *symbol, int index);
4949
4950 /* Simple structure to pass a symbol and index (with same meaning as parameters
4951 of adjust_references_in_caller) through a void* parameter of a
4952 call_for_symbol_thunks_and_aliases callback. */
4953 struct symbol_and_index_together
4954 {
4955 symtab_node *symbol;
4956 int index;
4957 };
4958
4959 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4960 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4961 static bool
4962 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4963 {
4964 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4965 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4966 if (!cs->caller->thunk)
4967 adjust_references_in_caller (cs, pack->symbol, pack->index);
4968 return false;
4969 }
4970
4971 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4972 variable which is only dereferenced and which is represented by SYMBOL. See
4973 if we can remove ADDR reference in callers assosiated witht the call. */
4974
4975 static void
4976 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4977 {
4978 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4979 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4980 if (jfunc->type == IPA_JF_CONST)
4981 {
4982 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4983 cs->lto_stmt_uid,
4984 IPA_REF_ADDR);
4985 if (!to_del)
4986 return;
4987 to_del->remove_reference ();
4988 ipa_zap_jf_refdesc (jfunc);
4989 if (dump_file)
4990 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4991 cs->caller->dump_name (), symbol->dump_name ());
4992 return;
4993 }
4994
4995 if (jfunc->type != IPA_JF_PASS_THROUGH
4996 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4997 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4998 return;
4999
5000 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5001 cgraph_node *caller = cs->caller;
5002 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
5003 /* TODO: This consistency check may be too big and not really
5004 that useful. Consider removing it. */
5005 tree cst;
5006 if (caller_info->ipcp_orig_node)
5007 cst = caller_info->known_csts[fidx];
5008 else
5009 {
5010 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
5011 gcc_assert (lat->is_single_const ());
5012 cst = lat->values->value;
5013 }
5014 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5015 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5016 == symbol));
5017
5018 int cuses = ipa_get_controlled_uses (caller_info, fidx);
5019 if (cuses == IPA_UNDESCRIBED_USE)
5020 return;
5021 gcc_assert (cuses > 0);
5022 cuses--;
5023 ipa_set_controlled_uses (caller_info, fidx, cuses);
5024 ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5025 if (dump_file && (dump_flags & TDF_DETAILS))
5026 fprintf (dump_file, " Controlled uses of parameter %i of %s dropped "
5027 "to %i.\n", fidx, caller->dump_name (), cuses);
5028 if (cuses)
5029 return;
5030
5031 if (caller_info->ipcp_orig_node)
5032 {
5033 /* Cloning machinery has created a reference here, we need to either
5034 remove it or change it to a read one. */
5035 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5036 if (to_del)
5037 {
5038 to_del->remove_reference ();
5039 if (dump_file)
5040 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5041 cs->caller->dump_name (), symbol->dump_name ());
5042 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5043 {
5044 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5045 if (dump_file)
5046 fprintf (dump_file,
5047 " ...and replaced it with LOAD one.\n");
5048 }
5049 }
5050 }
5051
5052 symbol_and_index_together pack;
5053 pack.symbol = symbol;
5054 pack.index = fidx;
5055 if (caller->can_change_signature)
5056 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5057 &pack, true);
5058 }
5059
5060
5061 /* Return true if we would like to remove a parameter from NODE when cloning it
5062 with KNOWN_CSTS scalar constants. */
5063
5064 static bool
5065 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5066 {
5067 auto_vec<bool, 16> surviving;
5068 bool filled_vec = false;
5069 ipa_node_params *info = ipa_node_params_sum->get (node);
5070 int i, count = ipa_get_param_count (info);
5071
5072 for (i = 0; i < count; i++)
5073 {
5074 if (!known_csts[i] && ipa_is_param_used (info, i))
5075 continue;
5076
5077 if (!filled_vec)
5078 {
5079 clone_info *info = clone_info::get (node);
5080 if (!info || !info->param_adjustments)
5081 return true;
5082 info->param_adjustments->get_surviving_params (&surviving);
5083 filled_vec = true;
5084 }
5085 if (surviving.length() < (unsigned) i && surviving[i])
5086 return true;
5087 }
5088 return false;
5089 }
5090
5091 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5092 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5093 redirect all edges in CALLERS to it. */
5094
5095 static struct cgraph_node *
5096 create_specialized_node (struct cgraph_node *node,
5097 vec<tree> known_csts,
5098 vec<ipa_polymorphic_call_context> known_contexts,
5099 vec<ipa_argagg_value, va_gc> *aggvals,
5100 vec<cgraph_edge *> &callers)
5101 {
5102 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5103 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5104 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5105 struct cgraph_node *new_node;
5106 int i, count = ipa_get_param_count (info);
5107 clone_info *cinfo = clone_info::get (node);
5108 ipa_param_adjustments *old_adjustments = cinfo
5109 ? cinfo->param_adjustments : NULL;
5110 ipa_param_adjustments *new_adjustments;
5111 gcc_assert (!info->ipcp_orig_node);
5112 gcc_assert (node->can_change_signature
5113 || !old_adjustments);
5114
5115 if (old_adjustments)
5116 {
5117 /* At the moment all IPA optimizations should use the number of
5118 parameters of the prevailing decl as the m_always_copy_start.
5119 Handling any other value would complicate the code below, so for the
5120 time bing let's only assert it is so. */
5121 gcc_assert (old_adjustments->m_always_copy_start == count
5122 || old_adjustments->m_always_copy_start < 0);
5123 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5124 for (i = 0; i < old_adj_count; i++)
5125 {
5126 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5127 if (!node->can_change_signature
5128 || old_adj->op != IPA_PARAM_OP_COPY
5129 || (!known_csts[old_adj->base_index]
5130 && ipa_is_param_used (info, old_adj->base_index)))
5131 {
5132 ipa_adjusted_param new_adj = *old_adj;
5133
5134 new_adj.prev_clone_adjustment = true;
5135 new_adj.prev_clone_index = i;
5136 vec_safe_push (new_params, new_adj);
5137 }
5138 }
5139 bool skip_return = old_adjustments->m_skip_return;
5140 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5141 ipa_param_adjustments (new_params, count,
5142 skip_return));
5143 }
5144 else if (node->can_change_signature
5145 && want_remove_some_param_p (node, known_csts))
5146 {
5147 ipa_adjusted_param adj;
5148 memset (&adj, 0, sizeof (adj));
5149 adj.op = IPA_PARAM_OP_COPY;
5150 for (i = 0; i < count; i++)
5151 if (!known_csts[i] && ipa_is_param_used (info, i))
5152 {
5153 adj.base_index = i;
5154 adj.prev_clone_index = i;
5155 vec_safe_push (new_params, adj);
5156 }
5157 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5158 ipa_param_adjustments (new_params, count, false));
5159 }
5160 else
5161 new_adjustments = NULL;
5162
5163 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5164 for (i = callers.length () - 1; i >= 0; i--)
5165 {
5166 cgraph_edge *cs = callers[i];
5167 if (cs->caller == node)
5168 {
5169 self_recursive_calls.safe_push (cs);
5170 callers.unordered_remove (i);
5171 }
5172 }
5173 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5174 for (i = 0; i < count; i++)
5175 {
5176 tree t = known_csts[i];
5177 if (!t)
5178 continue;
5179
5180 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5181
5182 bool load_ref = false;
5183 symtab_node *ref_symbol;
5184 if (TREE_CODE (t) == ADDR_EXPR)
5185 {
5186 tree base = get_base_address (TREE_OPERAND (t, 0));
5187 if (TREE_CODE (base) == VAR_DECL
5188 && ipa_get_controlled_uses (info, i) == 0
5189 && ipa_get_param_load_dereferenced (info, i)
5190 && (ref_symbol = symtab_node::get (base)))
5191 {
5192 load_ref = true;
5193 if (node->can_change_signature)
5194 for (cgraph_edge *caller : callers)
5195 adjust_references_in_caller (caller, ref_symbol, i);
5196 }
5197 }
5198
5199 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5200 if (replace_map)
5201 vec_safe_push (replace_trees, replace_map);
5202 }
5203
5204 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5205 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5206 node->decl)));
5207 new_node = node->create_virtual_clone (callers, replace_trees,
5208 new_adjustments, "constprop",
5209 suffix_counter);
5210 suffix_counter++;
5211
5212 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5213 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5214 {
5215 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5216 /* Cloned edges can disappear during cloning as speculation can be
5217 resolved, check that we have one and that it comes from the last
5218 cloning. */
5219 if (cs && cs->caller == new_node)
5220 cs->redirect_callee_duplicating_thunks (new_node);
5221 /* Any future code that would make more than one clone of an outgoing
5222 edge would confuse this mechanism, so let's check that does not
5223 happen. */
5224 gcc_checking_assert (!cs
5225 || !get_next_cgraph_edge_clone (cs)
5226 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5227 }
5228 if (have_self_recursive_calls)
5229 new_node->expand_all_artificial_thunks ();
5230
5231 ipa_set_node_agg_value_chain (new_node, aggvals);
5232 for (const ipa_argagg_value &av : aggvals)
5233 new_node->maybe_create_reference (av.value, NULL);
5234
5235 if (dump_file && (dump_flags & TDF_DETAILS))
5236 {
5237 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5238 if (known_contexts.exists ())
5239 {
5240 for (i = 0; i < count; i++)
5241 if (!known_contexts[i].useless_p ())
5242 {
5243 fprintf (dump_file, " known ctx %i is ", i);
5244 known_contexts[i].dump (dump_file);
5245 }
5246 }
5247 if (aggvals)
5248 {
5249 fprintf (dump_file, " Aggregate replacements:");
5250 ipa_argagg_value_list avs (aggvals);
5251 avs.dump (dump_file);
5252 }
5253 }
5254
5255 new_info = ipa_node_params_sum->get (new_node);
5256 new_info->ipcp_orig_node = node;
5257 new_node->ipcp_clone = true;
5258 new_info->known_csts = known_csts;
5259 new_info->known_contexts = known_contexts;
5260
5261 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts,
5262 aggvals);
5263
5264 return new_node;
5265 }
5266
5267 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5268 pass-through function to itself when the cgraph_node involved is not an
5269 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5270 no-operation pass-through. */
5271
5272 static bool
5273 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5274 bool simple = true)
5275 {
5276 enum availability availability;
5277 if (cs->caller == cs->callee->function_symbol (&availability)
5278 && availability > AVAIL_INTERPOSABLE
5279 && jfunc->type == IPA_JF_PASS_THROUGH
5280 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5281 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5282 && ipa_node_params_sum->get (cs->caller)
5283 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5284 return true;
5285 return false;
5286 }
5287
5288 /* Return true if JFUNC, which describes a part of an aggregate represented or
5289 pointed to by the i-th parameter of call CS, is a pass-through function to
5290 itself when the cgraph_node involved is not an IPA-CP clone.. When
5291 SIMPLE is true, further check if JFUNC is a simple no-operation
5292 pass-through. */
5293
5294 static bool
5295 self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5296 const ipa_agg_jf_item *jfunc,
5297 int i, bool simple = true)
5298 {
5299 enum availability availability;
5300 if (cs->caller == cs->callee->function_symbol (&availability)
5301 && availability > AVAIL_INTERPOSABLE
5302 && jfunc->jftype == IPA_JF_LOAD_AGG
5303 && jfunc->offset == jfunc->value.load_agg.offset
5304 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5305 && jfunc->value.pass_through.formal_id == i
5306 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5307 && ipa_node_params_sum->get (cs->caller)
5308 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5309 return true;
5310 return false;
5311 }
5312
5313 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5314 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5315
5316 static void
5317 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5318 vec<tree> &known_csts,
5319 const vec<cgraph_edge *> &callers)
5320 {
5321 ipa_node_params *info = ipa_node_params_sum->get (node);
5322 int i, count = ipa_get_param_count (info);
5323
5324 for (i = 0; i < count; i++)
5325 {
5326 struct cgraph_edge *cs;
5327 tree newval = NULL_TREE;
5328 int j;
5329 bool first = true;
5330 tree type = ipa_get_type (info, i);
5331
5332 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5333 continue;
5334
5335 FOR_EACH_VEC_ELT (callers, j, cs)
5336 {
5337 struct ipa_jump_func *jump_func;
5338 tree t;
5339
5340 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5341 if (!args
5342 || i >= ipa_get_cs_argument_count (args)
5343 || (i == 0
5344 && call_passes_through_thunk (cs)))
5345 {
5346 newval = NULL_TREE;
5347 break;
5348 }
5349 jump_func = ipa_get_ith_jump_func (args, i);
5350
5351 /* Besides simple pass-through jump function, arithmetic jump
5352 function could also introduce argument-direct-pass-through for
5353 self-feeding recursive call. For example,
5354
5355 fn (int i)
5356 {
5357 fn (i & 1);
5358 }
5359
5360 Given that i is 0, recursive propagation via (i & 1) also gets
5361 0. */
5362 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5363 {
5364 gcc_assert (newval);
5365 t = ipa_get_jf_arith_result (
5366 ipa_get_jf_pass_through_operation (jump_func),
5367 newval,
5368 ipa_get_jf_pass_through_operand (jump_func),
5369 type);
5370 }
5371 else
5372 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5373 jump_func, type);
5374 if (!t
5375 || (newval
5376 && !values_equal_for_ipcp_p (t, newval))
5377 || (!first && !newval))
5378 {
5379 newval = NULL_TREE;
5380 break;
5381 }
5382 else
5383 newval = t;
5384 first = false;
5385 }
5386
5387 if (newval)
5388 {
5389 if (dump_file && (dump_flags & TDF_DETAILS))
5390 {
5391 fprintf (dump_file, " adding an extra known scalar value ");
5392 print_ipcp_constant_value (dump_file, newval);
5393 fprintf (dump_file, " for ");
5394 ipa_dump_param (dump_file, info, i);
5395 fprintf (dump_file, "\n");
5396 }
5397
5398 known_csts[i] = newval;
5399 }
5400 }
5401 }
5402
5403 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5404 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5405 CALLERS. */
5406
5407 static void
5408 find_more_contexts_for_caller_subset (cgraph_node *node,
5409 vec<ipa_polymorphic_call_context>
5410 *known_contexts,
5411 const vec<cgraph_edge *> &callers)
5412 {
5413 ipa_node_params *info = ipa_node_params_sum->get (node);
5414 int i, count = ipa_get_param_count (info);
5415
5416 for (i = 0; i < count; i++)
5417 {
5418 cgraph_edge *cs;
5419
5420 if (ipa_get_poly_ctx_lat (info, i)->bottom
5421 || (known_contexts->exists ()
5422 && !(*known_contexts)[i].useless_p ()))
5423 continue;
5424
5425 ipa_polymorphic_call_context newval;
5426 bool first = true;
5427 int j;
5428
5429 FOR_EACH_VEC_ELT (callers, j, cs)
5430 {
5431 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5432 if (!args
5433 || i >= ipa_get_cs_argument_count (args))
5434 return;
5435 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5436 ipa_polymorphic_call_context ctx;
5437 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5438 cs, i, jfunc);
5439 if (first)
5440 {
5441 newval = ctx;
5442 first = false;
5443 }
5444 else
5445 newval.meet_with (ctx);
5446 if (newval.useless_p ())
5447 break;
5448 }
5449
5450 if (!newval.useless_p ())
5451 {
5452 if (dump_file && (dump_flags & TDF_DETAILS))
5453 {
5454 fprintf (dump_file, " adding an extra known polymorphic "
5455 "context ");
5456 print_ipcp_constant_value (dump_file, newval);
5457 fprintf (dump_file, " for ");
5458 ipa_dump_param (dump_file, info, i);
5459 fprintf (dump_file, "\n");
5460 }
5461
5462 if (!known_contexts->exists ())
5463 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5464 true);
5465 (*known_contexts)[i] = newval;
5466 }
5467
5468 }
5469 }
5470
5471 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5472 RES. If INTERIM is non-NULL, it contains the current interim state of
5473 collected aggregate values which can be used to compute values passed over
5474 self-recursive edges.
5475
5476 This basically one iteration of push_agg_values_from_edge over one
5477 parameter, which allows for simpler early returns. */
5478
5479 static void
5480 push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5481 vec<ipa_argagg_value> *res,
5482 const ipa_argagg_value_list *interim)
5483 {
5484 bool agg_values_from_caller = false;
5485 bool agg_jf_preserved = false;
5486 unsigned unit_delta = UINT_MAX;
5487 int src_idx = -1;
5488 ipa_jump_func *jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs),
5489 index);
5490
5491 if (jfunc->type == IPA_JF_PASS_THROUGH
5492 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5493 {
5494 agg_values_from_caller = true;
5495 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5496 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5497 unit_delta = 0;
5498 }
5499 else if (jfunc->type == IPA_JF_ANCESTOR
5500 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5501 {
5502 agg_values_from_caller = true;
5503 agg_jf_preserved = true;
5504 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5505 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5506 }
5507
5508 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5509 if (agg_values_from_caller)
5510 {
5511 if (caller_info->ipcp_orig_node)
5512 {
5513 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5514 ipcp_transformation *ts
5515 = ipcp_get_transformation_summary (cs->caller);
5516 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5517 ipcp_param_lattices *orig_plats
5518 = ipa_get_parm_lattices (orig_info, src_idx);
5519 if (ts
5520 && orig_plats->aggs
5521 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5522 {
5523 ipa_argagg_value_list src (ts);
5524 src.push_adjusted_values (src_idx, index, unit_delta, res);
5525 return;
5526 }
5527 }
5528 else
5529 {
5530 ipcp_param_lattices *src_plats
5531 = ipa_get_parm_lattices (caller_info, src_idx);
5532 if (src_plats->aggs
5533 && !src_plats->aggs_bottom
5534 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5535 {
5536 if (interim && self_recursive_pass_through_p (cs, jfunc, index))
5537 {
5538 interim->push_adjusted_values (src_idx, index, unit_delta,
5539 res);
5540 return;
5541 }
5542 if (!src_plats->aggs_contain_variable)
5543 {
5544 push_agg_values_from_plats (src_plats, index, unit_delta,
5545 res);
5546 return;
5547 }
5548 }
5549 }
5550 }
5551
5552 if (!jfunc->agg.items)
5553 return;
5554 bool first = true;
5555 unsigned prev_unit_offset = 0;
5556 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5557 {
5558 tree value, srcvalue;
5559 /* Besides simple pass-through aggregate jump function, arithmetic
5560 aggregate jump function could also bring same aggregate value as
5561 parameter passed-in for self-feeding recursive call. For example,
5562
5563 fn (int *i)
5564 {
5565 int j = *i & 1;
5566 fn (&j);
5567 }
5568
5569 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5570 if (interim
5571 && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false)
5572 && (srcvalue = interim->get_value(index,
5573 agg_jf.offset / BITS_PER_UNIT)))
5574 value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation,
5575 srcvalue,
5576 agg_jf.value.pass_through.operand,
5577 agg_jf.type);
5578 else
5579 value = ipa_agg_value_from_jfunc (caller_info, cs->caller,
5580 &agg_jf);
5581 if (value)
5582 {
5583 struct ipa_argagg_value iav;
5584 iav.value = value;
5585 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5586 iav.index = index;
5587 iav.by_ref = jfunc->agg.by_ref;
5588 iav.killed = false;
5589
5590 gcc_assert (first
5591 || iav.unit_offset > prev_unit_offset);
5592 prev_unit_offset = iav.unit_offset;
5593 first = false;
5594
5595 res->safe_push (iav);
5596 }
5597 }
5598 return;
5599 }
5600
5601 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5602 description of ultimate callee of CS or the one it was cloned from (the
5603 summary where lattices are). If INTERIM is non-NULL, it contains the
5604 current interim state of collected aggregate values which can be used to
5605 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5606 is true) and to skip values which clearly will not be part of intersection
5607 with INTERIM. */
5608
5609 static void
5610 push_agg_values_from_edge (struct cgraph_edge *cs,
5611 ipa_node_params *dest_info,
5612 vec<ipa_argagg_value> *res,
5613 const ipa_argagg_value_list *interim,
5614 bool optimize_self_recursion)
5615 {
5616 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5617 if (!args)
5618 return;
5619
5620 int count = MIN (ipa_get_param_count (dest_info),
5621 ipa_get_cs_argument_count (args));
5622
5623 unsigned interim_index = 0;
5624 for (int index = 0; index < count; index++)
5625 {
5626 if (interim)
5627 {
5628 while (interim_index < interim->m_elts.size ()
5629 && interim->m_elts[interim_index].value
5630 && interim->m_elts[interim_index].index < index)
5631 interim_index++;
5632 if (interim_index >= interim->m_elts.size ()
5633 || interim->m_elts[interim_index].index > index)
5634 continue;
5635 }
5636
5637 ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, index);
5638 if (!ipa_is_param_used (dest_info, index)
5639 || plats->aggs_bottom)
5640 continue;
5641 push_agg_values_for_index_from_edge (cs, index, res,
5642 optimize_self_recursion ? interim
5643 : NULL);
5644 }
5645 }
5646
5647
5648 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5649 from all of them. Return nullptr if there are none. */
5650
5651 static struct vec<ipa_argagg_value, va_gc> *
5652 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5653 const vec<cgraph_edge *> &callers)
5654 {
5655 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5656 if (dest_info->ipcp_orig_node)
5657 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5658
5659 /* gather_edges_for_value puts a non-recursive call into the first element of
5660 callers if it can. */
5661 auto_vec<ipa_argagg_value, 32> interim;
5662 push_agg_values_from_edge (callers[0], dest_info, &interim, NULL, true);
5663
5664 unsigned valid_entries = interim.length ();
5665 if (!valid_entries)
5666 return nullptr;
5667
5668 unsigned caller_count = callers.length();
5669 for (unsigned i = 1; i < caller_count; i++)
5670 {
5671 auto_vec<ipa_argagg_value, 32> last;
5672 ipa_argagg_value_list avs (&interim);
5673 push_agg_values_from_edge (callers[i], dest_info, &last, &avs, true);
5674
5675 valid_entries = intersect_argaggs_with (interim, last);
5676 if (!valid_entries)
5677 return nullptr;
5678 }
5679
5680 vec<ipa_argagg_value, va_gc> *res = NULL;
5681 vec_safe_reserve_exact (res, valid_entries);
5682 for (const ipa_argagg_value &av : interim)
5683 if (av.value)
5684 res->quick_push(av);
5685 gcc_checking_assert (res->length () == valid_entries);
5686 return res;
5687 }
5688
5689 /* Determine whether CS also brings all scalar values that the NODE is
5690 specialized for. */
5691
5692 static bool
5693 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5694 struct cgraph_node *node)
5695 {
5696 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5697 int count = ipa_get_param_count (dest_info);
5698 class ipa_node_params *caller_info;
5699 class ipa_edge_args *args;
5700 int i;
5701
5702 caller_info = ipa_node_params_sum->get (cs->caller);
5703 args = ipa_edge_args_sum->get (cs);
5704 for (i = 0; i < count; i++)
5705 {
5706 struct ipa_jump_func *jump_func;
5707 tree val, t;
5708
5709 val = dest_info->known_csts[i];
5710 if (!val)
5711 continue;
5712
5713 if (i >= ipa_get_cs_argument_count (args))
5714 return false;
5715 jump_func = ipa_get_ith_jump_func (args, i);
5716 t = ipa_value_from_jfunc (caller_info, jump_func,
5717 ipa_get_type (dest_info, i));
5718 if (!t || !values_equal_for_ipcp_p (val, t))
5719 return false;
5720 }
5721 return true;
5722 }
5723
5724 /* Determine whether CS also brings all aggregate values that NODE is
5725 specialized for. */
5726
5727 static bool
5728 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5729 struct cgraph_node *node)
5730 {
5731 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5732 if (!ts || vec_safe_is_empty (ts->m_agg_values))
5733 return true;
5734
5735 const ipa_argagg_value_list existing (ts->m_agg_values);
5736 auto_vec<ipa_argagg_value, 32> edge_values;
5737 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5738 gcc_checking_assert (dest_info->ipcp_orig_node);
5739 dest_info = ipa_node_params_sum->get (dest_info->ipcp_orig_node);
5740 push_agg_values_from_edge (cs, dest_info, &edge_values, &existing, false);
5741 const ipa_argagg_value_list avl (&edge_values);
5742 return avl.superset_of_p (existing);
5743 }
5744
5745 /* Given an original NODE and a VAL for which we have already created a
5746 specialized clone, look whether there are incoming edges that still lead
5747 into the old node but now also bring the requested value and also conform to
5748 all other criteria such that they can be redirected the special node.
5749 This function can therefore redirect the final edge in a SCC. */
5750
5751 template <typename valtype>
5752 static void
5753 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5754 {
5755 ipcp_value_source<valtype> *src;
5756 profile_count redirected_sum = profile_count::zero ();
5757
5758 for (src = val->sources; src; src = src->next)
5759 {
5760 struct cgraph_edge *cs = src->cs;
5761 while (cs)
5762 {
5763 if (cgraph_edge_brings_value_p (cs, src, node, val)
5764 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5765 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5766 {
5767 if (dump_file)
5768 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5769 cs->caller->dump_name (),
5770 val->spec_node->dump_name ());
5771
5772 cs->redirect_callee_duplicating_thunks (val->spec_node);
5773 val->spec_node->expand_all_artificial_thunks ();
5774 if (cs->count.ipa ().initialized_p ())
5775 redirected_sum = redirected_sum + cs->count.ipa ();
5776 }
5777 cs = get_next_cgraph_edge_clone (cs);
5778 }
5779 }
5780
5781 if (redirected_sum.nonzero_p ())
5782 update_specialized_profile (val->spec_node, node, redirected_sum);
5783 }
5784
5785 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5786
5787 static bool
5788 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5789 {
5790 ipa_polymorphic_call_context *ctx;
5791 int i;
5792
5793 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5794 if (!ctx->useless_p ())
5795 return true;
5796 return false;
5797 }
5798
5799 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5800
5801 static vec<ipa_polymorphic_call_context>
5802 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5803 {
5804 if (known_contexts_useful_p (known_contexts))
5805 return known_contexts.copy ();
5806 else
5807 return vNULL;
5808 }
5809
5810 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5811 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5812 copy too. */
5813
5814 static void
5815 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5816 vec<tree> *known_csts,
5817 vec<ipa_polymorphic_call_context> *known_contexts,
5818 ipcp_value<tree> *val, int index)
5819 {
5820 *known_csts = avals->m_known_vals.copy ();
5821 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5822 (*known_csts)[index] = val->value;
5823 }
5824
5825 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5826 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5827 INDEX. */
5828
5829 static void
5830 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5831 vec<tree> *known_csts,
5832 vec<ipa_polymorphic_call_context> *known_contexts,
5833 ipcp_value<ipa_polymorphic_call_context> *val,
5834 int index)
5835 {
5836 *known_csts = avals->m_known_vals.copy ();
5837 *known_contexts = avals->m_known_contexts.copy ();
5838 (*known_contexts)[index] = val->value;
5839 }
5840
5841 /* Return true if OFFSET indicates this was not an aggregate value or there is
5842 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5843 AGGVALS list. */
5844
5845 DEBUG_FUNCTION bool
5846 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
5847 int index, HOST_WIDE_INT offset, tree value)
5848 {
5849 if (offset == -1)
5850 return true;
5851
5852 const ipa_argagg_value_list avl (aggvals);
5853 tree v = avl.get_value (index, offset / BITS_PER_UNIT);
5854 return v && values_equal_for_ipcp_p (v, value);
5855 }
5856
5857 /* Return true if offset is minus one because source of a polymorphic context
5858 cannot be an aggregate value. */
5859
5860 DEBUG_FUNCTION bool
5861 ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
5862 int , HOST_WIDE_INT offset,
5863 ipa_polymorphic_call_context)
5864 {
5865 return offset == -1;
5866 }
5867
5868 /* Decide whether to create a special version of NODE for value VAL of
5869 parameter at the given INDEX. If OFFSET is -1, the value is for the
5870 parameter itself, otherwise it is stored at the given OFFSET of the
5871 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5872 is a vector which contains clones created for self-recursive calls with an
5873 arithmetic pass-through jump function. */
5874
5875 template <typename valtype>
5876 static bool
5877 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5878 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
5879 vec<cgraph_node *> *self_gen_clones)
5880 {
5881 int caller_count;
5882 sreal freq_sum;
5883 profile_count count_sum, rec_count_sum;
5884 vec<cgraph_edge *> callers;
5885
5886 if (val->spec_node)
5887 {
5888 perhaps_add_new_callers (node, val);
5889 return false;
5890 }
5891 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5892 {
5893 if (dump_file && (dump_flags & TDF_DETAILS))
5894 fprintf (dump_file, " Ignoring candidate value because "
5895 "maximum unit size would be reached with %li.\n",
5896 val->local_size_cost + overall_size);
5897 return false;
5898 }
5899 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5900 &rec_count_sum, &count_sum))
5901 return false;
5902
5903 if (!dbg_cnt (ipa_cp_values))
5904 return false;
5905
5906 if (val->self_recursion_generated_p ())
5907 {
5908 /* The edge counts in this case might not have been adjusted yet.
5909 Nevertleless, even if they were it would be only a guesswork which we
5910 can do now. The recursive part of the counts can be derived from the
5911 count of the original node anyway. */
5912 if (node->count.ipa ().nonzero_p ())
5913 {
5914 unsigned dem = self_gen_clones->length () + 1;
5915 rec_count_sum = node->count.ipa () / dem;
5916 }
5917 else
5918 rec_count_sum = profile_count::zero ();
5919 }
5920
5921 /* get_info_about_necessary_edges only sums up ipa counts. */
5922 count_sum += rec_count_sum;
5923
5924 if (dump_file && (dump_flags & TDF_DETAILS))
5925 {
5926 fprintf (dump_file, " - considering value ");
5927 print_ipcp_constant_value (dump_file, val->value);
5928 fprintf (dump_file, " for ");
5929 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
5930 if (offset != -1)
5931 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5932 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
5933 }
5934
5935 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5936 freq_sum, count_sum,
5937 val->local_size_cost)
5938 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
5939 freq_sum, count_sum, val->prop_size_cost))
5940 return false;
5941
5942 if (dump_file)
5943 fprintf (dump_file, " Creating a specialized node of %s.\n",
5944 node->dump_name ());
5945
5946 vec<tree> known_csts;
5947 vec<ipa_polymorphic_call_context> known_contexts;
5948
5949 callers = gather_edges_for_value (val, node, caller_count);
5950 if (offset == -1)
5951 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
5952 else
5953 {
5954 known_csts = avals->m_known_vals.copy ();
5955 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5956 }
5957 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5958 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
5959 vec<ipa_argagg_value, va_gc> *aggvals
5960 = find_aggregate_values_for_callers_subset (node, callers);
5961 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5962 offset, val->value));
5963 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5964 aggvals, callers);
5965
5966 if (val->self_recursion_generated_p ())
5967 self_gen_clones->safe_push (val->spec_node);
5968 else
5969 update_profiling_info (node, val->spec_node);
5970
5971 callers.release ();
5972 overall_size += val->local_size_cost;
5973 if (dump_file && (dump_flags & TDF_DETAILS))
5974 fprintf (dump_file, " overall size reached %li\n",
5975 overall_size);
5976
5977 /* TODO: If for some lattice there is only one other known value
5978 left, make a special node for it too. */
5979
5980 return true;
5981 }
5982
5983 /* Like irange::contains_p(), but convert VAL to the range of R if
5984 necessary. */
5985
5986 static inline bool
5987 ipa_range_contains_p (const vrange &r, tree val)
5988 {
5989 if (r.undefined_p ())
5990 return false;
5991
5992 tree type = r.type ();
5993 if (!wi::fits_to_tree_p (wi::to_wide (val), type))
5994 return false;
5995
5996 val = fold_convert (type, val);
5997 return r.contains_p (val);
5998 }
5999
6000 /* Decide whether and what specialized clones of NODE should be created. */
6001
6002 static bool
6003 decide_whether_version_node (struct cgraph_node *node)
6004 {
6005 ipa_node_params *info = ipa_node_params_sum->get (node);
6006 int i, count = ipa_get_param_count (info);
6007 bool ret = false;
6008
6009 if (count == 0)
6010 return false;
6011
6012 if (dump_file && (dump_flags & TDF_DETAILS))
6013 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6014 node->dump_name ());
6015
6016 auto_vec <cgraph_node *, 9> self_gen_clones;
6017 ipa_auto_call_arg_values avals;
6018 gather_context_independent_values (info, &avals, false, NULL);
6019
6020 for (i = 0; i < count;i++)
6021 {
6022 if (!ipa_is_param_used (info, i))
6023 continue;
6024
6025 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6026 ipcp_lattice<tree> *lat = &plats->itself;
6027 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6028
6029 if (!lat->bottom
6030 && !avals.m_known_vals[i])
6031 {
6032 ipcp_value<tree> *val;
6033 for (val = lat->values; val; val = val->next)
6034 {
6035 /* If some values generated for self-recursive calls with
6036 arithmetic jump functions fall outside of the known
6037 range for the parameter, we can skip them. */
6038 if (TREE_CODE (val->value) == INTEGER_CST
6039 && !plats->m_value_range.bottom_p ()
6040 && !ipa_range_contains_p (plats->m_value_range.m_vr,
6041 val->value))
6042 {
6043 /* This can happen also if a constant present in the source
6044 code falls outside of the range of parameter's type, so we
6045 cannot assert. */
6046 if (dump_file && (dump_flags & TDF_DETAILS))
6047 {
6048 fprintf (dump_file, " - skipping%s value ",
6049 val->self_recursion_generated_p ()
6050 ? " self_recursion_generated" : "");
6051 print_ipcp_constant_value (dump_file, val->value);
6052 fprintf (dump_file, " because it is outside known "
6053 "value range.\n");
6054 }
6055 continue;
6056 }
6057 ret |= decide_about_value (node, i, -1, val, &avals,
6058 &self_gen_clones);
6059 }
6060 }
6061
6062 if (!plats->aggs_bottom)
6063 {
6064 struct ipcp_agg_lattice *aglat;
6065 ipcp_value<tree> *val;
6066 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6067 if (!aglat->bottom && aglat->values
6068 /* If the following is false, the one value has been considered
6069 for cloning for all contexts. */
6070 && (plats->aggs_contain_variable
6071 || !aglat->is_single_const ()))
6072 for (val = aglat->values; val; val = val->next)
6073 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6074 &self_gen_clones);
6075 }
6076
6077 if (!ctxlat->bottom
6078 && avals.m_known_contexts[i].useless_p ())
6079 {
6080 ipcp_value<ipa_polymorphic_call_context> *val;
6081 for (val = ctxlat->values; val; val = val->next)
6082 ret |= decide_about_value (node, i, -1, val, &avals,
6083 &self_gen_clones);
6084 }
6085 }
6086
6087 if (!self_gen_clones.is_empty ())
6088 {
6089 self_gen_clones.safe_push (node);
6090 update_counts_for_self_gen_clones (node, self_gen_clones);
6091 }
6092
6093 if (info->do_clone_for_all_contexts)
6094 {
6095 if (!dbg_cnt (ipa_cp_values))
6096 {
6097 info->do_clone_for_all_contexts = false;
6098 return ret;
6099 }
6100
6101 struct cgraph_node *clone;
6102 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6103
6104 for (int i = callers.length () - 1; i >= 0; i--)
6105 {
6106 cgraph_edge *cs = callers[i];
6107 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6108
6109 if (caller_info && caller_info->node_dead)
6110 callers.unordered_remove (i);
6111 }
6112
6113 if (!adjust_callers_for_value_intersection (callers, node))
6114 {
6115 /* If node is not called by anyone, or all its caller edges are
6116 self-recursive, the node is not really in use, no need to do
6117 cloning. */
6118 info->do_clone_for_all_contexts = false;
6119 return ret;
6120 }
6121
6122 if (dump_file)
6123 fprintf (dump_file, " - Creating a specialized node of %s "
6124 "for all known contexts.\n", node->dump_name ());
6125
6126 vec<tree> known_csts = avals.m_known_vals.copy ();
6127 vec<ipa_polymorphic_call_context> known_contexts
6128 = copy_useful_known_contexts (avals.m_known_contexts);
6129 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6130 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6131 vec<ipa_argagg_value, va_gc> *aggvals
6132 = find_aggregate_values_for_callers_subset (node, callers);
6133
6134 if (!known_contexts_useful_p (known_contexts))
6135 {
6136 known_contexts.release ();
6137 known_contexts = vNULL;
6138 }
6139 clone = create_specialized_node (node, known_csts, known_contexts,
6140 aggvals, callers);
6141 info->do_clone_for_all_contexts = false;
6142 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6143 ret = true;
6144 }
6145
6146 return ret;
6147 }
6148
6149 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6150
6151 static void
6152 spread_undeadness (struct cgraph_node *node)
6153 {
6154 struct cgraph_edge *cs;
6155
6156 for (cs = node->callees; cs; cs = cs->next_callee)
6157 if (ipa_edge_within_scc (cs))
6158 {
6159 struct cgraph_node *callee;
6160 class ipa_node_params *info;
6161
6162 callee = cs->callee->function_symbol (NULL);
6163 info = ipa_node_params_sum->get (callee);
6164
6165 if (info && info->node_dead)
6166 {
6167 info->node_dead = 0;
6168 spread_undeadness (callee);
6169 }
6170 }
6171 }
6172
6173 /* Return true if NODE has a caller from outside of its SCC that is not
6174 dead. Worker callback for cgraph_for_node_and_aliases. */
6175
6176 static bool
6177 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6178 void *data ATTRIBUTE_UNUSED)
6179 {
6180 struct cgraph_edge *cs;
6181
6182 for (cs = node->callers; cs; cs = cs->next_caller)
6183 if (cs->caller->thunk
6184 && cs->caller->call_for_symbol_thunks_and_aliases
6185 (has_undead_caller_from_outside_scc_p, NULL, true))
6186 return true;
6187 else if (!ipa_edge_within_scc (cs))
6188 {
6189 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6190 if (!caller_info /* Unoptimized caller are like dead ones. */
6191 || !caller_info->node_dead)
6192 return true;
6193 }
6194 return false;
6195 }
6196
6197
6198 /* Identify nodes within the same SCC as NODE which are no longer needed
6199 because of new clones and will be removed as unreachable. */
6200
6201 static void
6202 identify_dead_nodes (struct cgraph_node *node)
6203 {
6204 struct cgraph_node *v;
6205 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6206 if (v->local)
6207 {
6208 ipa_node_params *info = ipa_node_params_sum->get (v);
6209 if (info
6210 && !v->call_for_symbol_thunks_and_aliases
6211 (has_undead_caller_from_outside_scc_p, NULL, true))
6212 info->node_dead = 1;
6213 }
6214
6215 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6216 {
6217 ipa_node_params *info = ipa_node_params_sum->get (v);
6218 if (info && !info->node_dead)
6219 spread_undeadness (v);
6220 }
6221
6222 if (dump_file && (dump_flags & TDF_DETAILS))
6223 {
6224 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6225 if (ipa_node_params_sum->get (v)
6226 && ipa_node_params_sum->get (v)->node_dead)
6227 fprintf (dump_file, " Marking node as dead: %s.\n",
6228 v->dump_name ());
6229 }
6230 }
6231
6232 /* The decision stage. Iterate over the topological order of call graph nodes
6233 TOPO and make specialized clones if deemed beneficial. */
6234
6235 static void
6236 ipcp_decision_stage (class ipa_topo_info *topo)
6237 {
6238 int i;
6239
6240 if (dump_file)
6241 fprintf (dump_file, "\nIPA decision stage:\n\n");
6242
6243 for (i = topo->nnodes - 1; i >= 0; i--)
6244 {
6245 struct cgraph_node *node = topo->order[i];
6246 bool change = false, iterate = true;
6247
6248 while (iterate)
6249 {
6250 struct cgraph_node *v;
6251 iterate = false;
6252 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6253 if (v->has_gimple_body_p ()
6254 && ipcp_versionable_function_p (v))
6255 iterate |= decide_whether_version_node (v);
6256
6257 change |= iterate;
6258 }
6259 if (change)
6260 identify_dead_nodes (node);
6261 }
6262 }
6263
6264 /* Look up all VR and bits information that we have discovered and copy it
6265 over to the transformation summary. */
6266
6267 static void
6268 ipcp_store_vr_results (void)
6269 {
6270 cgraph_node *node;
6271
6272 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6273 {
6274 ipa_node_params *info = ipa_node_params_sum->get (node);
6275 bool dumped_sth = false;
6276 bool found_useful_result = false;
6277 bool do_vr = true;
6278 bool do_bits = true;
6279
6280 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6281 {
6282 if (dump_file)
6283 fprintf (dump_file, "Not considering %s for VR discovery "
6284 "and propagate; -fipa-ipa-vrp: disabled.\n",
6285 node->dump_name ());
6286 do_vr = false;
6287 }
6288 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6289 {
6290 if (dump_file)
6291 fprintf (dump_file, "Not considering %s for ipa bitwise "
6292 "propagation ; -fipa-bit-cp: disabled.\n",
6293 node->dump_name ());
6294 do_bits = false;
6295 }
6296 if (!do_bits && !do_vr)
6297 continue;
6298
6299 if (info->ipcp_orig_node)
6300 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6301 if (info->lattices.is_empty ())
6302 /* Newly expanded artificial thunks do not have lattices. */
6303 continue;
6304
6305 unsigned count = ipa_get_param_count (info);
6306 for (unsigned i = 0; i < count; i++)
6307 {
6308 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6309 if (do_vr
6310 && !plats->m_value_range.bottom_p ()
6311 && !plats->m_value_range.top_p ())
6312 {
6313 found_useful_result = true;
6314 break;
6315 }
6316 if (do_bits && plats->bits_lattice.constant_p ())
6317 {
6318 found_useful_result = true;
6319 break;
6320 }
6321 }
6322 if (!found_useful_result)
6323 continue;
6324
6325 ipcp_transformation_initialize ();
6326 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6327 vec_safe_reserve_exact (ts->m_vr, count);
6328
6329 for (unsigned i = 0; i < count; i++)
6330 {
6331 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6332 ipcp_bits_lattice *bits = NULL;
6333
6334 if (do_bits
6335 && plats->bits_lattice.constant_p ()
6336 && dbg_cnt (ipa_cp_bits))
6337 bits = &plats->bits_lattice;
6338
6339 if (do_vr
6340 && !plats->m_value_range.bottom_p ()
6341 && !plats->m_value_range.top_p ()
6342 && dbg_cnt (ipa_cp_vr))
6343 {
6344 if (bits)
6345 {
6346 Value_Range tmp = plats->m_value_range.m_vr;
6347 tree type = ipa_get_type (info, i);
6348 irange_bitmask bm (wide_int::from (bits->get_value (),
6349 TYPE_PRECISION (type),
6350 TYPE_SIGN (type)),
6351 wide_int::from (bits->get_mask (),
6352 TYPE_PRECISION (type),
6353 TYPE_SIGN (type)));
6354 tmp.update_bitmask (bm);
6355 ipa_vr vr (tmp);
6356 ts->m_vr->quick_push (vr);
6357 }
6358 else
6359 {
6360 ipa_vr vr (plats->m_value_range.m_vr);
6361 ts->m_vr->quick_push (vr);
6362 }
6363 }
6364 else if (bits)
6365 {
6366 tree type = ipa_get_type (info, i);
6367 Value_Range tmp;
6368 tmp.set_varying (type);
6369 irange_bitmask bm (wide_int::from (bits->get_value (),
6370 TYPE_PRECISION (type),
6371 TYPE_SIGN (type)),
6372 wide_int::from (bits->get_mask (),
6373 TYPE_PRECISION (type),
6374 TYPE_SIGN (type)));
6375 tmp.update_bitmask (bm);
6376 ipa_vr vr (tmp);
6377 ts->m_vr->quick_push (vr);
6378 }
6379 else
6380 {
6381 ipa_vr vr;
6382 ts->m_vr->quick_push (vr);
6383 }
6384
6385 if (!dump_file || !bits)
6386 continue;
6387
6388 if (!dumped_sth)
6389 {
6390 fprintf (dump_file, "Propagated bits info for function %s:\n",
6391 node->dump_name ());
6392 dumped_sth = true;
6393 }
6394 fprintf (dump_file, " param %i: value = ", i);
6395 print_hex (bits->get_value (), dump_file);
6396 fprintf (dump_file, ", mask = ");
6397 print_hex (bits->get_mask (), dump_file);
6398 fprintf (dump_file, "\n");
6399 }
6400 }
6401 }
6402
6403 /* The IPCP driver. */
6404
6405 static unsigned int
6406 ipcp_driver (void)
6407 {
6408 class ipa_topo_info topo;
6409
6410 if (edge_clone_summaries == NULL)
6411 edge_clone_summaries = new edge_clone_summary_t (symtab);
6412
6413 ipa_check_create_node_params ();
6414 ipa_check_create_edge_args ();
6415 clone_num_suffixes = new hash_map<const char *, unsigned>;
6416
6417 if (dump_file)
6418 {
6419 fprintf (dump_file, "\nIPA structures before propagation:\n");
6420 if (dump_flags & TDF_DETAILS)
6421 ipa_print_all_params (dump_file);
6422 ipa_print_all_jump_functions (dump_file);
6423 }
6424
6425 /* Topological sort. */
6426 build_toporder_info (&topo);
6427 /* Do the interprocedural propagation. */
6428 ipcp_propagate_stage (&topo);
6429 /* Decide what constant propagation and cloning should be performed. */
6430 ipcp_decision_stage (&topo);
6431 /* Store results of value range and bits propagation. */
6432 ipcp_store_vr_results ();
6433
6434 /* Free all IPCP structures. */
6435 delete clone_num_suffixes;
6436 free_toporder_info (&topo);
6437 delete edge_clone_summaries;
6438 edge_clone_summaries = NULL;
6439 ipa_free_all_structures_after_ipa_cp ();
6440 if (dump_file)
6441 fprintf (dump_file, "\nIPA constant propagation end\n");
6442 return 0;
6443 }
6444
6445 /* Initialization and computation of IPCP data structures. This is the initial
6446 intraprocedural analysis of functions, which gathers information to be
6447 propagated later on. */
6448
6449 static void
6450 ipcp_generate_summary (void)
6451 {
6452 struct cgraph_node *node;
6453
6454 if (dump_file)
6455 fprintf (dump_file, "\nIPA constant propagation start:\n");
6456 ipa_register_cgraph_hooks ();
6457
6458 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6459 ipa_analyze_node (node);
6460 }
6461
6462 namespace {
6463
6464 const pass_data pass_data_ipa_cp =
6465 {
6466 IPA_PASS, /* type */
6467 "cp", /* name */
6468 OPTGROUP_NONE, /* optinfo_flags */
6469 TV_IPA_CONSTANT_PROP, /* tv_id */
6470 0, /* properties_required */
6471 0, /* properties_provided */
6472 0, /* properties_destroyed */
6473 0, /* todo_flags_start */
6474 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6475 };
6476
6477 class pass_ipa_cp : public ipa_opt_pass_d
6478 {
6479 public:
6480 pass_ipa_cp (gcc::context *ctxt)
6481 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6482 ipcp_generate_summary, /* generate_summary */
6483 NULL, /* write_summary */
6484 NULL, /* read_summary */
6485 ipcp_write_transformation_summaries, /*
6486 write_optimization_summary */
6487 ipcp_read_transformation_summaries, /*
6488 read_optimization_summary */
6489 NULL, /* stmt_fixup */
6490 0, /* function_transform_todo_flags_start */
6491 ipcp_transform_function, /* function_transform */
6492 NULL) /* variable_transform */
6493 {}
6494
6495 /* opt_pass methods: */
6496 bool gate (function *) final override
6497 {
6498 /* FIXME: We should remove the optimize check after we ensure we never run
6499 IPA passes when not optimizing. */
6500 return (flag_ipa_cp && optimize) || in_lto_p;
6501 }
6502
6503 unsigned int execute (function *) final override { return ipcp_driver (); }
6504
6505 }; // class pass_ipa_cp
6506
6507 } // anon namespace
6508
6509 ipa_opt_pass_d *
6510 make_pass_ipa_cp (gcc::context *ctxt)
6511 {
6512 return new pass_ipa_cp (ctxt);
6513 }
6514
6515 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6516 within the same process. For use by toplev::finalize. */
6517
6518 void
6519 ipa_cp_cc_finalize (void)
6520 {
6521 base_count = profile_count::uninitialized ();
6522 overall_size = 0;
6523 orig_overall_size = 0;
6524 ipcp_free_transformation_sum ();
6525 }
6526
6527 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6528 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6529 DECL_UID-sorted vector if available (which is pre-computed only if there are
6530 many parameters). Can return -1 if param is static chain not represented
6531 among DECL_ARGUMENTS. */
6532
6533 int
6534 ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6535 {
6536 gcc_assert (TREE_CODE (param) == PARM_DECL);
6537 if (m_uid_to_idx)
6538 {
6539 unsigned puid = DECL_UID (param);
6540 const ipa_uid_to_idx_map_elt *res
6541 = std::lower_bound (m_uid_to_idx->begin(), m_uid_to_idx->end (), puid,
6542 [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6543 {
6544 return elt.uid < uid;
6545 });
6546 if (res == m_uid_to_idx->end ()
6547 || res->uid != puid)
6548 {
6549 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6550 return -1;
6551 }
6552 return res->index;
6553 }
6554
6555 unsigned index = 0;
6556 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6557 if (p == param)
6558 return (int) index;
6559
6560 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6561 return -1;
6562 }
6563
6564 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6565 according to the uid. */
6566
6567 static int
6568 compare_uids (const void *a, const void *b)
6569 {
6570 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6571 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6572 if (e1->uid < e2->uid)
6573 return -1;
6574 if (e1->uid > e2->uid)
6575 return 1;
6576 gcc_unreachable ();
6577 }
6578
6579 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6580 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6581 from parameters to their indices in DECL_ARGUMENTS chain. */
6582
6583 void
6584 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6585 {
6586 int c = count_formal_params (fndecl);
6587 if (c < 32)
6588 return;
6589
6590 m_uid_to_idx = NULL;
6591 vec_safe_reserve (m_uid_to_idx, c, true);
6592 unsigned index = 0;
6593 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6594 {
6595 ipa_uid_to_idx_map_elt elt;
6596 elt.uid = DECL_UID (p);
6597 elt.index = index;
6598 m_uid_to_idx->quick_push (elt);
6599 }
6600 m_uid_to_idx->qsort (compare_uids);
6601 }