]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/ipa-cp.c
Update copyright years.
[thirdparty/gcc.git] / gcc / ipa-cp.c
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 /* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.c
100 and tree-inline.c) according to instructions inserted to the call graph by
101 the second stage. */
102
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
129
130 template <typename valtype> class ipcp_value;
131
132 /* Describes a particular source for an IPA-CP value. */
133
134 template <typename valtype>
135 struct ipcp_value_source
136 {
137 public:
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset;
141 /* The incoming edge that brought the value. */
142 cgraph_edge *cs;
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value<valtype> *val;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source *next;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
152 int index;
153 };
154
155 /* Common ancestor for all ipcp_value instantiations. */
156
157 class ipcp_value_base
158 {
159 public:
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit;
163 /* Time benefit that specializing the function for this value can bring about
164 in it's callees. */
165 sreal prop_time_benefit;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
168 int local_size_cost;
169 /* Size cost that specializing the function for this value can bring about in
170 it's callees. */
171 int prop_size_cost;
172
173 ipcp_value_base ()
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
176 };
177
178 /* Describes one particular value stored in struct ipcp_lattice. */
179
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
182 {
183 public:
184 /* The actual value for the given parameter. */
185 valtype value;
186 /* The list of sources from which this value originates. */
187 ipcp_value_source <valtype> *sources = nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value *next = nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
191 of values. */
192 ipcp_value *scc_next = nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value *topo_next = nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
197 created. */
198 cgraph_node *spec_node = nullptr;
199 /* Depth first search number and low link for topological sorting of
200 values. */
201 int dfs = 0;
202 int low_link = 0;
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
205 int scc_no = 0;
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
210 maximum. */
211 unsigned self_recursion_generated_level = 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack = false;
214
215 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216 HOST_WIDE_INT offset);
217
218 /* Return true if both THIS value and O feed into each other. */
219
220 bool same_scc (const ipcp_value<valtype> *o)
221 {
222 return o->scc_no == scc_no;
223 }
224
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
228
229 bool self_recursion_generated_p ()
230 {
231 return self_recursion_generated_level > 0;
232 }
233 };
234
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
240
241 template <typename valtype>
242 struct ipcp_lattice
243 {
244 public:
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value<valtype> *values;
249 /* Number of known values and types in this lattice. */
250 int values_count;
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
254 propagation). */
255 bool bottom;
256
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval, cgraph_edge *cs,
261 ipcp_value<valtype> *src_val = NULL,
262 int src_idx = 0, HOST_WIDE_INT offset = -1,
263 ipcp_value<valtype> **val_p = NULL,
264 unsigned same_lat_gen_level = 0);
265 void print (FILE * f, bool dump_sources, bool dump_benefits);
266 };
267
268 /* Lattice of tree values with an offset to describe a part of an
269 aggregate. */
270
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
272 {
273 public:
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
278 HOST_WIDE_INT size;
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice *next;
281 };
282
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
285 value are constant.
286 For eg:
287 int f(int x)
288 {
289 return some_op (x);
290 }
291
292 int f1(int y)
293 {
294 if (cond)
295 return f (y & 0xff);
296 else
297 return f (y & 0xf);
298 }
299
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
305
306 class ipcp_bits_lattice
307 {
308 public:
309 bool bottom_p () { return m_lattice_val == IPA_BITS_VARYING; }
310 bool top_p () { return m_lattice_val == IPA_BITS_UNDEFINED; }
311 bool constant_p () { return m_lattice_val == IPA_BITS_CONSTANT; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int, widest_int);
314
315 widest_int get_value () { return m_value; }
316 widest_int get_mask () { return m_mask; }
317
318 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
319 enum tree_code, tree);
320
321 bool meet_with (widest_int, widest_int, unsigned);
322
323 void print (FILE *);
324
325 private:
326 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
327
328 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
329 If a bit in mask is set to 0, then the corresponding bit in
330 value is known to be constant. */
331 widest_int m_value, m_mask;
332
333 bool meet_with_1 (widest_int, widest_int, unsigned);
334 void get_value_and_mask (tree, widest_int *, widest_int *);
335 };
336
337 /* Lattice of value ranges. */
338
339 class ipcp_vr_lattice
340 {
341 public:
342 value_range m_vr;
343
344 inline bool bottom_p () const;
345 inline bool top_p () const;
346 inline bool set_to_bottom ();
347 bool meet_with (const value_range *p_vr);
348 bool meet_with (const ipcp_vr_lattice &other);
349 void init () { gcc_assert (m_vr.undefined_p ()); }
350 void print (FILE * f);
351
352 private:
353 bool meet_with_1 (const value_range *other_vr);
354 };
355
356 /* Structure containing lattices for a parameter itself and for pieces of
357 aggregates that are passed in the parameter or by a reference in a parameter
358 plus some other useful flags. */
359
360 class ipcp_param_lattices
361 {
362 public:
363 /* Lattice describing the value of the parameter itself. */
364 ipcp_lattice<tree> itself;
365 /* Lattice describing the polymorphic contexts of a parameter. */
366 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
367 /* Lattices describing aggregate parts. */
368 ipcp_agg_lattice *aggs;
369 /* Lattice describing known bits. */
370 ipcp_bits_lattice bits_lattice;
371 /* Lattice describing value range. */
372 ipcp_vr_lattice m_value_range;
373 /* Number of aggregate lattices */
374 int aggs_count;
375 /* True if aggregate data were passed by reference (as opposed to by
376 value). */
377 bool aggs_by_ref;
378 /* All aggregate lattices contain a variable component (in addition to
379 values). */
380 bool aggs_contain_variable;
381 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
382 for any propagation). */
383 bool aggs_bottom;
384
385 /* There is a virtual call based on this parameter. */
386 bool virt_call;
387 };
388
389 /* Allocation pools for values and their sources in ipa-cp. */
390
391 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
392 ("IPA-CP constant values");
393
394 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
395 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
396
397 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
398 ("IPA-CP value sources");
399
400 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
401 ("IPA_CP aggregate lattices");
402
403 /* Base count to use in heuristics when using profile feedback. */
404
405 static profile_count base_count;
406
407 /* Original overall size of the program. */
408
409 static long overall_size, orig_overall_size;
410
411 /* Node name to unique clone suffix number map. */
412 static hash_map<const char *, unsigned> *clone_num_suffixes;
413
414 /* Return the param lattices structure corresponding to the Ith formal
415 parameter of the function described by INFO. */
416 static inline class ipcp_param_lattices *
417 ipa_get_parm_lattices (class ipa_node_params *info, int i)
418 {
419 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
420 gcc_checking_assert (!info->ipcp_orig_node);
421 gcc_checking_assert (info->lattices);
422 return &(info->lattices[i]);
423 }
424
425 /* Return the lattice corresponding to the scalar value of the Ith formal
426 parameter of the function described by INFO. */
427 static inline ipcp_lattice<tree> *
428 ipa_get_scalar_lat (class ipa_node_params *info, int i)
429 {
430 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
431 return &plats->itself;
432 }
433
434 /* Return the lattice corresponding to the scalar value of the Ith formal
435 parameter of the function described by INFO. */
436 static inline ipcp_lattice<ipa_polymorphic_call_context> *
437 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
438 {
439 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
440 return &plats->ctxlat;
441 }
442
443 /* Return whether LAT is a lattice with a single constant and without an
444 undefined value. */
445
446 template <typename valtype>
447 inline bool
448 ipcp_lattice<valtype>::is_single_const ()
449 {
450 if (bottom || contains_variable || values_count != 1)
451 return false;
452 else
453 return true;
454 }
455
456 /* Print V which is extracted from a value in a lattice to F. */
457
458 static void
459 print_ipcp_constant_value (FILE * f, tree v)
460 {
461 if (TREE_CODE (v) == ADDR_EXPR
462 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
463 {
464 fprintf (f, "& ");
465 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
466 }
467 else
468 print_generic_expr (f, v);
469 }
470
471 /* Print V which is extracted from a value in a lattice to F. */
472
473 static void
474 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
475 {
476 v.dump(f, false);
477 }
478
479 /* Print a lattice LAT to F. */
480
481 template <typename valtype>
482 void
483 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
484 {
485 ipcp_value<valtype> *val;
486 bool prev = false;
487
488 if (bottom)
489 {
490 fprintf (f, "BOTTOM\n");
491 return;
492 }
493
494 if (!values_count && !contains_variable)
495 {
496 fprintf (f, "TOP\n");
497 return;
498 }
499
500 if (contains_variable)
501 {
502 fprintf (f, "VARIABLE");
503 prev = true;
504 if (dump_benefits)
505 fprintf (f, "\n");
506 }
507
508 for (val = values; val; val = val->next)
509 {
510 if (dump_benefits && prev)
511 fprintf (f, " ");
512 else if (!dump_benefits && prev)
513 fprintf (f, ", ");
514 else
515 prev = true;
516
517 print_ipcp_constant_value (f, val->value);
518
519 if (dump_sources)
520 {
521 ipcp_value_source<valtype> *s;
522
523 if (val->self_recursion_generated_p ())
524 fprintf (f, " [self_gen(%i), from:",
525 val->self_recursion_generated_level);
526 else
527 fprintf (f, " [scc: %i, from:", val->scc_no);
528 for (s = val->sources; s; s = s->next)
529 fprintf (f, " %i(%f)", s->cs->caller->order,
530 s->cs->sreal_frequency ().to_double ());
531 fprintf (f, "]");
532 }
533
534 if (dump_benefits)
535 fprintf (f, " [loc_time: %g, loc_size: %i, "
536 "prop_time: %g, prop_size: %i]\n",
537 val->local_time_benefit.to_double (), val->local_size_cost,
538 val->prop_time_benefit.to_double (), val->prop_size_cost);
539 }
540 if (!dump_benefits)
541 fprintf (f, "\n");
542 }
543
544 void
545 ipcp_bits_lattice::print (FILE *f)
546 {
547 if (top_p ())
548 fprintf (f, " Bits unknown (TOP)\n");
549 else if (bottom_p ())
550 fprintf (f, " Bits unusable (BOTTOM)\n");
551 else
552 {
553 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
554 fprintf (f, ", mask = "); print_hex (get_mask (), f);
555 fprintf (f, "\n");
556 }
557 }
558
559 /* Print value range lattice to F. */
560
561 void
562 ipcp_vr_lattice::print (FILE * f)
563 {
564 dump_value_range (f, &m_vr);
565 }
566
567 /* Print all ipcp_lattices of all functions to F. */
568
569 static void
570 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
571 {
572 struct cgraph_node *node;
573 int i, count;
574
575 fprintf (f, "\nLattices:\n");
576 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
577 {
578 class ipa_node_params *info;
579
580 info = ipa_node_params_sum->get (node);
581 /* Skip unoptimized functions and constprop clones since we don't make
582 lattices for them. */
583 if (!info || info->ipcp_orig_node)
584 continue;
585 fprintf (f, " Node: %s:\n", node->dump_name ());
586 count = ipa_get_param_count (info);
587 for (i = 0; i < count; i++)
588 {
589 struct ipcp_agg_lattice *aglat;
590 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
591 fprintf (f, " param [%d]: ", i);
592 plats->itself.print (f, dump_sources, dump_benefits);
593 fprintf (f, " ctxs: ");
594 plats->ctxlat.print (f, dump_sources, dump_benefits);
595 plats->bits_lattice.print (f);
596 fprintf (f, " ");
597 plats->m_value_range.print (f);
598 fprintf (f, "\n");
599 if (plats->virt_call)
600 fprintf (f, " virt_call flag set\n");
601
602 if (plats->aggs_bottom)
603 {
604 fprintf (f, " AGGS BOTTOM\n");
605 continue;
606 }
607 if (plats->aggs_contain_variable)
608 fprintf (f, " AGGS VARIABLE\n");
609 for (aglat = plats->aggs; aglat; aglat = aglat->next)
610 {
611 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
612 plats->aggs_by_ref ? "ref " : "", aglat->offset);
613 aglat->print (f, dump_sources, dump_benefits);
614 }
615 }
616 }
617 }
618
619 /* Determine whether it is at all technically possible to create clones of NODE
620 and store this information in the ipa_node_params structure associated
621 with NODE. */
622
623 static void
624 determine_versionability (struct cgraph_node *node,
625 class ipa_node_params *info)
626 {
627 const char *reason = NULL;
628
629 /* There are a number of generic reasons functions cannot be versioned. We
630 also cannot remove parameters if there are type attributes such as fnspec
631 present. */
632 if (node->alias || node->thunk)
633 reason = "alias or thunk";
634 else if (!node->versionable)
635 reason = "not a tree_versionable_function";
636 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
637 reason = "insufficient body availability";
638 else if (!opt_for_fn (node->decl, optimize)
639 || !opt_for_fn (node->decl, flag_ipa_cp))
640 reason = "non-optimized function";
641 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
642 {
643 /* Ideally we should clone the SIMD clones themselves and create
644 vector copies of them, so IPA-cp and SIMD clones can happily
645 coexist, but that may not be worth the effort. */
646 reason = "function has SIMD clones";
647 }
648 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
649 {
650 /* Ideally we should clone the target clones themselves and create
651 copies of them, so IPA-cp and target clones can happily
652 coexist, but that may not be worth the effort. */
653 reason = "function target_clones attribute";
654 }
655 /* Don't clone decls local to a comdat group; it breaks and for C++
656 decloned constructors, inlining is always better anyway. */
657 else if (node->comdat_local_p ())
658 reason = "comdat-local function";
659 else if (node->calls_comdat_local)
660 {
661 /* TODO: call is versionable if we make sure that all
662 callers are inside of a comdat group. */
663 reason = "calls comdat-local function";
664 }
665
666 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
667 work only when inlined. Cloning them may still lead to better code
668 because ipa-cp will not give up on cloning further. If the function is
669 external this however leads to wrong code because we may end up producing
670 offline copy of the function. */
671 if (DECL_EXTERNAL (node->decl))
672 for (cgraph_edge *edge = node->callees; !reason && edge;
673 edge = edge->next_callee)
674 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
675 {
676 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
677 reason = "external function which calls va_arg_pack";
678 if (DECL_FUNCTION_CODE (edge->callee->decl)
679 == BUILT_IN_VA_ARG_PACK_LEN)
680 reason = "external function which calls va_arg_pack_len";
681 }
682
683 if (reason && dump_file && !node->alias && !node->thunk)
684 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
685 node->dump_name (), reason);
686
687 info->versionable = (reason == NULL);
688 }
689
690 /* Return true if it is at all technically possible to create clones of a
691 NODE. */
692
693 static bool
694 ipcp_versionable_function_p (struct cgraph_node *node)
695 {
696 ipa_node_params *info = ipa_node_params_sum->get (node);
697 return info && info->versionable;
698 }
699
700 /* Structure holding accumulated information about callers of a node. */
701
702 struct caller_statistics
703 {
704 /* If requested (see below), self-recursive call counts are summed into this
705 field. */
706 profile_count rec_count_sum;
707 /* The sum of all ipa counts of all the other (non-recursive) calls. */
708 profile_count count_sum;
709 /* Sum of all frequencies for all calls. */
710 sreal freq_sum;
711 /* Number of calls and hot calls respectively. */
712 int n_calls, n_hot_calls;
713 /* If itself is set up, also count the number of non-self-recursive
714 calls. */
715 int n_nonrec_calls;
716 /* If non-NULL, this is the node itself and calls from it should have their
717 counts included in rec_count_sum and not count_sum. */
718 cgraph_node *itself;
719 };
720
721 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
722 from IGNORED_CALLER are not counted. */
723
724 static inline void
725 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
726 {
727 stats->rec_count_sum = profile_count::zero ();
728 stats->count_sum = profile_count::zero ();
729 stats->n_calls = 0;
730 stats->n_hot_calls = 0;
731 stats->n_nonrec_calls = 0;
732 stats->freq_sum = 0;
733 stats->itself = itself;
734 }
735
736 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
737 non-thunk incoming edges to NODE. */
738
739 static bool
740 gather_caller_stats (struct cgraph_node *node, void *data)
741 {
742 struct caller_statistics *stats = (struct caller_statistics *) data;
743 struct cgraph_edge *cs;
744
745 for (cs = node->callers; cs; cs = cs->next_caller)
746 if (!cs->caller->thunk)
747 {
748 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
749 if (info && info->node_dead)
750 continue;
751
752 if (cs->count.ipa ().initialized_p ())
753 {
754 if (stats->itself && stats->itself == cs->caller)
755 stats->rec_count_sum += cs->count.ipa ();
756 else
757 stats->count_sum += cs->count.ipa ();
758 }
759 stats->freq_sum += cs->sreal_frequency ();
760 stats->n_calls++;
761 if (stats->itself && stats->itself != cs->caller)
762 stats->n_nonrec_calls++;
763
764 if (cs->maybe_hot_p ())
765 stats->n_hot_calls ++;
766 }
767 return false;
768
769 }
770
771 /* Return true if this NODE is viable candidate for cloning. */
772
773 static bool
774 ipcp_cloning_candidate_p (struct cgraph_node *node)
775 {
776 struct caller_statistics stats;
777
778 gcc_checking_assert (node->has_gimple_body_p ());
779
780 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
781 {
782 if (dump_file)
783 fprintf (dump_file, "Not considering %s for cloning; "
784 "-fipa-cp-clone disabled.\n",
785 node->dump_name ());
786 return false;
787 }
788
789 if (node->optimize_for_size_p ())
790 {
791 if (dump_file)
792 fprintf (dump_file, "Not considering %s for cloning; "
793 "optimizing it for size.\n",
794 node->dump_name ());
795 return false;
796 }
797
798 init_caller_stats (&stats);
799 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
800
801 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
802 {
803 if (dump_file)
804 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
805 node->dump_name ());
806 return true;
807 }
808
809 /* When profile is available and function is hot, propagate into it even if
810 calls seems cold; constant propagation can improve function's speed
811 significantly. */
812 if (stats.count_sum > profile_count::zero ()
813 && node->count.ipa ().initialized_p ())
814 {
815 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
816 {
817 if (dump_file)
818 fprintf (dump_file, "Considering %s for cloning; "
819 "usually called directly.\n",
820 node->dump_name ());
821 return true;
822 }
823 }
824 if (!stats.n_hot_calls)
825 {
826 if (dump_file)
827 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
828 node->dump_name ());
829 return false;
830 }
831 if (dump_file)
832 fprintf (dump_file, "Considering %s for cloning.\n",
833 node->dump_name ());
834 return true;
835 }
836
837 template <typename valtype>
838 class value_topo_info
839 {
840 public:
841 /* Head of the linked list of topologically sorted values. */
842 ipcp_value<valtype> *values_topo;
843 /* Stack for creating SCCs, represented by a linked list too. */
844 ipcp_value<valtype> *stack;
845 /* Counter driving the algorithm in add_val_to_toposort. */
846 int dfs_counter;
847
848 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
849 {}
850 void add_val (ipcp_value<valtype> *cur_val);
851 void propagate_effects ();
852 };
853
854 /* Arrays representing a topological ordering of call graph nodes and a stack
855 of nodes used during constant propagation and also data required to perform
856 topological sort of values and propagation of benefits in the determined
857 order. */
858
859 class ipa_topo_info
860 {
861 public:
862 /* Array with obtained topological order of cgraph nodes. */
863 struct cgraph_node **order;
864 /* Stack of cgraph nodes used during propagation within SCC until all values
865 in the SCC stabilize. */
866 struct cgraph_node **stack;
867 int nnodes, stack_top;
868
869 value_topo_info<tree> constants;
870 value_topo_info<ipa_polymorphic_call_context> contexts;
871
872 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
873 constants ()
874 {}
875 };
876
877 /* Skip edges from and to nodes without ipa_cp enabled.
878 Ignore not available symbols. */
879
880 static bool
881 ignore_edge_p (cgraph_edge *e)
882 {
883 enum availability avail;
884 cgraph_node *ultimate_target
885 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
886
887 return (avail <= AVAIL_INTERPOSABLE
888 || !opt_for_fn (ultimate_target->decl, optimize)
889 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
890 }
891
892 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
893
894 static void
895 build_toporder_info (class ipa_topo_info *topo)
896 {
897 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
898 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
899
900 gcc_checking_assert (topo->stack_top == 0);
901 topo->nnodes = ipa_reduced_postorder (topo->order, true,
902 ignore_edge_p);
903 }
904
905 /* Free information about strongly connected components and the arrays in
906 TOPO. */
907
908 static void
909 free_toporder_info (class ipa_topo_info *topo)
910 {
911 ipa_free_postorder_info ();
912 free (topo->order);
913 free (topo->stack);
914 }
915
916 /* Add NODE to the stack in TOPO, unless it is already there. */
917
918 static inline void
919 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
920 {
921 ipa_node_params *info = ipa_node_params_sum->get (node);
922 if (info->node_enqueued)
923 return;
924 info->node_enqueued = 1;
925 topo->stack[topo->stack_top++] = node;
926 }
927
928 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
929 is empty. */
930
931 static struct cgraph_node *
932 pop_node_from_stack (class ipa_topo_info *topo)
933 {
934 if (topo->stack_top)
935 {
936 struct cgraph_node *node;
937 topo->stack_top--;
938 node = topo->stack[topo->stack_top];
939 ipa_node_params_sum->get (node)->node_enqueued = 0;
940 return node;
941 }
942 else
943 return NULL;
944 }
945
946 /* Set lattice LAT to bottom and return true if it previously was not set as
947 such. */
948
949 template <typename valtype>
950 inline bool
951 ipcp_lattice<valtype>::set_to_bottom ()
952 {
953 bool ret = !bottom;
954 bottom = true;
955 return ret;
956 }
957
958 /* Mark lattice as containing an unknown value and return true if it previously
959 was not marked as such. */
960
961 template <typename valtype>
962 inline bool
963 ipcp_lattice<valtype>::set_contains_variable ()
964 {
965 bool ret = !contains_variable;
966 contains_variable = true;
967 return ret;
968 }
969
970 /* Set all aggregate lattices in PLATS to bottom and return true if they were
971 not previously set as such. */
972
973 static inline bool
974 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
975 {
976 bool ret = !plats->aggs_bottom;
977 plats->aggs_bottom = true;
978 return ret;
979 }
980
981 /* Mark all aggregate lattices in PLATS as containing an unknown value and
982 return true if they were not previously marked as such. */
983
984 static inline bool
985 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
986 {
987 bool ret = !plats->aggs_contain_variable;
988 plats->aggs_contain_variable = true;
989 return ret;
990 }
991
992 bool
993 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
994 {
995 return meet_with_1 (&other.m_vr);
996 }
997
998 /* Meet the current value of the lattice with value range described by VR
999 lattice. */
1000
1001 bool
1002 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1003 {
1004 return meet_with_1 (p_vr);
1005 }
1006
1007 /* Meet the current value of the lattice with value range described by
1008 OTHER_VR lattice. Return TRUE if anything changed. */
1009
1010 bool
1011 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1012 {
1013 if (bottom_p ())
1014 return false;
1015
1016 if (other_vr->varying_p ())
1017 return set_to_bottom ();
1018
1019 value_range save (m_vr);
1020 m_vr.union_ (other_vr);
1021 return !m_vr.equal_p (save);
1022 }
1023
1024 /* Return true if value range information in the lattice is yet unknown. */
1025
1026 bool
1027 ipcp_vr_lattice::top_p () const
1028 {
1029 return m_vr.undefined_p ();
1030 }
1031
1032 /* Return true if value range information in the lattice is known to be
1033 unusable. */
1034
1035 bool
1036 ipcp_vr_lattice::bottom_p () const
1037 {
1038 return m_vr.varying_p ();
1039 }
1040
1041 /* Set value range information in the lattice to bottom. Return true if it
1042 previously was in a different state. */
1043
1044 bool
1045 ipcp_vr_lattice::set_to_bottom ()
1046 {
1047 if (m_vr.varying_p ())
1048 return false;
1049 /* ?? We create all sorts of VARYING ranges for floats, structures,
1050 and other types which we cannot handle as ranges. We should
1051 probably avoid handling them throughout the pass, but it's easier
1052 to create a sensible VARYING here and let the lattice
1053 propagate. */
1054 m_vr.set_varying (integer_type_node);
1055 return true;
1056 }
1057
1058 /* Set lattice value to bottom, if it already isn't the case. */
1059
1060 bool
1061 ipcp_bits_lattice::set_to_bottom ()
1062 {
1063 if (bottom_p ())
1064 return false;
1065 m_lattice_val = IPA_BITS_VARYING;
1066 m_value = 0;
1067 m_mask = -1;
1068 return true;
1069 }
1070
1071 /* Set to constant if it isn't already. Only meant to be called
1072 when switching state from TOP. */
1073
1074 bool
1075 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1076 {
1077 gcc_assert (top_p ());
1078 m_lattice_val = IPA_BITS_CONSTANT;
1079 m_value = wi::bit_and (wi::bit_not (mask), value);
1080 m_mask = mask;
1081 return true;
1082 }
1083
1084 /* Convert operand to value, mask form. */
1085
1086 void
1087 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1088 {
1089 wide_int get_nonzero_bits (const_tree);
1090
1091 if (TREE_CODE (operand) == INTEGER_CST)
1092 {
1093 *valuep = wi::to_widest (operand);
1094 *maskp = 0;
1095 }
1096 else
1097 {
1098 *valuep = 0;
1099 *maskp = -1;
1100 }
1101 }
1102
1103 /* Meet operation, similar to ccp_lattice_meet, we xor values
1104 if this->value, value have different values at same bit positions, we want
1105 to drop that bit to varying. Return true if mask is changed.
1106 This function assumes that the lattice value is in CONSTANT state */
1107
1108 bool
1109 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1110 unsigned precision)
1111 {
1112 gcc_assert (constant_p ());
1113
1114 widest_int old_mask = m_mask;
1115 m_mask = (m_mask | mask) | (m_value ^ value);
1116 m_value &= ~m_mask;
1117
1118 if (wi::sext (m_mask, precision) == -1)
1119 return set_to_bottom ();
1120
1121 return m_mask != old_mask;
1122 }
1123
1124 /* Meet the bits lattice with operand
1125 described by <value, mask, sgn, precision. */
1126
1127 bool
1128 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1129 unsigned precision)
1130 {
1131 if (bottom_p ())
1132 return false;
1133
1134 if (top_p ())
1135 {
1136 if (wi::sext (mask, precision) == -1)
1137 return set_to_bottom ();
1138 return set_to_constant (value, mask);
1139 }
1140
1141 return meet_with_1 (value, mask, precision);
1142 }
1143
1144 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1145 if code is binary operation or bit_value_unop (other) if code is unary op.
1146 In the case when code is nop_expr, no adjustment is required. */
1147
1148 bool
1149 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1150 signop sgn, enum tree_code code, tree operand)
1151 {
1152 if (other.bottom_p ())
1153 return set_to_bottom ();
1154
1155 if (bottom_p () || other.top_p ())
1156 return false;
1157
1158 widest_int adjusted_value, adjusted_mask;
1159
1160 if (TREE_CODE_CLASS (code) == tcc_binary)
1161 {
1162 tree type = TREE_TYPE (operand);
1163 widest_int o_value, o_mask;
1164 get_value_and_mask (operand, &o_value, &o_mask);
1165
1166 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1167 sgn, precision, other.get_value (), other.get_mask (),
1168 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1169
1170 if (wi::sext (adjusted_mask, precision) == -1)
1171 return set_to_bottom ();
1172 }
1173
1174 else if (TREE_CODE_CLASS (code) == tcc_unary)
1175 {
1176 bit_value_unop (code, sgn, precision, &adjusted_value,
1177 &adjusted_mask, sgn, precision, other.get_value (),
1178 other.get_mask ());
1179
1180 if (wi::sext (adjusted_mask, precision) == -1)
1181 return set_to_bottom ();
1182 }
1183
1184 else
1185 return set_to_bottom ();
1186
1187 if (top_p ())
1188 {
1189 if (wi::sext (adjusted_mask, precision) == -1)
1190 return set_to_bottom ();
1191 return set_to_constant (adjusted_value, adjusted_mask);
1192 }
1193 else
1194 return meet_with_1 (adjusted_value, adjusted_mask, precision);
1195 }
1196
1197 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1198 return true is any of them has not been marked as such so far. */
1199
1200 static inline bool
1201 set_all_contains_variable (class ipcp_param_lattices *plats)
1202 {
1203 bool ret;
1204 ret = plats->itself.set_contains_variable ();
1205 ret |= plats->ctxlat.set_contains_variable ();
1206 ret |= set_agg_lats_contain_variable (plats);
1207 ret |= plats->bits_lattice.set_to_bottom ();
1208 ret |= plats->m_value_range.set_to_bottom ();
1209 return ret;
1210 }
1211
1212 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1213 points to by the number of callers to NODE. */
1214
1215 static bool
1216 count_callers (cgraph_node *node, void *data)
1217 {
1218 int *caller_count = (int *) data;
1219
1220 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1221 /* Local thunks can be handled transparently, but if the thunk cannot
1222 be optimized out, count it as a real use. */
1223 if (!cs->caller->thunk || !cs->caller->local)
1224 ++*caller_count;
1225 return false;
1226 }
1227
1228 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1229 the one caller of some other node. Set the caller's corresponding flag. */
1230
1231 static bool
1232 set_single_call_flag (cgraph_node *node, void *)
1233 {
1234 cgraph_edge *cs = node->callers;
1235 /* Local thunks can be handled transparently, skip them. */
1236 while (cs && cs->caller->thunk && cs->caller->local)
1237 cs = cs->next_caller;
1238 if (cs)
1239 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1240 {
1241 info->node_calling_single_call = true;
1242 return true;
1243 }
1244 return false;
1245 }
1246
1247 /* Initialize ipcp_lattices. */
1248
1249 static void
1250 initialize_node_lattices (struct cgraph_node *node)
1251 {
1252 ipa_node_params *info = ipa_node_params_sum->get (node);
1253 struct cgraph_edge *ie;
1254 bool disable = false, variable = false;
1255 int i;
1256
1257 gcc_checking_assert (node->has_gimple_body_p ());
1258
1259 if (!ipa_get_param_count (info))
1260 disable = true;
1261 else if (node->local)
1262 {
1263 int caller_count = 0;
1264 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1265 true);
1266 gcc_checking_assert (caller_count > 0);
1267 if (caller_count == 1)
1268 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1269 NULL, true);
1270 }
1271 else
1272 {
1273 /* When cloning is allowed, we can assume that externally visible
1274 functions are not called. We will compensate this by cloning
1275 later. */
1276 if (ipcp_versionable_function_p (node)
1277 && ipcp_cloning_candidate_p (node))
1278 variable = true;
1279 else
1280 disable = true;
1281 }
1282
1283 if (dump_file && (dump_flags & TDF_DETAILS)
1284 && !node->alias && !node->thunk)
1285 {
1286 fprintf (dump_file, "Initializing lattices of %s\n",
1287 node->dump_name ());
1288 if (disable || variable)
1289 fprintf (dump_file, " Marking all lattices as %s\n",
1290 disable ? "BOTTOM" : "VARIABLE");
1291 }
1292
1293 auto_vec<bool, 16> surviving_params;
1294 bool pre_modified = false;
1295
1296 clone_info *cinfo = clone_info::get (node);
1297
1298 if (!disable && cinfo && cinfo->param_adjustments)
1299 {
1300 /* At the moment all IPA optimizations should use the number of
1301 parameters of the prevailing decl as the m_always_copy_start.
1302 Handling any other value would complicate the code below, so for the
1303 time bing let's only assert it is so. */
1304 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1305 == ipa_get_param_count (info))
1306 || cinfo->param_adjustments->m_always_copy_start < 0);
1307
1308 pre_modified = true;
1309 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1310
1311 if (dump_file && (dump_flags & TDF_DETAILS)
1312 && !node->alias && !node->thunk)
1313 {
1314 bool first = true;
1315 for (int j = 0; j < ipa_get_param_count (info); j++)
1316 {
1317 if (j < (int) surviving_params.length ()
1318 && surviving_params[j])
1319 continue;
1320 if (first)
1321 {
1322 fprintf (dump_file,
1323 " The following parameters are dead on arrival:");
1324 first = false;
1325 }
1326 fprintf (dump_file, " %u", j);
1327 }
1328 if (!first)
1329 fprintf (dump_file, "\n");
1330 }
1331 }
1332
1333 for (i = 0; i < ipa_get_param_count (info); i++)
1334 {
1335 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1336 if (disable
1337 || !ipa_get_type (info, i)
1338 || (pre_modified && (surviving_params.length () <= (unsigned) i
1339 || !surviving_params[i])))
1340 {
1341 plats->itself.set_to_bottom ();
1342 plats->ctxlat.set_to_bottom ();
1343 set_agg_lats_to_bottom (plats);
1344 plats->bits_lattice.set_to_bottom ();
1345 plats->m_value_range.m_vr = value_range ();
1346 plats->m_value_range.set_to_bottom ();
1347 }
1348 else
1349 {
1350 plats->m_value_range.init ();
1351 if (variable)
1352 set_all_contains_variable (plats);
1353 }
1354 }
1355
1356 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1357 if (ie->indirect_info->polymorphic
1358 && ie->indirect_info->param_index >= 0)
1359 {
1360 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1361 ipa_get_parm_lattices (info,
1362 ie->indirect_info->param_index)->virt_call = 1;
1363 }
1364 }
1365
1366 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1367 PARAM_TYPE. */
1368
1369 static bool
1370 ipacp_value_safe_for_type (tree param_type, tree value)
1371 {
1372 tree val_type = TREE_TYPE (value);
1373 if (param_type == val_type
1374 || useless_type_conversion_p (param_type, val_type)
1375 || fold_convertible_p (param_type, value))
1376 return true;
1377 else
1378 return false;
1379 }
1380
1381 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1382
1383 static bool
1384 values_equal_for_ipcp_p (tree x, tree y)
1385 {
1386 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1387
1388 if (x == y)
1389 return true;
1390
1391 if (TREE_CODE (x) == ADDR_EXPR
1392 && TREE_CODE (y) == ADDR_EXPR
1393 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1394 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1395 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1396 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1397 else
1398 return operand_equal_p (x, y, 0);
1399 }
1400
1401 /* Return the result of a (possibly arithmetic) operation on the constant
1402 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1403 the type of the parameter to which the result is passed. Return
1404 NULL_TREE if that cannot be determined or be considered an
1405 interprocedural invariant. */
1406
1407 static tree
1408 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1409 tree res_type)
1410 {
1411 tree res;
1412
1413 if (opcode == NOP_EXPR)
1414 return input;
1415 if (!is_gimple_ip_invariant (input))
1416 return NULL_TREE;
1417
1418 if (opcode == ASSERT_EXPR)
1419 {
1420 if (values_equal_for_ipcp_p (input, operand))
1421 return input;
1422 else
1423 return NULL_TREE;
1424 }
1425
1426 if (!res_type)
1427 {
1428 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1429 res_type = boolean_type_node;
1430 else if (expr_type_first_operand_type_p (opcode))
1431 res_type = TREE_TYPE (input);
1432 else
1433 return NULL_TREE;
1434 }
1435
1436 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1437 res = fold_unary (opcode, res_type, input);
1438 else
1439 res = fold_binary (opcode, res_type, input, operand);
1440
1441 if (res && !is_gimple_ip_invariant (res))
1442 return NULL_TREE;
1443
1444 return res;
1445 }
1446
1447 /* Return the result of a (possibly arithmetic) pass through jump function
1448 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1449 to which the result is passed. Return NULL_TREE if that cannot be
1450 determined or be considered an interprocedural invariant. */
1451
1452 static tree
1453 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1454 tree res_type)
1455 {
1456 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1457 input,
1458 ipa_get_jf_pass_through_operand (jfunc),
1459 res_type);
1460 }
1461
1462 /* Return the result of an ancestor jump function JFUNC on the constant value
1463 INPUT. Return NULL_TREE if that cannot be determined. */
1464
1465 static tree
1466 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1467 {
1468 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1469 if (TREE_CODE (input) == ADDR_EXPR)
1470 {
1471 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1472 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1473 if (known_eq (off, 0))
1474 return input;
1475 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1476 return build1 (ADDR_EXPR, TREE_TYPE (input),
1477 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1478 build_int_cst (ptr_type_node, byte_offset)));
1479 }
1480 else
1481 return NULL_TREE;
1482 }
1483
1484 /* Determine whether JFUNC evaluates to a single known constant value and if
1485 so, return it. Otherwise return NULL. INFO describes the caller node or
1486 the one it is inlined to, so that pass-through jump functions can be
1487 evaluated. PARM_TYPE is the type of the parameter to which the result is
1488 passed. */
1489
1490 tree
1491 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1492 tree parm_type)
1493 {
1494 if (jfunc->type == IPA_JF_CONST)
1495 return ipa_get_jf_constant (jfunc);
1496 else if (jfunc->type == IPA_JF_PASS_THROUGH
1497 || jfunc->type == IPA_JF_ANCESTOR)
1498 {
1499 tree input;
1500 int idx;
1501
1502 if (jfunc->type == IPA_JF_PASS_THROUGH)
1503 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1504 else
1505 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1506
1507 if (info->ipcp_orig_node)
1508 input = info->known_csts[idx];
1509 else
1510 {
1511 ipcp_lattice<tree> *lat;
1512
1513 if (!info->lattices
1514 || idx >= ipa_get_param_count (info))
1515 return NULL_TREE;
1516 lat = ipa_get_scalar_lat (info, idx);
1517 if (!lat->is_single_const ())
1518 return NULL_TREE;
1519 input = lat->values->value;
1520 }
1521
1522 if (!input)
1523 return NULL_TREE;
1524
1525 if (jfunc->type == IPA_JF_PASS_THROUGH)
1526 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1527 else
1528 return ipa_get_jf_ancestor_result (jfunc, input);
1529 }
1530 else
1531 return NULL_TREE;
1532 }
1533
1534 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1535 that INFO describes the caller node or the one it is inlined to, CS is the
1536 call graph edge corresponding to JFUNC and CSIDX index of the described
1537 parameter. */
1538
1539 ipa_polymorphic_call_context
1540 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1541 ipa_jump_func *jfunc)
1542 {
1543 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1544 ipa_polymorphic_call_context ctx;
1545 ipa_polymorphic_call_context *edge_ctx
1546 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1547
1548 if (edge_ctx && !edge_ctx->useless_p ())
1549 ctx = *edge_ctx;
1550
1551 if (jfunc->type == IPA_JF_PASS_THROUGH
1552 || jfunc->type == IPA_JF_ANCESTOR)
1553 {
1554 ipa_polymorphic_call_context srcctx;
1555 int srcidx;
1556 bool type_preserved = true;
1557 if (jfunc->type == IPA_JF_PASS_THROUGH)
1558 {
1559 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1560 return ctx;
1561 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1562 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1563 }
1564 else
1565 {
1566 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1567 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1568 }
1569 if (info->ipcp_orig_node)
1570 {
1571 if (info->known_contexts.exists ())
1572 srcctx = info->known_contexts[srcidx];
1573 }
1574 else
1575 {
1576 if (!info->lattices
1577 || srcidx >= ipa_get_param_count (info))
1578 return ctx;
1579 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1580 lat = ipa_get_poly_ctx_lat (info, srcidx);
1581 if (!lat->is_single_const ())
1582 return ctx;
1583 srcctx = lat->values->value;
1584 }
1585 if (srcctx.useless_p ())
1586 return ctx;
1587 if (jfunc->type == IPA_JF_ANCESTOR)
1588 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1589 if (!type_preserved)
1590 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1591 srcctx.combine_with (ctx);
1592 return srcctx;
1593 }
1594
1595 return ctx;
1596 }
1597
1598 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1599 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1600 the result is a range or an anti-range. */
1601
1602 static bool
1603 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1604 value_range *src_vr,
1605 enum tree_code operation,
1606 tree dst_type, tree src_type)
1607 {
1608 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1609 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1610 return false;
1611 return true;
1612 }
1613
1614 /* Determine value_range of JFUNC given that INFO describes the caller node or
1615 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1616 and PARM_TYPE of the parameter. */
1617
1618 value_range
1619 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1620 ipa_jump_func *jfunc, tree parm_type)
1621 {
1622 value_range vr;
1623 if (jfunc->m_vr)
1624 ipa_vr_operation_and_type_effects (&vr,
1625 jfunc->m_vr,
1626 NOP_EXPR, parm_type,
1627 jfunc->m_vr->type ());
1628 if (vr.singleton_p ())
1629 return vr;
1630 if (jfunc->type == IPA_JF_PASS_THROUGH)
1631 {
1632 int idx;
1633 ipcp_transformation *sum
1634 = ipcp_get_transformation_summary (cs->caller->inlined_to
1635 ? cs->caller->inlined_to
1636 : cs->caller);
1637 if (!sum || !sum->m_vr)
1638 return vr;
1639
1640 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1641
1642 if (!(*sum->m_vr)[idx].known)
1643 return vr;
1644 tree vr_type = ipa_get_type (info, idx);
1645 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1646 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1647 (*sum->m_vr)[idx].type);
1648
1649 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1650
1651 if (TREE_CODE_CLASS (operation) == tcc_unary)
1652 {
1653 value_range res;
1654
1655 if (ipa_vr_operation_and_type_effects (&res,
1656 &srcvr,
1657 operation, parm_type,
1658 vr_type))
1659 vr.intersect (res);
1660 }
1661 else
1662 {
1663 value_range op_res, res;
1664 tree op = ipa_get_jf_pass_through_operand (jfunc);
1665 value_range op_vr (op, op);
1666
1667 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1668 if (ipa_vr_operation_and_type_effects (&res,
1669 &op_res,
1670 NOP_EXPR, parm_type,
1671 vr_type))
1672 vr.intersect (res);
1673 }
1674 }
1675 return vr;
1676 }
1677
1678 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1679 parameter with the given INDEX. */
1680
1681 static tree
1682 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1683 int index)
1684 {
1685 struct ipa_agg_replacement_value *aggval;
1686
1687 aggval = ipa_get_agg_replacements_for_node (node);
1688 while (aggval)
1689 {
1690 if (aggval->offset == offset
1691 && aggval->index == index)
1692 return aggval->value;
1693 aggval = aggval->next;
1694 }
1695 return NULL_TREE;
1696 }
1697
1698 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1699 single known constant value and if so, return it. Otherwise return NULL.
1700 NODE and INFO describes the caller node or the one it is inlined to, and
1701 its related info. */
1702
1703 static tree
1704 ipa_agg_value_from_node (class ipa_node_params *info,
1705 struct cgraph_node *node,
1706 struct ipa_agg_jf_item *item)
1707 {
1708 tree value = NULL_TREE;
1709 int src_idx;
1710
1711 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1712 return NULL_TREE;
1713
1714 if (item->jftype == IPA_JF_CONST)
1715 return item->value.constant;
1716
1717 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1718 || item->jftype == IPA_JF_LOAD_AGG);
1719
1720 src_idx = item->value.pass_through.formal_id;
1721
1722 if (info->ipcp_orig_node)
1723 {
1724 if (item->jftype == IPA_JF_PASS_THROUGH)
1725 value = info->known_csts[src_idx];
1726 else
1727 value = get_clone_agg_value (node, item->value.load_agg.offset,
1728 src_idx);
1729 }
1730 else if (info->lattices)
1731 {
1732 class ipcp_param_lattices *src_plats
1733 = ipa_get_parm_lattices (info, src_idx);
1734
1735 if (item->jftype == IPA_JF_PASS_THROUGH)
1736 {
1737 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1738
1739 if (!lat->is_single_const ())
1740 return NULL_TREE;
1741
1742 value = lat->values->value;
1743 }
1744 else if (src_plats->aggs
1745 && !src_plats->aggs_bottom
1746 && !src_plats->aggs_contain_variable
1747 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1748 {
1749 struct ipcp_agg_lattice *aglat;
1750
1751 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1752 {
1753 if (aglat->offset > item->value.load_agg.offset)
1754 break;
1755
1756 if (aglat->offset == item->value.load_agg.offset)
1757 {
1758 if (aglat->is_single_const ())
1759 value = aglat->values->value;
1760 break;
1761 }
1762 }
1763 }
1764 }
1765
1766 if (!value)
1767 return NULL_TREE;
1768
1769 if (item->jftype == IPA_JF_LOAD_AGG)
1770 {
1771 tree load_type = item->value.load_agg.type;
1772 tree value_type = TREE_TYPE (value);
1773
1774 /* Ensure value type is compatible with load type. */
1775 if (!useless_type_conversion_p (load_type, value_type))
1776 return NULL_TREE;
1777 }
1778
1779 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1780 value,
1781 item->value.pass_through.operand,
1782 item->type);
1783 }
1784
1785 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1786 an aggregate and if so, return it. Otherwise return an empty set. NODE
1787 and INFO describes the caller node or the one it is inlined to, and its
1788 related info. */
1789
1790 struct ipa_agg_value_set
1791 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1792 struct ipa_agg_jump_function *agg_jfunc)
1793 {
1794 struct ipa_agg_value_set agg;
1795 struct ipa_agg_jf_item *item;
1796 int i;
1797
1798 agg.items = vNULL;
1799 agg.by_ref = agg_jfunc->by_ref;
1800
1801 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1802 {
1803 tree value = ipa_agg_value_from_node (info, node, item);
1804
1805 if (value)
1806 {
1807 struct ipa_agg_value value_item;
1808
1809 value_item.offset = item->offset;
1810 value_item.value = value;
1811
1812 agg.items.safe_push (value_item);
1813 }
1814 }
1815 return agg;
1816 }
1817
1818 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1819 bottom, not containing a variable component and without any known value at
1820 the same time. */
1821
1822 DEBUG_FUNCTION void
1823 ipcp_verify_propagated_values (void)
1824 {
1825 struct cgraph_node *node;
1826
1827 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1828 {
1829 ipa_node_params *info = ipa_node_params_sum->get (node);
1830 if (!opt_for_fn (node->decl, flag_ipa_cp)
1831 || !opt_for_fn (node->decl, optimize))
1832 continue;
1833 int i, count = ipa_get_param_count (info);
1834
1835 for (i = 0; i < count; i++)
1836 {
1837 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1838
1839 if (!lat->bottom
1840 && !lat->contains_variable
1841 && lat->values_count == 0)
1842 {
1843 if (dump_file)
1844 {
1845 symtab->dump (dump_file);
1846 fprintf (dump_file, "\nIPA lattices after constant "
1847 "propagation, before gcc_unreachable:\n");
1848 print_all_lattices (dump_file, true, false);
1849 }
1850
1851 gcc_unreachable ();
1852 }
1853 }
1854 }
1855 }
1856
1857 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1858
1859 static bool
1860 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1861 ipa_polymorphic_call_context y)
1862 {
1863 return x.equal_to (y);
1864 }
1865
1866
1867 /* Add a new value source to the value represented by THIS, marking that a
1868 value comes from edge CS and (if the underlying jump function is a
1869 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1870 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1871 scalar value of the parameter itself or the offset within an aggregate. */
1872
1873 template <typename valtype>
1874 void
1875 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1876 int src_idx, HOST_WIDE_INT offset)
1877 {
1878 ipcp_value_source<valtype> *src;
1879
1880 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1881 src->offset = offset;
1882 src->cs = cs;
1883 src->val = src_val;
1884 src->index = src_idx;
1885
1886 src->next = sources;
1887 sources = src;
1888 }
1889
1890 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1891 SOURCE and clear all other fields. */
1892
1893 static ipcp_value<tree> *
1894 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1895 {
1896 ipcp_value<tree> *val;
1897
1898 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1899 val->value = cst;
1900 val->self_recursion_generated_level = same_lat_gen_level;
1901 return val;
1902 }
1903
1904 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1905 value to SOURCE and clear all other fields. */
1906
1907 static ipcp_value<ipa_polymorphic_call_context> *
1908 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1909 unsigned same_lat_gen_level)
1910 {
1911 ipcp_value<ipa_polymorphic_call_context> *val;
1912
1913 val = new (ipcp_poly_ctx_values_pool.allocate ())
1914 ipcp_value<ipa_polymorphic_call_context>();
1915 val->value = ctx;
1916 val->self_recursion_generated_level = same_lat_gen_level;
1917 return val;
1918 }
1919
1920 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1921 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1922 meaning. OFFSET -1 means the source is scalar and not a part of an
1923 aggregate. If non-NULL, VAL_P records address of existing or newly added
1924 ipcp_value.
1925
1926 If the value is generated for a self-recursive call as a result of an
1927 arithmetic pass-through jump-function acting on a value in the same lattice,
1928 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1929 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1930
1931 template <typename valtype>
1932 bool
1933 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1934 ipcp_value<valtype> *src_val,
1935 int src_idx, HOST_WIDE_INT offset,
1936 ipcp_value<valtype> **val_p,
1937 unsigned same_lat_gen_level)
1938 {
1939 ipcp_value<valtype> *val, *last_val = NULL;
1940
1941 if (val_p)
1942 *val_p = NULL;
1943
1944 if (bottom)
1945 return false;
1946
1947 for (val = values; val; last_val = val, val = val->next)
1948 if (values_equal_for_ipcp_p (val->value, newval))
1949 {
1950 if (val_p)
1951 *val_p = val;
1952
1953 if (val->self_recursion_generated_level < same_lat_gen_level)
1954 val->self_recursion_generated_level = same_lat_gen_level;
1955
1956 if (ipa_edge_within_scc (cs))
1957 {
1958 ipcp_value_source<valtype> *s;
1959 for (s = val->sources; s; s = s->next)
1960 if (s->cs == cs && s->val == src_val)
1961 break;
1962 if (s)
1963 return false;
1964 }
1965
1966 val->add_source (cs, src_val, src_idx, offset);
1967 return false;
1968 }
1969
1970 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1971 param_ipa_cp_value_list_size))
1972 {
1973 /* We can only free sources, not the values themselves, because sources
1974 of other values in this SCC might point to them. */
1975 for (val = values; val; val = val->next)
1976 {
1977 while (val->sources)
1978 {
1979 ipcp_value_source<valtype> *src = val->sources;
1980 val->sources = src->next;
1981 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
1982 }
1983 }
1984 values = NULL;
1985 return set_to_bottom ();
1986 }
1987
1988 values_count++;
1989 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
1990 val->add_source (cs, src_val, src_idx, offset);
1991 val->next = NULL;
1992
1993 /* Add the new value to end of value list, which can reduce iterations
1994 of propagation stage for recursive function. */
1995 if (last_val)
1996 last_val->next = val;
1997 else
1998 values = val;
1999
2000 if (val_p)
2001 *val_p = val;
2002
2003 return true;
2004 }
2005
2006 /* A helper function that returns result of operation specified by OPCODE on
2007 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2008 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2009 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2010 the result. */
2011
2012 static tree
2013 get_val_across_arith_op (enum tree_code opcode,
2014 tree opnd1_type,
2015 tree opnd2,
2016 ipcp_value<tree> *src_val,
2017 tree res_type)
2018 {
2019 tree opnd1 = src_val->value;
2020
2021 /* Skip source values that is incompatible with specified type. */
2022 if (opnd1_type
2023 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2024 return NULL_TREE;
2025
2026 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2027 }
2028
2029 /* Propagate values through an arithmetic transformation described by a jump
2030 function associated with edge CS, taking values from SRC_LAT and putting
2031 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2032 OPND2 is a constant value if transformation is a binary operation.
2033 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2034 a part of the aggregate. SRC_IDX is the index of the source parameter.
2035 RES_TYPE is the value type of result being propagated into. Return true if
2036 DEST_LAT changed. */
2037
2038 static bool
2039 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2040 enum tree_code opcode,
2041 tree opnd1_type,
2042 tree opnd2,
2043 ipcp_lattice<tree> *src_lat,
2044 ipcp_lattice<tree> *dest_lat,
2045 HOST_WIDE_INT src_offset,
2046 int src_idx,
2047 tree res_type)
2048 {
2049 ipcp_value<tree> *src_val;
2050 bool ret = false;
2051
2052 /* Due to circular dependencies, propagating within an SCC through arithmetic
2053 transformation would create infinite number of values. But for
2054 self-feeding recursive function, we could allow propagation in a limited
2055 count, and this can enable a simple kind of recursive function versioning.
2056 For other scenario, we would just make lattices bottom. */
2057 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2058 {
2059 int i;
2060
2061 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2062 param_ipa_cp_max_recursive_depth);
2063 if (src_lat != dest_lat || max_recursive_depth < 1)
2064 return dest_lat->set_contains_variable ();
2065
2066 /* No benefit if recursive execution is in low probability. */
2067 if (cs->sreal_frequency () * 100
2068 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2069 param_ipa_cp_min_recursive_probability))
2070 return dest_lat->set_contains_variable ();
2071
2072 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2073
2074 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2075 {
2076 /* Now we do not use self-recursively generated value as propagation
2077 source, this is absolutely conservative, but could avoid explosion
2078 of lattice's value space, especially when one recursive function
2079 calls another recursive. */
2080 if (src_val->self_recursion_generated_p ())
2081 {
2082 ipcp_value_source<tree> *s;
2083
2084 /* If the lattice has already been propagated for the call site,
2085 no need to do that again. */
2086 for (s = src_val->sources; s; s = s->next)
2087 if (s->cs == cs)
2088 return dest_lat->set_contains_variable ();
2089 }
2090 else
2091 val_seeds.safe_push (src_val);
2092 }
2093
2094 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2095
2096 /* Recursively generate lattice values with a limited count. */
2097 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2098 {
2099 for (int j = 1; j < max_recursive_depth; j++)
2100 {
2101 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2102 src_val, res_type);
2103 if (!cstval
2104 || !ipacp_value_safe_for_type (res_type, cstval))
2105 break;
2106
2107 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2108 src_offset, &src_val, j);
2109 gcc_checking_assert (src_val);
2110 }
2111 }
2112 ret |= dest_lat->set_contains_variable ();
2113 }
2114 else
2115 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2116 {
2117 /* Now we do not use self-recursively generated value as propagation
2118 source, otherwise it is easy to make value space of normal lattice
2119 overflow. */
2120 if (src_val->self_recursion_generated_p ())
2121 {
2122 ret |= dest_lat->set_contains_variable ();
2123 continue;
2124 }
2125
2126 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2127 src_val, res_type);
2128 if (cstval
2129 && ipacp_value_safe_for_type (res_type, cstval))
2130 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2131 src_offset);
2132 else
2133 ret |= dest_lat->set_contains_variable ();
2134 }
2135
2136 return ret;
2137 }
2138
2139 /* Propagate values through a pass-through jump function JFUNC associated with
2140 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2141 is the index of the source parameter. PARM_TYPE is the type of the
2142 parameter to which the result is passed. */
2143
2144 static bool
2145 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2146 ipcp_lattice<tree> *src_lat,
2147 ipcp_lattice<tree> *dest_lat, int src_idx,
2148 tree parm_type)
2149 {
2150 return propagate_vals_across_arith_jfunc (cs,
2151 ipa_get_jf_pass_through_operation (jfunc),
2152 NULL_TREE,
2153 ipa_get_jf_pass_through_operand (jfunc),
2154 src_lat, dest_lat, -1, src_idx, parm_type);
2155 }
2156
2157 /* Propagate values through an ancestor jump function JFUNC associated with
2158 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2159 is the index of the source parameter. */
2160
2161 static bool
2162 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2163 struct ipa_jump_func *jfunc,
2164 ipcp_lattice<tree> *src_lat,
2165 ipcp_lattice<tree> *dest_lat, int src_idx,
2166 tree param_type)
2167 {
2168 ipcp_value<tree> *src_val;
2169 bool ret = false;
2170
2171 if (ipa_edge_within_scc (cs))
2172 return dest_lat->set_contains_variable ();
2173
2174 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2175 {
2176 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2177
2178 if (t && ipacp_value_safe_for_type (param_type, t))
2179 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2180 else
2181 ret |= dest_lat->set_contains_variable ();
2182 }
2183
2184 return ret;
2185 }
2186
2187 /* Propagate scalar values across jump function JFUNC that is associated with
2188 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2189 parameter to which the result is passed. */
2190
2191 static bool
2192 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2193 struct ipa_jump_func *jfunc,
2194 ipcp_lattice<tree> *dest_lat,
2195 tree param_type)
2196 {
2197 if (dest_lat->bottom)
2198 return false;
2199
2200 if (jfunc->type == IPA_JF_CONST)
2201 {
2202 tree val = ipa_get_jf_constant (jfunc);
2203 if (ipacp_value_safe_for_type (param_type, val))
2204 return dest_lat->add_value (val, cs, NULL, 0);
2205 else
2206 return dest_lat->set_contains_variable ();
2207 }
2208 else if (jfunc->type == IPA_JF_PASS_THROUGH
2209 || jfunc->type == IPA_JF_ANCESTOR)
2210 {
2211 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2212 ipcp_lattice<tree> *src_lat;
2213 int src_idx;
2214 bool ret;
2215
2216 if (jfunc->type == IPA_JF_PASS_THROUGH)
2217 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2218 else
2219 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2220
2221 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2222 if (src_lat->bottom)
2223 return dest_lat->set_contains_variable ();
2224
2225 /* If we would need to clone the caller and cannot, do not propagate. */
2226 if (!ipcp_versionable_function_p (cs->caller)
2227 && (src_lat->contains_variable
2228 || (src_lat->values_count > 1)))
2229 return dest_lat->set_contains_variable ();
2230
2231 if (jfunc->type == IPA_JF_PASS_THROUGH)
2232 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2233 dest_lat, src_idx,
2234 param_type);
2235 else
2236 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2237 src_idx, param_type);
2238
2239 if (src_lat->contains_variable)
2240 ret |= dest_lat->set_contains_variable ();
2241
2242 return ret;
2243 }
2244
2245 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2246 use it for indirect inlining), we should propagate them too. */
2247 return dest_lat->set_contains_variable ();
2248 }
2249
2250 /* Propagate scalar values across jump function JFUNC that is associated with
2251 edge CS and describes argument IDX and put the values into DEST_LAT. */
2252
2253 static bool
2254 propagate_context_across_jump_function (cgraph_edge *cs,
2255 ipa_jump_func *jfunc, int idx,
2256 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2257 {
2258 if (dest_lat->bottom)
2259 return false;
2260 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2261 bool ret = false;
2262 bool added_sth = false;
2263 bool type_preserved = true;
2264
2265 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2266 = ipa_get_ith_polymorhic_call_context (args, idx);
2267
2268 if (edge_ctx_ptr)
2269 edge_ctx = *edge_ctx_ptr;
2270
2271 if (jfunc->type == IPA_JF_PASS_THROUGH
2272 || jfunc->type == IPA_JF_ANCESTOR)
2273 {
2274 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2275 int src_idx;
2276 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2277
2278 /* TODO: Once we figure out how to propagate speculations, it will
2279 probably be a good idea to switch to speculation if type_preserved is
2280 not set instead of punting. */
2281 if (jfunc->type == IPA_JF_PASS_THROUGH)
2282 {
2283 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2284 goto prop_fail;
2285 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2286 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2287 }
2288 else
2289 {
2290 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2291 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2292 }
2293
2294 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2295 /* If we would need to clone the caller and cannot, do not propagate. */
2296 if (!ipcp_versionable_function_p (cs->caller)
2297 && (src_lat->contains_variable
2298 || (src_lat->values_count > 1)))
2299 goto prop_fail;
2300
2301 ipcp_value<ipa_polymorphic_call_context> *src_val;
2302 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2303 {
2304 ipa_polymorphic_call_context cur = src_val->value;
2305
2306 if (!type_preserved)
2307 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2308 if (jfunc->type == IPA_JF_ANCESTOR)
2309 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2310 /* TODO: In cases we know how the context is going to be used,
2311 we can improve the result by passing proper OTR_TYPE. */
2312 cur.combine_with (edge_ctx);
2313 if (!cur.useless_p ())
2314 {
2315 if (src_lat->contains_variable
2316 && !edge_ctx.equal_to (cur))
2317 ret |= dest_lat->set_contains_variable ();
2318 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2319 added_sth = true;
2320 }
2321 }
2322 }
2323
2324 prop_fail:
2325 if (!added_sth)
2326 {
2327 if (!edge_ctx.useless_p ())
2328 ret |= dest_lat->add_value (edge_ctx, cs);
2329 else
2330 ret |= dest_lat->set_contains_variable ();
2331 }
2332
2333 return ret;
2334 }
2335
2336 /* Propagate bits across jfunc that is associated with
2337 edge cs and update dest_lattice accordingly. */
2338
2339 bool
2340 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2341 ipa_jump_func *jfunc,
2342 ipcp_bits_lattice *dest_lattice)
2343 {
2344 if (dest_lattice->bottom_p ())
2345 return false;
2346
2347 enum availability availability;
2348 cgraph_node *callee = cs->callee->function_symbol (&availability);
2349 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2350 tree parm_type = ipa_get_type (callee_info, idx);
2351
2352 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2353 transform for these cases. Similarly, we can have bad type mismatches
2354 with LTO, avoid doing anything with those too. */
2355 if (!parm_type
2356 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2357 {
2358 if (dump_file && (dump_flags & TDF_DETAILS))
2359 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2360 "param %i of %s is NULL or unsuitable for bits propagation\n",
2361 idx, cs->callee->dump_name ());
2362
2363 return dest_lattice->set_to_bottom ();
2364 }
2365
2366 unsigned precision = TYPE_PRECISION (parm_type);
2367 signop sgn = TYPE_SIGN (parm_type);
2368
2369 if (jfunc->type == IPA_JF_PASS_THROUGH
2370 || jfunc->type == IPA_JF_ANCESTOR)
2371 {
2372 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2373 tree operand = NULL_TREE;
2374 enum tree_code code;
2375 unsigned src_idx;
2376
2377 if (jfunc->type == IPA_JF_PASS_THROUGH)
2378 {
2379 code = ipa_get_jf_pass_through_operation (jfunc);
2380 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2381 if (code != NOP_EXPR)
2382 operand = ipa_get_jf_pass_through_operand (jfunc);
2383 }
2384 else
2385 {
2386 code = POINTER_PLUS_EXPR;
2387 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2388 unsigned HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2389 operand = build_int_cstu (size_type_node, offset);
2390 }
2391
2392 class ipcp_param_lattices *src_lats
2393 = ipa_get_parm_lattices (caller_info, src_idx);
2394
2395 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2396 for eg consider:
2397 int f(int x)
2398 {
2399 g (x & 0xff);
2400 }
2401 Assume lattice for x is bottom, however we can still propagate
2402 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2403 and we store it in jump function during analysis stage. */
2404
2405 if (src_lats->bits_lattice.bottom_p ()
2406 && jfunc->bits)
2407 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2408 precision);
2409 else
2410 return dest_lattice->meet_with (src_lats->bits_lattice, precision, sgn,
2411 code, operand);
2412 }
2413
2414 else if (jfunc->type == IPA_JF_ANCESTOR)
2415 return dest_lattice->set_to_bottom ();
2416 else if (jfunc->bits)
2417 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2418 precision);
2419 else
2420 return dest_lattice->set_to_bottom ();
2421 }
2422
2423 /* Propagate value range across jump function JFUNC that is associated with
2424 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2425 accordingly. */
2426
2427 static bool
2428 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2429 class ipcp_param_lattices *dest_plats,
2430 tree param_type)
2431 {
2432 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2433
2434 if (dest_lat->bottom_p ())
2435 return false;
2436
2437 if (!param_type
2438 || (!INTEGRAL_TYPE_P (param_type)
2439 && !POINTER_TYPE_P (param_type)))
2440 return dest_lat->set_to_bottom ();
2441
2442 if (jfunc->type == IPA_JF_PASS_THROUGH)
2443 {
2444 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2445 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2446 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2447 class ipcp_param_lattices *src_lats
2448 = ipa_get_parm_lattices (caller_info, src_idx);
2449 tree operand_type = ipa_get_type (caller_info, src_idx);
2450
2451 if (src_lats->m_value_range.bottom_p ())
2452 return dest_lat->set_to_bottom ();
2453
2454 value_range vr;
2455 if (TREE_CODE_CLASS (operation) == tcc_unary)
2456 ipa_vr_operation_and_type_effects (&vr,
2457 &src_lats->m_value_range.m_vr,
2458 operation, param_type,
2459 operand_type);
2460 /* A crude way to prevent unbounded number of value range updates
2461 in SCC components. We should allow limited number of updates within
2462 SCC, too. */
2463 else if (!ipa_edge_within_scc (cs))
2464 {
2465 tree op = ipa_get_jf_pass_through_operand (jfunc);
2466 value_range op_vr (op, op);
2467 value_range op_res,res;
2468
2469 range_fold_binary_expr (&op_res, operation, operand_type,
2470 &src_lats->m_value_range.m_vr, &op_vr);
2471 ipa_vr_operation_and_type_effects (&vr,
2472 &op_res,
2473 NOP_EXPR, param_type,
2474 operand_type);
2475 }
2476 if (!vr.undefined_p () && !vr.varying_p ())
2477 {
2478 if (jfunc->m_vr)
2479 {
2480 value_range jvr;
2481 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2482 NOP_EXPR,
2483 param_type,
2484 jfunc->m_vr->type ()))
2485 vr.intersect (jvr);
2486 }
2487 return dest_lat->meet_with (&vr);
2488 }
2489 }
2490 else if (jfunc->type == IPA_JF_CONST)
2491 {
2492 tree val = ipa_get_jf_constant (jfunc);
2493 if (TREE_CODE (val) == INTEGER_CST)
2494 {
2495 val = fold_convert (param_type, val);
2496 if (TREE_OVERFLOW_P (val))
2497 val = drop_tree_overflow (val);
2498
2499 value_range tmpvr (val, val);
2500 return dest_lat->meet_with (&tmpvr);
2501 }
2502 }
2503
2504 value_range vr;
2505 if (jfunc->m_vr
2506 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2507 param_type,
2508 jfunc->m_vr->type ()))
2509 return dest_lat->meet_with (&vr);
2510 else
2511 return dest_lat->set_to_bottom ();
2512 }
2513
2514 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2515 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2516 other cases, return false). If there are no aggregate items, set
2517 aggs_by_ref to NEW_AGGS_BY_REF. */
2518
2519 static bool
2520 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2521 bool new_aggs_by_ref)
2522 {
2523 if (dest_plats->aggs)
2524 {
2525 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2526 {
2527 set_agg_lats_to_bottom (dest_plats);
2528 return true;
2529 }
2530 }
2531 else
2532 dest_plats->aggs_by_ref = new_aggs_by_ref;
2533 return false;
2534 }
2535
2536 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2537 already existing lattice for the given OFFSET and SIZE, marking all skipped
2538 lattices as containing variable and checking for overlaps. If there is no
2539 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2540 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2541 unless there are too many already. If there are two many, return false. If
2542 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2543 skipped lattices were newly marked as containing variable, set *CHANGE to
2544 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2545
2546 static bool
2547 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2548 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2549 struct ipcp_agg_lattice ***aglat,
2550 bool pre_existing, bool *change, int max_agg_items)
2551 {
2552 gcc_checking_assert (offset >= 0);
2553
2554 while (**aglat && (**aglat)->offset < offset)
2555 {
2556 if ((**aglat)->offset + (**aglat)->size > offset)
2557 {
2558 set_agg_lats_to_bottom (dest_plats);
2559 return false;
2560 }
2561 *change |= (**aglat)->set_contains_variable ();
2562 *aglat = &(**aglat)->next;
2563 }
2564
2565 if (**aglat && (**aglat)->offset == offset)
2566 {
2567 if ((**aglat)->size != val_size)
2568 {
2569 set_agg_lats_to_bottom (dest_plats);
2570 return false;
2571 }
2572 gcc_assert (!(**aglat)->next
2573 || (**aglat)->next->offset >= offset + val_size);
2574 return true;
2575 }
2576 else
2577 {
2578 struct ipcp_agg_lattice *new_al;
2579
2580 if (**aglat && (**aglat)->offset < offset + val_size)
2581 {
2582 set_agg_lats_to_bottom (dest_plats);
2583 return false;
2584 }
2585 if (dest_plats->aggs_count == max_agg_items)
2586 return false;
2587 dest_plats->aggs_count++;
2588 new_al = ipcp_agg_lattice_pool.allocate ();
2589 memset (new_al, 0, sizeof (*new_al));
2590
2591 new_al->offset = offset;
2592 new_al->size = val_size;
2593 new_al->contains_variable = pre_existing;
2594
2595 new_al->next = **aglat;
2596 **aglat = new_al;
2597 return true;
2598 }
2599 }
2600
2601 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2602 containing an unknown value. */
2603
2604 static bool
2605 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2606 {
2607 bool ret = false;
2608 while (aglat)
2609 {
2610 ret |= aglat->set_contains_variable ();
2611 aglat = aglat->next;
2612 }
2613 return ret;
2614 }
2615
2616 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2617 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2618 parameter used for lattice value sources. Return true if DEST_PLATS changed
2619 in any way. */
2620
2621 static bool
2622 merge_aggregate_lattices (struct cgraph_edge *cs,
2623 class ipcp_param_lattices *dest_plats,
2624 class ipcp_param_lattices *src_plats,
2625 int src_idx, HOST_WIDE_INT offset_delta)
2626 {
2627 bool pre_existing = dest_plats->aggs != NULL;
2628 struct ipcp_agg_lattice **dst_aglat;
2629 bool ret = false;
2630
2631 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2632 return true;
2633 if (src_plats->aggs_bottom)
2634 return set_agg_lats_contain_variable (dest_plats);
2635 if (src_plats->aggs_contain_variable)
2636 ret |= set_agg_lats_contain_variable (dest_plats);
2637 dst_aglat = &dest_plats->aggs;
2638
2639 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2640 param_ipa_max_agg_items);
2641 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2642 src_aglat;
2643 src_aglat = src_aglat->next)
2644 {
2645 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2646
2647 if (new_offset < 0)
2648 continue;
2649 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2650 &dst_aglat, pre_existing, &ret, max_agg_items))
2651 {
2652 struct ipcp_agg_lattice *new_al = *dst_aglat;
2653
2654 dst_aglat = &(*dst_aglat)->next;
2655 if (src_aglat->bottom)
2656 {
2657 ret |= new_al->set_contains_variable ();
2658 continue;
2659 }
2660 if (src_aglat->contains_variable)
2661 ret |= new_al->set_contains_variable ();
2662 for (ipcp_value<tree> *val = src_aglat->values;
2663 val;
2664 val = val->next)
2665 ret |= new_al->add_value (val->value, cs, val, src_idx,
2666 src_aglat->offset);
2667 }
2668 else if (dest_plats->aggs_bottom)
2669 return true;
2670 }
2671 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2672 return ret;
2673 }
2674
2675 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2676 pass-through JFUNC and if so, whether it has conform and conforms to the
2677 rules about propagating values passed by reference. */
2678
2679 static bool
2680 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2681 struct ipa_jump_func *jfunc)
2682 {
2683 return src_plats->aggs
2684 && (!src_plats->aggs_by_ref
2685 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2686 }
2687
2688 /* Propagate values through ITEM, jump function for a part of an aggregate,
2689 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2690 associated with the jump function. Return true if AGLAT changed in any
2691 way. */
2692
2693 static bool
2694 propagate_aggregate_lattice (struct cgraph_edge *cs,
2695 struct ipa_agg_jf_item *item,
2696 struct ipcp_agg_lattice *aglat)
2697 {
2698 class ipa_node_params *caller_info;
2699 class ipcp_param_lattices *src_plats;
2700 struct ipcp_lattice<tree> *src_lat;
2701 HOST_WIDE_INT src_offset;
2702 int src_idx;
2703 tree load_type;
2704 bool ret;
2705
2706 if (item->jftype == IPA_JF_CONST)
2707 {
2708 tree value = item->value.constant;
2709
2710 gcc_checking_assert (is_gimple_ip_invariant (value));
2711 return aglat->add_value (value, cs, NULL, 0);
2712 }
2713
2714 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2715 || item->jftype == IPA_JF_LOAD_AGG);
2716
2717 caller_info = ipa_node_params_sum->get (cs->caller);
2718 src_idx = item->value.pass_through.formal_id;
2719 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2720
2721 if (item->jftype == IPA_JF_PASS_THROUGH)
2722 {
2723 load_type = NULL_TREE;
2724 src_lat = &src_plats->itself;
2725 src_offset = -1;
2726 }
2727 else
2728 {
2729 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2730 struct ipcp_agg_lattice *src_aglat;
2731
2732 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2733 if (src_aglat->offset >= load_offset)
2734 break;
2735
2736 load_type = item->value.load_agg.type;
2737 if (!src_aglat
2738 || src_aglat->offset > load_offset
2739 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2740 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2741 return aglat->set_contains_variable ();
2742
2743 src_lat = src_aglat;
2744 src_offset = load_offset;
2745 }
2746
2747 if (src_lat->bottom
2748 || (!ipcp_versionable_function_p (cs->caller)
2749 && !src_lat->is_single_const ()))
2750 return aglat->set_contains_variable ();
2751
2752 ret = propagate_vals_across_arith_jfunc (cs,
2753 item->value.pass_through.operation,
2754 load_type,
2755 item->value.pass_through.operand,
2756 src_lat, aglat,
2757 src_offset,
2758 src_idx,
2759 item->type);
2760
2761 if (src_lat->contains_variable)
2762 ret |= aglat->set_contains_variable ();
2763
2764 return ret;
2765 }
2766
2767 /* Propagate scalar values across jump function JFUNC that is associated with
2768 edge CS and put the values into DEST_LAT. */
2769
2770 static bool
2771 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2772 struct ipa_jump_func *jfunc,
2773 class ipcp_param_lattices *dest_plats)
2774 {
2775 bool ret = false;
2776
2777 if (dest_plats->aggs_bottom)
2778 return false;
2779
2780 if (jfunc->type == IPA_JF_PASS_THROUGH
2781 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2782 {
2783 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2784 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2785 class ipcp_param_lattices *src_plats;
2786
2787 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2788 if (agg_pass_through_permissible_p (src_plats, jfunc))
2789 {
2790 /* Currently we do not produce clobber aggregate jump
2791 functions, replace with merging when we do. */
2792 gcc_assert (!jfunc->agg.items);
2793 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2794 src_idx, 0);
2795 return ret;
2796 }
2797 }
2798 else if (jfunc->type == IPA_JF_ANCESTOR
2799 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2800 {
2801 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2802 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2803 class ipcp_param_lattices *src_plats;
2804
2805 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2806 if (src_plats->aggs && src_plats->aggs_by_ref)
2807 {
2808 /* Currently we do not produce clobber aggregate jump
2809 functions, replace with merging when we do. */
2810 gcc_assert (!jfunc->agg.items);
2811 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2812 ipa_get_jf_ancestor_offset (jfunc));
2813 }
2814 else if (!src_plats->aggs_by_ref)
2815 ret |= set_agg_lats_to_bottom (dest_plats);
2816 else
2817 ret |= set_agg_lats_contain_variable (dest_plats);
2818 return ret;
2819 }
2820
2821 if (jfunc->agg.items)
2822 {
2823 bool pre_existing = dest_plats->aggs != NULL;
2824 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2825 struct ipa_agg_jf_item *item;
2826 int i;
2827
2828 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2829 return true;
2830
2831 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2832 param_ipa_max_agg_items);
2833 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2834 {
2835 HOST_WIDE_INT val_size;
2836
2837 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2838 continue;
2839 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2840
2841 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2842 &aglat, pre_existing, &ret, max_agg_items))
2843 {
2844 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2845 aglat = &(*aglat)->next;
2846 }
2847 else if (dest_plats->aggs_bottom)
2848 return true;
2849 }
2850
2851 ret |= set_chain_of_aglats_contains_variable (*aglat);
2852 }
2853 else
2854 ret |= set_agg_lats_contain_variable (dest_plats);
2855
2856 return ret;
2857 }
2858
2859 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2860 non-thunk) destination, the call passes through a thunk. */
2861
2862 static bool
2863 call_passes_through_thunk (cgraph_edge *cs)
2864 {
2865 cgraph_node *alias_or_thunk = cs->callee;
2866 while (alias_or_thunk->alias)
2867 alias_or_thunk = alias_or_thunk->get_alias_target ();
2868 return alias_or_thunk->thunk;
2869 }
2870
2871 /* Propagate constants from the caller to the callee of CS. INFO describes the
2872 caller. */
2873
2874 static bool
2875 propagate_constants_across_call (struct cgraph_edge *cs)
2876 {
2877 class ipa_node_params *callee_info;
2878 enum availability availability;
2879 cgraph_node *callee;
2880 class ipa_edge_args *args;
2881 bool ret = false;
2882 int i, args_count, parms_count;
2883
2884 callee = cs->callee->function_symbol (&availability);
2885 if (!callee->definition)
2886 return false;
2887 gcc_checking_assert (callee->has_gimple_body_p ());
2888 callee_info = ipa_node_params_sum->get (callee);
2889 if (!callee_info)
2890 return false;
2891
2892 args = ipa_edge_args_sum->get (cs);
2893 parms_count = ipa_get_param_count (callee_info);
2894 if (parms_count == 0)
2895 return false;
2896 if (!args
2897 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2898 || !opt_for_fn (cs->caller->decl, optimize))
2899 {
2900 for (i = 0; i < parms_count; i++)
2901 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2902 i));
2903 return ret;
2904 }
2905 args_count = ipa_get_cs_argument_count (args);
2906
2907 /* If this call goes through a thunk we must not propagate to the first (0th)
2908 parameter. However, we might need to uncover a thunk from below a series
2909 of aliases first. */
2910 if (call_passes_through_thunk (cs))
2911 {
2912 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2913 0));
2914 i = 1;
2915 }
2916 else
2917 i = 0;
2918
2919 for (; (i < args_count) && (i < parms_count); i++)
2920 {
2921 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2922 class ipcp_param_lattices *dest_plats;
2923 tree param_type = ipa_get_type (callee_info, i);
2924
2925 dest_plats = ipa_get_parm_lattices (callee_info, i);
2926 if (availability == AVAIL_INTERPOSABLE)
2927 ret |= set_all_contains_variable (dest_plats);
2928 else
2929 {
2930 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2931 &dest_plats->itself,
2932 param_type);
2933 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2934 &dest_plats->ctxlat);
2935 ret
2936 |= propagate_bits_across_jump_function (cs, i, jump_func,
2937 &dest_plats->bits_lattice);
2938 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2939 dest_plats);
2940 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2941 ret |= propagate_vr_across_jump_function (cs, jump_func,
2942 dest_plats, param_type);
2943 else
2944 ret |= dest_plats->m_value_range.set_to_bottom ();
2945 }
2946 }
2947 for (; i < parms_count; i++)
2948 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2949
2950 return ret;
2951 }
2952
2953 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2954 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2955 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2956
2957 static tree
2958 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2959 const vec<tree> &known_csts,
2960 const vec<ipa_polymorphic_call_context> &known_contexts,
2961 const vec<ipa_agg_value_set> &known_aggs,
2962 struct ipa_agg_replacement_value *agg_reps,
2963 bool *speculative)
2964 {
2965 int param_index = ie->indirect_info->param_index;
2966 HOST_WIDE_INT anc_offset;
2967 tree t = NULL;
2968 tree target = NULL;
2969
2970 *speculative = false;
2971
2972 if (param_index == -1)
2973 return NULL_TREE;
2974
2975 if (!ie->indirect_info->polymorphic)
2976 {
2977 tree t = NULL;
2978
2979 if (ie->indirect_info->agg_contents)
2980 {
2981 t = NULL;
2982 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
2983 {
2984 while (agg_reps)
2985 {
2986 if (agg_reps->index == param_index
2987 && agg_reps->offset == ie->indirect_info->offset
2988 && agg_reps->by_ref == ie->indirect_info->by_ref)
2989 {
2990 t = agg_reps->value;
2991 break;
2992 }
2993 agg_reps = agg_reps->next;
2994 }
2995 }
2996 if (!t)
2997 {
2998 const ipa_agg_value_set *agg;
2999 if (known_aggs.length () > (unsigned int) param_index)
3000 agg = &known_aggs[param_index];
3001 else
3002 agg = NULL;
3003 bool from_global_constant;
3004 t = ipa_find_agg_cst_for_param (agg,
3005 (unsigned) param_index
3006 < known_csts.length ()
3007 ? known_csts[param_index]
3008 : NULL,
3009 ie->indirect_info->offset,
3010 ie->indirect_info->by_ref,
3011 &from_global_constant);
3012 if (t
3013 && !from_global_constant
3014 && !ie->indirect_info->guaranteed_unmodified)
3015 t = NULL_TREE;
3016 }
3017 }
3018 else if ((unsigned) param_index < known_csts.length ())
3019 t = known_csts[param_index];
3020
3021 if (t
3022 && TREE_CODE (t) == ADDR_EXPR
3023 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3024 return TREE_OPERAND (t, 0);
3025 else
3026 return NULL_TREE;
3027 }
3028
3029 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3030 return NULL_TREE;
3031
3032 gcc_assert (!ie->indirect_info->agg_contents);
3033 anc_offset = ie->indirect_info->offset;
3034
3035 t = NULL;
3036
3037 /* Try to work out value of virtual table pointer value in replacements. */
3038 if (!t && agg_reps && !ie->indirect_info->by_ref)
3039 {
3040 while (agg_reps)
3041 {
3042 if (agg_reps->index == param_index
3043 && agg_reps->offset == ie->indirect_info->offset
3044 && agg_reps->by_ref)
3045 {
3046 t = agg_reps->value;
3047 break;
3048 }
3049 agg_reps = agg_reps->next;
3050 }
3051 }
3052
3053 /* Try to work out value of virtual table pointer value in known
3054 aggregate values. */
3055 if (!t && known_aggs.length () > (unsigned int) param_index
3056 && !ie->indirect_info->by_ref)
3057 {
3058 const ipa_agg_value_set *agg = &known_aggs[param_index];
3059 t = ipa_find_agg_cst_for_param (agg,
3060 (unsigned) param_index
3061 < known_csts.length ()
3062 ? known_csts[param_index] : NULL,
3063 ie->indirect_info->offset, true);
3064 }
3065
3066 /* If we found the virtual table pointer, lookup the target. */
3067 if (t)
3068 {
3069 tree vtable;
3070 unsigned HOST_WIDE_INT offset;
3071 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3072 {
3073 bool can_refer;
3074 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3075 vtable, offset, &can_refer);
3076 if (can_refer)
3077 {
3078 if (!target
3079 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3080 || !possible_polymorphic_call_target_p
3081 (ie, cgraph_node::get (target)))
3082 {
3083 /* Do not speculate builtin_unreachable, it is stupid! */
3084 if (ie->indirect_info->vptr_changed)
3085 return NULL;
3086 target = ipa_impossible_devirt_target (ie, target);
3087 }
3088 *speculative = ie->indirect_info->vptr_changed;
3089 if (!*speculative)
3090 return target;
3091 }
3092 }
3093 }
3094
3095 /* Do we know the constant value of pointer? */
3096 if (!t && (unsigned) param_index < known_csts.length ())
3097 t = known_csts[param_index];
3098
3099 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3100
3101 ipa_polymorphic_call_context context;
3102 if (known_contexts.length () > (unsigned int) param_index)
3103 {
3104 context = known_contexts[param_index];
3105 context.offset_by (anc_offset);
3106 if (ie->indirect_info->vptr_changed)
3107 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3108 ie->indirect_info->otr_type);
3109 if (t)
3110 {
3111 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3112 (t, ie->indirect_info->otr_type, anc_offset);
3113 if (!ctx2.useless_p ())
3114 context.combine_with (ctx2, ie->indirect_info->otr_type);
3115 }
3116 }
3117 else if (t)
3118 {
3119 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3120 anc_offset);
3121 if (ie->indirect_info->vptr_changed)
3122 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3123 ie->indirect_info->otr_type);
3124 }
3125 else
3126 return NULL_TREE;
3127
3128 vec <cgraph_node *>targets;
3129 bool final;
3130
3131 targets = possible_polymorphic_call_targets
3132 (ie->indirect_info->otr_type,
3133 ie->indirect_info->otr_token,
3134 context, &final);
3135 if (!final || targets.length () > 1)
3136 {
3137 struct cgraph_node *node;
3138 if (*speculative)
3139 return target;
3140 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3141 || ie->speculative || !ie->maybe_hot_p ())
3142 return NULL;
3143 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3144 ie->indirect_info->otr_token,
3145 context);
3146 if (node)
3147 {
3148 *speculative = true;
3149 target = node->decl;
3150 }
3151 else
3152 return NULL;
3153 }
3154 else
3155 {
3156 *speculative = false;
3157 if (targets.length () == 1)
3158 target = targets[0]->decl;
3159 else
3160 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3161 }
3162
3163 if (target && !possible_polymorphic_call_target_p (ie,
3164 cgraph_node::get (target)))
3165 {
3166 if (*speculative)
3167 return NULL;
3168 target = ipa_impossible_devirt_target (ie, target);
3169 }
3170
3171 return target;
3172 }
3173
3174 /* If an indirect edge IE can be turned into a direct one based on data in
3175 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3176 whether the discovered target is only speculative guess. */
3177
3178 tree
3179 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3180 ipa_call_arg_values *avals,
3181 bool *speculative)
3182 {
3183 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3184 avals->m_known_contexts,
3185 avals->m_known_aggs,
3186 NULL, speculative);
3187 }
3188
3189 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3190
3191 tree
3192 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3193 ipa_auto_call_arg_values *avals,
3194 bool *speculative)
3195 {
3196 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3197 avals->m_known_contexts,
3198 avals->m_known_aggs,
3199 NULL, speculative);
3200 }
3201
3202 /* Calculate devirtualization time bonus for NODE, assuming we know information
3203 about arguments stored in AVALS. */
3204
3205 static int
3206 devirtualization_time_bonus (struct cgraph_node *node,
3207 ipa_auto_call_arg_values *avals)
3208 {
3209 struct cgraph_edge *ie;
3210 int res = 0;
3211
3212 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3213 {
3214 struct cgraph_node *callee;
3215 class ipa_fn_summary *isummary;
3216 enum availability avail;
3217 tree target;
3218 bool speculative;
3219
3220 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3221 if (!target)
3222 continue;
3223
3224 /* Only bare minimum benefit for clearly un-inlineable targets. */
3225 res += 1;
3226 callee = cgraph_node::get (target);
3227 if (!callee || !callee->definition)
3228 continue;
3229 callee = callee->function_symbol (&avail);
3230 if (avail < AVAIL_AVAILABLE)
3231 continue;
3232 isummary = ipa_fn_summaries->get (callee);
3233 if (!isummary || !isummary->inlinable)
3234 continue;
3235
3236 int size = ipa_size_summaries->get (callee)->size;
3237 /* FIXME: The values below need re-considering and perhaps also
3238 integrating into the cost metrics, at lest in some very basic way. */
3239 int max_inline_insns_auto
3240 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3241 if (size <= max_inline_insns_auto / 4)
3242 res += 31 / ((int)speculative + 1);
3243 else if (size <= max_inline_insns_auto / 2)
3244 res += 15 / ((int)speculative + 1);
3245 else if (size <= max_inline_insns_auto
3246 || DECL_DECLARED_INLINE_P (callee->decl))
3247 res += 7 / ((int)speculative + 1);
3248 }
3249
3250 return res;
3251 }
3252
3253 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3254
3255 static int
3256 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3257 {
3258 int result = 0;
3259 ipa_hints hints = estimates.hints;
3260 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3261 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3262
3263 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3264
3265 if (hints & INLINE_HINT_loop_iterations)
3266 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3267
3268 if (hints & INLINE_HINT_loop_stride)
3269 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3270
3271 return result;
3272 }
3273
3274 /* If there is a reason to penalize the function described by INFO in the
3275 cloning goodness evaluation, do so. */
3276
3277 static inline sreal
3278 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3279 sreal evaluation)
3280 {
3281 if (info->node_within_scc && !info->node_is_self_scc)
3282 evaluation = (evaluation
3283 * (100 - opt_for_fn (node->decl,
3284 param_ipa_cp_recursion_penalty))) / 100;
3285
3286 if (info->node_calling_single_call)
3287 evaluation = (evaluation
3288 * (100 - opt_for_fn (node->decl,
3289 param_ipa_cp_single_call_penalty)))
3290 / 100;
3291
3292 return evaluation;
3293 }
3294
3295 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3296 and SIZE_COST and with the sum of frequencies of incoming edges to the
3297 potential new clone in FREQUENCIES. */
3298
3299 static bool
3300 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3301 sreal freq_sum, profile_count count_sum,
3302 int size_cost)
3303 {
3304 if (time_benefit == 0
3305 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3306 || node->optimize_for_size_p ())
3307 return false;
3308
3309 gcc_assert (size_cost > 0);
3310
3311 ipa_node_params *info = ipa_node_params_sum->get (node);
3312 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3313 if (count_sum > profile_count::zero ())
3314 {
3315 gcc_assert (base_count > profile_count::zero ());
3316 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3317 sreal evaluation = (time_benefit * factor) / size_cost;
3318 evaluation = incorporate_penalties (node, info, evaluation);
3319 evaluation *= 1000;
3320
3321 if (dump_file && (dump_flags & TDF_DETAILS))
3322 {
3323 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3324 "size: %i, count_sum: ", time_benefit.to_double (),
3325 size_cost);
3326 count_sum.dump (dump_file);
3327 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3328 info->node_within_scc
3329 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3330 info->node_calling_single_call ? ", single_call" : "",
3331 evaluation.to_double (), eval_threshold);
3332 }
3333
3334 return evaluation.to_int () >= eval_threshold;
3335 }
3336 else
3337 {
3338 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3339 evaluation = incorporate_penalties (node, info, evaluation);
3340 evaluation *= 1000;
3341
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3344 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3345 "threshold: %i\n",
3346 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3347 info->node_within_scc
3348 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3349 info->node_calling_single_call ? ", single_call" : "",
3350 evaluation.to_double (), eval_threshold);
3351
3352 return evaluation.to_int () >= eval_threshold;
3353 }
3354 }
3355
3356 /* Return all context independent values from aggregate lattices in PLATS in a
3357 vector. Return NULL if there are none. */
3358
3359 static vec<ipa_agg_value>
3360 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3361 {
3362 vec<ipa_agg_value> res = vNULL;
3363
3364 if (plats->aggs_bottom
3365 || plats->aggs_contain_variable
3366 || plats->aggs_count == 0)
3367 return vNULL;
3368
3369 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3370 aglat;
3371 aglat = aglat->next)
3372 if (aglat->is_single_const ())
3373 {
3374 struct ipa_agg_value item;
3375 item.offset = aglat->offset;
3376 item.value = aglat->values->value;
3377 res.safe_push (item);
3378 }
3379 return res;
3380 }
3381
3382 /* Grow vectors in AVALS and fill them with information about values of
3383 parameters that are known to be independent of the context. Only calculate
3384 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3385 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3386 parameters will be stored in it.
3387
3388 TODO: Also grow context independent value range vectors. */
3389
3390 static bool
3391 gather_context_independent_values (class ipa_node_params *info,
3392 ipa_auto_call_arg_values *avals,
3393 bool calculate_aggs,
3394 int *removable_params_cost)
3395 {
3396 int i, count = ipa_get_param_count (info);
3397 bool ret = false;
3398
3399 avals->m_known_vals.safe_grow_cleared (count, true);
3400 avals->m_known_contexts.safe_grow_cleared (count, true);
3401 if (calculate_aggs)
3402 avals->m_known_aggs.safe_grow_cleared (count, true);
3403
3404 if (removable_params_cost)
3405 *removable_params_cost = 0;
3406
3407 for (i = 0; i < count; i++)
3408 {
3409 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3410 ipcp_lattice<tree> *lat = &plats->itself;
3411
3412 if (lat->is_single_const ())
3413 {
3414 ipcp_value<tree> *val = lat->values;
3415 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3416 avals->m_known_vals[i] = val->value;
3417 if (removable_params_cost)
3418 *removable_params_cost
3419 += estimate_move_cost (TREE_TYPE (val->value), false);
3420 ret = true;
3421 }
3422 else if (removable_params_cost
3423 && !ipa_is_param_used (info, i))
3424 *removable_params_cost
3425 += ipa_get_param_move_cost (info, i);
3426
3427 if (!ipa_is_param_used (info, i))
3428 continue;
3429
3430 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3431 /* Do not account known context as reason for cloning. We can see
3432 if it permits devirtualization. */
3433 if (ctxlat->is_single_const ())
3434 avals->m_known_contexts[i] = ctxlat->values->value;
3435
3436 if (calculate_aggs)
3437 {
3438 vec<ipa_agg_value> agg_items;
3439 struct ipa_agg_value_set *agg;
3440
3441 agg_items = context_independent_aggregate_values (plats);
3442 agg = &avals->m_known_aggs[i];
3443 agg->items = agg_items;
3444 agg->by_ref = plats->aggs_by_ref;
3445 ret |= !agg_items.is_empty ();
3446 }
3447 }
3448
3449 return ret;
3450 }
3451
3452 /* Perform time and size measurement of NODE with the context given in AVALS,
3453 calculate the benefit compared to the node without specialization and store
3454 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3455 context-independent or unused removable parameters and EST_MOVE_COST, the
3456 estimated movement of the considered parameter. */
3457
3458 static void
3459 perform_estimation_of_a_value (cgraph_node *node,
3460 ipa_auto_call_arg_values *avals,
3461 int removable_params_cost, int est_move_cost,
3462 ipcp_value_base *val)
3463 {
3464 sreal time_benefit;
3465 ipa_call_estimates estimates;
3466
3467 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3468
3469 /* Extern inline functions have no cloning local time benefits because they
3470 will be inlined anyway. The only reason to clone them is if it enables
3471 optimization in any of the functions they call. */
3472 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3473 time_benefit = 0;
3474 else
3475 time_benefit = (estimates.nonspecialized_time - estimates.time)
3476 + (devirtualization_time_bonus (node, avals)
3477 + hint_time_bonus (node, estimates)
3478 + removable_params_cost + est_move_cost);
3479
3480 int size = estimates.size;
3481 gcc_checking_assert (size >=0);
3482 /* The inliner-heuristics based estimates may think that in certain
3483 contexts some functions do not have any size at all but we want
3484 all specializations to have at least a tiny cost, not least not to
3485 divide by zero. */
3486 if (size == 0)
3487 size = 1;
3488
3489 val->local_time_benefit = time_benefit;
3490 val->local_size_cost = size;
3491 }
3492
3493 /* Get the overall limit oof growth based on parameters extracted from growth.
3494 it does not really make sense to mix functions with different overall growth
3495 limits but it is possible and if it happens, we do not want to select one
3496 limit at random. */
3497
3498 static long
3499 get_max_overall_size (cgraph_node *node)
3500 {
3501 long max_new_size = orig_overall_size;
3502 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3503 if (max_new_size < large_unit)
3504 max_new_size = large_unit;
3505 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3506 max_new_size += max_new_size * unit_growth / 100 + 1;
3507 return max_new_size;
3508 }
3509
3510 /* Iterate over known values of parameters of NODE and estimate the local
3511 effects in terms of time and size they have. */
3512
3513 static void
3514 estimate_local_effects (struct cgraph_node *node)
3515 {
3516 ipa_node_params *info = ipa_node_params_sum->get (node);
3517 int i, count = ipa_get_param_count (info);
3518 bool always_const;
3519 int removable_params_cost;
3520
3521 if (!count || !ipcp_versionable_function_p (node))
3522 return;
3523
3524 if (dump_file && (dump_flags & TDF_DETAILS))
3525 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3526
3527 ipa_auto_call_arg_values avals;
3528 always_const = gather_context_independent_values (info, &avals, true,
3529 &removable_params_cost);
3530 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3531 if (always_const || devirt_bonus
3532 || (removable_params_cost && node->can_change_signature))
3533 {
3534 struct caller_statistics stats;
3535 ipa_call_estimates estimates;
3536
3537 init_caller_stats (&stats);
3538 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3539 false);
3540 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3541 sreal time = estimates.nonspecialized_time - estimates.time;
3542 time += devirt_bonus;
3543 time += hint_time_bonus (node, estimates);
3544 time += removable_params_cost;
3545 int size = estimates.size - stats.n_calls * removable_params_cost;
3546
3547 if (dump_file)
3548 fprintf (dump_file, " - context independent values, size: %i, "
3549 "time_benefit: %f\n", size, (time).to_double ());
3550
3551 if (size <= 0 || node->local)
3552 {
3553 info->do_clone_for_all_contexts = true;
3554
3555 if (dump_file)
3556 fprintf (dump_file, " Decided to specialize for all "
3557 "known contexts, code not going to grow.\n");
3558 }
3559 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3560 stats.count_sum, size))
3561 {
3562 if (size + overall_size <= get_max_overall_size (node))
3563 {
3564 info->do_clone_for_all_contexts = true;
3565 overall_size += size;
3566
3567 if (dump_file)
3568 fprintf (dump_file, " Decided to specialize for all "
3569 "known contexts, growth (to %li) deemed "
3570 "beneficial.\n", overall_size);
3571 }
3572 else if (dump_file && (dump_flags & TDF_DETAILS))
3573 fprintf (dump_file, " Not cloning for all contexts because "
3574 "maximum unit size would be reached with %li.\n",
3575 size + overall_size);
3576 }
3577 else if (dump_file && (dump_flags & TDF_DETAILS))
3578 fprintf (dump_file, " Not cloning for all contexts because "
3579 "!good_cloning_opportunity_p.\n");
3580
3581 }
3582
3583 for (i = 0; i < count; i++)
3584 {
3585 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3586 ipcp_lattice<tree> *lat = &plats->itself;
3587 ipcp_value<tree> *val;
3588
3589 if (lat->bottom
3590 || !lat->values
3591 || avals.m_known_vals[i])
3592 continue;
3593
3594 for (val = lat->values; val; val = val->next)
3595 {
3596 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3597 avals.m_known_vals[i] = val->value;
3598
3599 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3600 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3601 emc, val);
3602
3603 if (dump_file && (dump_flags & TDF_DETAILS))
3604 {
3605 fprintf (dump_file, " - estimates for value ");
3606 print_ipcp_constant_value (dump_file, val->value);
3607 fprintf (dump_file, " for ");
3608 ipa_dump_param (dump_file, info, i);
3609 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3610 val->local_time_benefit.to_double (),
3611 val->local_size_cost);
3612 }
3613 }
3614 avals.m_known_vals[i] = NULL_TREE;
3615 }
3616
3617 for (i = 0; i < count; i++)
3618 {
3619 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3620
3621 if (!plats->virt_call)
3622 continue;
3623
3624 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3625 ipcp_value<ipa_polymorphic_call_context> *val;
3626
3627 if (ctxlat->bottom
3628 || !ctxlat->values
3629 || !avals.m_known_contexts[i].useless_p ())
3630 continue;
3631
3632 for (val = ctxlat->values; val; val = val->next)
3633 {
3634 avals.m_known_contexts[i] = val->value;
3635 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3636 0, val);
3637
3638 if (dump_file && (dump_flags & TDF_DETAILS))
3639 {
3640 fprintf (dump_file, " - estimates for polymorphic context ");
3641 print_ipcp_constant_value (dump_file, val->value);
3642 fprintf (dump_file, " for ");
3643 ipa_dump_param (dump_file, info, i);
3644 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3645 val->local_time_benefit.to_double (),
3646 val->local_size_cost);
3647 }
3648 }
3649 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3650 }
3651
3652 for (i = 0; i < count; i++)
3653 {
3654 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3655
3656 if (plats->aggs_bottom || !plats->aggs)
3657 continue;
3658
3659 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3660 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3661 {
3662 ipcp_value<tree> *val;
3663 if (aglat->bottom || !aglat->values
3664 /* If the following is true, the one value is in known_aggs. */
3665 || (!plats->aggs_contain_variable
3666 && aglat->is_single_const ()))
3667 continue;
3668
3669 for (val = aglat->values; val; val = val->next)
3670 {
3671 struct ipa_agg_value item;
3672
3673 item.offset = aglat->offset;
3674 item.value = val->value;
3675 agg->items.safe_push (item);
3676
3677 perform_estimation_of_a_value (node, &avals,
3678 removable_params_cost, 0, val);
3679
3680 if (dump_file && (dump_flags & TDF_DETAILS))
3681 {
3682 fprintf (dump_file, " - estimates for value ");
3683 print_ipcp_constant_value (dump_file, val->value);
3684 fprintf (dump_file, " for ");
3685 ipa_dump_param (dump_file, info, i);
3686 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3687 "]: time_benefit: %g, size: %i\n",
3688 plats->aggs_by_ref ? "ref " : "",
3689 aglat->offset,
3690 val->local_time_benefit.to_double (),
3691 val->local_size_cost);
3692 }
3693
3694 agg->items.pop ();
3695 }
3696 }
3697 }
3698 }
3699
3700
3701 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3702 topological sort of values. */
3703
3704 template <typename valtype>
3705 void
3706 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3707 {
3708 ipcp_value_source<valtype> *src;
3709
3710 if (cur_val->dfs)
3711 return;
3712
3713 dfs_counter++;
3714 cur_val->dfs = dfs_counter;
3715 cur_val->low_link = dfs_counter;
3716
3717 cur_val->topo_next = stack;
3718 stack = cur_val;
3719 cur_val->on_stack = true;
3720
3721 for (src = cur_val->sources; src; src = src->next)
3722 if (src->val)
3723 {
3724 if (src->val->dfs == 0)
3725 {
3726 add_val (src->val);
3727 if (src->val->low_link < cur_val->low_link)
3728 cur_val->low_link = src->val->low_link;
3729 }
3730 else if (src->val->on_stack
3731 && src->val->dfs < cur_val->low_link)
3732 cur_val->low_link = src->val->dfs;
3733 }
3734
3735 if (cur_val->dfs == cur_val->low_link)
3736 {
3737 ipcp_value<valtype> *v, *scc_list = NULL;
3738
3739 do
3740 {
3741 v = stack;
3742 stack = v->topo_next;
3743 v->on_stack = false;
3744 v->scc_no = cur_val->dfs;
3745
3746 v->scc_next = scc_list;
3747 scc_list = v;
3748 }
3749 while (v != cur_val);
3750
3751 cur_val->topo_next = values_topo;
3752 values_topo = cur_val;
3753 }
3754 }
3755
3756 /* Add all values in lattices associated with NODE to the topological sort if
3757 they are not there yet. */
3758
3759 static void
3760 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3761 {
3762 ipa_node_params *info = ipa_node_params_sum->get (node);
3763 int i, count = ipa_get_param_count (info);
3764
3765 for (i = 0; i < count; i++)
3766 {
3767 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3768 ipcp_lattice<tree> *lat = &plats->itself;
3769 struct ipcp_agg_lattice *aglat;
3770
3771 if (!lat->bottom)
3772 {
3773 ipcp_value<tree> *val;
3774 for (val = lat->values; val; val = val->next)
3775 topo->constants.add_val (val);
3776 }
3777
3778 if (!plats->aggs_bottom)
3779 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3780 if (!aglat->bottom)
3781 {
3782 ipcp_value<tree> *val;
3783 for (val = aglat->values; val; val = val->next)
3784 topo->constants.add_val (val);
3785 }
3786
3787 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3788 if (!ctxlat->bottom)
3789 {
3790 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3791 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3792 topo->contexts.add_val (ctxval);
3793 }
3794 }
3795 }
3796
3797 /* One pass of constants propagation along the call graph edges, from callers
3798 to callees (requires topological ordering in TOPO), iterate over strongly
3799 connected components. */
3800
3801 static void
3802 propagate_constants_topo (class ipa_topo_info *topo)
3803 {
3804 int i;
3805
3806 for (i = topo->nnodes - 1; i >= 0; i--)
3807 {
3808 unsigned j;
3809 struct cgraph_node *v, *node = topo->order[i];
3810 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3811
3812 /* First, iteratively propagate within the strongly connected component
3813 until all lattices stabilize. */
3814 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3815 if (v->has_gimple_body_p ())
3816 {
3817 if (opt_for_fn (v->decl, flag_ipa_cp)
3818 && opt_for_fn (v->decl, optimize))
3819 push_node_to_stack (topo, v);
3820 /* When V is not optimized, we can not push it to stack, but
3821 still we need to set all its callees lattices to bottom. */
3822 else
3823 {
3824 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3825 propagate_constants_across_call (cs);
3826 }
3827 }
3828
3829 v = pop_node_from_stack (topo);
3830 while (v)
3831 {
3832 struct cgraph_edge *cs;
3833 class ipa_node_params *info = NULL;
3834 bool self_scc = true;
3835
3836 for (cs = v->callees; cs; cs = cs->next_callee)
3837 if (ipa_edge_within_scc (cs))
3838 {
3839 cgraph_node *callee = cs->callee->function_symbol ();
3840
3841 if (v != callee)
3842 self_scc = false;
3843
3844 if (!info)
3845 {
3846 info = ipa_node_params_sum->get (v);
3847 info->node_within_scc = true;
3848 }
3849
3850 if (propagate_constants_across_call (cs))
3851 push_node_to_stack (topo, callee);
3852 }
3853
3854 if (info)
3855 info->node_is_self_scc = self_scc;
3856
3857 v = pop_node_from_stack (topo);
3858 }
3859
3860 /* Afterwards, propagate along edges leading out of the SCC, calculates
3861 the local effects of the discovered constants and all valid values to
3862 their topological sort. */
3863 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3864 if (v->has_gimple_body_p ()
3865 && opt_for_fn (v->decl, flag_ipa_cp)
3866 && opt_for_fn (v->decl, optimize))
3867 {
3868 struct cgraph_edge *cs;
3869
3870 estimate_local_effects (v);
3871 add_all_node_vals_to_toposort (v, topo);
3872 for (cs = v->callees; cs; cs = cs->next_callee)
3873 if (!ipa_edge_within_scc (cs))
3874 propagate_constants_across_call (cs);
3875 }
3876 cycle_nodes.release ();
3877 }
3878 }
3879
3880 /* Propagate the estimated effects of individual values along the topological
3881 from the dependent values to those they depend on. */
3882
3883 template <typename valtype>
3884 void
3885 value_topo_info<valtype>::propagate_effects ()
3886 {
3887 ipcp_value<valtype> *base;
3888 hash_set<ipcp_value<valtype> *> processed_srcvals;
3889
3890 for (base = values_topo; base; base = base->topo_next)
3891 {
3892 ipcp_value_source<valtype> *src;
3893 ipcp_value<valtype> *val;
3894 sreal time = 0;
3895 HOST_WIDE_INT size = 0;
3896
3897 for (val = base; val; val = val->scc_next)
3898 {
3899 time = time + val->local_time_benefit + val->prop_time_benefit;
3900 size = size + val->local_size_cost + val->prop_size_cost;
3901 }
3902
3903 for (val = base; val; val = val->scc_next)
3904 {
3905 processed_srcvals.empty ();
3906 for (src = val->sources; src; src = src->next)
3907 if (src->val
3908 && src->cs->maybe_hot_p ())
3909 {
3910 if (!processed_srcvals.add (src->val))
3911 {
3912 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3913 if (prop_size < INT_MAX)
3914 src->val->prop_size_cost = prop_size;
3915 else
3916 continue;
3917 }
3918
3919 int special_factor = 1;
3920 if (val->same_scc (src->val))
3921 special_factor
3922 = opt_for_fn(src->cs->caller->decl,
3923 param_ipa_cp_recursive_freq_factor);
3924 else if (val->self_recursion_generated_p ()
3925 && (src->cs->callee->function_symbol ()
3926 == src->cs->caller))
3927 {
3928 int max_recur_gen_depth
3929 = opt_for_fn(src->cs->caller->decl,
3930 param_ipa_cp_max_recursive_depth);
3931 special_factor = max_recur_gen_depth
3932 - val->self_recursion_generated_level + 1;
3933 }
3934
3935 src->val->prop_time_benefit
3936 += time * special_factor * src->cs->sreal_frequency ();
3937 }
3938
3939 if (size < INT_MAX)
3940 {
3941 val->prop_time_benefit = time;
3942 val->prop_size_cost = size;
3943 }
3944 else
3945 {
3946 val->prop_time_benefit = 0;
3947 val->prop_size_cost = 0;
3948 }
3949 }
3950 }
3951 }
3952
3953 /* Callback for qsort to sort counts of all edges. */
3954
3955 static int
3956 compare_edge_profile_counts (const void *a, const void *b)
3957 {
3958 const profile_count *cnt1 = (const profile_count *) a;
3959 const profile_count *cnt2 = (const profile_count *) b;
3960
3961 if (*cnt1 < *cnt2)
3962 return 1;
3963 if (*cnt1 > *cnt2)
3964 return -1;
3965 return 0;
3966 }
3967
3968
3969 /* Propagate constants, polymorphic contexts and their effects from the
3970 summaries interprocedurally. */
3971
3972 static void
3973 ipcp_propagate_stage (class ipa_topo_info *topo)
3974 {
3975 struct cgraph_node *node;
3976
3977 if (dump_file)
3978 fprintf (dump_file, "\n Propagating constants:\n\n");
3979
3980 base_count = profile_count::uninitialized ();
3981
3982 bool compute_count_base = false;
3983 unsigned base_count_pos_percent = 0;
3984 FOR_EACH_DEFINED_FUNCTION (node)
3985 {
3986 if (node->has_gimple_body_p ()
3987 && opt_for_fn (node->decl, flag_ipa_cp)
3988 && opt_for_fn (node->decl, optimize))
3989 {
3990 ipa_node_params *info = ipa_node_params_sum->get (node);
3991 determine_versionability (node, info);
3992
3993 unsigned nlattices = ipa_get_param_count (info);
3994 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
3995 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
3996 initialize_node_lattices (node);
3997 }
3998 ipa_size_summary *s = ipa_size_summaries->get (node);
3999 if (node->definition && !node->alias && s != NULL)
4000 overall_size += s->self_size;
4001 if (node->count.ipa ().initialized_p ())
4002 {
4003 compute_count_base = true;
4004 unsigned pos_percent = opt_for_fn (node->decl,
4005 param_ipa_cp_profile_count_base);
4006 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4007 }
4008 }
4009
4010 if (compute_count_base)
4011 {
4012 auto_vec<profile_count> all_edge_counts;
4013 all_edge_counts.reserve_exact (symtab->edges_count);
4014 FOR_EACH_DEFINED_FUNCTION (node)
4015 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4016 {
4017 profile_count count = cs->count.ipa ();
4018 if (!(count > profile_count::zero ()))
4019 continue;
4020
4021 enum availability avail;
4022 cgraph_node *tgt
4023 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4024 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4025 if (info && info->versionable)
4026 all_edge_counts.quick_push (count);
4027 }
4028
4029 if (!all_edge_counts.is_empty ())
4030 {
4031 gcc_assert (base_count_pos_percent <= 100);
4032 all_edge_counts.qsort (compare_edge_profile_counts);
4033
4034 unsigned base_count_pos
4035 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4036 base_count = all_edge_counts[base_count_pos];
4037
4038 if (dump_file)
4039 {
4040 fprintf (dump_file, "\nSelected base_count from %u edges at "
4041 "position %u, arriving at: ", all_edge_counts.length (),
4042 base_count_pos);
4043 base_count.dump (dump_file);
4044 fprintf (dump_file, "\n");
4045 }
4046 }
4047 else if (dump_file)
4048 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4049 "continuing as if without profile feedback.\n");
4050 }
4051
4052 orig_overall_size = overall_size;
4053
4054 if (dump_file)
4055 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4056
4057 propagate_constants_topo (topo);
4058 if (flag_checking)
4059 ipcp_verify_propagated_values ();
4060 topo->constants.propagate_effects ();
4061 topo->contexts.propagate_effects ();
4062
4063 if (dump_file)
4064 {
4065 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4066 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4067 }
4068 }
4069
4070 /* Discover newly direct outgoing edges from NODE which is a new clone with
4071 known KNOWN_CSTS and make them direct. */
4072
4073 static void
4074 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4075 vec<tree> known_csts,
4076 vec<ipa_polymorphic_call_context>
4077 known_contexts,
4078 struct ipa_agg_replacement_value *aggvals)
4079 {
4080 struct cgraph_edge *ie, *next_ie;
4081 bool found = false;
4082
4083 for (ie = node->indirect_calls; ie; ie = next_ie)
4084 {
4085 tree target;
4086 bool speculative;
4087
4088 next_ie = ie->next_callee;
4089 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4090 vNULL, aggvals, &speculative);
4091 if (target)
4092 {
4093 bool agg_contents = ie->indirect_info->agg_contents;
4094 bool polymorphic = ie->indirect_info->polymorphic;
4095 int param_index = ie->indirect_info->param_index;
4096 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4097 speculative);
4098 found = true;
4099
4100 if (cs && !agg_contents && !polymorphic)
4101 {
4102 ipa_node_params *info = ipa_node_params_sum->get (node);
4103 int c = ipa_get_controlled_uses (info, param_index);
4104 if (c != IPA_UNDESCRIBED_USE
4105 && !ipa_get_param_load_dereferenced (info, param_index))
4106 {
4107 struct ipa_ref *to_del;
4108
4109 c--;
4110 ipa_set_controlled_uses (info, param_index, c);
4111 if (dump_file && (dump_flags & TDF_DETAILS))
4112 fprintf (dump_file, " controlled uses count of param "
4113 "%i bumped down to %i\n", param_index, c);
4114 if (c == 0
4115 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4116 {
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 fprintf (dump_file, " and even removing its "
4119 "cloning-created reference\n");
4120 to_del->remove_reference ();
4121 }
4122 }
4123 }
4124 }
4125 }
4126 /* Turning calls to direct calls will improve overall summary. */
4127 if (found)
4128 ipa_update_overall_fn_summary (node);
4129 }
4130
4131 class edge_clone_summary;
4132 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4133
4134 /* Edge clone summary. */
4135
4136 class edge_clone_summary
4137 {
4138 public:
4139 /* Default constructor. */
4140 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4141
4142 /* Default destructor. */
4143 ~edge_clone_summary ()
4144 {
4145 if (prev_clone)
4146 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4147 if (next_clone)
4148 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4149 }
4150
4151 cgraph_edge *prev_clone;
4152 cgraph_edge *next_clone;
4153 };
4154
4155 class edge_clone_summary_t:
4156 public call_summary <edge_clone_summary *>
4157 {
4158 public:
4159 edge_clone_summary_t (symbol_table *symtab):
4160 call_summary <edge_clone_summary *> (symtab)
4161 {
4162 m_initialize_when_cloning = true;
4163 }
4164
4165 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4166 edge_clone_summary *src_data,
4167 edge_clone_summary *dst_data);
4168 };
4169
4170 /* Edge duplication hook. */
4171
4172 void
4173 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4174 edge_clone_summary *src_data,
4175 edge_clone_summary *dst_data)
4176 {
4177 if (src_data->next_clone)
4178 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4179 dst_data->prev_clone = src_edge;
4180 dst_data->next_clone = src_data->next_clone;
4181 src_data->next_clone = dst_edge;
4182 }
4183
4184 /* Return true is CS calls DEST or its clone for all contexts. When
4185 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4186 edges from/to an all-context clone. */
4187
4188 static bool
4189 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4190 bool allow_recursion_to_clone)
4191 {
4192 enum availability availability;
4193 cgraph_node *callee = cs->callee->function_symbol (&availability);
4194
4195 if (availability <= AVAIL_INTERPOSABLE)
4196 return false;
4197 if (callee == dest)
4198 return true;
4199 if (!allow_recursion_to_clone && cs->caller == callee)
4200 return false;
4201
4202 ipa_node_params *info = ipa_node_params_sum->get (callee);
4203 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4204 }
4205
4206 /* Return true if edge CS does bring about the value described by SRC to
4207 DEST_VAL of node DEST or its clone for all contexts. */
4208
4209 static bool
4210 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4211 cgraph_node *dest, ipcp_value<tree> *dest_val)
4212 {
4213 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4214
4215 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4216 || caller_info->node_dead)
4217 return false;
4218
4219 if (!src->val)
4220 return true;
4221
4222 if (caller_info->ipcp_orig_node)
4223 {
4224 tree t;
4225 if (src->offset == -1)
4226 t = caller_info->known_csts[src->index];
4227 else
4228 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4229 return (t != NULL_TREE
4230 && values_equal_for_ipcp_p (src->val->value, t));
4231 }
4232 else
4233 {
4234 if (src->val == dest_val)
4235 return true;
4236
4237 struct ipcp_agg_lattice *aglat;
4238 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4239 src->index);
4240 if (src->offset == -1)
4241 return (plats->itself.is_single_const ()
4242 && values_equal_for_ipcp_p (src->val->value,
4243 plats->itself.values->value));
4244 else
4245 {
4246 if (plats->aggs_bottom || plats->aggs_contain_variable)
4247 return false;
4248 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4249 if (aglat->offset == src->offset)
4250 return (aglat->is_single_const ()
4251 && values_equal_for_ipcp_p (src->val->value,
4252 aglat->values->value));
4253 }
4254 return false;
4255 }
4256 }
4257
4258 /* Return true if edge CS does bring about the value described by SRC to
4259 DST_VAL of node DEST or its clone for all contexts. */
4260
4261 static bool
4262 cgraph_edge_brings_value_p (cgraph_edge *cs,
4263 ipcp_value_source<ipa_polymorphic_call_context> *src,
4264 cgraph_node *dest,
4265 ipcp_value<ipa_polymorphic_call_context> *)
4266 {
4267 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4268
4269 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4270 || caller_info->node_dead)
4271 return false;
4272 if (!src->val)
4273 return true;
4274
4275 if (caller_info->ipcp_orig_node)
4276 return (caller_info->known_contexts.length () > (unsigned) src->index)
4277 && values_equal_for_ipcp_p (src->val->value,
4278 caller_info->known_contexts[src->index]);
4279
4280 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4281 src->index);
4282 return plats->ctxlat.is_single_const ()
4283 && values_equal_for_ipcp_p (src->val->value,
4284 plats->ctxlat.values->value);
4285 }
4286
4287 /* Get the next clone in the linked list of clones of an edge. */
4288
4289 static inline struct cgraph_edge *
4290 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4291 {
4292 edge_clone_summary *s = edge_clone_summaries->get (cs);
4293 return s != NULL ? s->next_clone : NULL;
4294 }
4295
4296 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4297 of them is viable and hot, return true. In that case, for those that still
4298 hold, add their edge frequency and their number and cumulative profile
4299 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4300 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4301
4302 template <typename valtype>
4303 static bool
4304 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4305 sreal *freq_sum, int *caller_count,
4306 profile_count *rec_count_sum,
4307 profile_count *nonrec_count_sum)
4308 {
4309 ipcp_value_source<valtype> *src;
4310 sreal freq = 0;
4311 int count = 0;
4312 profile_count rec_cnt = profile_count::zero ();
4313 profile_count nonrec_cnt = profile_count::zero ();
4314 bool hot = false;
4315 bool non_self_recursive = false;
4316
4317 for (src = val->sources; src; src = src->next)
4318 {
4319 struct cgraph_edge *cs = src->cs;
4320 while (cs)
4321 {
4322 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4323 {
4324 count++;
4325 freq += cs->sreal_frequency ();
4326 hot |= cs->maybe_hot_p ();
4327 if (cs->caller != dest)
4328 {
4329 non_self_recursive = true;
4330 if (cs->count.ipa ().initialized_p ())
4331 rec_cnt += cs->count.ipa ();
4332 }
4333 else if (cs->count.ipa ().initialized_p ())
4334 nonrec_cnt += cs->count.ipa ();
4335 }
4336 cs = get_next_cgraph_edge_clone (cs);
4337 }
4338 }
4339
4340 /* If the only edges bringing a value are self-recursive ones, do not bother
4341 evaluating it. */
4342 if (!non_self_recursive)
4343 return false;
4344
4345 *freq_sum = freq;
4346 *caller_count = count;
4347 *rec_count_sum = rec_cnt;
4348 *nonrec_count_sum = nonrec_cnt;
4349
4350 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4351 {
4352 struct cgraph_edge *cs;
4353
4354 /* Cold non-SCC source edge could trigger hot recursive execution of
4355 function. Consider the case as hot and rely on following cost model
4356 computation to further select right one. */
4357 for (cs = dest->callers; cs; cs = cs->next_caller)
4358 if (cs->caller == dest && cs->maybe_hot_p ())
4359 return true;
4360 }
4361
4362 return hot;
4363 }
4364
4365 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4366 to let a non-self-recursive caller be the first element. Thus, we can
4367 simplify intersecting operations on values that arrive from all of these
4368 callers, especially when there exists self-recursive call. Return true if
4369 this kind of adjustment is possible. */
4370
4371 static bool
4372 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4373 cgraph_node *node)
4374 {
4375 for (unsigned i = 0; i < callers.length (); i++)
4376 {
4377 cgraph_edge *cs = callers[i];
4378
4379 if (cs->caller != node)
4380 {
4381 if (i > 0)
4382 {
4383 callers[i] = callers[0];
4384 callers[0] = cs;
4385 }
4386 return true;
4387 }
4388 }
4389 return false;
4390 }
4391
4392 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4393 is assumed their number is known and equal to CALLER_COUNT. */
4394
4395 template <typename valtype>
4396 static vec<cgraph_edge *>
4397 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4398 int caller_count)
4399 {
4400 ipcp_value_source<valtype> *src;
4401 vec<cgraph_edge *> ret;
4402
4403 ret.create (caller_count);
4404 for (src = val->sources; src; src = src->next)
4405 {
4406 struct cgraph_edge *cs = src->cs;
4407 while (cs)
4408 {
4409 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4410 ret.quick_push (cs);
4411 cs = get_next_cgraph_edge_clone (cs);
4412 }
4413 }
4414
4415 if (caller_count > 1)
4416 adjust_callers_for_value_intersection (ret, dest);
4417
4418 return ret;
4419 }
4420
4421 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4422 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4423 should be set to true when the reference created for the constant should be
4424 a load one and not an address one because the corresponding parameter p is
4425 only used as *p. */
4426
4427 static struct ipa_replace_map *
4428 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4429 bool force_load_ref)
4430 {
4431 struct ipa_replace_map *replace_map;
4432
4433 replace_map = ggc_alloc<ipa_replace_map> ();
4434 if (dump_file)
4435 {
4436 fprintf (dump_file, " replacing ");
4437 ipa_dump_param (dump_file, info, parm_num);
4438
4439 fprintf (dump_file, " with const ");
4440 print_generic_expr (dump_file, value);
4441
4442 if (force_load_ref)
4443 fprintf (dump_file, " - forcing load reference\n");
4444 else
4445 fprintf (dump_file, "\n");
4446 }
4447 replace_map->parm_num = parm_num;
4448 replace_map->new_tree = value;
4449 replace_map->force_load_ref = force_load_ref;
4450 return replace_map;
4451 }
4452
4453 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4454 one, otherwise it will be referred to as the original node. */
4455
4456 static void
4457 dump_profile_updates (cgraph_node *node, bool spec)
4458 {
4459 if (spec)
4460 fprintf (dump_file, " setting count of the specialized node %s to ",
4461 node->dump_name ());
4462 else
4463 fprintf (dump_file, " setting count of the original node %s to ",
4464 node->dump_name ());
4465
4466 node->count.dump (dump_file);
4467 fprintf (dump_file, "\n");
4468 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4469 {
4470 fprintf (dump_file, " edge to %s has count ",
4471 cs->callee->dump_name ());
4472 cs->count.dump (dump_file);
4473 fprintf (dump_file, "\n");
4474 }
4475 }
4476
4477 /* With partial train run we do not want to assume that original's count is
4478 zero whenever we redurect all executed edges to clone. Simply drop profile
4479 to local one in this case. In eany case, return the new value. ORIG_NODE
4480 is the original node and its count has not been updaed yet. */
4481
4482 profile_count
4483 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4484 {
4485 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4486 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4487 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4488 remainder = remainder.guessed_local ();
4489
4490 return remainder;
4491 }
4492
4493 /* Structure to sum counts coming from nodes other than the original node and
4494 its clones. */
4495
4496 struct gather_other_count_struct
4497 {
4498 cgraph_node *orig;
4499 profile_count other_count;
4500 };
4501
4502 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4503 counts that come from non-self-recursive calls.. */
4504
4505 static bool
4506 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4507 {
4508 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4509 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4510 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4511 desc->other_count += cs->count.ipa ();
4512 return false;
4513 }
4514
4515 /* Structure to help analyze if we need to boost counts of some clones of some
4516 non-recursive edges to match the new callee count. */
4517
4518 struct desc_incoming_count_struct
4519 {
4520 cgraph_node *orig;
4521 hash_set <cgraph_edge *> *processed_edges;
4522 profile_count count;
4523 unsigned unproc_orig_rec_edges;
4524 };
4525
4526 /* Go over edges calling NODE and its thunks and gather information about
4527 incoming counts so that we know if we need to make any adjustments. */
4528
4529 static void
4530 analyze_clone_icoming_counts (cgraph_node *node,
4531 desc_incoming_count_struct *desc)
4532 {
4533 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4534 if (cs->caller->thunk)
4535 {
4536 analyze_clone_icoming_counts (cs->caller, desc);
4537 continue;
4538 }
4539 else
4540 {
4541 if (cs->count.initialized_p ())
4542 desc->count += cs->count.ipa ();
4543 if (!desc->processed_edges->contains (cs)
4544 && cs->caller->clone_of == desc->orig)
4545 desc->unproc_orig_rec_edges++;
4546 }
4547 }
4548
4549 /* If caller edge counts of a clone created for a self-recursive arithmetic
4550 jump function must be adjusted because it is coming from a the "seed" clone
4551 for the first value and so has been excessively scaled back as if it was not
4552 a recursive call, adjust it so that the incoming counts of NODE match its
4553 count. NODE is the node or its thunk. */
4554
4555 static void
4556 adjust_clone_incoming_counts (cgraph_node *node,
4557 desc_incoming_count_struct *desc)
4558 {
4559 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4560 if (cs->caller->thunk)
4561 {
4562 adjust_clone_incoming_counts (cs->caller, desc);
4563 profile_count sum = profile_count::zero ();
4564 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4565 if (e->count.initialized_p ())
4566 sum += e->count.ipa ();
4567 cs->count = cs->count.combine_with_ipa_count (sum);
4568 }
4569 else if (!desc->processed_edges->contains (cs)
4570 && cs->caller->clone_of == desc->orig)
4571 {
4572 cs->count += desc->count;
4573 if (dump_file)
4574 {
4575 fprintf (dump_file, " Adjusted count of an incoming edge of "
4576 "a clone %s -> %s to ", cs->caller->dump_name (),
4577 cs->callee->dump_name ());
4578 cs->count.dump (dump_file);
4579 fprintf (dump_file, "\n");
4580 }
4581 }
4582 }
4583
4584 /* When ORIG_NODE has been cloned for values which have been generated fora
4585 self-recursive call as a result of an arithmetic pass-through
4586 jump-functions, adjust its count together with counts of all such clones in
4587 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4588
4589 The function sums the counts of the original node and all its clones that
4590 cannot be attributed to a specific clone because it comes from a
4591 non-recursive edge. This sum is then evenly divided between the clones and
4592 on top of that each one gets all the counts which can be attributed directly
4593 to it. */
4594
4595 static void
4596 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4597 const vec<cgraph_node *> &self_gen_clones)
4598 {
4599 profile_count redist_sum = orig_node->count.ipa ();
4600 if (!(redist_sum > profile_count::zero ()))
4601 return;
4602
4603 if (dump_file)
4604 fprintf (dump_file, " Updating profile of self recursive clone "
4605 "series\n");
4606
4607 gather_other_count_struct gocs;
4608 gocs.orig = orig_node;
4609 gocs.other_count = profile_count::zero ();
4610
4611 auto_vec <profile_count, 8> other_edges_count;
4612 for (cgraph_node *n : self_gen_clones)
4613 {
4614 gocs.other_count = profile_count::zero ();
4615 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4616 &gocs, false);
4617 other_edges_count.safe_push (gocs.other_count);
4618 redist_sum -= gocs.other_count;
4619 }
4620
4621 hash_set<cgraph_edge *> processed_edges;
4622 unsigned i = 0;
4623 for (cgraph_node *n : self_gen_clones)
4624 {
4625 profile_count orig_count = n->count;
4626 profile_count new_count
4627 = (redist_sum.apply_scale (1, self_gen_clones.length ())
4628 + other_edges_count[i]);
4629 new_count = lenient_count_portion_handling (new_count, orig_node);
4630 n->count = new_count;
4631 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4632 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4633 {
4634 cs->count = cs->count.apply_scale (new_count, orig_count);
4635 processed_edges.add (cs);
4636 }
4637 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4638 cs->count = cs->count.apply_scale (new_count, orig_count);
4639
4640 i++;
4641 }
4642
4643 /* There are still going to be edges to ORIG_NODE that have one or more
4644 clones coming from another node clone in SELF_GEN_CLONES and which we
4645 scaled by the same amount, which means that the total incoming sum of
4646 counts to ORIG_NODE will be too high, scale such edges back. */
4647 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4648 {
4649 if (cs->callee->ultimate_alias_target () == orig_node)
4650 {
4651 unsigned den = 0;
4652 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4653 if (e->callee->ultimate_alias_target () == orig_node
4654 && processed_edges.contains (e))
4655 den++;
4656 if (den > 0)
4657 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4658 if (e->callee->ultimate_alias_target () == orig_node
4659 && processed_edges.contains (e))
4660 e->count = e->count.apply_scale (1, den);
4661 }
4662 }
4663
4664 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4665 along self-recursive edges are likely to have fairly low count and so
4666 edges from them to nodes in the self_gen_clones do not correspond to the
4667 artificially distributed count of the nodes, the total sum of incoming
4668 edges to some clones might be too low. Detect this situation and correct
4669 it. */
4670 for (cgraph_node *n : self_gen_clones)
4671 {
4672 if (!(n->count.ipa () > profile_count::zero ()))
4673 continue;
4674
4675 desc_incoming_count_struct desc;
4676 desc.orig = orig_node;
4677 desc.processed_edges = &processed_edges;
4678 desc.count = profile_count::zero ();
4679 desc.unproc_orig_rec_edges = 0;
4680 analyze_clone_icoming_counts (n, &desc);
4681
4682 if (n->count.differs_from_p (desc.count))
4683 {
4684 if (n->count > desc.count
4685 && desc.unproc_orig_rec_edges > 0)
4686 {
4687 desc.count = n->count - desc.count;
4688 desc.count
4689 = desc.count.apply_scale (1, desc.unproc_orig_rec_edges);
4690 adjust_clone_incoming_counts (n, &desc);
4691 }
4692 else if (dump_file)
4693 fprintf (dump_file,
4694 " Unable to fix up incoming counts for %s.\n",
4695 n->dump_name ());
4696 }
4697 }
4698
4699 if (dump_file)
4700 for (cgraph_node *n : self_gen_clones)
4701 dump_profile_updates (n, n != orig_node);
4702 return;
4703 }
4704
4705 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4706 their profile information to reflect this. This function should not be used
4707 for clones generated for arithmetic pass-through jump functions on a
4708 self-recursive call graph edge, that situation is handled by
4709 update_counts_for_self_gen_clones. */
4710
4711 static void
4712 update_profiling_info (struct cgraph_node *orig_node,
4713 struct cgraph_node *new_node)
4714 {
4715 struct caller_statistics stats;
4716 profile_count new_sum;
4717 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4718
4719 if (!(orig_node_count > profile_count::zero ()))
4720 return;
4721
4722 if (dump_file)
4723 {
4724 fprintf (dump_file, " Updating profile from original count: ");
4725 orig_node_count.dump (dump_file);
4726 fprintf (dump_file, "\n");
4727 }
4728
4729 init_caller_stats (&stats, new_node);
4730 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4731 false);
4732 new_sum = stats.count_sum;
4733
4734 if (new_sum > orig_node_count)
4735 {
4736 /* TODO: Perhaps this should be gcc_unreachable ()? */
4737 remainder = profile_count::zero ().guessed_local ();
4738 }
4739 else if (stats.rec_count_sum.nonzero_p ())
4740 {
4741 int new_nonrec_calls = stats.n_nonrec_calls;
4742 /* There are self-recursive edges which are likely to bring in the
4743 majority of calls but which we must divide in between the original and
4744 new node. */
4745 init_caller_stats (&stats, orig_node);
4746 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4747 &stats, false);
4748 int orig_nonrec_calls = stats.n_nonrec_calls;
4749 profile_count orig_nonrec_call_count = stats.count_sum;
4750
4751 if (orig_node->local)
4752 {
4753 if (!orig_nonrec_call_count.nonzero_p ())
4754 {
4755 if (dump_file)
4756 fprintf (dump_file, " The original is local and the only "
4757 "incoming edges from non-dead callers with nonzero "
4758 "counts are self-recursive, assuming it is cold.\n");
4759 /* The NEW_NODE count and counts of all its outgoing edges
4760 are still unmodified copies of ORIG_NODE's. Just clear
4761 the latter and bail out. */
4762 profile_count zero;
4763 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4764 zero = profile_count::zero ().guessed_local ();
4765 else
4766 zero = profile_count::adjusted_zero ();
4767 orig_node->count = zero;
4768 for (cgraph_edge *cs = orig_node->callees;
4769 cs;
4770 cs = cs->next_callee)
4771 cs->count = zero;
4772 for (cgraph_edge *cs = orig_node->indirect_calls;
4773 cs;
4774 cs = cs->next_callee)
4775 cs->count = zero;
4776 return;
4777 }
4778 }
4779 else
4780 {
4781 /* Let's behave as if there was another caller that accounts for all
4782 the calls that were either indirect or from other compilation
4783 units. */
4784 orig_nonrec_calls++;
4785 profile_count pretend_caller_count
4786 = (orig_node_count - new_sum - orig_nonrec_call_count
4787 - stats.rec_count_sum);
4788 orig_nonrec_call_count += pretend_caller_count;
4789 }
4790
4791 /* Divide all "unexplained" counts roughly proportionally to sums of
4792 counts of non-recursive calls.
4793
4794 We put rather arbitrary limits on how many counts we claim because the
4795 number of non-self-recursive incoming count is only a rough guideline
4796 and there are cases (such as mcf) where using it blindly just takes
4797 too many. And if lattices are considered in the opposite order we
4798 could also take too few. */
4799 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4800
4801 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4802 profile_count new_part
4803 = MAX(MIN (unexp.apply_scale (new_sum,
4804 new_sum + orig_nonrec_call_count),
4805 unexp.apply_scale (limit_den - 1, limit_den)),
4806 unexp.apply_scale (new_nonrec_calls, limit_den));
4807 if (dump_file)
4808 {
4809 fprintf (dump_file, " Claiming ");
4810 new_part.dump (dump_file);
4811 fprintf (dump_file, " of unexplained ");
4812 unexp.dump (dump_file);
4813 fprintf (dump_file, " counts because of self-recursive "
4814 "calls\n");
4815 }
4816 new_sum += new_part;
4817 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4818 orig_node);
4819 }
4820 else
4821 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4822 orig_node);
4823
4824 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4825 new_node->count = new_sum;
4826 orig_node->count = remainder;
4827
4828 profile_count orig_new_node_count = orig_node_count;
4829 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4830 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4831 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4832 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4833 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4834
4835 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4836 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4837 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4838 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4839 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4840
4841 if (dump_file)
4842 {
4843 dump_profile_updates (new_node, true);
4844 dump_profile_updates (orig_node, false);
4845 }
4846 }
4847
4848 /* Update the respective profile of specialized NEW_NODE and the original
4849 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4850 have been redirected to the specialized version. */
4851
4852 static void
4853 update_specialized_profile (struct cgraph_node *new_node,
4854 struct cgraph_node *orig_node,
4855 profile_count redirected_sum)
4856 {
4857 struct cgraph_edge *cs;
4858 profile_count new_node_count, orig_node_count = orig_node->count;
4859
4860 if (dump_file)
4861 {
4862 fprintf (dump_file, " the sum of counts of redirected edges is ");
4863 redirected_sum.dump (dump_file);
4864 fprintf (dump_file, "\n");
4865 }
4866 if (!(orig_node_count > profile_count::zero ()))
4867 return;
4868
4869 gcc_assert (orig_node_count >= redirected_sum);
4870
4871 new_node_count = new_node->count;
4872 new_node->count += redirected_sum;
4873 orig_node->count -= redirected_sum;
4874
4875 for (cs = new_node->callees; cs; cs = cs->next_callee)
4876 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4877
4878 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4879 {
4880 profile_count dec = cs->count.apply_scale (redirected_sum,
4881 orig_node_count);
4882 cs->count -= dec;
4883 }
4884
4885 if (dump_file)
4886 {
4887 dump_profile_updates (new_node, true);
4888 dump_profile_updates (orig_node, false);
4889 }
4890 }
4891
4892 static void adjust_references_in_caller (cgraph_edge *cs,
4893 symtab_node *symbol, int index);
4894
4895 /* Simple structure to pass a symbol and index (with same meaning as parameters
4896 of adjust_references_in_caller) through a void* parameter of a
4897 call_for_symbol_thunks_and_aliases callback. */
4898 struct symbol_and_index_together
4899 {
4900 symtab_node *symbol;
4901 int index;
4902 };
4903
4904 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4905 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4906 static bool
4907 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4908 {
4909 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4910 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4911 if (!cs->caller->thunk)
4912 adjust_references_in_caller (cs, pack->symbol, pack->index);
4913 return false;
4914 }
4915
4916 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4917 variable which is only dereferenced and which is represented by SYMBOL. See
4918 if we can remove ADDR reference in callers assosiated witht the call. */
4919
4920 static void
4921 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4922 {
4923 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4924 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4925 if (jfunc->type == IPA_JF_CONST)
4926 {
4927 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4928 cs->lto_stmt_uid);
4929 if (!to_del)
4930 return;
4931 to_del->remove_reference ();
4932 if (dump_file)
4933 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4934 cs->caller->dump_name (), symbol->dump_name ());
4935 return;
4936 }
4937
4938 if (jfunc->type != IPA_JF_PASS_THROUGH
4939 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
4940 return;
4941
4942 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4943 cgraph_node *caller = cs->caller;
4944 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4945 /* TODO: This consistency check may be too big and not really
4946 that useful. Consider removing it. */
4947 tree cst;
4948 if (caller_info->ipcp_orig_node)
4949 cst = caller_info->known_csts[fidx];
4950 else
4951 {
4952 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4953 gcc_assert (lat->is_single_const ());
4954 cst = lat->values->value;
4955 }
4956 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4957 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4958 == symbol));
4959
4960 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4961 if (cuses == IPA_UNDESCRIBED_USE)
4962 return;
4963 gcc_assert (cuses > 0);
4964 cuses--;
4965 ipa_set_controlled_uses (caller_info, fidx, cuses);
4966 if (cuses)
4967 return;
4968
4969 if (caller_info->ipcp_orig_node)
4970 {
4971 /* Cloning machinery has created a reference here, we need to either
4972 remove it or change it to a read one. */
4973 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
4974 if (to_del && to_del->use == IPA_REF_ADDR)
4975 {
4976 to_del->remove_reference ();
4977 if (dump_file)
4978 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4979 cs->caller->dump_name (), symbol->dump_name ());
4980 if (ipa_get_param_load_dereferenced (caller_info, fidx))
4981 {
4982 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
4983 if (dump_file)
4984 fprintf (dump_file,
4985 " ...and replaced it with LOAD one.\n");
4986 }
4987 }
4988 }
4989
4990 symbol_and_index_together pack;
4991 pack.symbol = symbol;
4992 pack.index = fidx;
4993 if (caller->can_change_signature)
4994 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
4995 &pack, true);
4996 }
4997
4998
4999 /* Return true if we would like to remove a parameter from NODE when cloning it
5000 with KNOWN_CSTS scalar constants. */
5001
5002 static bool
5003 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5004 {
5005 auto_vec<bool, 16> surviving;
5006 bool filled_vec = false;
5007 ipa_node_params *info = ipa_node_params_sum->get (node);
5008 int i, count = ipa_get_param_count (info);
5009
5010 for (i = 0; i < count; i++)
5011 {
5012 if (!known_csts[i] && ipa_is_param_used (info, i))
5013 continue;
5014
5015 if (!filled_vec)
5016 {
5017 clone_info *info = clone_info::get (node);
5018 if (!info || !info->param_adjustments)
5019 return true;
5020 info->param_adjustments->get_surviving_params (&surviving);
5021 filled_vec = true;
5022 }
5023 if (surviving.length() < (unsigned) i && surviving[i])
5024 return true;
5025 }
5026 return false;
5027 }
5028
5029 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5030 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5031 redirect all edges in CALLERS to it. */
5032
5033 static struct cgraph_node *
5034 create_specialized_node (struct cgraph_node *node,
5035 vec<tree> known_csts,
5036 vec<ipa_polymorphic_call_context> known_contexts,
5037 struct ipa_agg_replacement_value *aggvals,
5038 vec<cgraph_edge *> &callers)
5039 {
5040 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5041 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5042 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5043 struct ipa_agg_replacement_value *av;
5044 struct cgraph_node *new_node;
5045 int i, count = ipa_get_param_count (info);
5046 clone_info *cinfo = clone_info::get (node);
5047 ipa_param_adjustments *old_adjustments = cinfo
5048 ? cinfo->param_adjustments : NULL;
5049 ipa_param_adjustments *new_adjustments;
5050 gcc_assert (!info->ipcp_orig_node);
5051 gcc_assert (node->can_change_signature
5052 || !old_adjustments);
5053
5054 if (old_adjustments)
5055 {
5056 /* At the moment all IPA optimizations should use the number of
5057 parameters of the prevailing decl as the m_always_copy_start.
5058 Handling any other value would complicate the code below, so for the
5059 time bing let's only assert it is so. */
5060 gcc_assert (old_adjustments->m_always_copy_start == count
5061 || old_adjustments->m_always_copy_start < 0);
5062 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5063 for (i = 0; i < old_adj_count; i++)
5064 {
5065 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5066 if (!node->can_change_signature
5067 || old_adj->op != IPA_PARAM_OP_COPY
5068 || (!known_csts[old_adj->base_index]
5069 && ipa_is_param_used (info, old_adj->base_index)))
5070 {
5071 ipa_adjusted_param new_adj = *old_adj;
5072
5073 new_adj.prev_clone_adjustment = true;
5074 new_adj.prev_clone_index = i;
5075 vec_safe_push (new_params, new_adj);
5076 }
5077 }
5078 bool skip_return = old_adjustments->m_skip_return;
5079 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5080 ipa_param_adjustments (new_params, count,
5081 skip_return));
5082 }
5083 else if (node->can_change_signature
5084 && want_remove_some_param_p (node, known_csts))
5085 {
5086 ipa_adjusted_param adj;
5087 memset (&adj, 0, sizeof (adj));
5088 adj.op = IPA_PARAM_OP_COPY;
5089 for (i = 0; i < count; i++)
5090 if (!known_csts[i] && ipa_is_param_used (info, i))
5091 {
5092 adj.base_index = i;
5093 adj.prev_clone_index = i;
5094 vec_safe_push (new_params, adj);
5095 }
5096 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5097 ipa_param_adjustments (new_params, count, false));
5098 }
5099 else
5100 new_adjustments = NULL;
5101
5102 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5103 for (i = 0; i < count; i++)
5104 {
5105 tree t = known_csts[i];
5106 if (!t)
5107 continue;
5108
5109 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5110
5111 bool load_ref = false;
5112 symtab_node *ref_symbol;
5113 if (TREE_CODE (t) == ADDR_EXPR)
5114 {
5115 tree base = get_base_address (TREE_OPERAND (t, 0));
5116 if (TREE_CODE (base) == VAR_DECL
5117 && ipa_get_controlled_uses (info, i) == 0
5118 && ipa_get_param_load_dereferenced (info, i)
5119 && (ref_symbol = symtab_node::get (base)))
5120 {
5121 load_ref = true;
5122 if (node->can_change_signature)
5123 for (cgraph_edge *caller : callers)
5124 adjust_references_in_caller (caller, ref_symbol, i);
5125 }
5126 }
5127
5128 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5129 if (replace_map)
5130 vec_safe_push (replace_trees, replace_map);
5131 }
5132 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5133 for (i = callers.length () - 1; i >= 0; i--)
5134 {
5135 cgraph_edge *cs = callers[i];
5136 if (cs->caller == node)
5137 {
5138 self_recursive_calls.safe_push (cs);
5139 callers.unordered_remove (i);
5140 }
5141 }
5142
5143 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5144 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5145 node->decl)));
5146 new_node = node->create_virtual_clone (callers, replace_trees,
5147 new_adjustments, "constprop",
5148 suffix_counter);
5149 suffix_counter++;
5150
5151 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5152 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5153 {
5154 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5155 /* Cloned edges can disappear during cloning as speculation can be
5156 resolved, check that we have one and that it comes from the last
5157 cloning. */
5158 if (cs && cs->caller == new_node)
5159 cs->redirect_callee_duplicating_thunks (new_node);
5160 /* Any future code that would make more than one clone of an outgoing
5161 edge would confuse this mechanism, so let's check that does not
5162 happen. */
5163 gcc_checking_assert (!cs
5164 || !get_next_cgraph_edge_clone (cs)
5165 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5166 }
5167 if (have_self_recursive_calls)
5168 new_node->expand_all_artificial_thunks ();
5169
5170 ipa_set_node_agg_value_chain (new_node, aggvals);
5171 for (av = aggvals; av; av = av->next)
5172 new_node->maybe_create_reference (av->value, NULL);
5173
5174 if (dump_file && (dump_flags & TDF_DETAILS))
5175 {
5176 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5177 if (known_contexts.exists ())
5178 {
5179 for (i = 0; i < count; i++)
5180 if (!known_contexts[i].useless_p ())
5181 {
5182 fprintf (dump_file, " known ctx %i is ", i);
5183 known_contexts[i].dump (dump_file);
5184 }
5185 }
5186 if (aggvals)
5187 ipa_dump_agg_replacement_values (dump_file, aggvals);
5188 }
5189
5190 new_info = ipa_node_params_sum->get (new_node);
5191 new_info->ipcp_orig_node = node;
5192 new_node->ipcp_clone = true;
5193 new_info->known_csts = known_csts;
5194 new_info->known_contexts = known_contexts;
5195
5196 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5197
5198 return new_node;
5199 }
5200
5201 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5202 pass-through function to itself when the cgraph_node involved is not an
5203 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5204 no-operation pass-through. */
5205
5206 static bool
5207 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5208 bool simple = true)
5209 {
5210 enum availability availability;
5211 if (cs->caller == cs->callee->function_symbol (&availability)
5212 && availability > AVAIL_INTERPOSABLE
5213 && jfunc->type == IPA_JF_PASS_THROUGH
5214 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5215 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5216 && ipa_node_params_sum->get (cs->caller)
5217 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5218 return true;
5219 return false;
5220 }
5221
5222 /* Return true if JFUNC, which describes a part of an aggregate represented or
5223 pointed to by the i-th parameter of call CS, is a pass-through function to
5224 itself when the cgraph_node involved is not an IPA-CP clone.. When
5225 SIMPLE is true, further check if JFUNC is a simple no-operation
5226 pass-through. */
5227
5228 static bool
5229 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5230 int i, bool simple = true)
5231 {
5232 enum availability availability;
5233 if (cs->caller == cs->callee->function_symbol (&availability)
5234 && availability > AVAIL_INTERPOSABLE
5235 && jfunc->jftype == IPA_JF_LOAD_AGG
5236 && jfunc->offset == jfunc->value.load_agg.offset
5237 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5238 && jfunc->value.pass_through.formal_id == i
5239 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5240 && ipa_node_params_sum->get (cs->caller)
5241 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5242 return true;
5243 return false;
5244 }
5245
5246 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5247 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5248
5249 static void
5250 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5251 vec<tree> &known_csts,
5252 const vec<cgraph_edge *> &callers)
5253 {
5254 ipa_node_params *info = ipa_node_params_sum->get (node);
5255 int i, count = ipa_get_param_count (info);
5256
5257 for (i = 0; i < count; i++)
5258 {
5259 struct cgraph_edge *cs;
5260 tree newval = NULL_TREE;
5261 int j;
5262 bool first = true;
5263 tree type = ipa_get_type (info, i);
5264
5265 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5266 continue;
5267
5268 FOR_EACH_VEC_ELT (callers, j, cs)
5269 {
5270 struct ipa_jump_func *jump_func;
5271 tree t;
5272
5273 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5274 if (!args
5275 || i >= ipa_get_cs_argument_count (args)
5276 || (i == 0
5277 && call_passes_through_thunk (cs)))
5278 {
5279 newval = NULL_TREE;
5280 break;
5281 }
5282 jump_func = ipa_get_ith_jump_func (args, i);
5283
5284 /* Besides simple pass-through jump function, arithmetic jump
5285 function could also introduce argument-direct-pass-through for
5286 self-feeding recursive call. For example,
5287
5288 fn (int i)
5289 {
5290 fn (i & 1);
5291 }
5292
5293 Given that i is 0, recursive propagation via (i & 1) also gets
5294 0. */
5295 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5296 {
5297 gcc_assert (newval);
5298 t = ipa_get_jf_arith_result (
5299 ipa_get_jf_pass_through_operation (jump_func),
5300 newval,
5301 ipa_get_jf_pass_through_operand (jump_func),
5302 type);
5303 }
5304 else
5305 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5306 jump_func, type);
5307 if (!t
5308 || (newval
5309 && !values_equal_for_ipcp_p (t, newval))
5310 || (!first && !newval))
5311 {
5312 newval = NULL_TREE;
5313 break;
5314 }
5315 else
5316 newval = t;
5317 first = false;
5318 }
5319
5320 if (newval)
5321 {
5322 if (dump_file && (dump_flags & TDF_DETAILS))
5323 {
5324 fprintf (dump_file, " adding an extra known scalar value ");
5325 print_ipcp_constant_value (dump_file, newval);
5326 fprintf (dump_file, " for ");
5327 ipa_dump_param (dump_file, info, i);
5328 fprintf (dump_file, "\n");
5329 }
5330
5331 known_csts[i] = newval;
5332 }
5333 }
5334 }
5335
5336 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5337 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5338 CALLERS. */
5339
5340 static void
5341 find_more_contexts_for_caller_subset (cgraph_node *node,
5342 vec<ipa_polymorphic_call_context>
5343 *known_contexts,
5344 const vec<cgraph_edge *> &callers)
5345 {
5346 ipa_node_params *info = ipa_node_params_sum->get (node);
5347 int i, count = ipa_get_param_count (info);
5348
5349 for (i = 0; i < count; i++)
5350 {
5351 cgraph_edge *cs;
5352
5353 if (ipa_get_poly_ctx_lat (info, i)->bottom
5354 || (known_contexts->exists ()
5355 && !(*known_contexts)[i].useless_p ()))
5356 continue;
5357
5358 ipa_polymorphic_call_context newval;
5359 bool first = true;
5360 int j;
5361
5362 FOR_EACH_VEC_ELT (callers, j, cs)
5363 {
5364 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5365 if (!args
5366 || i >= ipa_get_cs_argument_count (args))
5367 return;
5368 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5369 ipa_polymorphic_call_context ctx;
5370 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5371 cs, i, jfunc);
5372 if (first)
5373 {
5374 newval = ctx;
5375 first = false;
5376 }
5377 else
5378 newval.meet_with (ctx);
5379 if (newval.useless_p ())
5380 break;
5381 }
5382
5383 if (!newval.useless_p ())
5384 {
5385 if (dump_file && (dump_flags & TDF_DETAILS))
5386 {
5387 fprintf (dump_file, " adding an extra known polymorphic "
5388 "context ");
5389 print_ipcp_constant_value (dump_file, newval);
5390 fprintf (dump_file, " for ");
5391 ipa_dump_param (dump_file, info, i);
5392 fprintf (dump_file, "\n");
5393 }
5394
5395 if (!known_contexts->exists ())
5396 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5397 true);
5398 (*known_contexts)[i] = newval;
5399 }
5400
5401 }
5402 }
5403
5404 /* Go through PLATS and create a vector of values consisting of values and
5405 offsets (minus OFFSET) of lattices that contain only a single value. */
5406
5407 static vec<ipa_agg_value>
5408 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5409 {
5410 vec<ipa_agg_value> res = vNULL;
5411
5412 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5413 return vNULL;
5414
5415 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5416 if (aglat->is_single_const ())
5417 {
5418 struct ipa_agg_value ti;
5419 ti.offset = aglat->offset - offset;
5420 ti.value = aglat->values->value;
5421 res.safe_push (ti);
5422 }
5423 return res;
5424 }
5425
5426 /* Intersect all values in INTER with single value lattices in PLATS (while
5427 subtracting OFFSET). */
5428
5429 static void
5430 intersect_with_plats (class ipcp_param_lattices *plats,
5431 vec<ipa_agg_value> *inter,
5432 HOST_WIDE_INT offset)
5433 {
5434 struct ipcp_agg_lattice *aglat;
5435 struct ipa_agg_value *item;
5436 int k;
5437
5438 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5439 {
5440 inter->release ();
5441 return;
5442 }
5443
5444 aglat = plats->aggs;
5445 FOR_EACH_VEC_ELT (*inter, k, item)
5446 {
5447 bool found = false;
5448 if (!item->value)
5449 continue;
5450 while (aglat)
5451 {
5452 if (aglat->offset - offset > item->offset)
5453 break;
5454 if (aglat->offset - offset == item->offset)
5455 {
5456 if (aglat->is_single_const ())
5457 {
5458 tree value = aglat->values->value;
5459
5460 if (values_equal_for_ipcp_p (item->value, value))
5461 found = true;
5462 }
5463 break;
5464 }
5465 aglat = aglat->next;
5466 }
5467 if (!found)
5468 item->value = NULL_TREE;
5469 }
5470 }
5471
5472 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5473 vector result while subtracting OFFSET from the individual value offsets. */
5474
5475 static vec<ipa_agg_value>
5476 agg_replacements_to_vector (struct cgraph_node *node, int index,
5477 HOST_WIDE_INT offset)
5478 {
5479 struct ipa_agg_replacement_value *av;
5480 vec<ipa_agg_value> res = vNULL;
5481
5482 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5483 if (av->index == index
5484 && (av->offset - offset) >= 0)
5485 {
5486 struct ipa_agg_value item;
5487 gcc_checking_assert (av->value);
5488 item.offset = av->offset - offset;
5489 item.value = av->value;
5490 res.safe_push (item);
5491 }
5492
5493 return res;
5494 }
5495
5496 /* Intersect all values in INTER with those that we have already scheduled to
5497 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5498 (while subtracting OFFSET). */
5499
5500 static void
5501 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5502 vec<ipa_agg_value> *inter,
5503 HOST_WIDE_INT offset)
5504 {
5505 struct ipa_agg_replacement_value *srcvals;
5506 struct ipa_agg_value *item;
5507 int i;
5508
5509 srcvals = ipa_get_agg_replacements_for_node (node);
5510 if (!srcvals)
5511 {
5512 inter->release ();
5513 return;
5514 }
5515
5516 FOR_EACH_VEC_ELT (*inter, i, item)
5517 {
5518 struct ipa_agg_replacement_value *av;
5519 bool found = false;
5520 if (!item->value)
5521 continue;
5522 for (av = srcvals; av; av = av->next)
5523 {
5524 gcc_checking_assert (av->value);
5525 if (av->index == index
5526 && av->offset - offset == item->offset)
5527 {
5528 if (values_equal_for_ipcp_p (item->value, av->value))
5529 found = true;
5530 break;
5531 }
5532 }
5533 if (!found)
5534 item->value = NULL_TREE;
5535 }
5536 }
5537
5538 /* Intersect values in INTER with aggregate values that come along edge CS to
5539 parameter number INDEX and return it. If INTER does not actually exist yet,
5540 copy all incoming values to it. If we determine we ended up with no values
5541 whatsoever, return a released vector. */
5542
5543 static vec<ipa_agg_value>
5544 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5545 vec<ipa_agg_value> inter)
5546 {
5547 struct ipa_jump_func *jfunc;
5548 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5549 if (jfunc->type == IPA_JF_PASS_THROUGH
5550 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5551 {
5552 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5553 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5554
5555 if (caller_info->ipcp_orig_node)
5556 {
5557 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5558 class ipcp_param_lattices *orig_plats;
5559 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5560 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5561 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5562 {
5563 if (!inter.exists ())
5564 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5565 else
5566 intersect_with_agg_replacements (cs->caller, src_idx,
5567 &inter, 0);
5568 return inter;
5569 }
5570 }
5571 else
5572 {
5573 class ipcp_param_lattices *src_plats;
5574 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5575 if (agg_pass_through_permissible_p (src_plats, jfunc))
5576 {
5577 /* Currently we do not produce clobber aggregate jump
5578 functions, adjust when we do. */
5579 gcc_checking_assert (!jfunc->agg.items);
5580 if (!inter.exists ())
5581 inter = copy_plats_to_inter (src_plats, 0);
5582 else
5583 intersect_with_plats (src_plats, &inter, 0);
5584 return inter;
5585 }
5586 }
5587 }
5588 else if (jfunc->type == IPA_JF_ANCESTOR
5589 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5590 {
5591 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5592 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5593 class ipcp_param_lattices *src_plats;
5594 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5595
5596 if (caller_info->ipcp_orig_node)
5597 {
5598 if (!inter.exists ())
5599 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5600 else
5601 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5602 delta);
5603 }
5604 else
5605 {
5606 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5607 /* Currently we do not produce clobber aggregate jump
5608 functions, adjust when we do. */
5609 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5610 if (!inter.exists ())
5611 inter = copy_plats_to_inter (src_plats, delta);
5612 else
5613 intersect_with_plats (src_plats, &inter, delta);
5614 }
5615 return inter;
5616 }
5617
5618 if (jfunc->agg.items)
5619 {
5620 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5621 struct ipa_agg_value *item;
5622 int k;
5623
5624 if (!inter.exists ())
5625 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5626 {
5627 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5628 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5629 agg_item);
5630 if (value)
5631 {
5632 struct ipa_agg_value agg_value;
5633
5634 agg_value.value = value;
5635 agg_value.offset = agg_item->offset;
5636 inter.safe_push (agg_value);
5637 }
5638 }
5639 else
5640 FOR_EACH_VEC_ELT (inter, k, item)
5641 {
5642 int l = 0;
5643 bool found = false;
5644
5645 if (!item->value)
5646 continue;
5647
5648 while ((unsigned) l < jfunc->agg.items->length ())
5649 {
5650 struct ipa_agg_jf_item *ti;
5651 ti = &(*jfunc->agg.items)[l];
5652 if (ti->offset > item->offset)
5653 break;
5654 if (ti->offset == item->offset)
5655 {
5656 tree value;
5657
5658 /* Besides simple pass-through aggregate jump function,
5659 arithmetic aggregate jump function could also bring
5660 same aggregate value as parameter passed-in for
5661 self-feeding recursive call. For example,
5662
5663 fn (int *i)
5664 {
5665 int j = *i & 1;
5666 fn (&j);
5667 }
5668
5669 Given that *i is 0, recursive propagation via (*i & 1)
5670 also gets 0. */
5671 if (self_recursive_agg_pass_through_p (cs, ti, index,
5672 false))
5673 value = ipa_get_jf_arith_result (
5674 ti->value.pass_through.operation,
5675 item->value,
5676 ti->value.pass_through.operand,
5677 ti->type);
5678 else
5679 value = ipa_agg_value_from_node (caller_info,
5680 cs->caller, ti);
5681
5682 if (value && values_equal_for_ipcp_p (item->value, value))
5683 found = true;
5684 break;
5685 }
5686 l++;
5687 }
5688 if (!found)
5689 item->value = NULL;
5690 }
5691 }
5692 else
5693 {
5694 inter.release ();
5695 return vNULL;
5696 }
5697 return inter;
5698 }
5699
5700 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5701 from all of them. */
5702
5703 static struct ipa_agg_replacement_value *
5704 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5705 const vec<cgraph_edge *> &callers)
5706 {
5707 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5708 struct ipa_agg_replacement_value *res;
5709 struct ipa_agg_replacement_value **tail = &res;
5710 struct cgraph_edge *cs;
5711 int i, j, count = ipa_get_param_count (dest_info);
5712
5713 FOR_EACH_VEC_ELT (callers, j, cs)
5714 {
5715 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5716 if (!args)
5717 {
5718 count = 0;
5719 break;
5720 }
5721 int c = ipa_get_cs_argument_count (args);
5722 if (c < count)
5723 count = c;
5724 }
5725
5726 for (i = 0; i < count; i++)
5727 {
5728 struct cgraph_edge *cs;
5729 vec<ipa_agg_value> inter = vNULL;
5730 struct ipa_agg_value *item;
5731 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5732 int j;
5733
5734 /* Among other things, the following check should deal with all by_ref
5735 mismatches. */
5736 if (plats->aggs_bottom)
5737 continue;
5738
5739 FOR_EACH_VEC_ELT (callers, j, cs)
5740 {
5741 struct ipa_jump_func *jfunc
5742 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5743 if (self_recursive_pass_through_p (cs, jfunc, i)
5744 && (!plats->aggs_by_ref
5745 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5746 continue;
5747 inter = intersect_aggregates_with_edge (cs, i, inter);
5748
5749 if (!inter.exists ())
5750 goto next_param;
5751 }
5752
5753 FOR_EACH_VEC_ELT (inter, j, item)
5754 {
5755 struct ipa_agg_replacement_value *v;
5756
5757 if (!item->value)
5758 continue;
5759
5760 v = ggc_alloc<ipa_agg_replacement_value> ();
5761 v->index = i;
5762 v->offset = item->offset;
5763 v->value = item->value;
5764 v->by_ref = plats->aggs_by_ref;
5765 *tail = v;
5766 tail = &v->next;
5767 }
5768
5769 next_param:
5770 if (inter.exists ())
5771 inter.release ();
5772 }
5773 *tail = NULL;
5774 return res;
5775 }
5776
5777 /* Determine whether CS also brings all scalar values that the NODE is
5778 specialized for. */
5779
5780 static bool
5781 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5782 struct cgraph_node *node)
5783 {
5784 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5785 int count = ipa_get_param_count (dest_info);
5786 class ipa_node_params *caller_info;
5787 class ipa_edge_args *args;
5788 int i;
5789
5790 caller_info = ipa_node_params_sum->get (cs->caller);
5791 args = ipa_edge_args_sum->get (cs);
5792 for (i = 0; i < count; i++)
5793 {
5794 struct ipa_jump_func *jump_func;
5795 tree val, t;
5796
5797 val = dest_info->known_csts[i];
5798 if (!val)
5799 continue;
5800
5801 if (i >= ipa_get_cs_argument_count (args))
5802 return false;
5803 jump_func = ipa_get_ith_jump_func (args, i);
5804 t = ipa_value_from_jfunc (caller_info, jump_func,
5805 ipa_get_type (dest_info, i));
5806 if (!t || !values_equal_for_ipcp_p (val, t))
5807 return false;
5808 }
5809 return true;
5810 }
5811
5812 /* Determine whether CS also brings all aggregate values that NODE is
5813 specialized for. */
5814 static bool
5815 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5816 struct cgraph_node *node)
5817 {
5818 struct ipa_agg_replacement_value *aggval;
5819 int i, ec, count;
5820
5821 aggval = ipa_get_agg_replacements_for_node (node);
5822 if (!aggval)
5823 return true;
5824
5825 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5826 count = ipa_get_param_count (clone_node_info);
5827 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5828 if (ec < count)
5829 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5830 if (aggval->index >= ec)
5831 return false;
5832
5833 ipa_node_params *orig_node_info
5834 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5835
5836 for (i = 0; i < count; i++)
5837 {
5838 class ipcp_param_lattices *plats;
5839 bool interesting = false;
5840 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5841 if (aggval->index == i)
5842 {
5843 interesting = true;
5844 break;
5845 }
5846 if (!interesting)
5847 continue;
5848
5849 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5850 if (plats->aggs_bottom)
5851 return false;
5852
5853 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5854 if (!values.exists ())
5855 return false;
5856
5857 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5858 if (aggval->index == i)
5859 {
5860 struct ipa_agg_value *item;
5861 int j;
5862 bool found = false;
5863 FOR_EACH_VEC_ELT (values, j, item)
5864 if (item->value
5865 && item->offset == av->offset
5866 && values_equal_for_ipcp_p (item->value, av->value))
5867 {
5868 found = true;
5869 break;
5870 }
5871 if (!found)
5872 {
5873 values.release ();
5874 return false;
5875 }
5876 }
5877 values.release ();
5878 }
5879 return true;
5880 }
5881
5882 /* Given an original NODE and a VAL for which we have already created a
5883 specialized clone, look whether there are incoming edges that still lead
5884 into the old node but now also bring the requested value and also conform to
5885 all other criteria such that they can be redirected the special node.
5886 This function can therefore redirect the final edge in a SCC. */
5887
5888 template <typename valtype>
5889 static void
5890 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5891 {
5892 ipcp_value_source<valtype> *src;
5893 profile_count redirected_sum = profile_count::zero ();
5894
5895 for (src = val->sources; src; src = src->next)
5896 {
5897 struct cgraph_edge *cs = src->cs;
5898 while (cs)
5899 {
5900 if (cgraph_edge_brings_value_p (cs, src, node, val)
5901 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5902 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5903 {
5904 if (dump_file)
5905 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5906 cs->caller->dump_name (),
5907 val->spec_node->dump_name ());
5908
5909 cs->redirect_callee_duplicating_thunks (val->spec_node);
5910 val->spec_node->expand_all_artificial_thunks ();
5911 if (cs->count.ipa ().initialized_p ())
5912 redirected_sum = redirected_sum + cs->count.ipa ();
5913 }
5914 cs = get_next_cgraph_edge_clone (cs);
5915 }
5916 }
5917
5918 if (redirected_sum.nonzero_p ())
5919 update_specialized_profile (val->spec_node, node, redirected_sum);
5920 }
5921
5922 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5923
5924 static bool
5925 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5926 {
5927 ipa_polymorphic_call_context *ctx;
5928 int i;
5929
5930 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5931 if (!ctx->useless_p ())
5932 return true;
5933 return false;
5934 }
5935
5936 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5937
5938 static vec<ipa_polymorphic_call_context>
5939 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5940 {
5941 if (known_contexts_useful_p (known_contexts))
5942 return known_contexts.copy ();
5943 else
5944 return vNULL;
5945 }
5946
5947 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5948 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5949 copy too. */
5950
5951 static void
5952 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5953 vec<tree> *known_csts,
5954 vec<ipa_polymorphic_call_context> *known_contexts,
5955 ipcp_value<tree> *val, int index)
5956 {
5957 *known_csts = avals->m_known_vals.copy ();
5958 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5959 (*known_csts)[index] = val->value;
5960 }
5961
5962 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5963 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5964 INDEX. */
5965
5966 static void
5967 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5968 vec<tree> *known_csts,
5969 vec<ipa_polymorphic_call_context> *known_contexts,
5970 ipcp_value<ipa_polymorphic_call_context> *val,
5971 int index)
5972 {
5973 *known_csts = avals->m_known_vals.copy ();
5974 *known_contexts = avals->m_known_contexts.copy ();
5975 (*known_contexts)[index] = val->value;
5976 }
5977
5978 /* Return true if OFFSET indicates this was not an aggregate value or there is
5979 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5980 AGGVALS list. */
5981
5982 DEBUG_FUNCTION bool
5983 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
5984 int index, HOST_WIDE_INT offset, tree value)
5985 {
5986 if (offset == -1)
5987 return true;
5988
5989 while (aggvals)
5990 {
5991 if (aggvals->index == index
5992 && aggvals->offset == offset
5993 && values_equal_for_ipcp_p (aggvals->value, value))
5994 return true;
5995 aggvals = aggvals->next;
5996 }
5997 return false;
5998 }
5999
6000 /* Return true if offset is minus one because source of a polymorphic context
6001 cannot be an aggregate value. */
6002
6003 DEBUG_FUNCTION bool
6004 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6005 int , HOST_WIDE_INT offset,
6006 ipa_polymorphic_call_context)
6007 {
6008 return offset == -1;
6009 }
6010
6011 /* Decide whether to create a special version of NODE for value VAL of
6012 parameter at the given INDEX. If OFFSET is -1, the value is for the
6013 parameter itself, otherwise it is stored at the given OFFSET of the
6014 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6015 is a vector which contains clones created for self-recursive calls with an
6016 arithmetic pass-through jump function. */
6017
6018 template <typename valtype>
6019 static bool
6020 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6021 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6022 vec<cgraph_node *> *self_gen_clones)
6023 {
6024 struct ipa_agg_replacement_value *aggvals;
6025 int caller_count;
6026 sreal freq_sum;
6027 profile_count count_sum, rec_count_sum;
6028 vec<cgraph_edge *> callers;
6029
6030 if (val->spec_node)
6031 {
6032 perhaps_add_new_callers (node, val);
6033 return false;
6034 }
6035 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6036 {
6037 if (dump_file && (dump_flags & TDF_DETAILS))
6038 fprintf (dump_file, " Ignoring candidate value because "
6039 "maximum unit size would be reached with %li.\n",
6040 val->local_size_cost + overall_size);
6041 return false;
6042 }
6043 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6044 &rec_count_sum, &count_sum))
6045 return false;
6046
6047 if (!dbg_cnt (ipa_cp_values))
6048 return false;
6049
6050 if (val->self_recursion_generated_p ())
6051 {
6052 /* The edge counts in this case might not have been adjusted yet.
6053 Nevertleless, even if they were it would be only a guesswork which we
6054 can do now. The recursive part of the counts can be derived from the
6055 count of the original node anyway. */
6056 if (node->count.ipa ().nonzero_p ())
6057 {
6058 unsigned dem = self_gen_clones->length () + 1;
6059 rec_count_sum = node->count.ipa ().apply_scale (1, dem);
6060 }
6061 else
6062 rec_count_sum = profile_count::zero ();
6063 }
6064
6065 /* get_info_about_necessary_edges only sums up ipa counts. */
6066 count_sum += rec_count_sum;
6067
6068 if (dump_file && (dump_flags & TDF_DETAILS))
6069 {
6070 fprintf (dump_file, " - considering value ");
6071 print_ipcp_constant_value (dump_file, val->value);
6072 fprintf (dump_file, " for ");
6073 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6074 if (offset != -1)
6075 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6076 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6077 }
6078
6079 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6080 freq_sum, count_sum,
6081 val->local_size_cost)
6082 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6083 freq_sum, count_sum, val->prop_size_cost))
6084 return false;
6085
6086 if (dump_file)
6087 fprintf (dump_file, " Creating a specialized node of %s.\n",
6088 node->dump_name ());
6089
6090 vec<tree> known_csts;
6091 vec<ipa_polymorphic_call_context> known_contexts;
6092
6093 callers = gather_edges_for_value (val, node, caller_count);
6094 if (offset == -1)
6095 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6096 else
6097 {
6098 known_csts = avals->m_known_vals.copy ();
6099 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6100 }
6101 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6102 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6103 aggvals = find_aggregate_values_for_callers_subset (node, callers);
6104 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6105 offset, val->value));
6106 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6107 aggvals, callers);
6108
6109 if (val->self_recursion_generated_p ())
6110 self_gen_clones->safe_push (val->spec_node);
6111 else
6112 update_profiling_info (node, val->spec_node);
6113
6114 callers.release ();
6115 overall_size += val->local_size_cost;
6116 if (dump_file && (dump_flags & TDF_DETAILS))
6117 fprintf (dump_file, " overall size reached %li\n",
6118 overall_size);
6119
6120 /* TODO: If for some lattice there is only one other known value
6121 left, make a special node for it too. */
6122
6123 return true;
6124 }
6125
6126 /* Decide whether and what specialized clones of NODE should be created. */
6127
6128 static bool
6129 decide_whether_version_node (struct cgraph_node *node)
6130 {
6131 ipa_node_params *info = ipa_node_params_sum->get (node);
6132 int i, count = ipa_get_param_count (info);
6133 bool ret = false;
6134
6135 if (count == 0)
6136 return false;
6137
6138 if (dump_file && (dump_flags & TDF_DETAILS))
6139 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6140 node->dump_name ());
6141
6142 auto_vec <cgraph_node *, 9> self_gen_clones;
6143 ipa_auto_call_arg_values avals;
6144 gather_context_independent_values (info, &avals, false, NULL);
6145
6146 for (i = 0; i < count;i++)
6147 {
6148 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6149 ipcp_lattice<tree> *lat = &plats->itself;
6150 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6151
6152 if (!lat->bottom
6153 && !avals.m_known_vals[i])
6154 {
6155 ipcp_value<tree> *val;
6156 for (val = lat->values; val; val = val->next)
6157 ret |= decide_about_value (node, i, -1, val, &avals,
6158 &self_gen_clones);
6159 }
6160
6161 if (!plats->aggs_bottom)
6162 {
6163 struct ipcp_agg_lattice *aglat;
6164 ipcp_value<tree> *val;
6165 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6166 if (!aglat->bottom && aglat->values
6167 /* If the following is false, the one value has been considered
6168 for cloning for all contexts. */
6169 && (plats->aggs_contain_variable
6170 || !aglat->is_single_const ()))
6171 for (val = aglat->values; val; val = val->next)
6172 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6173 &self_gen_clones);
6174 }
6175
6176 if (!ctxlat->bottom
6177 && avals.m_known_contexts[i].useless_p ())
6178 {
6179 ipcp_value<ipa_polymorphic_call_context> *val;
6180 for (val = ctxlat->values; val; val = val->next)
6181 ret |= decide_about_value (node, i, -1, val, &avals,
6182 &self_gen_clones);
6183 }
6184 }
6185
6186 if (!self_gen_clones.is_empty ())
6187 {
6188 self_gen_clones.safe_push (node);
6189 update_counts_for_self_gen_clones (node, self_gen_clones);
6190 }
6191
6192 if (info->do_clone_for_all_contexts)
6193 {
6194 if (!dbg_cnt (ipa_cp_values))
6195 {
6196 info->do_clone_for_all_contexts = false;
6197 return ret;
6198 }
6199
6200 struct cgraph_node *clone;
6201 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6202
6203 for (int i = callers.length () - 1; i >= 0; i--)
6204 {
6205 cgraph_edge *cs = callers[i];
6206 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6207
6208 if (caller_info && caller_info->node_dead)
6209 callers.unordered_remove (i);
6210 }
6211
6212 if (!adjust_callers_for_value_intersection (callers, node))
6213 {
6214 /* If node is not called by anyone, or all its caller edges are
6215 self-recursive, the node is not really in use, no need to do
6216 cloning. */
6217 info->do_clone_for_all_contexts = false;
6218 return ret;
6219 }
6220
6221 if (dump_file)
6222 fprintf (dump_file, " - Creating a specialized node of %s "
6223 "for all known contexts.\n", node->dump_name ());
6224
6225 vec<tree> known_csts = avals.m_known_vals.copy ();
6226 vec<ipa_polymorphic_call_context> known_contexts
6227 = copy_useful_known_contexts (avals.m_known_contexts);
6228 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6229 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6230 ipa_agg_replacement_value *aggvals
6231 = find_aggregate_values_for_callers_subset (node, callers);
6232
6233 if (!known_contexts_useful_p (known_contexts))
6234 {
6235 known_contexts.release ();
6236 known_contexts = vNULL;
6237 }
6238 clone = create_specialized_node (node, known_csts, known_contexts,
6239 aggvals, callers);
6240 info->do_clone_for_all_contexts = false;
6241 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6242 ret = true;
6243 }
6244
6245 return ret;
6246 }
6247
6248 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6249
6250 static void
6251 spread_undeadness (struct cgraph_node *node)
6252 {
6253 struct cgraph_edge *cs;
6254
6255 for (cs = node->callees; cs; cs = cs->next_callee)
6256 if (ipa_edge_within_scc (cs))
6257 {
6258 struct cgraph_node *callee;
6259 class ipa_node_params *info;
6260
6261 callee = cs->callee->function_symbol (NULL);
6262 info = ipa_node_params_sum->get (callee);
6263
6264 if (info && info->node_dead)
6265 {
6266 info->node_dead = 0;
6267 spread_undeadness (callee);
6268 }
6269 }
6270 }
6271
6272 /* Return true if NODE has a caller from outside of its SCC that is not
6273 dead. Worker callback for cgraph_for_node_and_aliases. */
6274
6275 static bool
6276 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6277 void *data ATTRIBUTE_UNUSED)
6278 {
6279 struct cgraph_edge *cs;
6280
6281 for (cs = node->callers; cs; cs = cs->next_caller)
6282 if (cs->caller->thunk
6283 && cs->caller->call_for_symbol_thunks_and_aliases
6284 (has_undead_caller_from_outside_scc_p, NULL, true))
6285 return true;
6286 else if (!ipa_edge_within_scc (cs))
6287 {
6288 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6289 if (!caller_info /* Unoptimized caller are like dead ones. */
6290 || !caller_info->node_dead)
6291 return true;
6292 }
6293 return false;
6294 }
6295
6296
6297 /* Identify nodes within the same SCC as NODE which are no longer needed
6298 because of new clones and will be removed as unreachable. */
6299
6300 static void
6301 identify_dead_nodes (struct cgraph_node *node)
6302 {
6303 struct cgraph_node *v;
6304 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6305 if (v->local)
6306 {
6307 ipa_node_params *info = ipa_node_params_sum->get (v);
6308 if (info
6309 && !v->call_for_symbol_thunks_and_aliases
6310 (has_undead_caller_from_outside_scc_p, NULL, true))
6311 info->node_dead = 1;
6312 }
6313
6314 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6315 {
6316 ipa_node_params *info = ipa_node_params_sum->get (v);
6317 if (info && !info->node_dead)
6318 spread_undeadness (v);
6319 }
6320
6321 if (dump_file && (dump_flags & TDF_DETAILS))
6322 {
6323 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6324 if (ipa_node_params_sum->get (v)
6325 && ipa_node_params_sum->get (v)->node_dead)
6326 fprintf (dump_file, " Marking node as dead: %s.\n",
6327 v->dump_name ());
6328 }
6329 }
6330
6331 /* The decision stage. Iterate over the topological order of call graph nodes
6332 TOPO and make specialized clones if deemed beneficial. */
6333
6334 static void
6335 ipcp_decision_stage (class ipa_topo_info *topo)
6336 {
6337 int i;
6338
6339 if (dump_file)
6340 fprintf (dump_file, "\nIPA decision stage:\n\n");
6341
6342 for (i = topo->nnodes - 1; i >= 0; i--)
6343 {
6344 struct cgraph_node *node = topo->order[i];
6345 bool change = false, iterate = true;
6346
6347 while (iterate)
6348 {
6349 struct cgraph_node *v;
6350 iterate = false;
6351 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6352 if (v->has_gimple_body_p ()
6353 && ipcp_versionable_function_p (v))
6354 iterate |= decide_whether_version_node (v);
6355
6356 change |= iterate;
6357 }
6358 if (change)
6359 identify_dead_nodes (node);
6360 }
6361 }
6362
6363 /* Look up all the bits information that we have discovered and copy it over
6364 to the transformation summary. */
6365
6366 static void
6367 ipcp_store_bits_results (void)
6368 {
6369 cgraph_node *node;
6370
6371 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6372 {
6373 ipa_node_params *info = ipa_node_params_sum->get (node);
6374 bool dumped_sth = false;
6375 bool found_useful_result = false;
6376
6377 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6378 {
6379 if (dump_file)
6380 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6381 "; -fipa-bit-cp: disabled.\n",
6382 node->dump_name ());
6383 continue;
6384 }
6385
6386 if (info->ipcp_orig_node)
6387 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6388 if (!info->lattices)
6389 /* Newly expanded artificial thunks do not have lattices. */
6390 continue;
6391
6392 unsigned count = ipa_get_param_count (info);
6393 for (unsigned i = 0; i < count; i++)
6394 {
6395 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6396 if (plats->bits_lattice.constant_p ())
6397 {
6398 found_useful_result = true;
6399 break;
6400 }
6401 }
6402
6403 if (!found_useful_result)
6404 continue;
6405
6406 ipcp_transformation_initialize ();
6407 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6408 vec_safe_reserve_exact (ts->bits, count);
6409
6410 for (unsigned i = 0; i < count; i++)
6411 {
6412 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6413 ipa_bits *jfbits;
6414
6415 if (plats->bits_lattice.constant_p ())
6416 {
6417 jfbits
6418 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6419 plats->bits_lattice.get_mask ());
6420 if (!dbg_cnt (ipa_cp_bits))
6421 jfbits = NULL;
6422 }
6423 else
6424 jfbits = NULL;
6425
6426 ts->bits->quick_push (jfbits);
6427 if (!dump_file || !jfbits)
6428 continue;
6429 if (!dumped_sth)
6430 {
6431 fprintf (dump_file, "Propagated bits info for function %s:\n",
6432 node->dump_name ());
6433 dumped_sth = true;
6434 }
6435 fprintf (dump_file, " param %i: value = ", i);
6436 print_hex (jfbits->value, dump_file);
6437 fprintf (dump_file, ", mask = ");
6438 print_hex (jfbits->mask, dump_file);
6439 fprintf (dump_file, "\n");
6440 }
6441 }
6442 }
6443
6444 /* Look up all VR information that we have discovered and copy it over
6445 to the transformation summary. */
6446
6447 static void
6448 ipcp_store_vr_results (void)
6449 {
6450 cgraph_node *node;
6451
6452 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6453 {
6454 ipa_node_params *info = ipa_node_params_sum->get (node);
6455 bool found_useful_result = false;
6456
6457 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6458 {
6459 if (dump_file)
6460 fprintf (dump_file, "Not considering %s for VR discovery "
6461 "and propagate; -fipa-ipa-vrp: disabled.\n",
6462 node->dump_name ());
6463 continue;
6464 }
6465
6466 if (info->ipcp_orig_node)
6467 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6468 if (!info->lattices)
6469 /* Newly expanded artificial thunks do not have lattices. */
6470 continue;
6471
6472 unsigned count = ipa_get_param_count (info);
6473 for (unsigned i = 0; i < count; i++)
6474 {
6475 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6476 if (!plats->m_value_range.bottom_p ()
6477 && !plats->m_value_range.top_p ())
6478 {
6479 found_useful_result = true;
6480 break;
6481 }
6482 }
6483 if (!found_useful_result)
6484 continue;
6485
6486 ipcp_transformation_initialize ();
6487 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6488 vec_safe_reserve_exact (ts->m_vr, count);
6489
6490 for (unsigned i = 0; i < count; i++)
6491 {
6492 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6493 ipa_vr vr;
6494
6495 if (!plats->m_value_range.bottom_p ()
6496 && !plats->m_value_range.top_p ()
6497 && dbg_cnt (ipa_cp_vr))
6498 {
6499 vr.known = true;
6500 vr.type = plats->m_value_range.m_vr.kind ();
6501 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6502 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6503 }
6504 else
6505 {
6506 vr.known = false;
6507 vr.type = VR_VARYING;
6508 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6509 }
6510 ts->m_vr->quick_push (vr);
6511 }
6512 }
6513 }
6514
6515 /* The IPCP driver. */
6516
6517 static unsigned int
6518 ipcp_driver (void)
6519 {
6520 class ipa_topo_info topo;
6521
6522 if (edge_clone_summaries == NULL)
6523 edge_clone_summaries = new edge_clone_summary_t (symtab);
6524
6525 ipa_check_create_node_params ();
6526 ipa_check_create_edge_args ();
6527 clone_num_suffixes = new hash_map<const char *, unsigned>;
6528
6529 if (dump_file)
6530 {
6531 fprintf (dump_file, "\nIPA structures before propagation:\n");
6532 if (dump_flags & TDF_DETAILS)
6533 ipa_print_all_params (dump_file);
6534 ipa_print_all_jump_functions (dump_file);
6535 }
6536
6537 /* Topological sort. */
6538 build_toporder_info (&topo);
6539 /* Do the interprocedural propagation. */
6540 ipcp_propagate_stage (&topo);
6541 /* Decide what constant propagation and cloning should be performed. */
6542 ipcp_decision_stage (&topo);
6543 /* Store results of bits propagation. */
6544 ipcp_store_bits_results ();
6545 /* Store results of value range propagation. */
6546 ipcp_store_vr_results ();
6547
6548 /* Free all IPCP structures. */
6549 delete clone_num_suffixes;
6550 free_toporder_info (&topo);
6551 delete edge_clone_summaries;
6552 edge_clone_summaries = NULL;
6553 ipa_free_all_structures_after_ipa_cp ();
6554 if (dump_file)
6555 fprintf (dump_file, "\nIPA constant propagation end\n");
6556 return 0;
6557 }
6558
6559 /* Initialization and computation of IPCP data structures. This is the initial
6560 intraprocedural analysis of functions, which gathers information to be
6561 propagated later on. */
6562
6563 static void
6564 ipcp_generate_summary (void)
6565 {
6566 struct cgraph_node *node;
6567
6568 if (dump_file)
6569 fprintf (dump_file, "\nIPA constant propagation start:\n");
6570 ipa_register_cgraph_hooks ();
6571
6572 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6573 ipa_analyze_node (node);
6574 }
6575
6576 namespace {
6577
6578 const pass_data pass_data_ipa_cp =
6579 {
6580 IPA_PASS, /* type */
6581 "cp", /* name */
6582 OPTGROUP_NONE, /* optinfo_flags */
6583 TV_IPA_CONSTANT_PROP, /* tv_id */
6584 0, /* properties_required */
6585 0, /* properties_provided */
6586 0, /* properties_destroyed */
6587 0, /* todo_flags_start */
6588 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6589 };
6590
6591 class pass_ipa_cp : public ipa_opt_pass_d
6592 {
6593 public:
6594 pass_ipa_cp (gcc::context *ctxt)
6595 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6596 ipcp_generate_summary, /* generate_summary */
6597 NULL, /* write_summary */
6598 NULL, /* read_summary */
6599 ipcp_write_transformation_summaries, /*
6600 write_optimization_summary */
6601 ipcp_read_transformation_summaries, /*
6602 read_optimization_summary */
6603 NULL, /* stmt_fixup */
6604 0, /* function_transform_todo_flags_start */
6605 ipcp_transform_function, /* function_transform */
6606 NULL) /* variable_transform */
6607 {}
6608
6609 /* opt_pass methods: */
6610 virtual bool gate (function *)
6611 {
6612 /* FIXME: We should remove the optimize check after we ensure we never run
6613 IPA passes when not optimizing. */
6614 return (flag_ipa_cp && optimize) || in_lto_p;
6615 }
6616
6617 virtual unsigned int execute (function *) { return ipcp_driver (); }
6618
6619 }; // class pass_ipa_cp
6620
6621 } // anon namespace
6622
6623 ipa_opt_pass_d *
6624 make_pass_ipa_cp (gcc::context *ctxt)
6625 {
6626 return new pass_ipa_cp (ctxt);
6627 }
6628
6629 /* Reset all state within ipa-cp.c so that we can rerun the compiler
6630 within the same process. For use by toplev::finalize. */
6631
6632 void
6633 ipa_cp_c_finalize (void)
6634 {
6635 base_count = profile_count::uninitialized ();
6636 overall_size = 0;
6637 orig_overall_size = 0;
6638 ipcp_free_transformation_sum ();
6639 }