]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/auto-profile.c
auto-profile.c (afdo_calculate_branch_prob): Break once has_sample is true.
[thirdparty/gcc.git] / gcc / auto-profile.c
1 /* Read and annotate call graph profile from the auto profile data file.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Dehao Chen (dehao@google.com)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23
24 #include <string.h>
25 #include <map>
26 #include <set>
27
28 #include "coretypes.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "options.h"
37 #include "wide-int.h"
38 #include "inchash.h"
39 #include "tree.h"
40 #include "fold-const.h"
41 #include "tree-pass.h"
42 #include "flags.h"
43 #include "predict.h"
44 #include "vec.h"
45 #include "hashtab.h"
46 #include "hash-set.h"
47 #include "machmode.h"
48 #include "tm.h"
49 #include "hard-reg-set.h"
50 #include "input.h"
51 #include "function.h"
52 #include "dominance.h"
53 #include "cfg.h"
54 #include "basic-block.h"
55 #include "diagnostic-core.h"
56 #include "gcov-io.h"
57 #include "profile.h"
58 #include "langhooks.h"
59 #include "opts.h"
60 #include "tree-pass.h"
61 #include "cfgloop.h"
62 #include "tree-ssa-alias.h"
63 #include "tree-cfg.h"
64 #include "tree-cfgcleanup.h"
65 #include "tree-ssa-operands.h"
66 #include "tree-into-ssa.h"
67 #include "internal-fn.h"
68 #include "is-a.h"
69 #include "gimple-expr.h"
70 #include "gimple.h"
71 #include "gimple-iterator.h"
72 #include "gimple-ssa.h"
73 #include "hash-map.h"
74 #include "plugin-api.h"
75 #include "ipa-ref.h"
76 #include "cgraph.h"
77 #include "value-prof.h"
78 #include "coverage.h"
79 #include "params.h"
80 #include "alloc-pool.h"
81 #include "symbol-summary.h"
82 #include "ipa-prop.h"
83 #include "ipa-inline.h"
84 #include "tree-inline.h"
85 #include "stringpool.h"
86 #include "auto-profile.h"
87
88 /* The following routines implements AutoFDO optimization.
89
90 This optimization uses sampling profiles to annotate basic block counts
91 and uses heuristics to estimate branch probabilities.
92
93 There are three phases in AutoFDO:
94
95 Phase 1: Read profile from the profile data file.
96 The following info is read from the profile datafile:
97 * string_table: a map between function name and its index.
98 * autofdo_source_profile: a map from function_instance name to
99 function_instance. This is represented as a forest of
100 function_instances.
101 * WorkingSet: a histogram of how many instructions are covered for a
102 given percentage of total cycles. This is describing the binary
103 level information (not source level). This info is used to help
104 decide if we want aggressive optimizations that could increase
105 code footprint (e.g. loop unroll etc.)
106 A function instance is an instance of function that could either be a
107 standalone symbol, or a clone of a function that is inlined into another
108 function.
109
110 Phase 2: Early inline + value profile transformation.
111 Early inline uses autofdo_source_profile to find if a callsite is:
112 * inlined in the profiled binary.
113 * callee body is hot in the profiling run.
114 If both condition satisfies, early inline will inline the callsite
115 regardless of the code growth.
116 Phase 2 is an iterative process. During each iteration, we also check
117 if an indirect callsite is promoted and inlined in the profiling run.
118 If yes, vpt will happen to force promote it and in the next iteration,
119 einline will inline the promoted callsite in the next iteration.
120
121 Phase 3: Annotate control flow graph.
122 AutoFDO uses a separate pass to:
123 * Annotate basic block count
124 * Estimate branch probability
125
126 After the above 3 phases, all profile is readily annotated on the GCC IR.
127 AutoFDO tries to reuse all FDO infrastructure as much as possible to make
128 use of the profile. E.g. it uses existing mechanism to calculate the basic
129 block/edge frequency, as well as the cgraph node/edge count.
130 */
131
132 #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
133 #define AUTO_PROFILE_VERSION 1
134
135 namespace autofdo
136 {
137
138 /* Represent a source location: (function_decl, lineno). */
139 typedef std::pair<tree, unsigned> decl_lineno;
140
141 /* Represent an inline stack. vector[0] is the leaf node. */
142 typedef auto_vec<decl_lineno> inline_stack;
143
144 /* String array that stores function names. */
145 typedef auto_vec<char *> string_vector;
146
147 /* Map from function name's index in string_table to target's
148 execution count. */
149 typedef std::map<unsigned, gcov_type> icall_target_map;
150
151 /* Set of gimple stmts. Used to track if the stmt has already been promoted
152 to direct call. */
153 typedef std::set<gimple> stmt_set;
154
155 /* Represent count info of an inline stack. */
156 struct count_info
157 {
158 /* Sampled count of the inline stack. */
159 gcov_type count;
160
161 /* Map from indirect call target to its sample count. */
162 icall_target_map targets;
163
164 /* Whether this inline stack is already used in annotation.
165
166 Each inline stack should only be used to annotate IR once.
167 This will be enforced when instruction-level discriminator
168 is supported. */
169 bool annotated;
170 };
171
172 /* operator< for "const char *". */
173 struct string_compare
174 {
175 bool operator()(const char *a, const char *b) const
176 {
177 return strcmp (a, b) < 0;
178 }
179 };
180
181 /* Store a string array, indexed by string position in the array. */
182 class string_table
183 {
184 public:
185 string_table ()
186 {}
187
188 ~string_table ();
189
190 /* For a given string, returns its index. */
191 int get_index (const char *name) const;
192
193 /* For a given decl, returns the index of the decl name. */
194 int get_index_by_decl (tree decl) const;
195
196 /* For a given index, returns the string. */
197 const char *get_name (int index) const;
198
199 /* Read profile, return TRUE on success. */
200 bool read ();
201
202 private:
203 typedef std::map<const char *, unsigned, string_compare> string_index_map;
204 string_vector vector_;
205 string_index_map map_;
206 };
207
208 /* Profile of a function instance:
209 1. total_count of the function.
210 2. head_count (entry basic block count) of the function (only valid when
211 function is a top-level function_instance, i.e. it is the original copy
212 instead of the inlined copy).
213 3. map from source location (decl_lineno) to profile (count_info).
214 4. map from callsite to callee function_instance. */
215 class function_instance
216 {
217 public:
218 typedef auto_vec<function_instance *> function_instance_stack;
219
220 /* Read the profile and return a function_instance with head count as
221 HEAD_COUNT. Recursively read callsites to create nested function_instances
222 too. STACK is used to track the recursive creation process. */
223 static function_instance *
224 read_function_instance (function_instance_stack *stack,
225 gcov_type head_count);
226
227 /* Recursively deallocate all callsites (nested function_instances). */
228 ~function_instance ();
229
230 /* Accessors. */
231 int
232 name () const
233 {
234 return name_;
235 }
236 gcov_type
237 total_count () const
238 {
239 return total_count_;
240 }
241 gcov_type
242 head_count () const
243 {
244 return head_count_;
245 }
246
247 /* Traverse callsites of the current function_instance to find one at the
248 location of LINENO and callee name represented in DECL. */
249 function_instance *get_function_instance_by_decl (unsigned lineno,
250 tree decl) const;
251
252 /* Store the profile info for LOC in INFO. Return TRUE if profile info
253 is found. */
254 bool get_count_info (location_t loc, count_info *info) const;
255
256 /* Read the inlined indirect call target profile for STMT and store it in
257 MAP, return the total count for all inlined indirect calls. */
258 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
259
260 /* Sum of counts that is used during annotation. */
261 gcov_type total_annotated_count () const;
262
263 /* Mark LOC as annotated. */
264 void mark_annotated (location_t loc);
265
266 private:
267 /* Callsite, represented as (decl_lineno, callee_function_name_index). */
268 typedef std::pair<unsigned, unsigned> callsite;
269
270 /* Map from callsite to callee function_instance. */
271 typedef std::map<callsite, function_instance *> callsite_map;
272
273 function_instance (unsigned name, gcov_type head_count)
274 : name_ (name), total_count_ (0), head_count_ (head_count)
275 {
276 }
277
278 /* Map from source location (decl_lineno) to profile (count_info). */
279 typedef std::map<unsigned, count_info> position_count_map;
280
281 /* function_instance name index in the string_table. */
282 unsigned name_;
283
284 /* Total sample count. */
285 gcov_type total_count_;
286
287 /* Entry BB's sample count. */
288 gcov_type head_count_;
289
290 /* Map from callsite location to callee function_instance. */
291 callsite_map callsites;
292
293 /* Map from source location to count_info. */
294 position_count_map pos_counts;
295 };
296
297 /* Profile for all functions. */
298 class autofdo_source_profile
299 {
300 public:
301 static autofdo_source_profile *
302 create ()
303 {
304 autofdo_source_profile *map = new autofdo_source_profile ();
305
306 if (map->read ())
307 return map;
308 delete map;
309 return NULL;
310 }
311
312 ~autofdo_source_profile ();
313
314 /* For a given DECL, returns the top-level function_instance. */
315 function_instance *get_function_instance_by_decl (tree decl) const;
316
317 /* Find count_info for a given gimple STMT. If found, store the count_info
318 in INFO and return true; otherwise return false. */
319 bool get_count_info (gimple stmt, count_info *info) const;
320
321 /* Find total count of the callee of EDGE. */
322 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
323
324 /* Update value profile INFO for STMT from the inlined indirect callsite.
325 Return true if INFO is updated. */
326 bool update_inlined_ind_target (gcall *stmt, count_info *info);
327
328 /* Mark LOC as annotated. */
329 void mark_annotated (location_t loc);
330
331 private:
332 /* Map from function_instance name index (in string_table) to
333 function_instance. */
334 typedef std::map<unsigned, function_instance *> name_function_instance_map;
335
336 autofdo_source_profile () {}
337
338 /* Read AutoFDO profile and returns TRUE on success. */
339 bool read ();
340
341 /* Return the function_instance in the profile that correspond to the
342 inline STACK. */
343 function_instance *
344 get_function_instance_by_inline_stack (const inline_stack &stack) const;
345
346 name_function_instance_map map_;
347 };
348
349 /* Store the strings read from the profile data file. */
350 static string_table *afdo_string_table;
351
352 /* Store the AutoFDO source profile. */
353 static autofdo_source_profile *afdo_source_profile;
354
355 /* gcov_ctr_summary structure to store the profile_info. */
356 static struct gcov_ctr_summary *afdo_profile_info;
357
358 /* Helper functions. */
359
360 /* Return the original name of NAME: strip the suffix that starts
361 with '.' Caller is responsible for freeing RET. */
362
363 static char *
364 get_original_name (const char *name)
365 {
366 char *ret = xstrdup (name);
367 char *find = strchr (ret, '.');
368 if (find != NULL)
369 *find = 0;
370 return ret;
371 }
372
373 /* Return the combined location, which is a 32bit integer in which
374 higher 16 bits stores the line offset of LOC to the start lineno
375 of DECL, The lower 16 bits stores the discriminator. */
376
377 static unsigned
378 get_combined_location (location_t loc, tree decl)
379 {
380 /* TODO: allow more bits for line and less bits for discriminator. */
381 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
382 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
383 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
384 }
385
386 /* Return the function decl of a given lexical BLOCK. */
387
388 static tree
389 get_function_decl_from_block (tree block)
390 {
391 tree decl;
392
393 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
394 return NULL_TREE;
395
396 for (decl = BLOCK_ABSTRACT_ORIGIN (block);
397 decl && (TREE_CODE (decl) == BLOCK);
398 decl = BLOCK_ABSTRACT_ORIGIN (decl))
399 if (TREE_CODE (decl) == FUNCTION_DECL)
400 break;
401 return decl;
402 }
403
404 /* Store inline stack for STMT in STACK. */
405
406 static void
407 get_inline_stack (location_t locus, inline_stack *stack)
408 {
409 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
410 return;
411
412 tree block = LOCATION_BLOCK (locus);
413 if (block && TREE_CODE (block) == BLOCK)
414 {
415 int level = 0;
416 for (block = BLOCK_SUPERCONTEXT (block);
417 block && (TREE_CODE (block) == BLOCK);
418 block = BLOCK_SUPERCONTEXT (block))
419 {
420 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
421 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
422 continue;
423
424 tree decl = get_function_decl_from_block (block);
425 stack->safe_push (
426 std::make_pair (decl, get_combined_location (locus, decl)));
427 locus = tmp_locus;
428 level++;
429 }
430 }
431 stack->safe_push (
432 std::make_pair (current_function_decl,
433 get_combined_location (locus, current_function_decl)));
434 }
435
436 /* Return STMT's combined location, which is a 32bit integer in which
437 higher 16 bits stores the line offset of LOC to the start lineno
438 of DECL, The lower 16 bits stores the discriminator. */
439
440 static unsigned
441 get_relative_location_for_stmt (gimple stmt)
442 {
443 location_t locus = gimple_location (stmt);
444 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
445 return UNKNOWN_LOCATION;
446
447 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
448 block = BLOCK_SUPERCONTEXT (block))
449 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
450 return get_combined_location (locus,
451 get_function_decl_from_block (block));
452 return get_combined_location (locus, current_function_decl);
453 }
454
455 /* Return true if BB contains indirect call. */
456
457 static bool
458 has_indirect_call (basic_block bb)
459 {
460 gimple_stmt_iterator gsi;
461
462 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
463 {
464 gimple stmt = gsi_stmt (gsi);
465 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
466 && (gimple_call_fn (stmt) == NULL
467 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
468 return true;
469 }
470 return false;
471 }
472
473 /* Member functions for string_table. */
474
475 /* Deconstructor. */
476
477 string_table::~string_table ()
478 {
479 for (unsigned i = 0; i < vector_.length (); i++)
480 free (vector_[i]);
481 }
482
483
484 /* Return the index of a given function NAME. Return -1 if NAME is not
485 found in string table. */
486
487 int
488 string_table::get_index (const char *name) const
489 {
490 if (name == NULL)
491 return -1;
492 string_index_map::const_iterator iter = map_.find (name);
493 if (iter == map_.end ())
494 return -1;
495
496 return iter->second;
497 }
498
499 /* Return the index of a given function DECL. Return -1 if DECL is not
500 found in string table. */
501
502 int
503 string_table::get_index_by_decl (tree decl) const
504 {
505 char *name
506 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
507 int ret = get_index (name);
508 free (name);
509 if (ret != -1)
510 return ret;
511 ret = get_index (lang_hooks.dwarf_name (decl, 0));
512 if (ret != -1)
513 return ret;
514 if (DECL_ABSTRACT_ORIGIN (decl))
515 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
516
517 return -1;
518 }
519
520 /* Return the function name of a given INDEX. */
521
522 const char *
523 string_table::get_name (int index) const
524 {
525 gcc_assert (index > 0 && index < (int)vector_.length ());
526 return vector_[index];
527 }
528
529 /* Read the string table. Return TRUE if reading is successful. */
530
531 bool
532 string_table::read ()
533 {
534 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
535 return false;
536 /* Skip the length of the section. */
537 gcov_read_unsigned ();
538 /* Read in the file name table. */
539 unsigned string_num = gcov_read_unsigned ();
540 for (unsigned i = 0; i < string_num; i++)
541 {
542 vector_.safe_push (get_original_name (gcov_read_string ()));
543 map_[vector_.last ()] = i;
544 }
545 return true;
546 }
547
548 /* Member functions for function_instance. */
549
550 function_instance::~function_instance ()
551 {
552 for (callsite_map::iterator iter = callsites.begin ();
553 iter != callsites.end (); ++iter)
554 delete iter->second;
555 }
556
557 /* Traverse callsites of the current function_instance to find one at the
558 location of LINENO and callee name represented in DECL. */
559
560 function_instance *
561 function_instance::get_function_instance_by_decl (unsigned lineno,
562 tree decl) const
563 {
564 int func_name_idx = afdo_string_table->get_index_by_decl (decl);
565 if (func_name_idx != -1)
566 {
567 callsite_map::const_iterator ret
568 = callsites.find (std::make_pair (lineno, func_name_idx));
569 if (ret != callsites.end ())
570 return ret->second;
571 }
572 func_name_idx
573 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
574 if (func_name_idx != -1)
575 {
576 callsite_map::const_iterator ret
577 = callsites.find (std::make_pair (lineno, func_name_idx));
578 if (ret != callsites.end ())
579 return ret->second;
580 }
581 if (DECL_ABSTRACT_ORIGIN (decl))
582 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
583
584 return NULL;
585 }
586
587 /* Store the profile info for LOC in INFO. Return TRUE if profile info
588 is found. */
589
590 bool
591 function_instance::get_count_info (location_t loc, count_info *info) const
592 {
593 position_count_map::const_iterator iter = pos_counts.find (loc);
594 if (iter == pos_counts.end ())
595 return false;
596 *info = iter->second;
597 return true;
598 }
599
600 /* Mark LOC as annotated. */
601
602 void
603 function_instance::mark_annotated (location_t loc)
604 {
605 position_count_map::iterator iter = pos_counts.find (loc);
606 if (iter == pos_counts.end ())
607 return;
608 iter->second.annotated = true;
609 }
610
611 /* Read the inlined indirect call target profile for STMT and store it in
612 MAP, return the total count for all inlined indirect calls. */
613
614 gcov_type
615 function_instance::find_icall_target_map (gcall *stmt,
616 icall_target_map *map) const
617 {
618 gcov_type ret = 0;
619 unsigned stmt_offset = get_relative_location_for_stmt (stmt);
620
621 for (callsite_map::const_iterator iter = callsites.begin ();
622 iter != callsites.end (); ++iter)
623 {
624 unsigned callee = iter->second->name ();
625 /* Check if callsite location match the stmt. */
626 if (iter->first.first != stmt_offset)
627 continue;
628 struct cgraph_node *node = cgraph_node::get_for_asmname (
629 get_identifier (afdo_string_table->get_name (callee)));
630 if (node == NULL)
631 continue;
632 if (!check_ic_target (stmt, node))
633 continue;
634 (*map)[callee] = iter->second->total_count ();
635 ret += iter->second->total_count ();
636 }
637 return ret;
638 }
639
640 /* Read the profile and create a function_instance with head count as
641 HEAD_COUNT. Recursively read callsites to create nested function_instances
642 too. STACK is used to track the recursive creation process. */
643
644 /* function instance profile format:
645
646 ENTRY_COUNT: 8 bytes
647 NAME_INDEX: 4 bytes
648 NUM_POS_COUNTS: 4 bytes
649 NUM_CALLSITES: 4 byte
650 POS_COUNT_1:
651 POS_1_OFFSET: 4 bytes
652 NUM_TARGETS: 4 bytes
653 COUNT: 8 bytes
654 TARGET_1:
655 VALUE_PROFILE_TYPE: 4 bytes
656 TARGET_IDX: 8 bytes
657 COUNT: 8 bytes
658 TARGET_2
659 ...
660 TARGET_n
661 POS_COUNT_2
662 ...
663 POS_COUNT_N
664 CALLSITE_1:
665 CALLSITE_1_OFFSET: 4 bytes
666 FUNCTION_INSTANCE_PROFILE (nested)
667 CALLSITE_2
668 ...
669 CALLSITE_n. */
670
671 function_instance *
672 function_instance::read_function_instance (function_instance_stack *stack,
673 gcov_type head_count)
674 {
675 unsigned name = gcov_read_unsigned ();
676 unsigned num_pos_counts = gcov_read_unsigned ();
677 unsigned num_callsites = gcov_read_unsigned ();
678 function_instance *s = new function_instance (name, head_count);
679 stack->safe_push (s);
680
681 for (unsigned i = 0; i < num_pos_counts; i++)
682 {
683 unsigned offset = gcov_read_unsigned () & 0xffff0000;
684 unsigned num_targets = gcov_read_unsigned ();
685 gcov_type count = gcov_read_counter ();
686 s->pos_counts[offset].count = count;
687 for (unsigned j = 0; j < stack->length (); j++)
688 (*stack)[j]->total_count_ += count;
689 for (unsigned j = 0; j < num_targets; j++)
690 {
691 /* Only indirect call target histogram is supported now. */
692 gcov_read_unsigned ();
693 gcov_type target_idx = gcov_read_counter ();
694 s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
695 }
696 }
697 for (unsigned i = 0; i < num_callsites; i++)
698 {
699 unsigned offset = gcov_read_unsigned ();
700 function_instance *callee_function_instance
701 = read_function_instance (stack, 0);
702 s->callsites[std::make_pair (offset, callee_function_instance->name ())]
703 = callee_function_instance;
704 }
705 stack->pop ();
706 return s;
707 }
708
709 /* Sum of counts that is used during annotation. */
710
711 gcov_type
712 function_instance::total_annotated_count () const
713 {
714 gcov_type ret = 0;
715 for (callsite_map::const_iterator iter = callsites.begin ();
716 iter != callsites.end (); ++iter)
717 ret += iter->second->total_annotated_count ();
718 for (position_count_map::const_iterator iter = pos_counts.begin ();
719 iter != pos_counts.end (); ++iter)
720 if (iter->second.annotated)
721 ret += iter->second.count;
722 return ret;
723 }
724
725 /* Member functions for autofdo_source_profile. */
726
727 autofdo_source_profile::~autofdo_source_profile ()
728 {
729 for (name_function_instance_map::const_iterator iter = map_.begin ();
730 iter != map_.end (); ++iter)
731 delete iter->second;
732 }
733
734 /* For a given DECL, returns the top-level function_instance. */
735
736 function_instance *
737 autofdo_source_profile::get_function_instance_by_decl (tree decl) const
738 {
739 int index = afdo_string_table->get_index_by_decl (decl);
740 if (index == -1)
741 return NULL;
742 name_function_instance_map::const_iterator ret = map_.find (index);
743 return ret == map_.end () ? NULL : ret->second;
744 }
745
746 /* Find count_info for a given gimple STMT. If found, store the count_info
747 in INFO and return true; otherwise return false. */
748
749 bool
750 autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
751 {
752 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
753 return false;
754
755 inline_stack stack;
756 get_inline_stack (gimple_location (stmt), &stack);
757 if (stack.length () == 0)
758 return false;
759 function_instance *s = get_function_instance_by_inline_stack (stack);
760 if (s == NULL)
761 return false;
762 return s->get_count_info (stack[0].second, info);
763 }
764
765 /* Mark LOC as annotated. */
766
767 void
768 autofdo_source_profile::mark_annotated (location_t loc)
769 {
770 inline_stack stack;
771 get_inline_stack (loc, &stack);
772 if (stack.length () == 0)
773 return;
774 function_instance *s = get_function_instance_by_inline_stack (stack);
775 if (s == NULL)
776 return;
777 s->mark_annotated (stack[0].second);
778 }
779
780 /* Update value profile INFO for STMT from the inlined indirect callsite.
781 Return true if INFO is updated. */
782
783 bool
784 autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
785 count_info *info)
786 {
787 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
788 return false;
789
790 count_info old_info;
791 get_count_info (stmt, &old_info);
792 gcov_type total = 0;
793 for (icall_target_map::const_iterator iter = old_info.targets.begin ();
794 iter != old_info.targets.end (); ++iter)
795 total += iter->second;
796
797 /* Program behavior changed, original promoted (and inlined) target is not
798 hot any more. Will avoid promote the original target.
799
800 To check if original promoted target is still hot, we check the total
801 count of the unpromoted targets (stored in old_info). If it is no less
802 than half of the callsite count (stored in INFO), the original promoted
803 target is considered not hot any more. */
804 if (total >= info->count / 2)
805 return false;
806
807 inline_stack stack;
808 get_inline_stack (gimple_location (stmt), &stack);
809 if (stack.length () == 0)
810 return false;
811 function_instance *s = get_function_instance_by_inline_stack (stack);
812 if (s == NULL)
813 return false;
814 icall_target_map map;
815 if (s->find_icall_target_map (stmt, &map) == 0)
816 return false;
817 for (icall_target_map::const_iterator iter = map.begin ();
818 iter != map.end (); ++iter)
819 info->targets[iter->first] = iter->second;
820 return true;
821 }
822
823 /* Find total count of the callee of EDGE. */
824
825 gcov_type
826 autofdo_source_profile::get_callsite_total_count (
827 struct cgraph_edge *edge) const
828 {
829 inline_stack stack;
830 stack.safe_push (std::make_pair (edge->callee->decl, 0));
831 get_inline_stack (gimple_location (edge->call_stmt), &stack);
832
833 function_instance *s = get_function_instance_by_inline_stack (stack);
834 if (s == NULL
835 || afdo_string_table->get_index (IDENTIFIER_POINTER (
836 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
837 return 0;
838
839 return s->total_count ();
840 }
841
842 /* Read AutoFDO profile and returns TRUE on success. */
843
844 /* source profile format:
845
846 GCOV_TAG_AFDO_FUNCTION: 4 bytes
847 LENGTH: 4 bytes
848 NUM_FUNCTIONS: 4 bytes
849 FUNCTION_INSTANCE_1
850 FUNCTION_INSTANCE_2
851 ...
852 FUNCTION_INSTANCE_N. */
853
854 bool
855 autofdo_source_profile::read ()
856 {
857 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
858 {
859 inform (0, "Not expected TAG.");
860 return false;
861 }
862
863 /* Skip the length of the section. */
864 gcov_read_unsigned ();
865
866 /* Read in the function/callsite profile, and store it in local
867 data structure. */
868 unsigned function_num = gcov_read_unsigned ();
869 for (unsigned i = 0; i < function_num; i++)
870 {
871 function_instance::function_instance_stack stack;
872 function_instance *s = function_instance::read_function_instance (
873 &stack, gcov_read_counter ());
874 afdo_profile_info->sum_all += s->total_count ();
875 map_[s->name ()] = s;
876 }
877 return true;
878 }
879
880 /* Return the function_instance in the profile that correspond to the
881 inline STACK. */
882
883 function_instance *
884 autofdo_source_profile::get_function_instance_by_inline_stack (
885 const inline_stack &stack) const
886 {
887 name_function_instance_map::const_iterator iter = map_.find (
888 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
889 if (iter == map_.end())
890 return NULL;
891 function_instance *s = iter->second;
892 for (unsigned i = stack.length() - 1; i > 0; i--)
893 {
894 s = s->get_function_instance_by_decl (
895 stack[i].second, stack[i - 1].first);
896 if (s == NULL)
897 return NULL;
898 }
899 return s;
900 }
901
902 /* Module profile is only used by LIPO. Here we simply ignore it. */
903
904 static void
905 fake_read_autofdo_module_profile ()
906 {
907 /* Read in the module info. */
908 gcov_read_unsigned ();
909
910 /* Skip the length of the section. */
911 gcov_read_unsigned ();
912
913 /* Read in the file name table. */
914 unsigned total_module_num = gcov_read_unsigned ();
915 gcc_assert (total_module_num == 0);
916 }
917
918 /* Read data from profile data file. */
919
920 static void
921 read_profile (void)
922 {
923 if (gcov_open (auto_profile_file, 1) == 0)
924 error ("Cannot open profile file %s.", auto_profile_file);
925
926 if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
927 error ("AutoFDO profile magic number does not mathch.");
928
929 /* Skip the version number. */
930 unsigned version = gcov_read_unsigned ();
931 if (version != AUTO_PROFILE_VERSION)
932 error ("AutoFDO profile version %u does match %u.",
933 version, AUTO_PROFILE_VERSION);
934
935 /* Skip the empty integer. */
936 gcov_read_unsigned ();
937
938 /* string_table. */
939 afdo_string_table = new string_table ();
940 if (!afdo_string_table->read())
941 error ("Cannot read string table from %s.", auto_profile_file);
942
943 /* autofdo_source_profile. */
944 afdo_source_profile = autofdo_source_profile::create ();
945 if (afdo_source_profile == NULL)
946 error ("Cannot read function profile from %s.", auto_profile_file);
947
948 /* autofdo_module_profile. */
949 fake_read_autofdo_module_profile ();
950
951 /* Read in the working set. */
952 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
953 error ("Cannot read working set from %s.", auto_profile_file);
954
955 /* Skip the length of the section. */
956 gcov_read_unsigned ();
957 gcov_working_set_t set[128];
958 for (unsigned i = 0; i < 128; i++)
959 {
960 set[i].num_counters = gcov_read_unsigned ();
961 set[i].min_counter = gcov_read_counter ();
962 }
963 add_working_set (set);
964 }
965
966 /* From AutoFDO profiles, find values inside STMT for that we want to measure
967 histograms for indirect-call optimization.
968
969 This function is actually served for 2 purposes:
970 * before annotation, we need to mark histogram, promote and inline
971 * after annotation, we just need to mark, and let follow-up logic to
972 decide if it needs to promote and inline. */
973
974 static void
975 afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
976 bool transform)
977 {
978 gimple gs = gsi_stmt (*gsi);
979 tree callee;
980
981 if (map.size () == 0)
982 return;
983 gcall *stmt = dyn_cast <gcall *> (gs);
984 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE)
985 return;
986
987 callee = gimple_call_fn (stmt);
988
989 histogram_value hist = gimple_alloc_histogram_value (
990 cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
991 hist->n_counters = 3;
992 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
993 gimple_add_histogram_value (cfun, stmt, hist);
994
995 gcov_type total = 0;
996 icall_target_map::const_iterator max_iter = map.end ();
997
998 for (icall_target_map::const_iterator iter = map.begin ();
999 iter != map.end (); ++iter)
1000 {
1001 total += iter->second;
1002 if (max_iter == map.end () || max_iter->second < iter->second)
1003 max_iter = iter;
1004 }
1005
1006 hist->hvalue.counters[0]
1007 = (unsigned long long)afdo_string_table->get_name (max_iter->first);
1008 hist->hvalue.counters[1] = max_iter->second;
1009 hist->hvalue.counters[2] = total;
1010
1011 if (!transform)
1012 return;
1013
1014 struct cgraph_edge *indirect_edge
1015 = cgraph_node::get (current_function_decl)->get_edge (stmt);
1016 struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1017 get_identifier ((const char *) hist->hvalue.counters[0]));
1018
1019 if (direct_call == NULL || !check_ic_target (stmt, direct_call))
1020 return;
1021 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1022 return;
1023 struct cgraph_edge *new_edge
1024 = indirect_edge->make_speculative (direct_call, 0, 0);
1025 new_edge->redirect_call_stmt_to_callee ();
1026 gimple_remove_histogram_value (cfun, stmt, hist);
1027 inline_call (new_edge, true, NULL, NULL, false);
1028 }
1029
1030 /* From AutoFDO profiles, find values inside STMT for that we want to measure
1031 histograms and adds them to list VALUES. */
1032
1033 static void
1034 afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1035 bool transform)
1036 {
1037 afdo_indirect_call (gsi, map, transform);
1038 }
1039
1040 typedef std::set<basic_block> bb_set;
1041 typedef std::set<edge> edge_set;
1042
1043 static bool
1044 is_bb_annotated (const basic_block bb, const bb_set &annotated)
1045 {
1046 return annotated.find (bb) != annotated.end ();
1047 }
1048
1049 static void
1050 set_bb_annotated (basic_block bb, bb_set *annotated)
1051 {
1052 annotated->insert (bb);
1053 }
1054
1055 static bool
1056 is_edge_annotated (const edge e, const edge_set &annotated)
1057 {
1058 return annotated.find (e) != annotated.end ();
1059 }
1060
1061 static void
1062 set_edge_annotated (edge e, edge_set *annotated)
1063 {
1064 annotated->insert (e);
1065 }
1066
1067 /* For a given BB, set its execution count. Attach value profile if a stmt
1068 is not in PROMOTED, because we only want to promote an indirect call once.
1069 Return TRUE if BB is annotated. */
1070
1071 static bool
1072 afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1073 {
1074 gimple_stmt_iterator gsi;
1075 edge e;
1076 edge_iterator ei;
1077 gcov_type max_count = 0;
1078 bool has_annotated = false;
1079
1080 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1081 {
1082 count_info info;
1083 gimple stmt = gsi_stmt (gsi);
1084 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1085 continue;
1086 if (afdo_source_profile->get_count_info (stmt, &info))
1087 {
1088 if (info.count > max_count)
1089 max_count = info.count;
1090 has_annotated = true;
1091 if (info.targets.size () > 0
1092 && promoted.find (stmt) == promoted.end ())
1093 afdo_vpt (&gsi, info.targets, false);
1094 }
1095 }
1096
1097 if (!has_annotated)
1098 return false;
1099
1100 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1101 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1102 for (gphi_iterator gpi = gsi_start_phis (bb);
1103 !gsi_end_p (gpi);
1104 gsi_next (&gpi))
1105 {
1106 gphi *phi = gpi.phi ();
1107 size_t i;
1108 for (i = 0; i < gimple_phi_num_args (phi); i++)
1109 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1110 }
1111 FOR_EACH_EDGE (e, ei, bb->succs)
1112 afdo_source_profile->mark_annotated (e->goto_locus);
1113
1114 bb->count = max_count;
1115 return true;
1116 }
1117
1118 /* BB1 and BB2 are in an equivalent class iff:
1119 1. BB1 dominates BB2.
1120 2. BB2 post-dominates BB1.
1121 3. BB1 and BB2 are in the same loop nest.
1122 This function finds the equivalent class for each basic block, and
1123 stores a pointer to the first BB in its equivalent class. Meanwhile,
1124 set bb counts for the same equivalent class to be idenical. Update
1125 ANNOTATED_BB for the first BB in its equivalent class. */
1126
1127 static void
1128 afdo_find_equiv_class (bb_set *annotated_bb)
1129 {
1130 basic_block bb;
1131
1132 FOR_ALL_BB_FN (bb, cfun)
1133 bb->aux = NULL;
1134
1135 FOR_ALL_BB_FN (bb, cfun)
1136 {
1137 vec<basic_block> dom_bbs;
1138 basic_block bb1;
1139 int i;
1140
1141 if (bb->aux != NULL)
1142 continue;
1143 bb->aux = bb;
1144 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1145 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1146 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1147 && bb1->loop_father == bb->loop_father)
1148 {
1149 bb1->aux = bb;
1150 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1151 {
1152 bb->count = bb1->count;
1153 set_bb_annotated (bb, annotated_bb);
1154 }
1155 }
1156 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
1157 FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
1158 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1159 && bb1->loop_father == bb->loop_father)
1160 {
1161 bb1->aux = bb;
1162 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1163 {
1164 bb->count = bb1->count;
1165 set_bb_annotated (bb, annotated_bb);
1166 }
1167 }
1168 }
1169 }
1170
1171 /* If a basic block's count is known, and only one of its in/out edges' count
1172 is unknown, its count can be calculated. Meanwhile, if all of the in/out
1173 edges' counts are known, then the basic block's unknown count can also be
1174 calculated.
1175 IS_SUCC is true if out edges of a basic blocks are examined.
1176 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
1177 Return TRUE if any basic block/edge count is changed. */
1178
1179 static bool
1180 afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
1181 edge_set *annotated_edge)
1182 {
1183 basic_block bb;
1184 bool changed = false;
1185
1186 FOR_EACH_BB_FN (bb, cfun)
1187 {
1188 edge e, unknown_edge = NULL;
1189 edge_iterator ei;
1190 int num_unknown_edge = 0;
1191 gcov_type total_known_count = 0;
1192
1193 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1194 if (!is_edge_annotated (e, *annotated_edge))
1195 num_unknown_edge++, unknown_edge = e;
1196 else
1197 total_known_count += e->count;
1198
1199 if (num_unknown_edge == 0)
1200 {
1201 if (total_known_count > bb->count)
1202 {
1203 bb->count = total_known_count;
1204 changed = true;
1205 }
1206 if (!is_bb_annotated (bb, *annotated_bb))
1207 {
1208 set_bb_annotated (bb, annotated_bb);
1209 changed = true;
1210 }
1211 }
1212 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1213 {
1214 if (bb->count >= total_known_count)
1215 unknown_edge->count = bb->count - total_known_count;
1216 else
1217 unknown_edge->count = 0;
1218 set_edge_annotated (unknown_edge, annotated_edge);
1219 changed = true;
1220 }
1221 }
1222 return changed;
1223 }
1224
1225 /* Special propagation for circuit expressions. Because GCC translates
1226 control flow into data flow for circuit expressions. E.g.
1227 BB1:
1228 if (a && b)
1229 BB2
1230 else
1231 BB3
1232
1233 will be translated into:
1234
1235 BB1:
1236 if (a)
1237 goto BB.t1
1238 else
1239 goto BB.t3
1240 BB.t1:
1241 if (b)
1242 goto BB.t2
1243 else
1244 goto BB.t3
1245 BB.t2:
1246 goto BB.t3
1247 BB.t3:
1248 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1249 if (tmp)
1250 goto BB2
1251 else
1252 goto BB3
1253
1254 In this case, we need to propagate through PHI to determine the edge
1255 count of BB1->BB.t1, BB.t1->BB.t2.
1256 Update ANNOTATED_EDGE accordingly. */
1257
1258 static void
1259 afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
1260 {
1261 basic_block bb;
1262 FOR_ALL_BB_FN (bb, cfun)
1263 {
1264 gimple def_stmt;
1265 tree cmp_rhs, cmp_lhs;
1266 gimple cmp_stmt = last_stmt (bb);
1267 edge e;
1268 edge_iterator ei;
1269
1270 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1271 continue;
1272 cmp_rhs = gimple_cond_rhs (cmp_stmt);
1273 cmp_lhs = gimple_cond_lhs (cmp_stmt);
1274 if (!TREE_CONSTANT (cmp_rhs)
1275 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1276 continue;
1277 if (TREE_CODE (cmp_lhs) != SSA_NAME)
1278 continue;
1279 if (!is_bb_annotated (bb, annotated_bb))
1280 continue;
1281 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1282 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1283 && gimple_assign_single_p (def_stmt)
1284 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1285 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1286 if (!def_stmt)
1287 continue;
1288 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1289 if (!phi_stmt)
1290 continue;
1291 FOR_EACH_EDGE (e, ei, bb->succs)
1292 {
1293 unsigned i, total = 0;
1294 edge only_one;
1295 bool check_value_one = (((integer_onep (cmp_rhs))
1296 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1297 ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1298 if (!is_edge_annotated (e, *annotated_edge))
1299 continue;
1300 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1301 {
1302 tree val = gimple_phi_arg_def (phi_stmt, i);
1303 edge ep = gimple_phi_arg_edge (phi_stmt, i);
1304
1305 if (!TREE_CONSTANT (val)
1306 || !(integer_zerop (val) || integer_onep (val)))
1307 continue;
1308 if (check_value_one ^ integer_onep (val))
1309 continue;
1310 total++;
1311 only_one = ep;
1312 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
1313 {
1314 ep->probability = 0;
1315 ep->count = 0;
1316 set_edge_annotated (ep, annotated_edge);
1317 }
1318 }
1319 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
1320 {
1321 only_one->probability = e->probability;
1322 only_one->count = e->count;
1323 set_edge_annotated (only_one, annotated_edge);
1324 }
1325 }
1326 }
1327 }
1328
1329 /* Propagate the basic block count and edge count on the control flow
1330 graph. We do the propagation iteratively until stablize. */
1331
1332 static void
1333 afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
1334 {
1335 basic_block bb;
1336 bool changed = true;
1337 int i = 0;
1338
1339 FOR_ALL_BB_FN (bb, cfun)
1340 {
1341 bb->count = ((basic_block)bb->aux)->count;
1342 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
1343 set_bb_annotated (bb, annotated_bb);
1344 }
1345
1346 while (changed && i++ < 10)
1347 {
1348 changed = false;
1349
1350 if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
1351 changed = true;
1352 if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
1353 changed = true;
1354 afdo_propagate_circuit (*annotated_bb, annotated_edge);
1355 }
1356 }
1357
1358 /* Propagate counts on control flow graph and calculate branch
1359 probabilities. */
1360
1361 static void
1362 afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
1363 {
1364 basic_block bb;
1365 bool has_sample = false;
1366
1367 FOR_EACH_BB_FN (bb, cfun)
1368 {
1369 if (bb->count > 0)
1370 {
1371 has_sample = true;
1372 break;
1373 }
1374 }
1375
1376 if (!has_sample)
1377 return;
1378
1379 calculate_dominance_info (CDI_POST_DOMINATORS);
1380 calculate_dominance_info (CDI_DOMINATORS);
1381 loop_optimizer_init (0);
1382
1383 afdo_find_equiv_class (annotated_bb);
1384 afdo_propagate (annotated_bb, annotated_edge);
1385
1386 FOR_EACH_BB_FN (bb, cfun)
1387 {
1388 edge e;
1389 edge_iterator ei;
1390 int num_unknown_succ = 0;
1391 gcov_type total_count = 0;
1392
1393 FOR_EACH_EDGE (e, ei, bb->succs)
1394 {
1395 if (!is_edge_annotated (e, *annotated_edge))
1396 num_unknown_succ++;
1397 else
1398 total_count += e->count;
1399 }
1400 if (num_unknown_succ == 0 && total_count > 0)
1401 {
1402 FOR_EACH_EDGE (e, ei, bb->succs)
1403 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
1404 }
1405 }
1406 FOR_ALL_BB_FN (bb, cfun)
1407 {
1408 edge e;
1409 edge_iterator ei;
1410
1411 FOR_EACH_EDGE (e, ei, bb->succs)
1412 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
1413 bb->aux = NULL;
1414 }
1415
1416 loop_optimizer_finalize ();
1417 free_dominance_info (CDI_DOMINATORS);
1418 free_dominance_info (CDI_POST_DOMINATORS);
1419 }
1420
1421 /* Perform value profile transformation using AutoFDO profile. Add the
1422 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1423 indirect call promoted. */
1424
1425 static bool
1426 afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1427 {
1428 basic_block bb;
1429 if (afdo_source_profile->get_function_instance_by_decl (
1430 current_function_decl) == NULL)
1431 return false;
1432
1433 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1434
1435 bool has_vpt = false;
1436 FOR_EACH_BB_FN (bb, cfun)
1437 {
1438 if (!has_indirect_call (bb))
1439 continue;
1440 gimple_stmt_iterator gsi;
1441
1442 gcov_type bb_count = 0;
1443 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1444 {
1445 count_info info;
1446 gimple stmt = gsi_stmt (gsi);
1447 if (afdo_source_profile->get_count_info (stmt, &info))
1448 bb_count = MAX (bb_count, info.count);
1449 }
1450
1451 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1452 {
1453 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1454 /* IC_promotion and early_inline_2 is done in multiple iterations.
1455 No need to promoted the stmt if its in promoted_stmts (means
1456 it is already been promoted in the previous iterations). */
1457 if ((!stmt) || gimple_call_fn (stmt) == NULL
1458 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1459 || promoted_stmts->find (stmt) != promoted_stmts->end ())
1460 continue;
1461
1462 count_info info;
1463 afdo_source_profile->get_count_info (stmt, &info);
1464 info.count = bb_count;
1465 if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1466 {
1467 /* Promote the indirect call and update the promoted_stmts. */
1468 promoted_stmts->insert (stmt);
1469 afdo_vpt (&gsi, info.targets, true);
1470 has_vpt = true;
1471 }
1472 }
1473 }
1474
1475 if (has_vpt)
1476 {
1477 optimize_inline_calls (current_function_decl);
1478 return true;
1479 }
1480
1481 return false;
1482 }
1483
1484 /* Annotate auto profile to the control flow graph. Do not annotate value
1485 profile for stmts in PROMOTED_STMTS. */
1486
1487 static void
1488 afdo_annotate_cfg (const stmt_set &promoted_stmts)
1489 {
1490 basic_block bb;
1491 bb_set annotated_bb;
1492 edge_set annotated_edge;
1493 const function_instance *s
1494 = afdo_source_profile->get_function_instance_by_decl (
1495 current_function_decl);
1496
1497 if (s == NULL)
1498 return;
1499 cgraph_node::get (current_function_decl)->count = s->head_count ();
1500 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
1501 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1502
1503 FOR_EACH_BB_FN (bb, cfun)
1504 {
1505 edge e;
1506 edge_iterator ei;
1507
1508 bb->count = 0;
1509 FOR_EACH_EDGE (e, ei, bb->succs)
1510 e->count = 0;
1511
1512 if (afdo_set_bb_count (bb, promoted_stmts))
1513 set_bb_annotated (bb, &annotated_bb);
1514 if (bb->count > max_count)
1515 max_count = bb->count;
1516 }
1517 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1518 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1519 {
1520 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1521 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1522 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1523 }
1524 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1525 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1526 {
1527 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1528 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1529 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1530 }
1531 afdo_source_profile->mark_annotated (
1532 DECL_SOURCE_LOCATION (current_function_decl));
1533 afdo_source_profile->mark_annotated (cfun->function_start_locus);
1534 afdo_source_profile->mark_annotated (cfun->function_end_locus);
1535 if (max_count > 0)
1536 {
1537 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
1538 counts_to_freqs ();
1539 profile_status_for_fn (cfun) = PROFILE_READ;
1540 }
1541 if (flag_value_profile_transformations)
1542 {
1543 gimple_value_profile_transformations ();
1544 free_dominance_info (CDI_DOMINATORS);
1545 free_dominance_info (CDI_POST_DOMINATORS);
1546 update_ssa (TODO_update_ssa);
1547 }
1548 }
1549
1550 /* Wrapper function to invoke early inliner. */
1551
1552 static void
1553 early_inline ()
1554 {
1555 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1556 unsigned todo = early_inliner (cfun);
1557 if (todo & TODO_update_ssa_any)
1558 update_ssa (TODO_update_ssa);
1559 }
1560
1561 /* Use AutoFDO profile to annoate the control flow graph.
1562 Return the todo flag. */
1563
1564 static unsigned int
1565 auto_profile (void)
1566 {
1567 struct cgraph_node *node;
1568
1569 if (symtab->state == FINISHED)
1570 return 0;
1571
1572 init_node_map (true);
1573 profile_info = autofdo::afdo_profile_info;
1574
1575 FOR_EACH_FUNCTION (node)
1576 {
1577 if (!gimple_has_body_p (node->decl))
1578 continue;
1579
1580 /* Don't profile functions produced for builtin stuff. */
1581 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1582 continue;
1583
1584 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1585
1586 /* First do indirect call promotion and early inline to make the
1587 IR match the profiled binary before actual annotation.
1588
1589 This is needed because an indirect call might have been promoted
1590 and inlined in the profiled binary. If we do not promote and
1591 inline these indirect calls before annotation, the profile for
1592 these promoted functions will be lost.
1593
1594 e.g. foo() --indirect_call--> bar()
1595 In profiled binary, the callsite is promoted and inlined, making
1596 the profile look like:
1597
1598 foo: {
1599 loc_foo_1: count_1
1600 bar@loc_foo_2: {
1601 loc_bar_1: count_2
1602 loc_bar_2: count_3
1603 }
1604 }
1605
1606 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1607 If we perform annotation on it, the profile inside bar@loc_foo2
1608 will be wasted.
1609
1610 To avoid this, we promote loc_foo_2 and inline the promoted bar
1611 function before annotation, so the profile inside bar@loc_foo2
1612 will be useful. */
1613 autofdo::stmt_set promoted_stmts;
1614 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
1615 {
1616 if (!flag_value_profile_transformations
1617 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1618 break;
1619 early_inline ();
1620 }
1621
1622 early_inline ();
1623 autofdo::afdo_annotate_cfg (promoted_stmts);
1624 compute_function_frequency ();
1625
1626 /* Local pure-const may imply need to fixup the cfg. */
1627 if (execute_fixup_cfg () & TODO_cleanup_cfg)
1628 cleanup_tree_cfg ();
1629
1630 free_dominance_info (CDI_DOMINATORS);
1631 free_dominance_info (CDI_POST_DOMINATORS);
1632 cgraph_edge::rebuild_edges ();
1633 compute_inline_parameters (cgraph_node::get (current_function_decl), true);
1634 pop_cfun ();
1635 }
1636
1637 return TODO_rebuild_cgraph_edges;
1638 }
1639 } /* namespace autofdo. */
1640
1641 /* Read the profile from the profile data file. */
1642
1643 void
1644 read_autofdo_file (void)
1645 {
1646 if (auto_profile_file == NULL)
1647 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1648
1649 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
1650 1, sizeof (struct gcov_ctr_summary));
1651 autofdo::afdo_profile_info->runs = 1;
1652 autofdo::afdo_profile_info->sum_max = 0;
1653 autofdo::afdo_profile_info->sum_all = 0;
1654
1655 /* Read the profile from the profile file. */
1656 autofdo::read_profile ();
1657 }
1658
1659 /* Free the resources. */
1660
1661 void
1662 end_auto_profile (void)
1663 {
1664 delete autofdo::afdo_source_profile;
1665 delete autofdo::afdo_string_table;
1666 profile_info = NULL;
1667 }
1668
1669 /* Returns TRUE if EDGE is hot enough to be inlined early. */
1670
1671 bool
1672 afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1673 {
1674 gcov_type count
1675 = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1676
1677 if (count > 0)
1678 {
1679 bool is_hot;
1680 const struct gcov_ctr_summary *saved_profile_info = profile_info;
1681 /* At early inline stage, profile_info is not set yet. We need to
1682 temporarily set it to afdo_profile_info to calculate hotness. */
1683 profile_info = autofdo::afdo_profile_info;
1684 is_hot = maybe_hot_count_p (NULL, count);
1685 profile_info = saved_profile_info;
1686 return is_hot;
1687 }
1688
1689 return false;
1690 }
1691
1692 namespace
1693 {
1694
1695 const pass_data pass_data_ipa_auto_profile = {
1696 SIMPLE_IPA_PASS, "afdo", /* name */
1697 OPTGROUP_NONE, /* optinfo_flags */
1698 TV_IPA_AUTOFDO, /* tv_id */
1699 0, /* properties_required */
1700 0, /* properties_provided */
1701 0, /* properties_destroyed */
1702 0, /* todo_flags_start */
1703 0, /* todo_flags_finish */
1704 };
1705
1706 class pass_ipa_auto_profile : public simple_ipa_opt_pass
1707 {
1708 public:
1709 pass_ipa_auto_profile (gcc::context *ctxt)
1710 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1711 {
1712 }
1713
1714 /* opt_pass methods: */
1715 virtual bool
1716 gate (function *)
1717 {
1718 return flag_auto_profile;
1719 }
1720 virtual unsigned int
1721 execute (function *)
1722 {
1723 return autofdo::auto_profile ();
1724 }
1725 }; // class pass_ipa_auto_profile
1726
1727 } // anon namespace
1728
1729 simple_ipa_opt_pass *
1730 make_pass_ipa_auto_profile (gcc::context *ctxt)
1731 {
1732 return new pass_ipa_auto_profile (ctxt);
1733 }