]>
Commit | Line | Data |
---|---|---|
be3c16c4 | 1 | /* Read and annotate call graph profile from the auto profile data file. |
7adcbafe | 2 | Copyright (C) 2014-2022 Free Software Foundation, Inc. |
be3c16c4 DC |
3 | Contributed by Dehao Chen (dehao@google.com) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
3d790fd8 | 21 | #include "config.h" |
2c384ad8 SN |
22 | #define INCLUDE_MAP |
23 | #define INCLUDE_SET | |
3d790fd8 | 24 | #include "system.h" |
957060b5 AM |
25 | #include "coretypes.h" |
26 | #include "backend.h" | |
957060b5 AM |
27 | #include "tree.h" |
28 | #include "gimple.h" | |
29 | #include "predict.h" | |
30 | #include "alloc-pool.h" | |
31 | #include "tree-pass.h" | |
32 | #include "ssa.h" | |
33 | #include "cgraph.h" | |
34 | #include "gcov-io.h" | |
35 | #include "diagnostic-core.h" | |
be3c16c4 DC |
36 | #include "profile.h" |
37 | #include "langhooks.h" | |
be3c16c4 | 38 | #include "cfgloop.h" |
be3c16c4 DC |
39 | #include "tree-cfg.h" |
40 | #include "tree-cfgcleanup.h" | |
be3c16c4 | 41 | #include "tree-into-ssa.h" |
be3c16c4 | 42 | #include "gimple-iterator.h" |
be3c16c4 | 43 | #include "value-prof.h" |
dd912cb8 | 44 | #include "symbol-summary.h" |
c582198b | 45 | #include "ipa-prop.h" |
27d020cf | 46 | #include "ipa-fnsummary.h" |
be3c16c4 DC |
47 | #include "ipa-inline.h" |
48 | #include "tree-inline.h" | |
be3c16c4 | 49 | #include "auto-profile.h" |
0ea221ef AK |
50 | #include "tree-pretty-print.h" |
51 | #include "gimple-pretty-print.h" | |
be3c16c4 DC |
52 | |
53 | /* The following routines implements AutoFDO optimization. | |
54 | ||
55 | This optimization uses sampling profiles to annotate basic block counts | |
56 | and uses heuristics to estimate branch probabilities. | |
57 | ||
58 | There are three phases in AutoFDO: | |
59 | ||
60 | Phase 1: Read profile from the profile data file. | |
61 | The following info is read from the profile datafile: | |
62 | * string_table: a map between function name and its index. | |
63 | * autofdo_source_profile: a map from function_instance name to | |
64 | function_instance. This is represented as a forest of | |
65 | function_instances. | |
66 | * WorkingSet: a histogram of how many instructions are covered for a | |
67 | given percentage of total cycles. This is describing the binary | |
68 | level information (not source level). This info is used to help | |
69 | decide if we want aggressive optimizations that could increase | |
70 | code footprint (e.g. loop unroll etc.) | |
71 | A function instance is an instance of function that could either be a | |
72 | standalone symbol, or a clone of a function that is inlined into another | |
73 | function. | |
74 | ||
1500c66f | 75 | Phase 2: Early inline + value profile transformation. |
be3c16c4 DC |
76 | Early inline uses autofdo_source_profile to find if a callsite is: |
77 | * inlined in the profiled binary. | |
78 | * callee body is hot in the profiling run. | |
79 | If both condition satisfies, early inline will inline the callsite | |
80 | regardless of the code growth. | |
81 | Phase 2 is an iterative process. During each iteration, we also check | |
82 | if an indirect callsite is promoted and inlined in the profiling run. | |
83 | If yes, vpt will happen to force promote it and in the next iteration, | |
84 | einline will inline the promoted callsite in the next iteration. | |
85 | ||
86 | Phase 3: Annotate control flow graph. | |
87 | AutoFDO uses a separate pass to: | |
88 | * Annotate basic block count | |
89 | * Estimate branch probability | |
90 | ||
91 | After the above 3 phases, all profile is readily annotated on the GCC IR. | |
92 | AutoFDO tries to reuse all FDO infrastructure as much as possible to make | |
93 | use of the profile. E.g. it uses existing mechanism to calculate the basic | |
94 | block/edge frequency, as well as the cgraph node/edge count. | |
95 | */ | |
96 | ||
97 | #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" | |
8819c82c | 98 | #define AUTO_PROFILE_VERSION 2 |
be3c16c4 DC |
99 | |
100 | namespace autofdo | |
101 | { | |
102 | ||
47b4c53f BC |
103 | /* Intermediate edge info used when propagating AutoFDO profile information. |
104 | We can't edge->count() directly since it's computed from edge's probability | |
105 | while probability is yet not decided during propagation. */ | |
99b1c316 | 106 | #define AFDO_EINFO(e) ((class edge_info *) e->aux) |
47b4c53f BC |
107 | class edge_info |
108 | { | |
109 | public: | |
110 | edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {} | |
111 | bool is_annotated () const { return annotated_; } | |
112 | void set_annotated () { annotated_ = true; } | |
113 | profile_count get_count () const { return count_; } | |
114 | void set_count (profile_count count) { count_ = count; } | |
115 | private: | |
116 | profile_count count_; | |
117 | bool annotated_; | |
118 | }; | |
119 | ||
be3c16c4 DC |
120 | /* Represent a source location: (function_decl, lineno). */ |
121 | typedef std::pair<tree, unsigned> decl_lineno; | |
122 | ||
123 | /* Represent an inline stack. vector[0] is the leaf node. */ | |
124 | typedef auto_vec<decl_lineno> inline_stack; | |
125 | ||
126 | /* String array that stores function names. */ | |
127 | typedef auto_vec<char *> string_vector; | |
128 | ||
129 | /* Map from function name's index in string_table to target's | |
130 | execution count. */ | |
131 | typedef std::map<unsigned, gcov_type> icall_target_map; | |
132 | ||
133 | /* Set of gimple stmts. Used to track if the stmt has already been promoted | |
134 | to direct call. */ | |
355fe088 | 135 | typedef std::set<gimple *> stmt_set; |
be3c16c4 DC |
136 | |
137 | /* Represent count info of an inline stack. */ | |
6c1dae73 | 138 | class count_info |
be3c16c4 | 139 | { |
6c1dae73 | 140 | public: |
be3c16c4 DC |
141 | /* Sampled count of the inline stack. */ |
142 | gcov_type count; | |
143 | ||
144 | /* Map from indirect call target to its sample count. */ | |
145 | icall_target_map targets; | |
146 | ||
147 | /* Whether this inline stack is already used in annotation. | |
148 | ||
149 | Each inline stack should only be used to annotate IR once. | |
150 | This will be enforced when instruction-level discriminator | |
151 | is supported. */ | |
152 | bool annotated; | |
153 | }; | |
154 | ||
155 | /* operator< for "const char *". */ | |
156 | struct string_compare | |
157 | { | |
158 | bool operator()(const char *a, const char *b) const | |
159 | { | |
160 | return strcmp (a, b) < 0; | |
161 | } | |
162 | }; | |
163 | ||
164 | /* Store a string array, indexed by string position in the array. */ | |
165 | class string_table | |
166 | { | |
167 | public: | |
168 | string_table () | |
169 | {} | |
170 | ||
171 | ~string_table (); | |
172 | ||
173 | /* For a given string, returns its index. */ | |
174 | int get_index (const char *name) const; | |
175 | ||
176 | /* For a given decl, returns the index of the decl name. */ | |
177 | int get_index_by_decl (tree decl) const; | |
178 | ||
179 | /* For a given index, returns the string. */ | |
180 | const char *get_name (int index) const; | |
181 | ||
182 | /* Read profile, return TRUE on success. */ | |
183 | bool read (); | |
184 | ||
185 | private: | |
186 | typedef std::map<const char *, unsigned, string_compare> string_index_map; | |
187 | string_vector vector_; | |
188 | string_index_map map_; | |
189 | }; | |
190 | ||
191 | /* Profile of a function instance: | |
192 | 1. total_count of the function. | |
193 | 2. head_count (entry basic block count) of the function (only valid when | |
194 | function is a top-level function_instance, i.e. it is the original copy | |
195 | instead of the inlined copy). | |
196 | 3. map from source location (decl_lineno) to profile (count_info). | |
197 | 4. map from callsite to callee function_instance. */ | |
198 | class function_instance | |
199 | { | |
200 | public: | |
201 | typedef auto_vec<function_instance *> function_instance_stack; | |
202 | ||
203 | /* Read the profile and return a function_instance with head count as | |
204 | HEAD_COUNT. Recursively read callsites to create nested function_instances | |
205 | too. STACK is used to track the recursive creation process. */ | |
206 | static function_instance * | |
207 | read_function_instance (function_instance_stack *stack, | |
208 | gcov_type head_count); | |
209 | ||
210 | /* Recursively deallocate all callsites (nested function_instances). */ | |
211 | ~function_instance (); | |
212 | ||
213 | /* Accessors. */ | |
214 | int | |
215 | name () const | |
216 | { | |
217 | return name_; | |
218 | } | |
219 | gcov_type | |
220 | total_count () const | |
221 | { | |
222 | return total_count_; | |
223 | } | |
224 | gcov_type | |
225 | head_count () const | |
226 | { | |
227 | return head_count_; | |
228 | } | |
229 | ||
230 | /* Traverse callsites of the current function_instance to find one at the | |
231 | location of LINENO and callee name represented in DECL. */ | |
232 | function_instance *get_function_instance_by_decl (unsigned lineno, | |
233 | tree decl) const; | |
234 | ||
235 | /* Store the profile info for LOC in INFO. Return TRUE if profile info | |
236 | is found. */ | |
237 | bool get_count_info (location_t loc, count_info *info) const; | |
238 | ||
239 | /* Read the inlined indirect call target profile for STMT and store it in | |
240 | MAP, return the total count for all inlined indirect calls. */ | |
538dd0b7 | 241 | gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; |
be3c16c4 DC |
242 | |
243 | /* Sum of counts that is used during annotation. */ | |
244 | gcov_type total_annotated_count () const; | |
245 | ||
246 | /* Mark LOC as annotated. */ | |
247 | void mark_annotated (location_t loc); | |
248 | ||
249 | private: | |
250 | /* Callsite, represented as (decl_lineno, callee_function_name_index). */ | |
251 | typedef std::pair<unsigned, unsigned> callsite; | |
252 | ||
253 | /* Map from callsite to callee function_instance. */ | |
254 | typedef std::map<callsite, function_instance *> callsite_map; | |
255 | ||
256 | function_instance (unsigned name, gcov_type head_count) | |
257 | : name_ (name), total_count_ (0), head_count_ (head_count) | |
258 | { | |
259 | } | |
260 | ||
261 | /* Map from source location (decl_lineno) to profile (count_info). */ | |
262 | typedef std::map<unsigned, count_info> position_count_map; | |
263 | ||
264 | /* function_instance name index in the string_table. */ | |
265 | unsigned name_; | |
266 | ||
267 | /* Total sample count. */ | |
268 | gcov_type total_count_; | |
269 | ||
270 | /* Entry BB's sample count. */ | |
271 | gcov_type head_count_; | |
272 | ||
273 | /* Map from callsite location to callee function_instance. */ | |
274 | callsite_map callsites; | |
275 | ||
276 | /* Map from source location to count_info. */ | |
277 | position_count_map pos_counts; | |
278 | }; | |
279 | ||
280 | /* Profile for all functions. */ | |
281 | class autofdo_source_profile | |
282 | { | |
283 | public: | |
284 | static autofdo_source_profile * | |
285 | create () | |
286 | { | |
287 | autofdo_source_profile *map = new autofdo_source_profile (); | |
288 | ||
289 | if (map->read ()) | |
290 | return map; | |
291 | delete map; | |
292 | return NULL; | |
293 | } | |
294 | ||
295 | ~autofdo_source_profile (); | |
296 | ||
297 | /* For a given DECL, returns the top-level function_instance. */ | |
298 | function_instance *get_function_instance_by_decl (tree decl) const; | |
299 | ||
300 | /* Find count_info for a given gimple STMT. If found, store the count_info | |
301 | in INFO and return true; otherwise return false. */ | |
355fe088 | 302 | bool get_count_info (gimple *stmt, count_info *info) const; |
be3c16c4 DC |
303 | |
304 | /* Find total count of the callee of EDGE. */ | |
305 | gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; | |
306 | ||
307 | /* Update value profile INFO for STMT from the inlined indirect callsite. | |
308 | Return true if INFO is updated. */ | |
538dd0b7 | 309 | bool update_inlined_ind_target (gcall *stmt, count_info *info); |
be3c16c4 DC |
310 | |
311 | /* Mark LOC as annotated. */ | |
312 | void mark_annotated (location_t loc); | |
313 | ||
314 | private: | |
315 | /* Map from function_instance name index (in string_table) to | |
316 | function_instance. */ | |
317 | typedef std::map<unsigned, function_instance *> name_function_instance_map; | |
318 | ||
319 | autofdo_source_profile () {} | |
320 | ||
321 | /* Read AutoFDO profile and returns TRUE on success. */ | |
322 | bool read (); | |
323 | ||
324 | /* Return the function_instance in the profile that correspond to the | |
325 | inline STACK. */ | |
326 | function_instance * | |
327 | get_function_instance_by_inline_stack (const inline_stack &stack) const; | |
328 | ||
329 | name_function_instance_map map_; | |
330 | }; | |
331 | ||
332 | /* Store the strings read from the profile data file. */ | |
333 | static string_table *afdo_string_table; | |
334 | ||
335 | /* Store the AutoFDO source profile. */ | |
336 | static autofdo_source_profile *afdo_source_profile; | |
337 | ||
7f3577f5 ML |
338 | /* gcov_summary structure to store the profile_info. */ |
339 | static gcov_summary *afdo_profile_info; | |
be3c16c4 DC |
340 | |
341 | /* Helper functions. */ | |
342 | ||
343 | /* Return the original name of NAME: strip the suffix that starts | |
344 | with '.' Caller is responsible for freeing RET. */ | |
345 | ||
346 | static char * | |
347 | get_original_name (const char *name) | |
348 | { | |
349 | char *ret = xstrdup (name); | |
350 | char *find = strchr (ret, '.'); | |
351 | if (find != NULL) | |
352 | *find = 0; | |
353 | return ret; | |
354 | } | |
355 | ||
356 | /* Return the combined location, which is a 32bit integer in which | |
357 | higher 16 bits stores the line offset of LOC to the start lineno | |
1500c66f | 358 | of DECL, The lower 16 bits stores the discriminator. */ |
be3c16c4 DC |
359 | |
360 | static unsigned | |
361 | get_combined_location (location_t loc, tree decl) | |
362 | { | |
363 | /* TODO: allow more bits for line and less bits for discriminator. */ | |
364 | if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) | |
90aaff2c | 365 | warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes"); |
be3c16c4 DC |
366 | return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); |
367 | } | |
368 | ||
369 | /* Return the function decl of a given lexical BLOCK. */ | |
370 | ||
371 | static tree | |
372 | get_function_decl_from_block (tree block) | |
373 | { | |
dc16b007 | 374 | if (!inlined_function_outer_scope_p (block)) |
be3c16c4 DC |
375 | return NULL_TREE; |
376 | ||
dc16b007 | 377 | return BLOCK_ABSTRACT_ORIGIN (block); |
be3c16c4 DC |
378 | } |
379 | ||
380 | /* Store inline stack for STMT in STACK. */ | |
381 | ||
382 | static void | |
383 | get_inline_stack (location_t locus, inline_stack *stack) | |
384 | { | |
385 | if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) | |
386 | return; | |
387 | ||
388 | tree block = LOCATION_BLOCK (locus); | |
389 | if (block && TREE_CODE (block) == BLOCK) | |
390 | { | |
be3c16c4 DC |
391 | for (block = BLOCK_SUPERCONTEXT (block); |
392 | block && (TREE_CODE (block) == BLOCK); | |
393 | block = BLOCK_SUPERCONTEXT (block)) | |
394 | { | |
395 | location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); | |
396 | if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) | |
397 | continue; | |
398 | ||
399 | tree decl = get_function_decl_from_block (block); | |
400 | stack->safe_push ( | |
401 | std::make_pair (decl, get_combined_location (locus, decl))); | |
402 | locus = tmp_locus; | |
be3c16c4 DC |
403 | } |
404 | } | |
405 | stack->safe_push ( | |
406 | std::make_pair (current_function_decl, | |
407 | get_combined_location (locus, current_function_decl))); | |
408 | } | |
409 | ||
410 | /* Return STMT's combined location, which is a 32bit integer in which | |
411 | higher 16 bits stores the line offset of LOC to the start lineno | |
1500c66f | 412 | of DECL, The lower 16 bits stores the discriminator. */ |
be3c16c4 DC |
413 | |
414 | static unsigned | |
355fe088 | 415 | get_relative_location_for_stmt (gimple *stmt) |
be3c16c4 DC |
416 | { |
417 | location_t locus = gimple_location (stmt); | |
418 | if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) | |
419 | return UNKNOWN_LOCATION; | |
420 | ||
421 | for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); | |
422 | block = BLOCK_SUPERCONTEXT (block)) | |
423 | if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) | |
424 | return get_combined_location (locus, | |
425 | get_function_decl_from_block (block)); | |
426 | return get_combined_location (locus, current_function_decl); | |
427 | } | |
428 | ||
429 | /* Return true if BB contains indirect call. */ | |
430 | ||
431 | static bool | |
432 | has_indirect_call (basic_block bb) | |
433 | { | |
434 | gimple_stmt_iterator gsi; | |
435 | ||
436 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
437 | { | |
355fe088 | 438 | gimple *stmt = gsi_stmt (gsi); |
be3c16c4 DC |
439 | if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) |
440 | && (gimple_call_fn (stmt) == NULL | |
441 | || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) | |
442 | return true; | |
443 | } | |
444 | return false; | |
445 | } | |
446 | ||
447 | /* Member functions for string_table. */ | |
448 | ||
449 | /* Deconstructor. */ | |
450 | ||
451 | string_table::~string_table () | |
452 | { | |
453 | for (unsigned i = 0; i < vector_.length (); i++) | |
454 | free (vector_[i]); | |
455 | } | |
456 | ||
457 | ||
458 | /* Return the index of a given function NAME. Return -1 if NAME is not | |
459 | found in string table. */ | |
460 | ||
461 | int | |
462 | string_table::get_index (const char *name) const | |
463 | { | |
464 | if (name == NULL) | |
465 | return -1; | |
466 | string_index_map::const_iterator iter = map_.find (name); | |
467 | if (iter == map_.end ()) | |
468 | return -1; | |
1500c66f FY |
469 | |
470 | return iter->second; | |
be3c16c4 DC |
471 | } |
472 | ||
473 | /* Return the index of a given function DECL. Return -1 if DECL is not | |
474 | found in string table. */ | |
475 | ||
476 | int | |
477 | string_table::get_index_by_decl (tree decl) const | |
478 | { | |
479 | char *name | |
480 | = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); | |
481 | int ret = get_index (name); | |
482 | free (name); | |
483 | if (ret != -1) | |
484 | return ret; | |
485 | ret = get_index (lang_hooks.dwarf_name (decl, 0)); | |
486 | if (ret != -1) | |
487 | return ret; | |
395bc8ad | 488 | if (DECL_FROM_INLINE (decl)) |
be3c16c4 | 489 | return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); |
1500c66f FY |
490 | |
491 | return -1; | |
be3c16c4 DC |
492 | } |
493 | ||
494 | /* Return the function name of a given INDEX. */ | |
495 | ||
496 | const char * | |
497 | string_table::get_name (int index) const | |
498 | { | |
499 | gcc_assert (index > 0 && index < (int)vector_.length ()); | |
500 | return vector_[index]; | |
501 | } | |
502 | ||
503 | /* Read the string table. Return TRUE if reading is successful. */ | |
504 | ||
505 | bool | |
506 | string_table::read () | |
507 | { | |
508 | if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) | |
509 | return false; | |
510 | /* Skip the length of the section. */ | |
511 | gcov_read_unsigned (); | |
512 | /* Read in the file name table. */ | |
513 | unsigned string_num = gcov_read_unsigned (); | |
514 | for (unsigned i = 0; i < string_num; i++) | |
515 | { | |
516 | vector_.safe_push (get_original_name (gcov_read_string ())); | |
517 | map_[vector_.last ()] = i; | |
518 | } | |
519 | return true; | |
520 | } | |
521 | ||
522 | /* Member functions for function_instance. */ | |
523 | ||
524 | function_instance::~function_instance () | |
525 | { | |
526 | for (callsite_map::iterator iter = callsites.begin (); | |
527 | iter != callsites.end (); ++iter) | |
528 | delete iter->second; | |
529 | } | |
530 | ||
531 | /* Traverse callsites of the current function_instance to find one at the | |
532 | location of LINENO and callee name represented in DECL. */ | |
533 | ||
534 | function_instance * | |
535 | function_instance::get_function_instance_by_decl (unsigned lineno, | |
536 | tree decl) const | |
537 | { | |
538 | int func_name_idx = afdo_string_table->get_index_by_decl (decl); | |
539 | if (func_name_idx != -1) | |
540 | { | |
541 | callsite_map::const_iterator ret | |
542 | = callsites.find (std::make_pair (lineno, func_name_idx)); | |
543 | if (ret != callsites.end ()) | |
544 | return ret->second; | |
545 | } | |
546 | func_name_idx | |
547 | = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); | |
548 | if (func_name_idx != -1) | |
549 | { | |
550 | callsite_map::const_iterator ret | |
551 | = callsites.find (std::make_pair (lineno, func_name_idx)); | |
552 | if (ret != callsites.end ()) | |
553 | return ret->second; | |
554 | } | |
395bc8ad | 555 | if (DECL_FROM_INLINE (decl)) |
be3c16c4 | 556 | return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); |
1500c66f FY |
557 | |
558 | return NULL; | |
be3c16c4 DC |
559 | } |
560 | ||
561 | /* Store the profile info for LOC in INFO. Return TRUE if profile info | |
562 | is found. */ | |
563 | ||
564 | bool | |
565 | function_instance::get_count_info (location_t loc, count_info *info) const | |
566 | { | |
567 | position_count_map::const_iterator iter = pos_counts.find (loc); | |
568 | if (iter == pos_counts.end ()) | |
569 | return false; | |
570 | *info = iter->second; | |
571 | return true; | |
572 | } | |
573 | ||
574 | /* Mark LOC as annotated. */ | |
575 | ||
576 | void | |
577 | function_instance::mark_annotated (location_t loc) | |
578 | { | |
579 | position_count_map::iterator iter = pos_counts.find (loc); | |
580 | if (iter == pos_counts.end ()) | |
581 | return; | |
582 | iter->second.annotated = true; | |
583 | } | |
584 | ||
1500c66f | 585 | /* Read the inlined indirect call target profile for STMT and store it in |
be3c16c4 DC |
586 | MAP, return the total count for all inlined indirect calls. */ |
587 | ||
588 | gcov_type | |
538dd0b7 | 589 | function_instance::find_icall_target_map (gcall *stmt, |
be3c16c4 DC |
590 | icall_target_map *map) const |
591 | { | |
592 | gcov_type ret = 0; | |
593 | unsigned stmt_offset = get_relative_location_for_stmt (stmt); | |
594 | ||
595 | for (callsite_map::const_iterator iter = callsites.begin (); | |
596 | iter != callsites.end (); ++iter) | |
597 | { | |
598 | unsigned callee = iter->second->name (); | |
599 | /* Check if callsite location match the stmt. */ | |
600 | if (iter->first.first != stmt_offset) | |
601 | continue; | |
602 | struct cgraph_node *node = cgraph_node::get_for_asmname ( | |
603 | get_identifier (afdo_string_table->get_name (callee))); | |
604 | if (node == NULL) | |
605 | continue; | |
be3c16c4 DC |
606 | (*map)[callee] = iter->second->total_count (); |
607 | ret += iter->second->total_count (); | |
608 | } | |
609 | return ret; | |
610 | } | |
611 | ||
612 | /* Read the profile and create a function_instance with head count as | |
613 | HEAD_COUNT. Recursively read callsites to create nested function_instances | |
614 | too. STACK is used to track the recursive creation process. */ | |
615 | ||
616 | /* function instance profile format: | |
617 | ||
618 | ENTRY_COUNT: 8 bytes | |
619 | NAME_INDEX: 4 bytes | |
620 | NUM_POS_COUNTS: 4 bytes | |
621 | NUM_CALLSITES: 4 byte | |
622 | POS_COUNT_1: | |
623 | POS_1_OFFSET: 4 bytes | |
624 | NUM_TARGETS: 4 bytes | |
625 | COUNT: 8 bytes | |
626 | TARGET_1: | |
627 | VALUE_PROFILE_TYPE: 4 bytes | |
628 | TARGET_IDX: 8 bytes | |
629 | COUNT: 8 bytes | |
630 | TARGET_2 | |
631 | ... | |
632 | TARGET_n | |
633 | POS_COUNT_2 | |
634 | ... | |
635 | POS_COUNT_N | |
636 | CALLSITE_1: | |
637 | CALLSITE_1_OFFSET: 4 bytes | |
638 | FUNCTION_INSTANCE_PROFILE (nested) | |
639 | CALLSITE_2 | |
640 | ... | |
641 | CALLSITE_n. */ | |
642 | ||
643 | function_instance * | |
644 | function_instance::read_function_instance (function_instance_stack *stack, | |
645 | gcov_type head_count) | |
646 | { | |
647 | unsigned name = gcov_read_unsigned (); | |
648 | unsigned num_pos_counts = gcov_read_unsigned (); | |
649 | unsigned num_callsites = gcov_read_unsigned (); | |
650 | function_instance *s = new function_instance (name, head_count); | |
651 | stack->safe_push (s); | |
652 | ||
653 | for (unsigned i = 0; i < num_pos_counts; i++) | |
654 | { | |
655 | unsigned offset = gcov_read_unsigned () & 0xffff0000; | |
656 | unsigned num_targets = gcov_read_unsigned (); | |
657 | gcov_type count = gcov_read_counter (); | |
658 | s->pos_counts[offset].count = count; | |
659 | for (unsigned j = 0; j < stack->length (); j++) | |
660 | (*stack)[j]->total_count_ += count; | |
661 | for (unsigned j = 0; j < num_targets; j++) | |
662 | { | |
663 | /* Only indirect call target histogram is supported now. */ | |
664 | gcov_read_unsigned (); | |
665 | gcov_type target_idx = gcov_read_counter (); | |
666 | s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); | |
667 | } | |
668 | } | |
669 | for (unsigned i = 0; i < num_callsites; i++) | |
670 | { | |
671 | unsigned offset = gcov_read_unsigned (); | |
672 | function_instance *callee_function_instance | |
673 | = read_function_instance (stack, 0); | |
674 | s->callsites[std::make_pair (offset, callee_function_instance->name ())] | |
675 | = callee_function_instance; | |
676 | } | |
677 | stack->pop (); | |
678 | return s; | |
679 | } | |
680 | ||
681 | /* Sum of counts that is used during annotation. */ | |
682 | ||
683 | gcov_type | |
684 | function_instance::total_annotated_count () const | |
685 | { | |
686 | gcov_type ret = 0; | |
687 | for (callsite_map::const_iterator iter = callsites.begin (); | |
688 | iter != callsites.end (); ++iter) | |
689 | ret += iter->second->total_annotated_count (); | |
690 | for (position_count_map::const_iterator iter = pos_counts.begin (); | |
691 | iter != pos_counts.end (); ++iter) | |
692 | if (iter->second.annotated) | |
693 | ret += iter->second.count; | |
694 | return ret; | |
695 | } | |
696 | ||
697 | /* Member functions for autofdo_source_profile. */ | |
698 | ||
699 | autofdo_source_profile::~autofdo_source_profile () | |
700 | { | |
701 | for (name_function_instance_map::const_iterator iter = map_.begin (); | |
702 | iter != map_.end (); ++iter) | |
703 | delete iter->second; | |
704 | } | |
705 | ||
706 | /* For a given DECL, returns the top-level function_instance. */ | |
707 | ||
708 | function_instance * | |
709 | autofdo_source_profile::get_function_instance_by_decl (tree decl) const | |
710 | { | |
711 | int index = afdo_string_table->get_index_by_decl (decl); | |
712 | if (index == -1) | |
713 | return NULL; | |
714 | name_function_instance_map::const_iterator ret = map_.find (index); | |
715 | return ret == map_.end () ? NULL : ret->second; | |
716 | } | |
717 | ||
718 | /* Find count_info for a given gimple STMT. If found, store the count_info | |
719 | in INFO and return true; otherwise return false. */ | |
720 | ||
721 | bool | |
355fe088 | 722 | autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const |
be3c16c4 DC |
723 | { |
724 | if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) | |
725 | return false; | |
726 | ||
727 | inline_stack stack; | |
728 | get_inline_stack (gimple_location (stmt), &stack); | |
729 | if (stack.length () == 0) | |
730 | return false; | |
731 | function_instance *s = get_function_instance_by_inline_stack (stack); | |
732 | if (s == NULL) | |
733 | return false; | |
734 | return s->get_count_info (stack[0].second, info); | |
735 | } | |
736 | ||
737 | /* Mark LOC as annotated. */ | |
738 | ||
739 | void | |
740 | autofdo_source_profile::mark_annotated (location_t loc) | |
741 | { | |
742 | inline_stack stack; | |
743 | get_inline_stack (loc, &stack); | |
744 | if (stack.length () == 0) | |
745 | return; | |
746 | function_instance *s = get_function_instance_by_inline_stack (stack); | |
747 | if (s == NULL) | |
748 | return; | |
749 | s->mark_annotated (stack[0].second); | |
750 | } | |
751 | ||
752 | /* Update value profile INFO for STMT from the inlined indirect callsite. | |
753 | Return true if INFO is updated. */ | |
754 | ||
755 | bool | |
538dd0b7 | 756 | autofdo_source_profile::update_inlined_ind_target (gcall *stmt, |
be3c16c4 DC |
757 | count_info *info) |
758 | { | |
0ea221ef AK |
759 | if (dump_file) |
760 | { | |
761 | fprintf (dump_file, "Checking indirect call -> direct call "); | |
762 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); | |
763 | } | |
764 | ||
be3c16c4 | 765 | if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) |
0ea221ef AK |
766 | { |
767 | if (dump_file) | |
768 | fprintf (dump_file, " good locus\n"); | |
769 | return false; | |
770 | } | |
be3c16c4 DC |
771 | |
772 | count_info old_info; | |
773 | get_count_info (stmt, &old_info); | |
774 | gcov_type total = 0; | |
775 | for (icall_target_map::const_iterator iter = old_info.targets.begin (); | |
776 | iter != old_info.targets.end (); ++iter) | |
777 | total += iter->second; | |
778 | ||
779 | /* Program behavior changed, original promoted (and inlined) target is not | |
780 | hot any more. Will avoid promote the original target. | |
781 | ||
782 | To check if original promoted target is still hot, we check the total | |
89722cf7 ML |
783 | count of the unpromoted targets (stored in TOTAL). If a callsite count |
784 | (stored in INFO) is smaller than half of the total count, the original | |
785 | promoted target is considered not hot any more. */ | |
786 | if (info->count < total / 2) | |
0ea221ef AK |
787 | { |
788 | if (dump_file) | |
89722cf7 ML |
789 | fprintf (dump_file, " not hot anymore %ld < %ld", |
790 | (long)info->count, | |
791 | (long)total /2); | |
0ea221ef AK |
792 | return false; |
793 | } | |
be3c16c4 DC |
794 | |
795 | inline_stack stack; | |
796 | get_inline_stack (gimple_location (stmt), &stack); | |
797 | if (stack.length () == 0) | |
0ea221ef AK |
798 | { |
799 | if (dump_file) | |
800 | fprintf (dump_file, " no inline stack\n"); | |
801 | return false; | |
802 | } | |
be3c16c4 DC |
803 | function_instance *s = get_function_instance_by_inline_stack (stack); |
804 | if (s == NULL) | |
0ea221ef AK |
805 | { |
806 | if (dump_file) | |
807 | fprintf (dump_file, " function not found in inline stack\n"); | |
808 | return false; | |
809 | } | |
be3c16c4 DC |
810 | icall_target_map map; |
811 | if (s->find_icall_target_map (stmt, &map) == 0) | |
0ea221ef AK |
812 | { |
813 | if (dump_file) | |
814 | fprintf (dump_file, " no target map\n"); | |
815 | return false; | |
816 | } | |
be3c16c4 DC |
817 | for (icall_target_map::const_iterator iter = map.begin (); |
818 | iter != map.end (); ++iter) | |
819 | info->targets[iter->first] = iter->second; | |
0ea221ef AK |
820 | if (dump_file) |
821 | fprintf (dump_file, " looks good\n"); | |
be3c16c4 DC |
822 | return true; |
823 | } | |
824 | ||
825 | /* Find total count of the callee of EDGE. */ | |
826 | ||
827 | gcov_type | |
828 | autofdo_source_profile::get_callsite_total_count ( | |
829 | struct cgraph_edge *edge) const | |
830 | { | |
831 | inline_stack stack; | |
832 | stack.safe_push (std::make_pair (edge->callee->decl, 0)); | |
833 | get_inline_stack (gimple_location (edge->call_stmt), &stack); | |
834 | ||
835 | function_instance *s = get_function_instance_by_inline_stack (stack); | |
836 | if (s == NULL | |
837 | || afdo_string_table->get_index (IDENTIFIER_POINTER ( | |
838 | DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) | |
839 | return 0; | |
1500c66f FY |
840 | |
841 | return s->total_count (); | |
be3c16c4 DC |
842 | } |
843 | ||
844 | /* Read AutoFDO profile and returns TRUE on success. */ | |
845 | ||
846 | /* source profile format: | |
847 | ||
848 | GCOV_TAG_AFDO_FUNCTION: 4 bytes | |
849 | LENGTH: 4 bytes | |
850 | NUM_FUNCTIONS: 4 bytes | |
851 | FUNCTION_INSTANCE_1 | |
852 | FUNCTION_INSTANCE_2 | |
853 | ... | |
854 | FUNCTION_INSTANCE_N. */ | |
855 | ||
856 | bool | |
857 | autofdo_source_profile::read () | |
858 | { | |
859 | if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) | |
860 | { | |
64a5912c | 861 | inform (UNKNOWN_LOCATION, "Not expected TAG."); |
be3c16c4 DC |
862 | return false; |
863 | } | |
864 | ||
865 | /* Skip the length of the section. */ | |
866 | gcov_read_unsigned (); | |
867 | ||
868 | /* Read in the function/callsite profile, and store it in local | |
869 | data structure. */ | |
870 | unsigned function_num = gcov_read_unsigned (); | |
871 | for (unsigned i = 0; i < function_num; i++) | |
872 | { | |
873 | function_instance::function_instance_stack stack; | |
874 | function_instance *s = function_instance::read_function_instance ( | |
875 | &stack, gcov_read_counter ()); | |
be3c16c4 DC |
876 | map_[s->name ()] = s; |
877 | } | |
878 | return true; | |
879 | } | |
880 | ||
881 | /* Return the function_instance in the profile that correspond to the | |
882 | inline STACK. */ | |
883 | ||
884 | function_instance * | |
885 | autofdo_source_profile::get_function_instance_by_inline_stack ( | |
886 | const inline_stack &stack) const | |
887 | { | |
888 | name_function_instance_map::const_iterator iter = map_.find ( | |
889 | afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); | |
890 | if (iter == map_.end()) | |
891 | return NULL; | |
892 | function_instance *s = iter->second; | |
893 | for (unsigned i = stack.length() - 1; i > 0; i--) | |
894 | { | |
895 | s = s->get_function_instance_by_decl ( | |
896 | stack[i].second, stack[i - 1].first); | |
897 | if (s == NULL) | |
898 | return NULL; | |
899 | } | |
900 | return s; | |
901 | } | |
902 | ||
903 | /* Module profile is only used by LIPO. Here we simply ignore it. */ | |
904 | ||
905 | static void | |
906 | fake_read_autofdo_module_profile () | |
907 | { | |
908 | /* Read in the module info. */ | |
909 | gcov_read_unsigned (); | |
910 | ||
911 | /* Skip the length of the section. */ | |
912 | gcov_read_unsigned (); | |
913 | ||
914 | /* Read in the file name table. */ | |
915 | unsigned total_module_num = gcov_read_unsigned (); | |
916 | gcc_assert (total_module_num == 0); | |
917 | } | |
918 | ||
919 | /* Read data from profile data file. */ | |
920 | ||
921 | static void | |
922 | read_profile (void) | |
923 | { | |
924 | if (gcov_open (auto_profile_file, 1) == 0) | |
1bba63a7 | 925 | { |
90aaff2c | 926 | error ("cannot open profile file %s", auto_profile_file); |
1bba63a7 AK |
927 | return; |
928 | } | |
be3c16c4 DC |
929 | |
930 | if (gcov_read_unsigned () != GCOV_DATA_MAGIC) | |
1bba63a7 | 931 | { |
90aaff2c | 932 | error ("AutoFDO profile magic number does not match"); |
1bba63a7 AK |
933 | return; |
934 | } | |
be3c16c4 DC |
935 | |
936 | /* Skip the version number. */ | |
937 | unsigned version = gcov_read_unsigned (); | |
938 | if (version != AUTO_PROFILE_VERSION) | |
1bba63a7 | 939 | { |
23691ddd | 940 | error ("AutoFDO profile version %u does not match %u", |
1bba63a7 AK |
941 | version, AUTO_PROFILE_VERSION); |
942 | return; | |
943 | } | |
be3c16c4 DC |
944 | |
945 | /* Skip the empty integer. */ | |
946 | gcov_read_unsigned (); | |
947 | ||
948 | /* string_table. */ | |
949 | afdo_string_table = new string_table (); | |
950 | if (!afdo_string_table->read()) | |
1bba63a7 | 951 | { |
90aaff2c | 952 | error ("cannot read string table from %s", auto_profile_file); |
1bba63a7 AK |
953 | return; |
954 | } | |
be3c16c4 DC |
955 | |
956 | /* autofdo_source_profile. */ | |
957 | afdo_source_profile = autofdo_source_profile::create (); | |
958 | if (afdo_source_profile == NULL) | |
1bba63a7 | 959 | { |
90aaff2c | 960 | error ("cannot read function profile from %s", auto_profile_file); |
1bba63a7 AK |
961 | return; |
962 | } | |
be3c16c4 DC |
963 | |
964 | /* autofdo_module_profile. */ | |
965 | fake_read_autofdo_module_profile (); | |
be3c16c4 DC |
966 | } |
967 | ||
968 | /* From AutoFDO profiles, find values inside STMT for that we want to measure | |
969 | histograms for indirect-call optimization. | |
970 | ||
971 | This function is actually served for 2 purposes: | |
1500c66f FY |
972 | * before annotation, we need to mark histogram, promote and inline |
973 | * after annotation, we just need to mark, and let follow-up logic to | |
974 | decide if it needs to promote and inline. */ | |
be3c16c4 | 975 | |
ba125745 | 976 | static bool |
be3c16c4 DC |
977 | afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, |
978 | bool transform) | |
979 | { | |
355fe088 | 980 | gimple *gs = gsi_stmt (*gsi); |
be3c16c4 DC |
981 | tree callee; |
982 | ||
538dd0b7 | 983 | if (map.size () == 0) |
ba125745 | 984 | return false; |
538dd0b7 | 985 | gcall *stmt = dyn_cast <gcall *> (gs); |
7f3477fb BC |
986 | if (!stmt |
987 | || gimple_call_internal_p (stmt) | |
988 | || gimple_call_fndecl (stmt) != NULL_TREE) | |
ba125745 | 989 | return false; |
be3c16c4 | 990 | |
be3c16c4 DC |
991 | gcov_type total = 0; |
992 | icall_target_map::const_iterator max_iter = map.end (); | |
993 | ||
994 | for (icall_target_map::const_iterator iter = map.begin (); | |
995 | iter != map.end (); ++iter) | |
996 | { | |
997 | total += iter->second; | |
998 | if (max_iter == map.end () || max_iter->second < iter->second) | |
999 | max_iter = iter; | |
1000 | } | |
4469188c BC |
1001 | struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( |
1002 | get_identifier (afdo_string_table->get_name (max_iter->first))); | |
1003 | if (direct_call == NULL || !direct_call->profile_id) | |
ba125745 | 1004 | return false; |
be3c16c4 | 1005 | |
4469188c BC |
1006 | callee = gimple_call_fn (stmt); |
1007 | ||
1008 | histogram_value hist = gimple_alloc_histogram_value ( | |
1009 | cfun, HIST_TYPE_INDIR_CALL, stmt, callee); | |
285aa689 | 1010 | hist->n_counters = 4; |
4469188c BC |
1011 | hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); |
1012 | gimple_add_histogram_value (cfun, stmt, hist); | |
1013 | ||
ba125745 | 1014 | /* Total counter */ |
285aa689 | 1015 | hist->hvalue.counters[0] = total; |
ba125745 | 1016 | /* Number of value/counter pairs */ |
285aa689 | 1017 | hist->hvalue.counters[1] = 1; |
ba125745 | 1018 | /* Value */ |
285aa689 | 1019 | hist->hvalue.counters[2] = direct_call->profile_id; |
ba125745 | 1020 | /* Counter */ |
285aa689 | 1021 | hist->hvalue.counters[3] = max_iter->second; |
be3c16c4 DC |
1022 | |
1023 | if (!transform) | |
ba125745 ER |
1024 | return false; |
1025 | ||
1026 | cgraph_node* current_function_node = cgraph_node::get (current_function_decl); | |
1027 | ||
1028 | /* If the direct call is a recursive call, don't promote it since | |
1029 | we are not set up to inline recursive calls at this stage. */ | |
1030 | if (direct_call == current_function_node) | |
1031 | return false; | |
be3c16c4 DC |
1032 | |
1033 | struct cgraph_edge *indirect_edge | |
ba125745 | 1034 | = current_function_node->get_edge (stmt); |
be3c16c4 | 1035 | |
0ea221ef AK |
1036 | if (dump_file) |
1037 | { | |
1038 | fprintf (dump_file, "Indirect call -> direct call "); | |
1039 | print_generic_expr (dump_file, callee, TDF_SLIM); | |
1040 | fprintf (dump_file, " => "); | |
1041 | print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); | |
1042 | } | |
1043 | ||
73136074 | 1044 | if (direct_call == NULL) |
0ea221ef AK |
1045 | { |
1046 | if (dump_file) | |
1047 | fprintf (dump_file, " not transforming\n"); | |
ba125745 | 1048 | return false; |
0ea221ef | 1049 | } |
be3c16c4 | 1050 | if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) |
0ea221ef AK |
1051 | { |
1052 | if (dump_file) | |
1053 | fprintf (dump_file, " no declaration\n"); | |
ba125745 | 1054 | return false; |
0ea221ef AK |
1055 | } |
1056 | ||
1057 | if (dump_file) | |
1058 | { | |
1059 | fprintf (dump_file, " transformation on insn "); | |
1060 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); | |
1061 | fprintf (dump_file, "\n"); | |
1062 | } | |
1063 | ||
3995f3a2 | 1064 | /* FIXME: Count should be initialized. */ |
be3c16c4 | 1065 | struct cgraph_edge *new_edge |
3995f3a2 | 1066 | = indirect_edge->make_speculative (direct_call, |
1bad9c18 | 1067 | profile_count::uninitialized ()); |
27c5a177 | 1068 | cgraph_edge::redirect_call_stmt_to_callee (new_edge); |
be3c16c4 DC |
1069 | gimple_remove_histogram_value (cfun, stmt, hist); |
1070 | inline_call (new_edge, true, NULL, NULL, false); | |
ba125745 | 1071 | return true; |
be3c16c4 DC |
1072 | } |
1073 | ||
1074 | /* From AutoFDO profiles, find values inside STMT for that we want to measure | |
1075 | histograms and adds them to list VALUES. */ | |
1076 | ||
ba125745 | 1077 | static bool |
be3c16c4 DC |
1078 | afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, |
1079 | bool transform) | |
1080 | { | |
ba125745 | 1081 | return afdo_indirect_call (gsi, map, transform); |
be3c16c4 DC |
1082 | } |
1083 | ||
1084 | typedef std::set<basic_block> bb_set; | |
1085 | typedef std::set<edge> edge_set; | |
1086 | ||
1087 | static bool | |
1088 | is_bb_annotated (const basic_block bb, const bb_set &annotated) | |
1089 | { | |
1090 | return annotated.find (bb) != annotated.end (); | |
1091 | } | |
1092 | ||
1093 | static void | |
1094 | set_bb_annotated (basic_block bb, bb_set *annotated) | |
1095 | { | |
1096 | annotated->insert (bb); | |
1097 | } | |
1098 | ||
be3c16c4 | 1099 | /* For a given BB, set its execution count. Attach value profile if a stmt |
1500c66f | 1100 | is not in PROMOTED, because we only want to promote an indirect call once. |
be3c16c4 DC |
1101 | Return TRUE if BB is annotated. */ |
1102 | ||
1103 | static bool | |
1104 | afdo_set_bb_count (basic_block bb, const stmt_set &promoted) | |
1105 | { | |
1106 | gimple_stmt_iterator gsi; | |
1107 | edge e; | |
1108 | edge_iterator ei; | |
1109 | gcov_type max_count = 0; | |
1110 | bool has_annotated = false; | |
1111 | ||
1112 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1113 | { | |
1114 | count_info info; | |
355fe088 | 1115 | gimple *stmt = gsi_stmt (gsi); |
be3c16c4 DC |
1116 | if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) |
1117 | continue; | |
1118 | if (afdo_source_profile->get_count_info (stmt, &info)) | |
1119 | { | |
1120 | if (info.count > max_count) | |
1121 | max_count = info.count; | |
1122 | has_annotated = true; | |
1123 | if (info.targets.size () > 0 | |
1124 | && promoted.find (stmt) == promoted.end ()) | |
1125 | afdo_vpt (&gsi, info.targets, false); | |
1126 | } | |
1127 | } | |
1128 | ||
1129 | if (!has_annotated) | |
1130 | return false; | |
1131 | ||
1132 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1133 | afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); | |
538dd0b7 DM |
1134 | for (gphi_iterator gpi = gsi_start_phis (bb); |
1135 | !gsi_end_p (gpi); | |
1136 | gsi_next (&gpi)) | |
be3c16c4 | 1137 | { |
538dd0b7 | 1138 | gphi *phi = gpi.phi (); |
be3c16c4 DC |
1139 | size_t i; |
1140 | for (i = 0; i < gimple_phi_num_args (phi); i++) | |
1141 | afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); | |
1142 | } | |
1143 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1144 | afdo_source_profile->mark_annotated (e->goto_locus); | |
1145 | ||
c69f704c | 1146 | bb->count = profile_count::from_gcov_type (max_count).afdo (); |
be3c16c4 DC |
1147 | return true; |
1148 | } | |
1149 | ||
1150 | /* BB1 and BB2 are in an equivalent class iff: | |
1151 | 1. BB1 dominates BB2. | |
1152 | 2. BB2 post-dominates BB1. | |
1153 | 3. BB1 and BB2 are in the same loop nest. | |
1154 | This function finds the equivalent class for each basic block, and | |
1155 | stores a pointer to the first BB in its equivalent class. Meanwhile, | |
1156 | set bb counts for the same equivalent class to be idenical. Update | |
1157 | ANNOTATED_BB for the first BB in its equivalent class. */ | |
1158 | ||
1159 | static void | |
1160 | afdo_find_equiv_class (bb_set *annotated_bb) | |
1161 | { | |
1162 | basic_block bb; | |
1163 | ||
1164 | FOR_ALL_BB_FN (bb, cfun) | |
1165 | bb->aux = NULL; | |
1166 | ||
1167 | FOR_ALL_BB_FN (bb, cfun) | |
1168 | { | |
be3c16c4 DC |
1169 | if (bb->aux != NULL) |
1170 | continue; | |
1171 | bb->aux = bb; | |
4f899c42 | 1172 | for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb)) |
21c0a521 DM |
1173 | if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) |
1174 | && bb1->loop_father == bb->loop_father) | |
1175 | { | |
1176 | bb1->aux = bb; | |
1177 | if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) | |
1178 | { | |
1179 | bb->count = bb1->count; | |
1180 | set_bb_annotated (bb, annotated_bb); | |
1181 | } | |
1182 | } | |
4f899c42 TS |
1183 | |
1184 | for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb)) | |
21c0a521 DM |
1185 | if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) |
1186 | && bb1->loop_father == bb->loop_father) | |
1187 | { | |
1188 | bb1->aux = bb; | |
1189 | if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) | |
1190 | { | |
1191 | bb->count = bb1->count; | |
1192 | set_bb_annotated (bb, annotated_bb); | |
1193 | } | |
1194 | } | |
be3c16c4 DC |
1195 | } |
1196 | } | |
1197 | ||
1198 | /* If a basic block's count is known, and only one of its in/out edges' count | |
1199 | is unknown, its count can be calculated. Meanwhile, if all of the in/out | |
1200 | edges' counts are known, then the basic block's unknown count can also be | |
3d9e6767 ER |
1201 | calculated. Also, if a block has a single predecessor or successor, the block's |
1202 | count can be propagated to that predecessor or successor. | |
be3c16c4 | 1203 | IS_SUCC is true if out edges of a basic blocks are examined. |
47b4c53f | 1204 | Update ANNOTATED_BB accordingly. |
be3c16c4 DC |
1205 | Return TRUE if any basic block/edge count is changed. */ |
1206 | ||
1207 | static bool | |
47b4c53f | 1208 | afdo_propagate_edge (bool is_succ, bb_set *annotated_bb) |
be3c16c4 DC |
1209 | { |
1210 | basic_block bb; | |
1211 | bool changed = false; | |
1212 | ||
1213 | FOR_EACH_BB_FN (bb, cfun) | |
1214 | { | |
1215 | edge e, unknown_edge = NULL; | |
1216 | edge_iterator ei; | |
1217 | int num_unknown_edge = 0; | |
3d9e6767 | 1218 | int num_edge = 0; |
c69f704c | 1219 | profile_count total_known_count = profile_count::zero ().afdo (); |
be3c16c4 DC |
1220 | |
1221 | FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) | |
47b4c53f BC |
1222 | { |
1223 | gcc_assert (AFDO_EINFO (e) != NULL); | |
1224 | if (! AFDO_EINFO (e)->is_annotated ()) | |
1225 | num_unknown_edge++, unknown_edge = e; | |
1226 | else | |
1227 | total_known_count += AFDO_EINFO (e)->get_count (); | |
3d9e6767 | 1228 | num_edge++; |
47b4c53f | 1229 | } |
be3c16c4 | 1230 | |
47b4c53f BC |
1231 | /* Be careful not to annotate block with no successor in special cases. */ |
1232 | if (num_unknown_edge == 0 && total_known_count > bb->count) | |
be3c16c4 | 1233 | { |
47b4c53f BC |
1234 | bb->count = total_known_count; |
1235 | if (!is_bb_annotated (bb, *annotated_bb)) | |
1236 | set_bb_annotated (bb, annotated_bb); | |
1237 | changed = true; | |
be3c16c4 DC |
1238 | } |
1239 | else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) | |
1240 | { | |
47b4c53f | 1241 | if (bb->count > total_known_count) |
3d9e6767 ER |
1242 | { |
1243 | profile_count new_count = bb->count - total_known_count; | |
1244 | AFDO_EINFO(unknown_edge)->set_count(new_count); | |
1245 | if (num_edge == 1) | |
1246 | { | |
1247 | basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : unknown_edge->src; | |
1248 | if (new_count > succ_or_pred_bb->count) | |
1249 | { | |
1250 | succ_or_pred_bb->count = new_count; | |
1251 | if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb)) | |
1252 | set_bb_annotated (succ_or_pred_bb, annotated_bb); | |
1253 | } | |
1254 | } | |
1255 | } | |
47b4c53f BC |
1256 | else |
1257 | AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ()); | |
1258 | AFDO_EINFO (unknown_edge)->set_annotated (); | |
1259 | changed = true; | |
be3c16c4 DC |
1260 | } |
1261 | } | |
1262 | return changed; | |
1263 | } | |
1264 | ||
1265 | /* Special propagation for circuit expressions. Because GCC translates | |
1266 | control flow into data flow for circuit expressions. E.g. | |
1267 | BB1: | |
1268 | if (a && b) | |
1269 | BB2 | |
1270 | else | |
1271 | BB3 | |
1272 | ||
1273 | will be translated into: | |
1274 | ||
1275 | BB1: | |
1276 | if (a) | |
1277 | goto BB.t1 | |
1278 | else | |
1279 | goto BB.t3 | |
1280 | BB.t1: | |
1281 | if (b) | |
1282 | goto BB.t2 | |
1283 | else | |
1284 | goto BB.t3 | |
1285 | BB.t2: | |
1286 | goto BB.t3 | |
1287 | BB.t3: | |
1288 | tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) | |
1289 | if (tmp) | |
1290 | goto BB2 | |
1291 | else | |
1292 | goto BB3 | |
1293 | ||
1294 | In this case, we need to propagate through PHI to determine the edge | |
47b4c53f | 1295 | count of BB1->BB.t1, BB.t1->BB.t2. */ |
be3c16c4 DC |
1296 | |
1297 | static void | |
47b4c53f | 1298 | afdo_propagate_circuit (const bb_set &annotated_bb) |
be3c16c4 DC |
1299 | { |
1300 | basic_block bb; | |
1301 | FOR_ALL_BB_FN (bb, cfun) | |
1302 | { | |
355fe088 | 1303 | gimple *def_stmt; |
be3c16c4 | 1304 | tree cmp_rhs, cmp_lhs; |
355fe088 | 1305 | gimple *cmp_stmt = last_stmt (bb); |
be3c16c4 DC |
1306 | edge e; |
1307 | edge_iterator ei; | |
1308 | ||
1309 | if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) | |
1310 | continue; | |
1311 | cmp_rhs = gimple_cond_rhs (cmp_stmt); | |
1312 | cmp_lhs = gimple_cond_lhs (cmp_stmt); | |
1313 | if (!TREE_CONSTANT (cmp_rhs) | |
1314 | || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) | |
1315 | continue; | |
1316 | if (TREE_CODE (cmp_lhs) != SSA_NAME) | |
1317 | continue; | |
1318 | if (!is_bb_annotated (bb, annotated_bb)) | |
1319 | continue; | |
538dd0b7 DM |
1320 | def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); |
1321 | while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN | |
1322 | && gimple_assign_single_p (def_stmt) | |
1323 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) | |
1324 | def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); | |
1325 | if (!def_stmt) | |
1326 | continue; | |
1327 | gphi *phi_stmt = dyn_cast <gphi *> (def_stmt); | |
1328 | if (!phi_stmt) | |
be3c16c4 DC |
1329 | continue; |
1330 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1331 | { | |
1332 | unsigned i, total = 0; | |
1333 | edge only_one; | |
1334 | bool check_value_one = (((integer_onep (cmp_rhs)) | |
1335 | ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) | |
1336 | ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); | |
47b4c53f | 1337 | if (! AFDO_EINFO (e)->is_annotated ()) |
be3c16c4 DC |
1338 | continue; |
1339 | for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) | |
1340 | { | |
1341 | tree val = gimple_phi_arg_def (phi_stmt, i); | |
1342 | edge ep = gimple_phi_arg_edge (phi_stmt, i); | |
1343 | ||
1344 | if (!TREE_CONSTANT (val) | |
1345 | || !(integer_zerop (val) || integer_onep (val))) | |
1346 | continue; | |
1347 | if (check_value_one ^ integer_onep (val)) | |
1348 | continue; | |
1349 | total++; | |
1350 | only_one = ep; | |
47b4c53f BC |
1351 | if (! (AFDO_EINFO (e)->get_count ()).nonzero_p () |
1352 | && ! AFDO_EINFO (ep)->is_annotated ()) | |
1353 | { | |
1354 | AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ()); | |
1355 | AFDO_EINFO (ep)->set_annotated (); | |
1356 | } | |
1357 | } | |
1358 | if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ()) | |
1359 | { | |
1360 | AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ()); | |
1361 | AFDO_EINFO (only_one)->set_annotated (); | |
1362 | } | |
be3c16c4 DC |
1363 | } |
1364 | } | |
1365 | } | |
1366 | ||
1367 | /* Propagate the basic block count and edge count on the control flow | |
1368 | graph. We do the propagation iteratively until stablize. */ | |
1369 | ||
1370 | static void | |
47b4c53f | 1371 | afdo_propagate (bb_set *annotated_bb) |
be3c16c4 DC |
1372 | { |
1373 | basic_block bb; | |
1374 | bool changed = true; | |
1375 | int i = 0; | |
1376 | ||
1377 | FOR_ALL_BB_FN (bb, cfun) | |
1378 | { | |
1379 | bb->count = ((basic_block)bb->aux)->count; | |
d65adabb | 1380 | if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb)) |
be3c16c4 DC |
1381 | set_bb_annotated (bb, annotated_bb); |
1382 | } | |
1383 | ||
1384 | while (changed && i++ < 10) | |
1385 | { | |
1386 | changed = false; | |
1387 | ||
47b4c53f | 1388 | if (afdo_propagate_edge (true, annotated_bb)) |
be3c16c4 | 1389 | changed = true; |
47b4c53f | 1390 | if (afdo_propagate_edge (false, annotated_bb)) |
be3c16c4 | 1391 | changed = true; |
47b4c53f | 1392 | afdo_propagate_circuit (*annotated_bb); |
be3c16c4 DC |
1393 | } |
1394 | } | |
1395 | ||
1396 | /* Propagate counts on control flow graph and calculate branch | |
1397 | probabilities. */ | |
1398 | ||
1399 | static void | |
47b4c53f | 1400 | afdo_calculate_branch_prob (bb_set *annotated_bb) |
be3c16c4 | 1401 | { |
47b4c53f BC |
1402 | edge e; |
1403 | edge_iterator ei; | |
be3c16c4 | 1404 | basic_block bb; |
be3c16c4 DC |
1405 | |
1406 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
1407 | calculate_dominance_info (CDI_DOMINATORS); | |
1408 | loop_optimizer_init (0); | |
1409 | ||
47b4c53f BC |
1410 | FOR_ALL_BB_FN (bb, cfun) |
1411 | { | |
1412 | gcc_assert (bb->aux == NULL); | |
1413 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1414 | { | |
1415 | gcc_assert (e->aux == NULL); | |
1416 | e->aux = new edge_info (); | |
1417 | } | |
1418 | } | |
1419 | ||
be3c16c4 | 1420 | afdo_find_equiv_class (annotated_bb); |
47b4c53f | 1421 | afdo_propagate (annotated_bb); |
be3c16c4 DC |
1422 | |
1423 | FOR_EACH_BB_FN (bb, cfun) | |
1424 | { | |
be3c16c4 | 1425 | int num_unknown_succ = 0; |
1dc7836c | 1426 | profile_count total_count = profile_count::zero ().afdo (); |
be3c16c4 DC |
1427 | |
1428 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1429 | { | |
47b4c53f BC |
1430 | gcc_assert (AFDO_EINFO (e) != NULL); |
1431 | if (! AFDO_EINFO (e)->is_annotated ()) | |
be3c16c4 DC |
1432 | num_unknown_succ++; |
1433 | else | |
47b4c53f | 1434 | total_count += AFDO_EINFO (e)->get_count (); |
be3c16c4 | 1435 | } |
3995f3a2 | 1436 | if (num_unknown_succ == 0 && total_count > profile_count::zero ()) |
be3c16c4 | 1437 | { |
47b4c53f BC |
1438 | FOR_EACH_EDGE (e, ei, bb->succs) |
1439 | e->probability | |
1440 | = AFDO_EINFO (e)->get_count ().probability_in (total_count); | |
be3c16c4 DC |
1441 | } |
1442 | } | |
1443 | FOR_ALL_BB_FN (bb, cfun) | |
47b4c53f BC |
1444 | { |
1445 | bb->aux = NULL; | |
1446 | FOR_EACH_EDGE (e, ei, bb->succs) | |
1447 | if (AFDO_EINFO (e) != NULL) | |
1448 | { | |
1449 | delete AFDO_EINFO (e); | |
1450 | e->aux = NULL; | |
1451 | } | |
1452 | } | |
be3c16c4 DC |
1453 | |
1454 | loop_optimizer_finalize (); | |
1455 | free_dominance_info (CDI_DOMINATORS); | |
1456 | free_dominance_info (CDI_POST_DOMINATORS); | |
1457 | } | |
1458 | ||
1459 | /* Perform value profile transformation using AutoFDO profile. Add the | |
1460 | promoted stmts to PROMOTED_STMTS. Return TRUE if there is any | |
1461 | indirect call promoted. */ | |
1462 | ||
1463 | static bool | |
1464 | afdo_vpt_for_early_inline (stmt_set *promoted_stmts) | |
1465 | { | |
1466 | basic_block bb; | |
1467 | if (afdo_source_profile->get_function_instance_by_decl ( | |
1468 | current_function_decl) == NULL) | |
1469 | return false; | |
1470 | ||
0bceb671 | 1471 | compute_fn_summary (cgraph_node::get (current_function_decl), true); |
be3c16c4 DC |
1472 | |
1473 | bool has_vpt = false; | |
1474 | FOR_EACH_BB_FN (bb, cfun) | |
1475 | { | |
1476 | if (!has_indirect_call (bb)) | |
1477 | continue; | |
1478 | gimple_stmt_iterator gsi; | |
1479 | ||
1480 | gcov_type bb_count = 0; | |
1481 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1482 | { | |
1483 | count_info info; | |
355fe088 | 1484 | gimple *stmt = gsi_stmt (gsi); |
be3c16c4 DC |
1485 | if (afdo_source_profile->get_count_info (stmt, &info)) |
1486 | bb_count = MAX (bb_count, info.count); | |
1487 | } | |
1488 | ||
1489 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1490 | { | |
538dd0b7 | 1491 | gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi)); |
be3c16c4 DC |
1492 | /* IC_promotion and early_inline_2 is done in multiple iterations. |
1493 | No need to promoted the stmt if its in promoted_stmts (means | |
1494 | it is already been promoted in the previous iterations). */ | |
538dd0b7 | 1495 | if ((!stmt) || gimple_call_fn (stmt) == NULL |
be3c16c4 DC |
1496 | || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL |
1497 | || promoted_stmts->find (stmt) != promoted_stmts->end ()) | |
1498 | continue; | |
1499 | ||
1500 | count_info info; | |
1501 | afdo_source_profile->get_count_info (stmt, &info); | |
1502 | info.count = bb_count; | |
1503 | if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) | |
1504 | { | |
1505 | /* Promote the indirect call and update the promoted_stmts. */ | |
1506 | promoted_stmts->insert (stmt); | |
ba125745 ER |
1507 | if (afdo_vpt (&gsi, info.targets, true)) |
1508 | has_vpt = true; | |
be3c16c4 DC |
1509 | } |
1510 | } | |
1511 | } | |
1500c66f | 1512 | |
be3c16c4 DC |
1513 | if (has_vpt) |
1514 | { | |
664306b9 RB |
1515 | unsigned todo = optimize_inline_calls (current_function_decl); |
1516 | if (todo & TODO_update_ssa_any) | |
1517 | update_ssa (TODO_update_ssa); | |
be3c16c4 DC |
1518 | return true; |
1519 | } | |
1500c66f FY |
1520 | |
1521 | return false; | |
be3c16c4 DC |
1522 | } |
1523 | ||
1524 | /* Annotate auto profile to the control flow graph. Do not annotate value | |
1525 | profile for stmts in PROMOTED_STMTS. */ | |
1526 | ||
1527 | static void | |
1528 | afdo_annotate_cfg (const stmt_set &promoted_stmts) | |
1529 | { | |
1530 | basic_block bb; | |
1531 | bb_set annotated_bb; | |
be3c16c4 DC |
1532 | const function_instance *s |
1533 | = afdo_source_profile->get_function_instance_by_decl ( | |
1534 | current_function_decl); | |
1535 | ||
1536 | if (s == NULL) | |
1537 | return; | |
3995f3a2 | 1538 | cgraph_node::get (current_function_decl)->count |
c69f704c | 1539 | = profile_count::from_gcov_type (s->head_count ()).afdo (); |
3995f3a2 | 1540 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->count |
c69f704c | 1541 | = profile_count::from_gcov_type (s->head_count ()).afdo (); |
1dc7836c | 1542 | EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo (); |
3995f3a2 | 1543 | profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
be3c16c4 DC |
1544 | |
1545 | FOR_EACH_BB_FN (bb, cfun) | |
47b4c53f BC |
1546 | { |
1547 | /* As autoFDO uses sampling approach, we have to assume that all | |
1548 | counters are zero when not seen by autoFDO. */ | |
1549 | bb->count = profile_count::zero ().afdo (); | |
1550 | if (afdo_set_bb_count (bb, promoted_stmts)) | |
1551 | set_bb_annotated (bb, &annotated_bb); | |
1552 | if (bb->count > max_count) | |
1553 | max_count = bb->count; | |
1554 | } | |
be3c16c4 DC |
1555 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count |
1556 | > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) | |
1557 | { | |
1558 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count | |
1559 | = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; | |
1560 | set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); | |
1561 | } | |
1562 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count | |
1563 | > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) | |
1564 | { | |
1565 | EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count | |
1566 | = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; | |
1567 | set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); | |
1568 | } | |
1569 | afdo_source_profile->mark_annotated ( | |
1570 | DECL_SOURCE_LOCATION (current_function_decl)); | |
1571 | afdo_source_profile->mark_annotated (cfun->function_start_locus); | |
1572 | afdo_source_profile->mark_annotated (cfun->function_end_locus); | |
3995f3a2 | 1573 | if (max_count > profile_count::zero ()) |
be3c16c4 | 1574 | { |
47b4c53f BC |
1575 | /* Calculate, propagate count and probability information on CFG. */ |
1576 | afdo_calculate_branch_prob (&annotated_bb); | |
be3c16c4 | 1577 | } |
b30bde10 BC |
1578 | update_max_bb_count (); |
1579 | profile_status_for_fn (cfun) = PROFILE_READ; | |
be3c16c4 | 1580 | if (flag_value_profile_transformations) |
29ca9bfb DC |
1581 | { |
1582 | gimple_value_profile_transformations (); | |
1583 | free_dominance_info (CDI_DOMINATORS); | |
1584 | free_dominance_info (CDI_POST_DOMINATORS); | |
1585 | update_ssa (TODO_update_ssa); | |
1586 | } | |
be3c16c4 DC |
1587 | } |
1588 | ||
1589 | /* Wrapper function to invoke early inliner. */ | |
1590 | ||
1591 | static void | |
1592 | early_inline () | |
1593 | { | |
0bceb671 | 1594 | compute_fn_summary (cgraph_node::get (current_function_decl), true); |
be3c16c4 DC |
1595 | unsigned todo = early_inliner (cfun); |
1596 | if (todo & TODO_update_ssa_any) | |
1597 | update_ssa (TODO_update_ssa); | |
1598 | } | |
1599 | ||
1600 | /* Use AutoFDO profile to annoate the control flow graph. | |
1601 | Return the todo flag. */ | |
1602 | ||
1603 | static unsigned int | |
1604 | auto_profile (void) | |
1605 | { | |
1606 | struct cgraph_node *node; | |
1607 | ||
1608 | if (symtab->state == FINISHED) | |
1609 | return 0; | |
1610 | ||
1611 | init_node_map (true); | |
1612 | profile_info = autofdo::afdo_profile_info; | |
1613 | ||
1614 | FOR_EACH_FUNCTION (node) | |
1615 | { | |
1616 | if (!gimple_has_body_p (node->decl)) | |
1617 | continue; | |
1618 | ||
1619 | /* Don't profile functions produced for builtin stuff. */ | |
1620 | if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) | |
1621 | continue; | |
1622 | ||
1623 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); | |
1624 | ||
1625 | /* First do indirect call promotion and early inline to make the | |
1626 | IR match the profiled binary before actual annotation. | |
1627 | ||
1628 | This is needed because an indirect call might have been promoted | |
1629 | and inlined in the profiled binary. If we do not promote and | |
1630 | inline these indirect calls before annotation, the profile for | |
1631 | these promoted functions will be lost. | |
1632 | ||
1633 | e.g. foo() --indirect_call--> bar() | |
1634 | In profiled binary, the callsite is promoted and inlined, making | |
1635 | the profile look like: | |
1636 | ||
1637 | foo: { | |
1638 | loc_foo_1: count_1 | |
1639 | bar@loc_foo_2: { | |
1640 | loc_bar_1: count_2 | |
1641 | loc_bar_2: count_3 | |
1642 | } | |
1643 | } | |
1644 | ||
1645 | Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. | |
1646 | If we perform annotation on it, the profile inside bar@loc_foo2 | |
1647 | will be wasted. | |
1648 | ||
1649 | To avoid this, we promote loc_foo_2 and inline the promoted bar | |
1650 | function before annotation, so the profile inside bar@loc_foo2 | |
1651 | will be useful. */ | |
1652 | autofdo::stmt_set promoted_stmts; | |
c17975d8 | 1653 | for (int i = 0; i < 10; i++) |
be3c16c4 DC |
1654 | { |
1655 | if (!flag_value_profile_transformations | |
1656 | || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) | |
1657 | break; | |
1658 | early_inline (); | |
1659 | } | |
1660 | ||
1661 | early_inline (); | |
1662 | autofdo::afdo_annotate_cfg (promoted_stmts); | |
1663 | compute_function_frequency (); | |
be3c16c4 DC |
1664 | |
1665 | /* Local pure-const may imply need to fixup the cfg. */ | |
1666 | if (execute_fixup_cfg () & TODO_cleanup_cfg) | |
1667 | cleanup_tree_cfg (); | |
1668 | ||
1669 | free_dominance_info (CDI_DOMINATORS); | |
1670 | free_dominance_info (CDI_POST_DOMINATORS); | |
1671 | cgraph_edge::rebuild_edges (); | |
0bceb671 | 1672 | compute_fn_summary (cgraph_node::get (current_function_decl), true); |
be3c16c4 DC |
1673 | pop_cfun (); |
1674 | } | |
1675 | ||
1676 | return TODO_rebuild_cgraph_edges; | |
1677 | } | |
1678 | } /* namespace autofdo. */ | |
1679 | ||
1680 | /* Read the profile from the profile data file. */ | |
1681 | ||
1682 | void | |
1683 | read_autofdo_file (void) | |
1684 | { | |
1685 | if (auto_profile_file == NULL) | |
1686 | auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; | |
1687 | ||
7f3577f5 | 1688 | autofdo::afdo_profile_info = XNEW (gcov_summary); |
be3c16c4 DC |
1689 | autofdo::afdo_profile_info->runs = 1; |
1690 | autofdo::afdo_profile_info->sum_max = 0; | |
be3c16c4 DC |
1691 | |
1692 | /* Read the profile from the profile file. */ | |
1693 | autofdo::read_profile (); | |
1694 | } | |
1695 | ||
1696 | /* Free the resources. */ | |
1697 | ||
1698 | void | |
1699 | end_auto_profile (void) | |
1700 | { | |
1701 | delete autofdo::afdo_source_profile; | |
1702 | delete autofdo::afdo_string_table; | |
1703 | profile_info = NULL; | |
1704 | } | |
1705 | ||
1706 | /* Returns TRUE if EDGE is hot enough to be inlined early. */ | |
1707 | ||
1708 | bool | |
1709 | afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) | |
1710 | { | |
1711 | gcov_type count | |
1712 | = autofdo::afdo_source_profile->get_callsite_total_count (edge); | |
1500c66f | 1713 | |
be3c16c4 DC |
1714 | if (count > 0) |
1715 | { | |
1716 | bool is_hot; | |
1dc7836c | 1717 | profile_count pcount = profile_count::from_gcov_type (count).afdo (); |
512cc015 | 1718 | gcov_summary *saved_profile_info = profile_info; |
1500c66f | 1719 | /* At early inline stage, profile_info is not set yet. We need to |
be3c16c4 DC |
1720 | temporarily set it to afdo_profile_info to calculate hotness. */ |
1721 | profile_info = autofdo::afdo_profile_info; | |
1dc7836c | 1722 | is_hot = maybe_hot_count_p (NULL, pcount); |
be3c16c4 DC |
1723 | profile_info = saved_profile_info; |
1724 | return is_hot; | |
1725 | } | |
1500c66f FY |
1726 | |
1727 | return false; | |
be3c16c4 DC |
1728 | } |
1729 | ||
1730 | namespace | |
1731 | { | |
1732 | ||
1733 | const pass_data pass_data_ipa_auto_profile = { | |
1734 | SIMPLE_IPA_PASS, "afdo", /* name */ | |
1735 | OPTGROUP_NONE, /* optinfo_flags */ | |
1736 | TV_IPA_AUTOFDO, /* tv_id */ | |
1737 | 0, /* properties_required */ | |
1738 | 0, /* properties_provided */ | |
1739 | 0, /* properties_destroyed */ | |
1740 | 0, /* todo_flags_start */ | |
1741 | 0, /* todo_flags_finish */ | |
1742 | }; | |
1743 | ||
1744 | class pass_ipa_auto_profile : public simple_ipa_opt_pass | |
1745 | { | |
1746 | public: | |
1747 | pass_ipa_auto_profile (gcc::context *ctxt) | |
1748 | : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) | |
1749 | { | |
1750 | } | |
1751 | ||
1752 | /* opt_pass methods: */ | |
725793af DM |
1753 | bool |
1754 | gate (function *) final override | |
be3c16c4 DC |
1755 | { |
1756 | return flag_auto_profile; | |
1757 | } | |
725793af DM |
1758 | unsigned int |
1759 | execute (function *) final override | |
be3c16c4 DC |
1760 | { |
1761 | return autofdo::auto_profile (); | |
1762 | } | |
1763 | }; // class pass_ipa_auto_profile | |
1764 | ||
1765 | } // anon namespace | |
1766 | ||
1767 | simple_ipa_opt_pass * | |
1768 | make_pass_ipa_auto_profile (gcc::context *ctxt) | |
1769 | { | |
1770 | return new pass_ipa_auto_profile (ctxt); | |
1771 | } |