]>
Commit | Line | Data |
---|---|---|
fa99ab3d | 1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
3ddccd57 | 3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. |
fbd26352 | 4 | Copyright (C) 2001-2019 Free Software Foundation, Inc. |
fa99ab3d | 5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
8c4c00c1 | 10 | the Free Software Foundation; either version 3, or (at your option) |
fa99ab3d | 11 | any later version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
16 | License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
fa99ab3d | 21 | |
22 | /* This pass performs the tail duplication needed for superblock formation. | |
23 | For more information see: | |
24 | ||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |
27 | Parallelism 3 (2000) 1-25 | |
28 | ||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |
30 | probably a better job can be done by a special pass and we don't | |
31 | need to worry too much about the code size implications as the tail | |
32 | duplicates are crossjumped again if optimizations are not | |
33 | performed. */ | |
34 | ||
35 | ||
36 | #include "config.h" | |
37 | #include "system.h" | |
805e22b2 | 38 | #include "coretypes.h" |
9ef16211 | 39 | #include "backend.h" |
7c29e30e | 40 | #include "rtl.h" |
fa99ab3d | 41 | #include "tree.h" |
9ef16211 | 42 | #include "gimple.h" |
7c29e30e | 43 | #include "cfghooks.h" |
44 | #include "tree-pass.h" | |
886c1262 | 45 | #include "profile.h" |
94ea8568 | 46 | #include "cfganal.h" |
fa99ab3d | 47 | #include "params.h" |
dcf1a1ec | 48 | #include "gimple-iterator.h" |
073c1fd5 | 49 | #include "tree-cfg.h" |
69ee5dbb | 50 | #include "tree-ssa.h" |
3ddccd57 | 51 | #include "tree-inline.h" |
f889f544 | 52 | #include "cfgloop.h" |
2966eae4 | 53 | #include "fibonacci_heap.h" |
b0e3fe96 | 54 | #include "tracer.h" |
fa99ab3d | 55 | |
3ddccd57 | 56 | static int count_insns (basic_block); |
7ecb5bb2 | 57 | static bool better_p (const_edge, const_edge); |
60b8c5b3 | 58 | static edge find_best_successor (basic_block); |
59 | static edge find_best_predecessor (basic_block); | |
60 | static int find_trace (basic_block, basic_block *); | |
fa99ab3d | 61 | |
62 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
63 | static int probability_cutoff; | |
64 | static int branch_ratio_cutoff; | |
65 | ||
3ddccd57 | 66 | /* A bit BB->index is set if BB has already been seen, i.e. it is |
67 | connected to some trace already. */ | |
30cebcca | 68 | static sbitmap bb_seen; |
fa99ab3d | 69 | |
3ddccd57 | 70 | static inline void |
71 | mark_bb_seen (basic_block bb) | |
72 | { | |
2808fda8 | 73 | unsigned int size = SBITMAP_SIZE (bb_seen); |
3ddccd57 | 74 | |
75 | if ((unsigned int)bb->index >= size) | |
76 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); | |
77 | ||
08b7917c | 78 | bitmap_set_bit (bb_seen, bb->index); |
3ddccd57 | 79 | } |
80 | ||
81 | static inline bool | |
82 | bb_seen_p (basic_block bb) | |
83 | { | |
08b7917c | 84 | return bitmap_bit_p (bb_seen, bb->index); |
3ddccd57 | 85 | } |
fa99ab3d | 86 | |
7299020b | 87 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
b0e3fe96 | 88 | bool |
7ecb5bb2 | 89 | ignore_bb_p (const_basic_block bb) |
fa99ab3d | 90 | { |
4d2e5d52 | 91 | if (bb->index < NUM_FIXED_BLOCKS) |
fa99ab3d | 92 | return true; |
0bfd8d5c | 93 | if (optimize_bb_for_size_p (bb)) |
fa99ab3d | 94 | return true; |
09a1f31f | 95 | |
6c5c88f6 | 96 | if (gimple *g = last_stmt (CONST_CAST_BB (bb))) |
97 | { | |
98 | /* A transaction is a single entry multiple exit region. It | |
99 | must be duplicated in its entirety or not at all. */ | |
100 | if (gimple_code (g) == GIMPLE_TRANSACTION) | |
101 | return true; | |
102 | ||
103 | /* An IFN_UNIQUE call must be duplicated as part of its group, | |
104 | or not at all. */ | |
105 | if (is_gimple_call (g) | |
106 | && gimple_call_internal_p (g) | |
107 | && gimple_call_internal_unique_p (g)) | |
108 | return true; | |
109 | } | |
09a1f31f | 110 | |
fa99ab3d | 111 | return false; |
112 | } | |
113 | ||
114 | /* Return number of instructions in the block. */ | |
115 | ||
116 | static int | |
3ddccd57 | 117 | count_insns (basic_block bb) |
fa99ab3d | 118 | { |
75a70cf9 | 119 | gimple_stmt_iterator gsi; |
42acab1c | 120 | gimple *stmt; |
fa99ab3d | 121 | int n = 0; |
122 | ||
75a70cf9 | 123 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
3ddccd57 | 124 | { |
75a70cf9 | 125 | stmt = gsi_stmt (gsi); |
3ddccd57 | 126 | n += estimate_num_insns (stmt, &eni_size_weights); |
127 | } | |
fa99ab3d | 128 | return n; |
129 | } | |
130 | ||
131 | /* Return true if E1 is more frequent than E2. */ | |
132 | static bool | |
7ecb5bb2 | 133 | better_p (const_edge e1, const_edge e2) |
fa99ab3d | 134 | { |
8e4236fb | 135 | if ((e1->count () > e2->count ()) || (e1->count () < e2->count ())) |
ea5d3981 | 136 | return e1->count () > e2->count (); |
fa99ab3d | 137 | /* This is needed to avoid changes in the decision after |
138 | CFG is modified. */ | |
139 | if (e1->src != e2->src) | |
140 | return e1->src->index > e2->src->index; | |
141 | return e1->dest->index > e2->dest->index; | |
142 | } | |
143 | ||
144 | /* Return most frequent successor of basic block BB. */ | |
145 | ||
146 | static edge | |
60b8c5b3 | 147 | find_best_successor (basic_block bb) |
fa99ab3d | 148 | { |
149 | edge e; | |
150 | edge best = NULL; | |
cd665a06 | 151 | edge_iterator ei; |
fa99ab3d | 152 | |
cd665a06 | 153 | FOR_EACH_EDGE (e, ei, bb->succs) |
8e4236fb | 154 | { |
155 | if (!e->count ().initialized_p ()) | |
156 | return NULL; | |
157 | if (!best || better_p (e, best)) | |
158 | best = e; | |
159 | } | |
fa99ab3d | 160 | if (!best || ignore_bb_p (best->dest)) |
161 | return NULL; | |
e646d25a | 162 | if (!best->probability.initialized_p () |
163 | || best->probability.to_reg_br_prob_base () <= probability_cutoff) | |
fa99ab3d | 164 | return NULL; |
165 | return best; | |
166 | } | |
167 | ||
168 | /* Return most frequent predecessor of basic block BB. */ | |
169 | ||
170 | static edge | |
60b8c5b3 | 171 | find_best_predecessor (basic_block bb) |
fa99ab3d | 172 | { |
173 | edge e; | |
174 | edge best = NULL; | |
cd665a06 | 175 | edge_iterator ei; |
fa99ab3d | 176 | |
cd665a06 | 177 | FOR_EACH_EDGE (e, ei, bb->preds) |
8e4236fb | 178 | { |
179 | if (!e->count ().initialized_p ()) | |
180 | return NULL; | |
181 | if (!best || better_p (e, best)) | |
182 | best = e; | |
183 | } | |
fa99ab3d | 184 | if (!best || ignore_bb_p (best->src)) |
185 | return NULL; | |
8e4236fb | 186 | if (bb->count.initialized_p () |
187 | && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE | |
188 | < bb->count.to_frequency (cfun) * branch_ratio_cutoff)) | |
fa99ab3d | 189 | return NULL; |
190 | return best; | |
191 | } | |
192 | ||
193 | /* Find the trace using bb and record it in the TRACE array. | |
194 | Return number of basic blocks recorded. */ | |
195 | ||
196 | static int | |
60b8c5b3 | 197 | find_trace (basic_block bb, basic_block *trace) |
fa99ab3d | 198 | { |
199 | int i = 0; | |
200 | edge e; | |
201 | ||
450d042a | 202 | if (dump_file) |
205ce1aa | 203 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun)); |
fa99ab3d | 204 | |
205 | while ((e = find_best_predecessor (bb)) != NULL) | |
206 | { | |
207 | basic_block bb2 = e->src; | |
3ddccd57 | 208 | if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
fa99ab3d | 209 | || find_best_successor (bb2) != e) |
210 | break; | |
450d042a | 211 | if (dump_file) |
205ce1aa | 212 | fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); |
fa99ab3d | 213 | bb = bb2; |
214 | } | |
450d042a | 215 | if (dump_file) |
205ce1aa | 216 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun)); |
fa99ab3d | 217 | trace[i++] = bb; |
218 | ||
219 | /* Follow the trace in forward direction. */ | |
220 | while ((e = find_best_successor (bb)) != NULL) | |
221 | { | |
222 | bb = e->dest; | |
3ddccd57 | 223 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
fa99ab3d | 224 | || find_best_predecessor (bb) != e) |
225 | break; | |
450d042a | 226 | if (dump_file) |
205ce1aa | 227 | fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); |
fa99ab3d | 228 | trace[i++] = bb; |
229 | } | |
450d042a | 230 | if (dump_file) |
231 | fprintf (dump_file, "\n"); | |
fa99ab3d | 232 | return i; |
233 | } | |
234 | ||
b0e3fe96 | 235 | /* Duplicate block BB2, placing it after BB in the CFG. Return the |
236 | newly created block. */ | |
237 | basic_block | |
238 | transform_duplicate (basic_block bb, basic_block bb2) | |
239 | { | |
240 | edge e; | |
241 | basic_block copy; | |
242 | ||
243 | e = find_edge (bb, bb2); | |
244 | ||
245 | copy = duplicate_block (bb2, e, bb); | |
246 | flush_pending_stmts (e); | |
247 | ||
248 | add_phi_args_after_copy (©, 1, NULL); | |
249 | ||
250 | return (copy); | |
251 | } | |
252 | ||
fa99ab3d | 253 | /* Look for basic blocks in frequency order, construct traces and tail duplicate |
254 | if profitable. */ | |
255 | ||
35c67c83 | 256 | static bool |
60b8c5b3 | 257 | tail_duplicate (void) |
fa99ab3d | 258 | { |
2966eae4 | 259 | auto_vec<fibonacci_node<long, basic_block_def>*> blocks; |
260 | blocks.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
261 | ||
a28770e1 | 262 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
fe672ac0 | 263 | int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun)); |
fa99ab3d | 264 | int ninsns = 0, nduplicated = 0; |
265 | gcov_type weighted_insns = 0, traced_insns = 0; | |
2966eae4 | 266 | fibonacci_heap<long, basic_block_def> heap (LONG_MIN); |
fa99ab3d | 267 | gcov_type cover_insns; |
268 | int max_dup_insns; | |
269 | basic_block bb; | |
35c67c83 | 270 | bool changed = false; |
fa99ab3d | 271 | |
3ddccd57 | 272 | /* Create an oversized sbitmap to reduce the chance that we need to |
273 | resize it. */ | |
fe672ac0 | 274 | bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); |
53c5d9d4 | 275 | bitmap_clear (bb_seen); |
3ddccd57 | 276 | initialize_original_copy_tables (); |
277 | ||
a74a34e6 | 278 | if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) |
fa99ab3d | 279 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
280 | else | |
281 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
282 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
283 | ||
284 | branch_ratio_cutoff = | |
285 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
286 | ||
fc00614f | 287 | FOR_EACH_BB_FN (bb, cfun) |
fa99ab3d | 288 | { |
289 | int n = count_insns (bb); | |
290 | if (!ignore_bb_p (bb)) | |
205ce1aa | 291 | blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); |
fa99ab3d | 292 | |
293 | counts [bb->index] = n; | |
294 | ninsns += n; | |
205ce1aa | 295 | weighted_insns += n * bb->count.to_frequency (cfun); |
fa99ab3d | 296 | } |
297 | ||
a74a34e6 | 298 | if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) |
fa99ab3d | 299 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
300 | else | |
301 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
302 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
303 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
304 | ||
305 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
2966eae4 | 306 | && !heap.empty ()) |
fa99ab3d | 307 | { |
2966eae4 | 308 | basic_block bb = heap.extract_min (); |
fa99ab3d | 309 | int n, pos; |
310 | ||
311 | if (!bb) | |
312 | break; | |
313 | ||
314 | blocks[bb->index] = NULL; | |
315 | ||
316 | if (ignore_bb_p (bb)) | |
317 | continue; | |
3ddccd57 | 318 | gcc_assert (!bb_seen_p (bb)); |
fa99ab3d | 319 | |
320 | n = find_trace (bb, trace); | |
321 | ||
322 | bb = trace[0]; | |
205ce1aa | 323 | traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; |
fa99ab3d | 324 | if (blocks[bb->index]) |
325 | { | |
2966eae4 | 326 | heap.delete_node (blocks[bb->index]); |
fa99ab3d | 327 | blocks[bb->index] = NULL; |
328 | } | |
329 | ||
330 | for (pos = 1; pos < n; pos++) | |
331 | { | |
332 | basic_block bb2 = trace[pos]; | |
333 | ||
334 | if (blocks[bb2->index]) | |
335 | { | |
2966eae4 | 336 | heap.delete_node (blocks[bb2->index]); |
fa99ab3d | 337 | blocks[bb2->index] = NULL; |
338 | } | |
205ce1aa | 339 | traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; |
cd665a06 | 340 | if (EDGE_COUNT (bb2->preds) > 1 |
f889f544 | 341 | && can_duplicate_block_p (bb2) |
342 | /* We have the tendency to duplicate the loop header | |
343 | of all do { } while loops. Do not do that - it is | |
344 | not profitable and it might create a loop with multiple | |
345 | entries or at least rotate the loop. */ | |
b3083327 | 346 | && bb2->loop_father->header != bb2) |
fa99ab3d | 347 | { |
3ddccd57 | 348 | nduplicated += counts [bb2->index]; |
b0e3fe96 | 349 | basic_block copy = transform_duplicate (bb, bb2); |
fa99ab3d | 350 | |
351 | /* Reconsider the original copy of block we've duplicated. | |
de132707 | 352 | Removing the most common predecessor may make it to be |
fa99ab3d | 353 | head. */ |
205ce1aa | 354 | blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2); |
fa99ab3d | 355 | |
450d042a | 356 | if (dump_file) |
357 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |
205ce1aa | 358 | bb2->index, copy->index, copy->count.to_frequency (cfun)); |
3ddccd57 | 359 | |
360 | bb2 = copy; | |
35c67c83 | 361 | changed = true; |
fa99ab3d | 362 | } |
3ddccd57 | 363 | mark_bb_seen (bb2); |
fa99ab3d | 364 | bb = bb2; |
365 | /* In case the trace became infrequent, stop duplicating. */ | |
366 | if (ignore_bb_p (bb)) | |
367 | break; | |
368 | } | |
450d042a | 369 | if (dump_file) |
370 | fprintf (dump_file, " covered now %.1f\n\n", | |
fa99ab3d | 371 | traced_insns * 100.0 / weighted_insns); |
372 | } | |
450d042a | 373 | if (dump_file) |
374 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
fa99ab3d | 375 | nduplicated * 100 / ninsns); |
376 | ||
3ddccd57 | 377 | free_original_copy_tables (); |
378 | sbitmap_free (bb_seen); | |
fa99ab3d | 379 | free (trace); |
380 | free (counts); | |
35c67c83 | 381 | |
382 | return changed; | |
fa99ab3d | 383 | } |
77fce4cd | 384 | \f |
cbe8bda8 | 385 | namespace { |
386 | ||
387 | const pass_data pass_data_tracer = | |
77fce4cd | 388 | { |
cbe8bda8 | 389 | GIMPLE_PASS, /* type */ |
390 | "tracer", /* name */ | |
391 | OPTGROUP_NONE, /* optinfo_flags */ | |
cbe8bda8 | 392 | TV_TRACER, /* tv_id */ |
393 | 0, /* properties_required */ | |
394 | 0, /* properties_provided */ | |
395 | 0, /* properties_destroyed */ | |
396 | 0, /* todo_flags_start */ | |
8b88439e | 397 | TODO_update_ssa, /* todo_flags_finish */ |
77fce4cd | 398 | }; |
cbe8bda8 | 399 | |
400 | class pass_tracer : public gimple_opt_pass | |
401 | { | |
402 | public: | |
9af5ce0c | 403 | pass_tracer (gcc::context *ctxt) |
404 | : gimple_opt_pass (pass_data_tracer, ctxt) | |
cbe8bda8 | 405 | {} |
406 | ||
407 | /* opt_pass methods: */ | |
31315c24 | 408 | virtual bool gate (function *) |
409 | { | |
410 | return (optimize > 0 && flag_tracer && flag_reorder_blocks); | |
411 | } | |
412 | ||
65b0537f | 413 | virtual unsigned int execute (function *); |
cbe8bda8 | 414 | |
415 | }; // class pass_tracer | |
416 | ||
65b0537f | 417 | unsigned int |
418 | pass_tracer::execute (function *fun) | |
419 | { | |
420 | bool changed; | |
421 | ||
422 | if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1) | |
423 | return 0; | |
424 | ||
425 | mark_dfs_back_edges (); | |
426 | if (dump_file) | |
427 | brief_dump_cfg (dump_file, dump_flags); | |
428 | ||
429 | /* Trace formation is done on the fly inside tail_duplicate */ | |
430 | changed = tail_duplicate (); | |
431 | if (changed) | |
432 | { | |
433 | free_dominance_info (CDI_DOMINATORS); | |
434 | /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ | |
b3083327 | 435 | loops_state_set (LOOPS_NEED_FIXUP); |
65b0537f | 436 | } |
437 | ||
438 | if (dump_file) | |
439 | brief_dump_cfg (dump_file, dump_flags); | |
440 | ||
441 | return changed ? TODO_cleanup_cfg : 0; | |
442 | } | |
cbe8bda8 | 443 | } // anon namespace |
444 | ||
445 | gimple_opt_pass * | |
446 | make_pass_tracer (gcc::context *ctxt) | |
447 | { | |
448 | return new pass_tracer (ctxt); | |
449 | } |