]>
Commit | Line | Data |
---|---|---|
fa99ab3d | 1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
3ddccd57 | 3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. |
711789cc | 4 | Copyright (C) 2001-2013 Free Software Foundation, Inc. |
fa99ab3d | 5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
8c4c00c1 | 10 | the Free Software Foundation; either version 3, or (at your option) |
fa99ab3d | 11 | any later version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
16 | License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
8c4c00c1 | 19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
fa99ab3d | 21 | |
22 | /* This pass performs the tail duplication needed for superblock formation. | |
23 | For more information see: | |
24 | ||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |
27 | Parallelism 3 (2000) 1-25 | |
28 | ||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |
30 | probably a better job can be done by a special pass and we don't | |
31 | need to worry too much about the code size implications as the tail | |
32 | duplicates are crossjumped again if optimizations are not | |
33 | performed. */ | |
34 | ||
35 | ||
36 | #include "config.h" | |
37 | #include "system.h" | |
805e22b2 | 38 | #include "coretypes.h" |
39 | #include "tm.h" | |
fa99ab3d | 40 | #include "tree.h" |
41 | #include "rtl.h" | |
42 | #include "hard-reg-set.h" | |
43 | #include "basic-block.h" | |
fa99ab3d | 44 | #include "fibheap.h" |
45 | #include "flags.h" | |
46 | #include "params.h" | |
44359ced | 47 | #include "coverage.h" |
77fce4cd | 48 | #include "tree-pass.h" |
bc61cadb | 49 | #include "tree-ssa-alias.h" |
50 | #include "internal-fn.h" | |
51 | #include "gimple-expr.h" | |
52 | #include "is-a.h" | |
073c1fd5 | 53 | #include "gimple.h" |
dcf1a1ec | 54 | #include "gimple-iterator.h" |
073c1fd5 | 55 | #include "tree-cfg.h" |
69ee5dbb | 56 | #include "tree-ssa.h" |
3ddccd57 | 57 | #include "tree-inline.h" |
f889f544 | 58 | #include "cfgloop.h" |
fa99ab3d | 59 | |
3ddccd57 | 60 | static int count_insns (basic_block); |
7ecb5bb2 | 61 | static bool ignore_bb_p (const_basic_block); |
62 | static bool better_p (const_edge, const_edge); | |
60b8c5b3 | 63 | static edge find_best_successor (basic_block); |
64 | static edge find_best_predecessor (basic_block); | |
65 | static int find_trace (basic_block, basic_block *); | |
fa99ab3d | 66 | |
67 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
68 | static int probability_cutoff; | |
69 | static int branch_ratio_cutoff; | |
70 | ||
3ddccd57 | 71 | /* A bit BB->index is set if BB has already been seen, i.e. it is |
72 | connected to some trace already. */ | |
73 | sbitmap bb_seen; | |
fa99ab3d | 74 | |
3ddccd57 | 75 | static inline void |
76 | mark_bb_seen (basic_block bb) | |
77 | { | |
2808fda8 | 78 | unsigned int size = SBITMAP_SIZE (bb_seen); |
3ddccd57 | 79 | |
80 | if ((unsigned int)bb->index >= size) | |
81 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); | |
82 | ||
08b7917c | 83 | bitmap_set_bit (bb_seen, bb->index); |
3ddccd57 | 84 | } |
85 | ||
86 | static inline bool | |
87 | bb_seen_p (basic_block bb) | |
88 | { | |
08b7917c | 89 | return bitmap_bit_p (bb_seen, bb->index); |
3ddccd57 | 90 | } |
fa99ab3d | 91 | |
7299020b | 92 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
fa99ab3d | 93 | static bool |
7ecb5bb2 | 94 | ignore_bb_p (const_basic_block bb) |
fa99ab3d | 95 | { |
09a1f31f | 96 | gimple g; |
97 | ||
4d2e5d52 | 98 | if (bb->index < NUM_FIXED_BLOCKS) |
fa99ab3d | 99 | return true; |
0bfd8d5c | 100 | if (optimize_bb_for_size_p (bb)) |
fa99ab3d | 101 | return true; |
09a1f31f | 102 | |
103 | /* A transaction is a single entry multiple exit region. It must be | |
104 | duplicated in its entirety or not at all. */ | |
105 | g = last_stmt (CONST_CAST_BB (bb)); | |
106 | if (g && gimple_code (g) == GIMPLE_TRANSACTION) | |
107 | return true; | |
108 | ||
fa99ab3d | 109 | return false; |
110 | } | |
111 | ||
112 | /* Return number of instructions in the block. */ | |
113 | ||
114 | static int | |
3ddccd57 | 115 | count_insns (basic_block bb) |
fa99ab3d | 116 | { |
75a70cf9 | 117 | gimple_stmt_iterator gsi; |
118 | gimple stmt; | |
fa99ab3d | 119 | int n = 0; |
120 | ||
75a70cf9 | 121 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
3ddccd57 | 122 | { |
75a70cf9 | 123 | stmt = gsi_stmt (gsi); |
3ddccd57 | 124 | n += estimate_num_insns (stmt, &eni_size_weights); |
125 | } | |
fa99ab3d | 126 | return n; |
127 | } | |
128 | ||
129 | /* Return true if E1 is more frequent than E2. */ | |
130 | static bool | |
7ecb5bb2 | 131 | better_p (const_edge e1, const_edge e2) |
fa99ab3d | 132 | { |
133 | if (e1->count != e2->count) | |
134 | return e1->count > e2->count; | |
135 | if (e1->src->frequency * e1->probability != | |
136 | e2->src->frequency * e2->probability) | |
137 | return (e1->src->frequency * e1->probability | |
138 | > e2->src->frequency * e2->probability); | |
139 | /* This is needed to avoid changes in the decision after | |
140 | CFG is modified. */ | |
141 | if (e1->src != e2->src) | |
142 | return e1->src->index > e2->src->index; | |
143 | return e1->dest->index > e2->dest->index; | |
144 | } | |
145 | ||
146 | /* Return most frequent successor of basic block BB. */ | |
147 | ||
148 | static edge | |
60b8c5b3 | 149 | find_best_successor (basic_block bb) |
fa99ab3d | 150 | { |
151 | edge e; | |
152 | edge best = NULL; | |
cd665a06 | 153 | edge_iterator ei; |
fa99ab3d | 154 | |
cd665a06 | 155 | FOR_EACH_EDGE (e, ei, bb->succs) |
fa99ab3d | 156 | if (!best || better_p (e, best)) |
157 | best = e; | |
158 | if (!best || ignore_bb_p (best->dest)) | |
159 | return NULL; | |
160 | if (best->probability <= probability_cutoff) | |
161 | return NULL; | |
162 | return best; | |
163 | } | |
164 | ||
165 | /* Return most frequent predecessor of basic block BB. */ | |
166 | ||
167 | static edge | |
60b8c5b3 | 168 | find_best_predecessor (basic_block bb) |
fa99ab3d | 169 | { |
170 | edge e; | |
171 | edge best = NULL; | |
cd665a06 | 172 | edge_iterator ei; |
fa99ab3d | 173 | |
cd665a06 | 174 | FOR_EACH_EDGE (e, ei, bb->preds) |
fa99ab3d | 175 | if (!best || better_p (e, best)) |
176 | best = e; | |
177 | if (!best || ignore_bb_p (best->src)) | |
178 | return NULL; | |
179 | if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE | |
180 | < bb->frequency * branch_ratio_cutoff) | |
181 | return NULL; | |
182 | return best; | |
183 | } | |
184 | ||
185 | /* Find the trace using bb and record it in the TRACE array. | |
186 | Return number of basic blocks recorded. */ | |
187 | ||
188 | static int | |
60b8c5b3 | 189 | find_trace (basic_block bb, basic_block *trace) |
fa99ab3d | 190 | { |
191 | int i = 0; | |
192 | edge e; | |
193 | ||
450d042a | 194 | if (dump_file) |
195 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); | |
fa99ab3d | 196 | |
197 | while ((e = find_best_predecessor (bb)) != NULL) | |
198 | { | |
199 | basic_block bb2 = e->src; | |
3ddccd57 | 200 | if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
fa99ab3d | 201 | || find_best_successor (bb2) != e) |
202 | break; | |
450d042a | 203 | if (dump_file) |
204 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
fa99ab3d | 205 | bb = bb2; |
206 | } | |
450d042a | 207 | if (dump_file) |
208 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); | |
fa99ab3d | 209 | trace[i++] = bb; |
210 | ||
211 | /* Follow the trace in forward direction. */ | |
212 | while ((e = find_best_successor (bb)) != NULL) | |
213 | { | |
214 | bb = e->dest; | |
3ddccd57 | 215 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
fa99ab3d | 216 | || find_best_predecessor (bb) != e) |
217 | break; | |
450d042a | 218 | if (dump_file) |
219 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
fa99ab3d | 220 | trace[i++] = bb; |
221 | } | |
450d042a | 222 | if (dump_file) |
223 | fprintf (dump_file, "\n"); | |
fa99ab3d | 224 | return i; |
225 | } | |
226 | ||
227 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |
228 | if profitable. */ | |
229 | ||
35c67c83 | 230 | static bool |
60b8c5b3 | 231 | tail_duplicate (void) |
fa99ab3d | 232 | { |
fe672ac0 | 233 | fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block_for_fn (cfun)); |
a28770e1 | 234 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
fe672ac0 | 235 | int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun)); |
fa99ab3d | 236 | int ninsns = 0, nduplicated = 0; |
237 | gcov_type weighted_insns = 0, traced_insns = 0; | |
238 | fibheap_t heap = fibheap_new (); | |
239 | gcov_type cover_insns; | |
240 | int max_dup_insns; | |
241 | basic_block bb; | |
35c67c83 | 242 | bool changed = false; |
fa99ab3d | 243 | |
3ddccd57 | 244 | /* Create an oversized sbitmap to reduce the chance that we need to |
245 | resize it. */ | |
fe672ac0 | 246 | bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); |
53c5d9d4 | 247 | bitmap_clear (bb_seen); |
3ddccd57 | 248 | initialize_original_copy_tables (); |
249 | ||
ab6a34f2 | 250 | if (profile_info && flag_branch_probabilities) |
fa99ab3d | 251 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
252 | else | |
253 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
254 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
255 | ||
256 | branch_ratio_cutoff = | |
257 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
258 | ||
fc00614f | 259 | FOR_EACH_BB_FN (bb, cfun) |
fa99ab3d | 260 | { |
261 | int n = count_insns (bb); | |
262 | if (!ignore_bb_p (bb)) | |
263 | blocks[bb->index] = fibheap_insert (heap, -bb->frequency, | |
264 | bb); | |
265 | ||
266 | counts [bb->index] = n; | |
267 | ninsns += n; | |
268 | weighted_insns += n * bb->frequency; | |
269 | } | |
270 | ||
ab6a34f2 | 271 | if (profile_info && flag_branch_probabilities) |
fa99ab3d | 272 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
273 | else | |
274 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
275 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
276 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
277 | ||
278 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
279 | && !fibheap_empty (heap)) | |
280 | { | |
45ba1503 | 281 | basic_block bb = (basic_block) fibheap_extract_min (heap); |
fa99ab3d | 282 | int n, pos; |
283 | ||
284 | if (!bb) | |
285 | break; | |
286 | ||
287 | blocks[bb->index] = NULL; | |
288 | ||
289 | if (ignore_bb_p (bb)) | |
290 | continue; | |
3ddccd57 | 291 | gcc_assert (!bb_seen_p (bb)); |
fa99ab3d | 292 | |
293 | n = find_trace (bb, trace); | |
294 | ||
295 | bb = trace[0]; | |
296 | traced_insns += bb->frequency * counts [bb->index]; | |
297 | if (blocks[bb->index]) | |
298 | { | |
299 | fibheap_delete_node (heap, blocks[bb->index]); | |
300 | blocks[bb->index] = NULL; | |
301 | } | |
302 | ||
303 | for (pos = 1; pos < n; pos++) | |
304 | { | |
305 | basic_block bb2 = trace[pos]; | |
306 | ||
307 | if (blocks[bb2->index]) | |
308 | { | |
309 | fibheap_delete_node (heap, blocks[bb2->index]); | |
310 | blocks[bb2->index] = NULL; | |
311 | } | |
312 | traced_insns += bb2->frequency * counts [bb2->index]; | |
cd665a06 | 313 | if (EDGE_COUNT (bb2->preds) > 1 |
f889f544 | 314 | && can_duplicate_block_p (bb2) |
315 | /* We have the tendency to duplicate the loop header | |
316 | of all do { } while loops. Do not do that - it is | |
317 | not profitable and it might create a loop with multiple | |
318 | entries or at least rotate the loop. */ | |
319 | && (!current_loops | |
320 | || bb2->loop_father->header != bb2)) | |
fa99ab3d | 321 | { |
cd665a06 | 322 | edge e; |
3ddccd57 | 323 | basic_block copy; |
324 | ||
325 | nduplicated += counts [bb2->index]; | |
fa99ab3d | 326 | |
c6356c17 | 327 | e = find_edge (bb, bb2); |
48e1416a | 328 | |
3ddccd57 | 329 | copy = duplicate_block (bb2, e, bb); |
330 | flush_pending_stmts (e); | |
cd665a06 | 331 | |
28c92cbb | 332 | add_phi_args_after_copy (©, 1, NULL); |
fa99ab3d | 333 | |
334 | /* Reconsider the original copy of block we've duplicated. | |
de132707 | 335 | Removing the most common predecessor may make it to be |
fa99ab3d | 336 | head. */ |
3ddccd57 | 337 | blocks[bb2->index] = |
338 | fibheap_insert (heap, -bb2->frequency, bb2); | |
fa99ab3d | 339 | |
450d042a | 340 | if (dump_file) |
341 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |
3ddccd57 | 342 | bb2->index, copy->index, copy->frequency); |
343 | ||
344 | bb2 = copy; | |
35c67c83 | 345 | changed = true; |
fa99ab3d | 346 | } |
3ddccd57 | 347 | mark_bb_seen (bb2); |
fa99ab3d | 348 | bb = bb2; |
349 | /* In case the trace became infrequent, stop duplicating. */ | |
350 | if (ignore_bb_p (bb)) | |
351 | break; | |
352 | } | |
450d042a | 353 | if (dump_file) |
354 | fprintf (dump_file, " covered now %.1f\n\n", | |
fa99ab3d | 355 | traced_insns * 100.0 / weighted_insns); |
356 | } | |
450d042a | 357 | if (dump_file) |
358 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
fa99ab3d | 359 | nduplicated * 100 / ninsns); |
360 | ||
3ddccd57 | 361 | free_original_copy_tables (); |
362 | sbitmap_free (bb_seen); | |
fa99ab3d | 363 | free (blocks); |
364 | free (trace); | |
365 | free (counts); | |
366 | fibheap_delete (heap); | |
35c67c83 | 367 | |
368 | return changed; | |
fa99ab3d | 369 | } |
370 | ||
207c7ab2 | 371 | /* Main entry point to this file. */ |
fa99ab3d | 372 | |
3ddccd57 | 373 | static unsigned int |
207c7ab2 | 374 | tracer (void) |
fa99ab3d | 375 | { |
35c67c83 | 376 | bool changed; |
377 | ||
a28770e1 | 378 | if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1) |
3ddccd57 | 379 | return 0; |
e7f8f0eb | 380 | |
fa99ab3d | 381 | mark_dfs_back_edges (); |
450d042a | 382 | if (dump_file) |
bec2cf98 | 383 | brief_dump_cfg (dump_file, dump_flags); |
3ddccd57 | 384 | |
385 | /* Trace formation is done on the fly inside tail_duplicate */ | |
35c67c83 | 386 | changed = tail_duplicate (); |
387 | if (changed) | |
8ca4cf5b | 388 | { |
389 | free_dominance_info (CDI_DOMINATORS); | |
caa1ac55 | 390 | /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ |
8ca4cf5b | 391 | if (current_loops) |
caa1ac55 | 392 | loops_state_set (LOOPS_NEED_FIXUP); |
8ca4cf5b | 393 | } |
3ddccd57 | 394 | |
450d042a | 395 | if (dump_file) |
bec2cf98 | 396 | brief_dump_cfg (dump_file, dump_flags); |
e7f8f0eb | 397 | |
35c67c83 | 398 | return changed ? TODO_cleanup_cfg : 0; |
77fce4cd | 399 | } |
400 | \f | |
401 | static bool | |
3ddccd57 | 402 | gate_tracer (void) |
77fce4cd | 403 | { |
3ddccd57 | 404 | return (optimize > 0 && flag_tracer && flag_reorder_blocks); |
fa99ab3d | 405 | } |
77fce4cd | 406 | |
cbe8bda8 | 407 | namespace { |
408 | ||
409 | const pass_data pass_data_tracer = | |
77fce4cd | 410 | { |
cbe8bda8 | 411 | GIMPLE_PASS, /* type */ |
412 | "tracer", /* name */ | |
413 | OPTGROUP_NONE, /* optinfo_flags */ | |
414 | true, /* has_gate */ | |
415 | true, /* has_execute */ | |
416 | TV_TRACER, /* tv_id */ | |
417 | 0, /* properties_required */ | |
418 | 0, /* properties_provided */ | |
419 | 0, /* properties_destroyed */ | |
420 | 0, /* todo_flags_start */ | |
421 | ( TODO_update_ssa | TODO_verify_ssa ), /* todo_flags_finish */ | |
77fce4cd | 422 | }; |
cbe8bda8 | 423 | |
424 | class pass_tracer : public gimple_opt_pass | |
425 | { | |
426 | public: | |
9af5ce0c | 427 | pass_tracer (gcc::context *ctxt) |
428 | : gimple_opt_pass (pass_data_tracer, ctxt) | |
cbe8bda8 | 429 | {} |
430 | ||
431 | /* opt_pass methods: */ | |
432 | bool gate () { return gate_tracer (); } | |
433 | unsigned int execute () { return tracer (); } | |
434 | ||
435 | }; // class pass_tracer | |
436 | ||
437 | } // anon namespace | |
438 | ||
439 | gimple_opt_pass * | |
440 | make_pass_tracer (gcc::context *ctxt) | |
441 | { | |
442 | return new pass_tracer (ctxt); | |
443 | } |