]>
Commit | Line | Data |
---|---|---|
5c856b23 JH |
1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
232abc3f | 3 | Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. |
5624e564 | 4 | Copyright (C) 2001-2015 Free Software Foundation, Inc. |
5c856b23 JH |
5 | |
6 | This file is part of GCC. | |
7 | ||
8 | GCC is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
9dcd6f09 | 10 | the Free Software Foundation; either version 3, or (at your option) |
5c856b23 JH |
11 | any later version. |
12 | ||
13 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
16 | License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
9dcd6f09 NC |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ | |
5c856b23 JH |
21 | |
22 | /* This pass performs the tail duplication needed for superblock formation. | |
23 | For more information see: | |
24 | ||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |
27 | Parallelism 3 (2000) 1-25 | |
28 | ||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |
30 | probably a better job can be done by a special pass and we don't | |
31 | need to worry too much about the code size implications as the tail | |
32 | duplicates are crossjumped again if optimizations are not | |
33 | performed. */ | |
34 | ||
35 | ||
36 | #include "config.h" | |
37 | #include "system.h" | |
4977bab6 | 38 | #include "coretypes.h" |
c7131fb2 | 39 | #include "backend.h" |
5c856b23 | 40 | #include "tree.h" |
c7131fb2 | 41 | #include "gimple.h" |
5c856b23 | 42 | #include "rtl.h" |
c7131fb2 AM |
43 | #include "alias.h" |
44 | #include "fold-const.h" | |
59f2e9d8 | 45 | #include "profile.h" |
60393bbc | 46 | #include "cfganal.h" |
5c856b23 JH |
47 | #include "flags.h" |
48 | #include "params.h" | |
ca29da43 | 49 | #include "coverage.h" |
ef330312 | 50 | #include "tree-pass.h" |
2fb9a547 | 51 | #include "internal-fn.h" |
5be5c238 | 52 | #include "gimple-iterator.h" |
442b4905 | 53 | #include "tree-cfg.h" |
7a300452 | 54 | #include "tree-ssa.h" |
232abc3f | 55 | #include "tree-inline.h" |
0b9066cf | 56 | #include "cfgloop.h" |
dd7bda5e | 57 | #include "fibonacci_heap.h" |
5c856b23 | 58 | |
232abc3f | 59 | static int count_insns (basic_block); |
ed7a4b4b KG |
60 | static bool ignore_bb_p (const_basic_block); |
61 | static bool better_p (const_edge, const_edge); | |
46c5ad27 AJ |
62 | static edge find_best_successor (basic_block); |
63 | static edge find_best_predecessor (basic_block); | |
64 | static int find_trace (basic_block, basic_block *); | |
5c856b23 JH |
65 | |
66 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
67 | static int probability_cutoff; | |
68 | static int branch_ratio_cutoff; | |
69 | ||
232abc3f RK |
70 | /* A bit BB->index is set if BB has already been seen, i.e. it is |
71 | connected to some trace already. */ | |
72 | sbitmap bb_seen; | |
5c856b23 | 73 | |
232abc3f RK |
74 | static inline void |
75 | mark_bb_seen (basic_block bb) | |
76 | { | |
131db6b8 | 77 | unsigned int size = SBITMAP_SIZE (bb_seen); |
232abc3f RK |
78 | |
79 | if ((unsigned int)bb->index >= size) | |
80 | bb_seen = sbitmap_resize (bb_seen, size * 2, 0); | |
81 | ||
d7c028c0 | 82 | bitmap_set_bit (bb_seen, bb->index); |
232abc3f RK |
83 | } |
84 | ||
85 | static inline bool | |
86 | bb_seen_p (basic_block bb) | |
87 | { | |
d7c028c0 | 88 | return bitmap_bit_p (bb_seen, bb->index); |
232abc3f | 89 | } |
5c856b23 | 90 | |
4b7e68e7 | 91 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
5c856b23 | 92 | static bool |
ed7a4b4b | 93 | ignore_bb_p (const_basic_block bb) |
5c856b23 | 94 | { |
2e85d5e2 PM |
95 | gimple g; |
96 | ||
24bd1a0b | 97 | if (bb->index < NUM_FIXED_BLOCKS) |
5c856b23 | 98 | return true; |
efd8f750 | 99 | if (optimize_bb_for_size_p (bb)) |
5c856b23 | 100 | return true; |
2e85d5e2 PM |
101 | |
102 | /* A transaction is a single entry multiple exit region. It must be | |
103 | duplicated in its entirety or not at all. */ | |
104 | g = last_stmt (CONST_CAST_BB (bb)); | |
105 | if (g && gimple_code (g) == GIMPLE_TRANSACTION) | |
106 | return true; | |
107 | ||
5c856b23 JH |
108 | return false; |
109 | } | |
110 | ||
111 | /* Return number of instructions in the block. */ | |
112 | ||
113 | static int | |
232abc3f | 114 | count_insns (basic_block bb) |
5c856b23 | 115 | { |
726a989a RB |
116 | gimple_stmt_iterator gsi; |
117 | gimple stmt; | |
5c856b23 JH |
118 | int n = 0; |
119 | ||
726a989a | 120 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
232abc3f | 121 | { |
726a989a | 122 | stmt = gsi_stmt (gsi); |
232abc3f RK |
123 | n += estimate_num_insns (stmt, &eni_size_weights); |
124 | } | |
5c856b23 JH |
125 | return n; |
126 | } | |
127 | ||
128 | /* Return true if E1 is more frequent than E2. */ | |
129 | static bool | |
ed7a4b4b | 130 | better_p (const_edge e1, const_edge e2) |
5c856b23 JH |
131 | { |
132 | if (e1->count != e2->count) | |
133 | return e1->count > e2->count; | |
134 | if (e1->src->frequency * e1->probability != | |
135 | e2->src->frequency * e2->probability) | |
136 | return (e1->src->frequency * e1->probability | |
137 | > e2->src->frequency * e2->probability); | |
138 | /* This is needed to avoid changes in the decision after | |
139 | CFG is modified. */ | |
140 | if (e1->src != e2->src) | |
141 | return e1->src->index > e2->src->index; | |
142 | return e1->dest->index > e2->dest->index; | |
143 | } | |
144 | ||
145 | /* Return most frequent successor of basic block BB. */ | |
146 | ||
147 | static edge | |
46c5ad27 | 148 | find_best_successor (basic_block bb) |
5c856b23 JH |
149 | { |
150 | edge e; | |
151 | edge best = NULL; | |
628f6a4e | 152 | edge_iterator ei; |
5c856b23 | 153 | |
628f6a4e | 154 | FOR_EACH_EDGE (e, ei, bb->succs) |
5c856b23 JH |
155 | if (!best || better_p (e, best)) |
156 | best = e; | |
157 | if (!best || ignore_bb_p (best->dest)) | |
158 | return NULL; | |
159 | if (best->probability <= probability_cutoff) | |
160 | return NULL; | |
161 | return best; | |
162 | } | |
163 | ||
164 | /* Return most frequent predecessor of basic block BB. */ | |
165 | ||
166 | static edge | |
46c5ad27 | 167 | find_best_predecessor (basic_block bb) |
5c856b23 JH |
168 | { |
169 | edge e; | |
170 | edge best = NULL; | |
628f6a4e | 171 | edge_iterator ei; |
5c856b23 | 172 | |
628f6a4e | 173 | FOR_EACH_EDGE (e, ei, bb->preds) |
5c856b23 JH |
174 | if (!best || better_p (e, best)) |
175 | best = e; | |
176 | if (!best || ignore_bb_p (best->src)) | |
177 | return NULL; | |
178 | if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE | |
179 | < bb->frequency * branch_ratio_cutoff) | |
180 | return NULL; | |
181 | return best; | |
182 | } | |
183 | ||
184 | /* Find the trace using bb and record it in the TRACE array. | |
185 | Return number of basic blocks recorded. */ | |
186 | ||
187 | static int | |
46c5ad27 | 188 | find_trace (basic_block bb, basic_block *trace) |
5c856b23 JH |
189 | { |
190 | int i = 0; | |
191 | edge e; | |
192 | ||
c263766c RH |
193 | if (dump_file) |
194 | fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
195 | |
196 | while ((e = find_best_predecessor (bb)) != NULL) | |
197 | { | |
198 | basic_block bb2 = e->src; | |
232abc3f | 199 | if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
5c856b23 JH |
200 | || find_best_successor (bb2) != e) |
201 | break; | |
c263766c RH |
202 | if (dump_file) |
203 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
204 | bb = bb2; |
205 | } | |
c263766c RH |
206 | if (dump_file) |
207 | fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
208 | trace[i++] = bb; |
209 | ||
210 | /* Follow the trace in forward direction. */ | |
211 | while ((e = find_best_successor (bb)) != NULL) | |
212 | { | |
213 | bb = e->dest; | |
232abc3f | 214 | if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) |
5c856b23 JH |
215 | || find_best_predecessor (bb) != e) |
216 | break; | |
c263766c RH |
217 | if (dump_file) |
218 | fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); | |
5c856b23 JH |
219 | trace[i++] = bb; |
220 | } | |
c263766c RH |
221 | if (dump_file) |
222 | fprintf (dump_file, "\n"); | |
5c856b23 JH |
223 | return i; |
224 | } | |
225 | ||
226 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |
227 | if profitable. */ | |
228 | ||
07b1bf20 | 229 | static bool |
46c5ad27 | 230 | tail_duplicate (void) |
5c856b23 | 231 | { |
dd7bda5e ML |
232 | auto_vec<fibonacci_node<long, basic_block_def>*> blocks; |
233 | blocks.safe_grow_cleared (last_basic_block_for_fn (cfun)); | |
234 | ||
0cae8d31 | 235 | basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); |
8b1c6fd7 | 236 | int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun)); |
5c856b23 JH |
237 | int ninsns = 0, nduplicated = 0; |
238 | gcov_type weighted_insns = 0, traced_insns = 0; | |
dd7bda5e | 239 | fibonacci_heap<long, basic_block_def> heap (LONG_MIN); |
5c856b23 JH |
240 | gcov_type cover_insns; |
241 | int max_dup_insns; | |
242 | basic_block bb; | |
07b1bf20 | 243 | bool changed = false; |
5c856b23 | 244 | |
232abc3f RK |
245 | /* Create an oversized sbitmap to reduce the chance that we need to |
246 | resize it. */ | |
8b1c6fd7 | 247 | bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); |
f61e445a | 248 | bitmap_clear (bb_seen); |
232abc3f RK |
249 | initialize_original_copy_tables (); |
250 | ||
cdb23767 | 251 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
252 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
253 | else | |
254 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
255 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
256 | ||
257 | branch_ratio_cutoff = | |
258 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
259 | ||
11cd3bed | 260 | FOR_EACH_BB_FN (bb, cfun) |
5c856b23 JH |
261 | { |
262 | int n = count_insns (bb); | |
263 | if (!ignore_bb_p (bb)) | |
dd7bda5e | 264 | blocks[bb->index] = heap.insert (-bb->frequency, bb); |
5c856b23 JH |
265 | |
266 | counts [bb->index] = n; | |
267 | ninsns += n; | |
268 | weighted_insns += n * bb->frequency; | |
269 | } | |
270 | ||
cdb23767 | 271 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
272 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
273 | else | |
274 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
275 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
276 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
277 | ||
278 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
dd7bda5e | 279 | && !heap.empty ()) |
5c856b23 | 280 | { |
dd7bda5e | 281 | basic_block bb = heap.extract_min (); |
5c856b23 JH |
282 | int n, pos; |
283 | ||
284 | if (!bb) | |
285 | break; | |
286 | ||
287 | blocks[bb->index] = NULL; | |
288 | ||
289 | if (ignore_bb_p (bb)) | |
290 | continue; | |
232abc3f | 291 | gcc_assert (!bb_seen_p (bb)); |
5c856b23 JH |
292 | |
293 | n = find_trace (bb, trace); | |
294 | ||
295 | bb = trace[0]; | |
296 | traced_insns += bb->frequency * counts [bb->index]; | |
297 | if (blocks[bb->index]) | |
298 | { | |
dd7bda5e | 299 | heap.delete_node (blocks[bb->index]); |
5c856b23 JH |
300 | blocks[bb->index] = NULL; |
301 | } | |
302 | ||
303 | for (pos = 1; pos < n; pos++) | |
304 | { | |
305 | basic_block bb2 = trace[pos]; | |
306 | ||
307 | if (blocks[bb2->index]) | |
308 | { | |
dd7bda5e | 309 | heap.delete_node (blocks[bb2->index]); |
5c856b23 JH |
310 | blocks[bb2->index] = NULL; |
311 | } | |
312 | traced_insns += bb2->frequency * counts [bb2->index]; | |
628f6a4e | 313 | if (EDGE_COUNT (bb2->preds) > 1 |
0b9066cf RG |
314 | && can_duplicate_block_p (bb2) |
315 | /* We have the tendency to duplicate the loop header | |
316 | of all do { } while loops. Do not do that - it is | |
317 | not profitable and it might create a loop with multiple | |
318 | entries or at least rotate the loop. */ | |
726338f4 | 319 | && bb2->loop_father->header != bb2) |
5c856b23 | 320 | { |
628f6a4e | 321 | edge e; |
232abc3f RK |
322 | basic_block copy; |
323 | ||
324 | nduplicated += counts [bb2->index]; | |
5c856b23 | 325 | |
9ff3d2de | 326 | e = find_edge (bb, bb2); |
b8698a0f | 327 | |
232abc3f RK |
328 | copy = duplicate_block (bb2, e, bb); |
329 | flush_pending_stmts (e); | |
628f6a4e | 330 | |
5f40b3cb | 331 | add_phi_args_after_copy (©, 1, NULL); |
5c856b23 JH |
332 | |
333 | /* Reconsider the original copy of block we've duplicated. | |
14b493d6 | 334 | Removing the most common predecessor may make it to be |
5c856b23 | 335 | head. */ |
dd7bda5e | 336 | blocks[bb2->index] = heap.insert (-bb2->frequency, bb2); |
5c856b23 | 337 | |
c263766c RH |
338 | if (dump_file) |
339 | fprintf (dump_file, "Duplicated %i as %i [%i]\n", | |
232abc3f RK |
340 | bb2->index, copy->index, copy->frequency); |
341 | ||
342 | bb2 = copy; | |
07b1bf20 | 343 | changed = true; |
5c856b23 | 344 | } |
232abc3f | 345 | mark_bb_seen (bb2); |
5c856b23 JH |
346 | bb = bb2; |
347 | /* In case the trace became infrequent, stop duplicating. */ | |
348 | if (ignore_bb_p (bb)) | |
349 | break; | |
350 | } | |
c263766c RH |
351 | if (dump_file) |
352 | fprintf (dump_file, " covered now %.1f\n\n", | |
5c856b23 JH |
353 | traced_insns * 100.0 / weighted_insns); |
354 | } | |
c263766c RH |
355 | if (dump_file) |
356 | fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
5c856b23 JH |
357 | nduplicated * 100 / ninsns); |
358 | ||
232abc3f RK |
359 | free_original_copy_tables (); |
360 | sbitmap_free (bb_seen); | |
5c856b23 JH |
361 | free (trace); |
362 | free (counts); | |
07b1bf20 RG |
363 | |
364 | return changed; | |
5c856b23 | 365 | } |
ef330312 | 366 | \f |
27a4cd48 DM |
367 | namespace { |
368 | ||
369 | const pass_data pass_data_tracer = | |
ef330312 | 370 | { |
27a4cd48 DM |
371 | GIMPLE_PASS, /* type */ |
372 | "tracer", /* name */ | |
373 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
374 | TV_TRACER, /* tv_id */ |
375 | 0, /* properties_required */ | |
376 | 0, /* properties_provided */ | |
377 | 0, /* properties_destroyed */ | |
378 | 0, /* todo_flags_start */ | |
3bea341f | 379 | TODO_update_ssa, /* todo_flags_finish */ |
ef330312 | 380 | }; |
27a4cd48 DM |
381 | |
382 | class pass_tracer : public gimple_opt_pass | |
383 | { | |
384 | public: | |
c3284718 RS |
385 | pass_tracer (gcc::context *ctxt) |
386 | : gimple_opt_pass (pass_data_tracer, ctxt) | |
27a4cd48 DM |
387 | {} |
388 | ||
389 | /* opt_pass methods: */ | |
1a3d085c TS |
390 | virtual bool gate (function *) |
391 | { | |
392 | return (optimize > 0 && flag_tracer && flag_reorder_blocks); | |
393 | } | |
394 | ||
be55bfe6 | 395 | virtual unsigned int execute (function *); |
27a4cd48 DM |
396 | |
397 | }; // class pass_tracer | |
398 | ||
be55bfe6 TS |
399 | unsigned int |
400 | pass_tracer::execute (function *fun) | |
401 | { | |
402 | bool changed; | |
403 | ||
404 | if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1) | |
405 | return 0; | |
406 | ||
407 | mark_dfs_back_edges (); | |
408 | if (dump_file) | |
409 | brief_dump_cfg (dump_file, dump_flags); | |
410 | ||
411 | /* Trace formation is done on the fly inside tail_duplicate */ | |
412 | changed = tail_duplicate (); | |
413 | if (changed) | |
414 | { | |
415 | free_dominance_info (CDI_DOMINATORS); | |
416 | /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ | |
726338f4 | 417 | loops_state_set (LOOPS_NEED_FIXUP); |
be55bfe6 TS |
418 | } |
419 | ||
420 | if (dump_file) | |
421 | brief_dump_cfg (dump_file, dump_flags); | |
422 | ||
423 | return changed ? TODO_cleanup_cfg : 0; | |
424 | } | |
27a4cd48 DM |
425 | } // anon namespace |
426 | ||
427 | gimple_opt_pass * | |
428 | make_pass_tracer (gcc::context *ctxt) | |
429 | { | |
430 | return new pass_tracer (ctxt); | |
431 | } |