]>
Commit | Line | Data |
---|---|---|
5c856b23 JH |
1 | /* The tracer pass for the GNU compiler. |
2 | Contributed by Jan Hubicka, SuSE Labs. | |
ca29da43 | 3 | Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. |
5c856b23 JH |
4 | |
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING. If not, write to the Free | |
19 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | 02111-1307, USA. */ | |
21 | ||
22 | /* This pass performs the tail duplication needed for superblock formation. | |
23 | For more information see: | |
24 | ||
25 | Design and Analysis of Profile-Based Optimization in Compaq's | |
26 | Compilation Tools for Alpha; Journal of Instruction-Level | |
27 | Parallelism 3 (2000) 1-25 | |
28 | ||
29 | Unlike Compaq's implementation we don't do the loop peeling as most | |
30 | probably a better job can be done by a special pass and we don't | |
31 | need to worry too much about the code size implications as the tail | |
32 | duplicates are crossjumped again if optimizations are not | |
33 | performed. */ | |
34 | ||
35 | ||
36 | #include "config.h" | |
37 | #include "system.h" | |
4977bab6 ZW |
38 | #include "coretypes.h" |
39 | #include "tm.h" | |
5c856b23 JH |
40 | #include "tree.h" |
41 | #include "rtl.h" | |
42 | #include "hard-reg-set.h" | |
43 | #include "basic-block.h" | |
44 | #include "output.h" | |
45 | #include "cfglayout.h" | |
46 | #include "fibheap.h" | |
47 | #include "flags.h" | |
48 | #include "params.h" | |
ca29da43 | 49 | #include "coverage.h" |
5c856b23 | 50 | |
46c5ad27 AJ |
51 | static int count_insns (basic_block); |
52 | static bool ignore_bb_p (basic_block); | |
53 | static bool better_p (edge, edge); | |
54 | static edge find_best_successor (basic_block); | |
55 | static edge find_best_predecessor (basic_block); | |
56 | static int find_trace (basic_block, basic_block *); | |
57 | static void tail_duplicate (void); | |
58 | static void layout_superblocks (void); | |
5c856b23 JH |
59 | |
60 | /* Minimal outgoing edge probability considered for superblock formation. */ | |
61 | static int probability_cutoff; | |
62 | static int branch_ratio_cutoff; | |
63 | ||
64 | /* Return true if BB has been seen - it is connected to some trace | |
65 | already. */ | |
66 | ||
bc35512f | 67 | #define seen(bb) (bb->rbi->visited || bb->rbi->next) |
5c856b23 | 68 | |
4b7e68e7 | 69 | /* Return true if we should ignore the basic block for purposes of tracing. */ |
5c856b23 | 70 | static bool |
46c5ad27 | 71 | ignore_bb_p (basic_block bb) |
5c856b23 JH |
72 | { |
73 | if (bb->index < 0) | |
74 | return true; | |
75 | if (!maybe_hot_bb_p (bb)) | |
76 | return true; | |
77 | return false; | |
78 | } | |
79 | ||
80 | /* Return number of instructions in the block. */ | |
81 | ||
82 | static int | |
46c5ad27 | 83 | count_insns (basic_block bb) |
5c856b23 JH |
84 | { |
85 | rtx insn; | |
86 | int n = 0; | |
87 | ||
a813c111 SB |
88 | for (insn = BB_HEAD (bb); |
89 | insn != NEXT_INSN (BB_END (bb)); | |
90 | insn = NEXT_INSN (insn)) | |
5c856b23 JH |
91 | if (active_insn_p (insn)) |
92 | n++; | |
93 | return n; | |
94 | } | |
95 | ||
96 | /* Return true if E1 is more frequent than E2. */ | |
97 | static bool | |
46c5ad27 | 98 | better_p (edge e1, edge e2) |
5c856b23 JH |
99 | { |
100 | if (e1->count != e2->count) | |
101 | return e1->count > e2->count; | |
102 | if (e1->src->frequency * e1->probability != | |
103 | e2->src->frequency * e2->probability) | |
104 | return (e1->src->frequency * e1->probability | |
105 | > e2->src->frequency * e2->probability); | |
106 | /* This is needed to avoid changes in the decision after | |
107 | CFG is modified. */ | |
108 | if (e1->src != e2->src) | |
109 | return e1->src->index > e2->src->index; | |
110 | return e1->dest->index > e2->dest->index; | |
111 | } | |
112 | ||
113 | /* Return most frequent successor of basic block BB. */ | |
114 | ||
115 | static edge | |
46c5ad27 | 116 | find_best_successor (basic_block bb) |
5c856b23 JH |
117 | { |
118 | edge e; | |
119 | edge best = NULL; | |
120 | ||
121 | for (e = bb->succ; e; e = e->succ_next) | |
122 | if (!best || better_p (e, best)) | |
123 | best = e; | |
124 | if (!best || ignore_bb_p (best->dest)) | |
125 | return NULL; | |
126 | if (best->probability <= probability_cutoff) | |
127 | return NULL; | |
128 | return best; | |
129 | } | |
130 | ||
131 | /* Return most frequent predecessor of basic block BB. */ | |
132 | ||
133 | static edge | |
46c5ad27 | 134 | find_best_predecessor (basic_block bb) |
5c856b23 JH |
135 | { |
136 | edge e; | |
137 | edge best = NULL; | |
138 | ||
139 | for (e = bb->pred; e; e = e->pred_next) | |
140 | if (!best || better_p (e, best)) | |
141 | best = e; | |
142 | if (!best || ignore_bb_p (best->src)) | |
143 | return NULL; | |
144 | if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE | |
145 | < bb->frequency * branch_ratio_cutoff) | |
146 | return NULL; | |
147 | return best; | |
148 | } | |
149 | ||
150 | /* Find the trace using bb and record it in the TRACE array. | |
151 | Return number of basic blocks recorded. */ | |
152 | ||
153 | static int | |
46c5ad27 | 154 | find_trace (basic_block bb, basic_block *trace) |
5c856b23 JH |
155 | { |
156 | int i = 0; | |
157 | edge e; | |
158 | ||
159 | if (rtl_dump_file) | |
160 | fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); | |
161 | ||
162 | while ((e = find_best_predecessor (bb)) != NULL) | |
163 | { | |
164 | basic_block bb2 = e->src; | |
165 | if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) | |
166 | || find_best_successor (bb2) != e) | |
167 | break; | |
168 | if (rtl_dump_file) | |
169 | fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); | |
170 | bb = bb2; | |
171 | } | |
172 | if (rtl_dump_file) | |
173 | fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency); | |
174 | trace[i++] = bb; | |
175 | ||
176 | /* Follow the trace in forward direction. */ | |
177 | while ((e = find_best_successor (bb)) != NULL) | |
178 | { | |
179 | bb = e->dest; | |
180 | if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) | |
181 | || find_best_predecessor (bb) != e) | |
182 | break; | |
183 | if (rtl_dump_file) | |
184 | fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); | |
185 | trace[i++] = bb; | |
186 | } | |
187 | if (rtl_dump_file) | |
188 | fprintf (rtl_dump_file, "\n"); | |
189 | return i; | |
190 | } | |
191 | ||
192 | /* Look for basic blocks in frequency order, construct traces and tail duplicate | |
193 | if profitable. */ | |
194 | ||
195 | static void | |
46c5ad27 | 196 | tail_duplicate (void) |
5c856b23 JH |
197 | { |
198 | fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t)); | |
199 | basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks); | |
200 | int *counts = xmalloc (sizeof (int) * last_basic_block); | |
201 | int ninsns = 0, nduplicated = 0; | |
202 | gcov_type weighted_insns = 0, traced_insns = 0; | |
203 | fibheap_t heap = fibheap_new (); | |
204 | gcov_type cover_insns; | |
205 | int max_dup_insns; | |
206 | basic_block bb; | |
207 | ||
cdb23767 | 208 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
209 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); |
210 | else | |
211 | probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
212 | probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
213 | ||
214 | branch_ratio_cutoff = | |
215 | (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); | |
216 | ||
217 | FOR_EACH_BB (bb) | |
218 | { | |
219 | int n = count_insns (bb); | |
220 | if (!ignore_bb_p (bb)) | |
221 | blocks[bb->index] = fibheap_insert (heap, -bb->frequency, | |
222 | bb); | |
223 | ||
224 | counts [bb->index] = n; | |
225 | ninsns += n; | |
226 | weighted_insns += n * bb->frequency; | |
227 | } | |
228 | ||
cdb23767 | 229 | if (profile_info && flag_branch_probabilities) |
5c856b23 JH |
230 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); |
231 | else | |
232 | cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); | |
233 | cover_insns = (weighted_insns * cover_insns + 50) / 100; | |
234 | max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; | |
235 | ||
236 | while (traced_insns < cover_insns && nduplicated < max_dup_insns | |
237 | && !fibheap_empty (heap)) | |
238 | { | |
239 | basic_block bb = fibheap_extract_min (heap); | |
240 | int n, pos; | |
241 | ||
242 | if (!bb) | |
243 | break; | |
244 | ||
245 | blocks[bb->index] = NULL; | |
246 | ||
247 | if (ignore_bb_p (bb)) | |
248 | continue; | |
249 | if (seen (bb)) | |
250 | abort (); | |
251 | ||
252 | n = find_trace (bb, trace); | |
253 | ||
254 | bb = trace[0]; | |
255 | traced_insns += bb->frequency * counts [bb->index]; | |
256 | if (blocks[bb->index]) | |
257 | { | |
258 | fibheap_delete_node (heap, blocks[bb->index]); | |
259 | blocks[bb->index] = NULL; | |
260 | } | |
261 | ||
262 | for (pos = 1; pos < n; pos++) | |
263 | { | |
264 | basic_block bb2 = trace[pos]; | |
265 | ||
266 | if (blocks[bb2->index]) | |
267 | { | |
268 | fibheap_delete_node (heap, blocks[bb2->index]); | |
269 | blocks[bb2->index] = NULL; | |
270 | } | |
271 | traced_insns += bb2->frequency * counts [bb2->index]; | |
272 | if (bb2->pred && bb2->pred->pred_next | |
273 | && cfg_layout_can_duplicate_bb_p (bb2)) | |
274 | { | |
275 | edge e = bb2->pred; | |
276 | basic_block old = bb2; | |
277 | ||
278 | while (e->src != bb) | |
279 | e = e->pred_next; | |
280 | nduplicated += counts [bb2->index]; | |
281 | bb2 = cfg_layout_duplicate_bb (bb2, e); | |
282 | ||
283 | /* Reconsider the original copy of block we've duplicated. | |
14b493d6 | 284 | Removing the most common predecessor may make it to be |
5c856b23 JH |
285 | head. */ |
286 | blocks[old->index] = | |
287 | fibheap_insert (heap, -old->frequency, old); | |
288 | ||
289 | if (rtl_dump_file) | |
290 | fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n", | |
291 | old->index, bb2->index, bb2->frequency); | |
292 | } | |
bc35512f JH |
293 | bb->rbi->next = bb2; |
294 | bb2->rbi->visited = 1; | |
5c856b23 JH |
295 | bb = bb2; |
296 | /* In case the trace became infrequent, stop duplicating. */ | |
297 | if (ignore_bb_p (bb)) | |
298 | break; | |
299 | } | |
300 | if (rtl_dump_file) | |
301 | fprintf (rtl_dump_file, " covered now %.1f\n\n", | |
302 | traced_insns * 100.0 / weighted_insns); | |
303 | } | |
304 | if (rtl_dump_file) | |
305 | fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, | |
306 | nduplicated * 100 / ninsns); | |
307 | ||
308 | free (blocks); | |
309 | free (trace); | |
310 | free (counts); | |
311 | fibheap_delete (heap); | |
312 | } | |
313 | ||
4d6922ee | 314 | /* Connect the superblocks into linear sequence. At the moment we attempt to keep |
5c856b23 JH |
315 | the original order as much as possible, but the algorithm may be made smarter |
316 | later if needed. BB reordering pass should void most of the benefits of such | |
317 | change though. */ | |
318 | ||
319 | static void | |
46c5ad27 | 320 | layout_superblocks (void) |
5c856b23 JH |
321 | { |
322 | basic_block end = ENTRY_BLOCK_PTR->succ->dest; | |
323 | basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb; | |
324 | ||
325 | while (bb != EXIT_BLOCK_PTR) | |
326 | { | |
327 | edge e, best = NULL; | |
bc35512f JH |
328 | while (end->rbi->next) |
329 | end = end->rbi->next; | |
5c856b23 JH |
330 | |
331 | for (e = end->succ; e; e = e->succ_next) | |
332 | if (e->dest != EXIT_BLOCK_PTR | |
333 | && e->dest != ENTRY_BLOCK_PTR->succ->dest | |
bc35512f | 334 | && !e->dest->rbi->visited |
5c856b23 JH |
335 | && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) |
336 | best = e; | |
337 | ||
338 | if (best) | |
339 | { | |
bc35512f JH |
340 | end->rbi->next = best->dest; |
341 | best->dest->rbi->visited = 1; | |
5c856b23 JH |
342 | } |
343 | else | |
6a87d634 | 344 | for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb) |
5c856b23 | 345 | { |
bc35512f | 346 | if (!bb->rbi->visited) |
5c856b23 | 347 | { |
bc35512f JH |
348 | end->rbi->next = bb; |
349 | bb->rbi->visited = 1; | |
5c856b23 JH |
350 | break; |
351 | } | |
352 | } | |
353 | } | |
354 | } | |
355 | ||
356 | /* Main entry point to this file. */ | |
357 | ||
358 | void | |
46c5ad27 | 359 | tracer (void) |
5c856b23 JH |
360 | { |
361 | if (n_basic_blocks <= 1) | |
362 | return; | |
bc35512f | 363 | cfg_layout_initialize (); |
5c856b23 JH |
364 | mark_dfs_back_edges (); |
365 | if (rtl_dump_file) | |
366 | dump_flow_info (rtl_dump_file); | |
367 | tail_duplicate (); | |
368 | layout_superblocks (); | |
369 | if (rtl_dump_file) | |
370 | dump_flow_info (rtl_dump_file); | |
371 | cfg_layout_finalize (); | |
372 | /* Merge basic blocks in duplicated traces. */ | |
373 | cleanup_cfg (CLEANUP_EXPENSIVE); | |
374 | } |