]>
Commit | Line | Data |
---|---|---|
86144b75 | 1 | /* Calculate branch probabilities, and basic block execution counts. |
9311a396 | 2 | Copyright (C) 1990, 91-94, 96-99, 2000 Free Software Foundation, Inc. |
86144b75 DE |
3 | Contributed by James E. Wilson, UC Berkeley/Cygnus Support; |
4 | based on some ideas from Dain Samples of UC Berkeley. | |
5 | Further mangling by Bob Manson, Cygnus Support. | |
6 | ||
7 | This file is part of GNU CC. | |
8 | ||
9 | GNU CC is free software; you can redistribute it and/or modify | |
10 | it under the terms of the GNU General Public License as published by | |
11 | the Free Software Foundation; either version 2, or (at your option) | |
12 | any later version. | |
13 | ||
14 | GNU CC is distributed in the hope that it will be useful, | |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | GNU General Public License for more details. | |
18 | ||
19 | You should have received a copy of the GNU General Public License | |
20 | along with GNU CC; see the file COPYING. If not, write to | |
5f38fdda JL |
21 | the Free Software Foundation, 59 Temple Place - Suite 330, |
22 | Boston, MA 02111-1307, USA. */ | |
86144b75 DE |
23 | |
24 | /* ??? Really should not put insns inside of LIBCALL sequences, when putting | |
25 | insns after a call, should look for the insn setting the retval, and | |
26 | insert the insns after that one. */ | |
27 | ||
28 | /* ??? Register allocation should use basic block execution counts to | |
29 | give preference to the most commonly executed blocks. */ | |
30 | ||
31 | /* ??? The .da files are not safe. Changing the program after creating .da | |
32 | files or using different options when compiling with -fbranch-probabilities | |
33 | can result the arc data not matching the program. Maybe add instrumented | |
34 | arc count to .bbg file? Maybe check whether PFG matches the .bbg file? */ | |
35 | ||
36 | /* ??? Should calculate branch probabilities before instrumenting code, since | |
37 | then we can use arc counts to help decide which arcs to instrument. */ | |
38 | ||
39 | /* ??? Rearrange code so that the most frequently executed arcs become from | |
40 | one block to the next block (i.e. a fall through), move seldom executed | |
41 | code outside of loops even at the expense of adding a few branches to | |
42 | achieve this, see Dain Sample's UC Berkeley thesis. */ | |
43 | ||
44 | #include "config.h" | |
670ee920 | 45 | #include "system.h" |
86144b75 | 46 | #include "rtl.h" |
49ad7cfa | 47 | #include "tree.h" |
86144b75 DE |
48 | #include "flags.h" |
49 | #include "insn-flags.h" | |
50 | #include "insn-config.h" | |
51 | #include "output.h" | |
e9a25f70 | 52 | #include "regs.h" |
49ad7cfa | 53 | #include "function.h" |
86144b75 | 54 | #include "gcov-io.h" |
10f0ad3d | 55 | #include "toplev.h" |
d7db6646 | 56 | #include "ggc.h" |
86144b75 | 57 | |
86144b75 DE |
58 | /* One of these is dynamically created whenever we identify an arc in the |
59 | function. */ | |
60 | ||
61 | struct adj_list | |
62 | { | |
63 | int source; | |
64 | int target; | |
65 | int arc_count; | |
66 | unsigned int count_valid : 1; | |
67 | unsigned int on_tree : 1; | |
68 | unsigned int fake : 1; | |
69 | unsigned int fall_through : 1; | |
70 | rtx branch_insn; | |
71 | struct adj_list *pred_next; | |
72 | struct adj_list *succ_next; | |
73 | }; | |
74 | ||
75 | #define ARC_TARGET(ARCPTR) (ARCPTR->target) | |
76 | #define ARC_SOURCE(ARCPTR) (ARCPTR->source) | |
77 | #define ARC_COUNT(ARCPTR) (ARCPTR->arc_count) | |
78 | ||
79 | /* Count the number of basic blocks, and create an array of these structures, | |
80 | one for each bb in the function. */ | |
81 | ||
82 | struct bb_info | |
83 | { | |
84 | struct adj_list *succ; | |
85 | struct adj_list *pred; | |
86 | int succ_count; | |
87 | int pred_count; | |
88 | int exec_count; | |
89 | unsigned int count_valid : 1; | |
90 | unsigned int on_tree : 1; | |
91 | rtx first_insn; | |
92 | }; | |
93 | ||
94 | /* Indexed by label number, gives the basic block number containing that | |
95 | label. */ | |
96 | ||
97 | static int *label_to_bb; | |
98 | ||
99 | /* Number of valid entries in the label_to_bb array. */ | |
100 | ||
101 | static int label_to_bb_size; | |
102 | ||
103 | /* Indexed by block index, holds the basic block graph. */ | |
104 | ||
105 | static struct bb_info *bb_graph; | |
106 | ||
107 | /* Name and file pointer of the output file for the basic block graph. */ | |
108 | ||
109 | static char *bbg_file_name; | |
110 | static FILE *bbg_file; | |
111 | ||
112 | /* Name and file pointer of the input file for the arc count data. */ | |
113 | ||
114 | static char *da_file_name; | |
115 | static FILE *da_file; | |
116 | ||
117 | /* Pointer of the output file for the basic block/line number map. */ | |
118 | static FILE *bb_file; | |
119 | ||
120 | /* Last source file name written to bb_file. */ | |
121 | ||
122 | static char *last_bb_file_name; | |
123 | ||
124 | /* Indicates whether the next line number note should be output to | |
125 | bb_file or not. Used to eliminate a redundant note after an | |
126 | expanded inline function call. */ | |
127 | ||
128 | static int ignore_next_note; | |
129 | ||
130 | /* Used by final, for allocating the proper amount of storage for the | |
131 | instrumented arc execution counts. */ | |
132 | ||
133 | int count_instrumented_arcs; | |
134 | ||
135 | /* Number of executions for the return label. */ | |
136 | ||
137 | int return_label_execution_count; | |
138 | ||
139 | /* Collect statistics on the performance of this pass for the entire source | |
140 | file. */ | |
141 | ||
142 | static int total_num_blocks; | |
143 | static int total_num_arcs; | |
144 | static int total_num_arcs_instrumented; | |
145 | static int total_num_blocks_created; | |
146 | static int total_num_passes; | |
147 | static int total_num_times_called; | |
148 | static int total_hist_br_prob[20]; | |
149 | static int total_num_never_executed; | |
150 | static int total_num_branches; | |
151 | ||
152 | /* Forward declarations. */ | |
13536812 KG |
153 | static void init_arc PARAMS ((struct adj_list *, int, int, rtx)); |
154 | static void find_spanning_tree PARAMS ((int)); | |
155 | static void expand_spanning_tree PARAMS ((int)); | |
156 | static void fill_spanning_tree PARAMS ((int)); | |
157 | static void init_arc_profiler PARAMS ((void)); | |
158 | static void output_arc_profiler PARAMS ((int, rtx)); | |
159 | static void instrument_arcs PARAMS ((rtx, int, FILE *)); | |
160 | static void output_gcov_string PARAMS ((const char *, long)); | |
161 | static int tablejump_entry_p PARAMS ((rtx, rtx)); | |
162 | static void compute_branch_probabilities PARAMS ((int, FILE *)); | |
86144b75 DE |
163 | |
164 | #ifndef LONG_TYPE_SIZE | |
165 | #define LONG_TYPE_SIZE BITS_PER_WORD | |
166 | #endif | |
167 | ||
168 | /* If non-zero, we need to output a constructor to set up the | |
169 | per-object-file data. */ | |
170 | static int need_func_profiler = 0; | |
171 | ||
172 | \f | |
173 | /* Add arc instrumentation code to the entire insn chain. | |
174 | ||
175 | F is the first insn of the chain. | |
176 | NUM_BLOCKS is the number of basic blocks found in F. | |
177 | DUMP_FILE, if nonzero, is an rtl dump file we can write to. */ | |
178 | ||
179 | static void | |
180 | instrument_arcs (f, num_blocks, dump_file) | |
181 | rtx f; | |
182 | int num_blocks; | |
183 | FILE *dump_file; | |
184 | { | |
185 | register int i; | |
186 | register struct adj_list *arcptr, *backptr; | |
187 | int num_arcs = 0; | |
188 | int num_instr_arcs = 0; | |
189 | rtx insn; | |
190 | ||
86144b75 DE |
191 | /* Instrument the program start. */ |
192 | /* Handle block 0 specially, since it will always be instrumented, | |
193 | but it doesn't have a valid first_insn or branch_insn. We must | |
194 | put the instructions before the NOTE_INSN_FUNCTION_BEG note, so | |
195 | that they don't clobber any of the parameters of the current | |
196 | function. */ | |
197 | for (insn = f; insn; insn = NEXT_INSN (insn)) | |
198 | if (GET_CODE (insn) == NOTE | |
199 | && NOTE_LINE_NUMBER (insn) == NOTE_INSN_FUNCTION_BEG) | |
200 | break; | |
201 | insn = PREV_INSN (insn); | |
202 | need_func_profiler = 1; | |
203 | output_arc_profiler (total_num_arcs_instrumented + num_instr_arcs++, insn); | |
204 | ||
205 | for (i = 1; i < num_blocks; i++) | |
206 | for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next) | |
207 | if (! arcptr->on_tree) | |
208 | { | |
209 | if (dump_file) | |
210 | fprintf (dump_file, "Arc %d to %d instrumented\n", i, | |
211 | ARC_TARGET (arcptr)); | |
212 | ||
213 | /* Check to see if this arc is the only exit from its source block, | |
214 | or the only entrance to its target block. In either case, | |
215 | we don't need to create a new block to instrument the arc. */ | |
216 | ||
217 | if (bb_graph[i].succ == arcptr && arcptr->succ_next == 0) | |
218 | { | |
219 | /* Instrument the source block. */ | |
220 | output_arc_profiler (total_num_arcs_instrumented | |
221 | + num_instr_arcs++, | |
222 | PREV_INSN (bb_graph[i].first_insn)); | |
223 | } | |
224 | else if (arcptr == bb_graph[ARC_TARGET (arcptr)].pred | |
225 | && arcptr->pred_next == 0) | |
226 | { | |
227 | /* Instrument the target block. */ | |
228 | output_arc_profiler (total_num_arcs_instrumented | |
229 | + num_instr_arcs++, | |
230 | PREV_INSN (bb_graph[ARC_TARGET (arcptr)].first_insn)); | |
231 | } | |
232 | else if (arcptr->fall_through) | |
233 | { | |
234 | /* This is a fall-through; put the instrumentation code after | |
235 | the branch that ends this block. */ | |
236 | ||
237 | for (backptr = bb_graph[i].succ; backptr; | |
238 | backptr = backptr->succ_next) | |
239 | if (backptr != arcptr) | |
240 | break; | |
241 | ||
242 | output_arc_profiler (total_num_arcs_instrumented | |
243 | + num_instr_arcs++, | |
244 | backptr->branch_insn); | |
245 | } | |
246 | else | |
247 | { | |
248 | /* Must emit a new basic block to hold the arc counting code. */ | |
249 | enum rtx_code code = GET_CODE (PATTERN (arcptr->branch_insn)); | |
250 | ||
251 | if (code == SET) | |
252 | { | |
253 | /* Create the new basic block right after the branch. | |
254 | Invert the branch so that it jumps past the end of the new | |
255 | block. The new block will consist of the instrumentation | |
256 | code, and a jump to the target of this arc. */ | |
257 | int this_is_simplejump = simplejump_p (arcptr->branch_insn); | |
258 | rtx new_label = gen_label_rtx (); | |
259 | rtx old_label, set_src; | |
260 | rtx after = arcptr->branch_insn; | |
261 | ||
262 | /* Simplejumps can't reach here. */ | |
263 | if (this_is_simplejump) | |
264 | abort (); | |
265 | ||
266 | /* We can't use JUMP_LABEL, because it won't be set if we | |
267 | are compiling without optimization. */ | |
268 | ||
269 | set_src = SET_SRC (single_set (arcptr->branch_insn)); | |
270 | if (GET_CODE (set_src) == LABEL_REF) | |
271 | old_label = set_src; | |
272 | else if (GET_CODE (set_src) != IF_THEN_ELSE) | |
273 | abort (); | |
274 | else if (XEXP (set_src, 1) == pc_rtx) | |
275 | old_label = XEXP (XEXP (set_src, 2), 0); | |
276 | else | |
277 | old_label = XEXP (XEXP (set_src, 1), 0); | |
278 | ||
279 | /* Set the JUMP_LABEL so that redirect_jump will work. */ | |
280 | JUMP_LABEL (arcptr->branch_insn) = old_label; | |
281 | ||
282 | /* Add a use for OLD_LABEL that will be needed when we emit | |
283 | the JUMP_INSN below. If we don't do this here, | |
284 | `invert_jump' might delete it for us. We must add two | |
285 | when not optimizing, because the NUSES is zero now, | |
286 | but must be at least two to prevent the label from being | |
287 | deleted. */ | |
288 | LABEL_NUSES (old_label) += 2; | |
289 | ||
290 | /* Emit the insns for the new block in reverse order, | |
291 | since that is most convenient. */ | |
292 | ||
293 | if (this_is_simplejump) | |
294 | { | |
295 | after = NEXT_INSN (arcptr->branch_insn); | |
296 | if (! redirect_jump (arcptr->branch_insn, new_label)) | |
297 | /* Don't know what to do if this branch won't | |
298 | redirect. */ | |
299 | abort (); | |
300 | } | |
301 | else | |
302 | { | |
303 | if (! invert_jump (arcptr->branch_insn, new_label)) | |
304 | /* Don't know what to do if this branch won't invert. */ | |
305 | abort (); | |
306 | ||
307 | emit_label_after (new_label, after); | |
308 | LABEL_NUSES (new_label)++; | |
309 | } | |
310 | emit_barrier_after (after); | |
311 | emit_jump_insn_after (gen_jump (old_label), after); | |
312 | JUMP_LABEL (NEXT_INSN (after)) = old_label; | |
313 | ||
314 | /* Instrument the source arc. */ | |
315 | output_arc_profiler (total_num_arcs_instrumented | |
316 | + num_instr_arcs++, | |
317 | after); | |
318 | if (this_is_simplejump) | |
319 | { | |
320 | emit_label_after (new_label, after); | |
321 | LABEL_NUSES (new_label)++; | |
322 | } | |
323 | } | |
324 | else if (code == ADDR_VEC || code == ADDR_DIFF_VEC) | |
325 | { | |
326 | /* A table jump. Create a new basic block immediately | |
327 | after the table, by emitting a barrier, a label, a | |
328 | counting note, and a jump to the old label. Put the | |
329 | new label in the table. */ | |
330 | ||
331 | rtx new_label = gen_label_rtx (); | |
332 | rtx old_lref, new_lref; | |
333 | int index; | |
334 | ||
335 | /* Must determine the old_label reference, do this | |
336 | by counting the arcs after this one, which will | |
337 | give the index of our label in the table. */ | |
338 | ||
339 | index = 0; | |
340 | for (backptr = arcptr->succ_next; backptr; | |
341 | backptr = backptr->succ_next) | |
342 | index++; | |
343 | ||
344 | old_lref = XVECEXP (PATTERN (arcptr->branch_insn), | |
345 | (code == ADDR_DIFF_VEC), index); | |
346 | ||
347 | /* Emit the insns for the new block in reverse order, | |
348 | since that is most convenient. */ | |
349 | emit_jump_insn_after (gen_jump (XEXP (old_lref, 0)), | |
350 | arcptr->branch_insn); | |
351 | JUMP_LABEL (NEXT_INSN (arcptr->branch_insn)) | |
352 | = XEXP (old_lref, 0); | |
353 | ||
354 | /* Instrument the source arc. */ | |
355 | output_arc_profiler (total_num_arcs_instrumented | |
356 | + num_instr_arcs++, | |
357 | arcptr->branch_insn); | |
358 | ||
359 | emit_label_after (new_label, arcptr->branch_insn); | |
360 | LABEL_NUSES (NEXT_INSN (arcptr->branch_insn))++; | |
361 | emit_barrier_after (arcptr->branch_insn); | |
362 | ||
363 | /* Fix up the table jump. */ | |
38a448ca | 364 | new_lref = gen_rtx_LABEL_REF (Pmode, new_label); |
86144b75 DE |
365 | XVECEXP (PATTERN (arcptr->branch_insn), |
366 | (code == ADDR_DIFF_VEC), index) = new_lref; | |
367 | } | |
368 | else | |
369 | abort (); | |
370 | ||
371 | num_arcs += 1; | |
372 | if (dump_file) | |
373 | fprintf (dump_file, | |
374 | "Arc %d to %d needed new basic block\n", i, | |
375 | ARC_TARGET (arcptr)); | |
376 | } | |
377 | } | |
378 | ||
379 | total_num_arcs_instrumented += num_instr_arcs; | |
380 | count_instrumented_arcs = total_num_arcs_instrumented; | |
381 | ||
382 | total_num_blocks_created += num_arcs; | |
383 | if (dump_file) | |
384 | { | |
385 | fprintf (dump_file, "%d arcs instrumented\n", num_instr_arcs); | |
386 | fprintf (dump_file, "%d extra basic blocks created\n", num_arcs); | |
387 | } | |
388 | } | |
389 | ||
390 | /* Output STRING to bb_file, surrounded by DELIMITER. */ | |
391 | ||
392 | static void | |
393 | output_gcov_string (string, delimiter) | |
95d75019 | 394 | const char *string; |
86144b75 DE |
395 | long delimiter; |
396 | { | |
397 | long temp; | |
398 | ||
399 | /* Write a delimiter to indicate that a file name follows. */ | |
400 | __write_long (delimiter, bb_file, 4); | |
401 | ||
402 | /* Write the string. */ | |
403 | temp = strlen (string) + 1; | |
404 | fwrite (string, temp, 1, bb_file); | |
405 | ||
406 | /* Append a few zeros, to align the output to a 4 byte boundary. */ | |
407 | temp = temp & 0x3; | |
408 | if (temp) | |
409 | { | |
410 | char c[4]; | |
411 | ||
412 | c[0] = c[1] = c[2] = c[3] = 0; | |
413 | fwrite (c, sizeof (char), 4 - temp, bb_file); | |
414 | } | |
415 | ||
416 | /* Store another delimiter in the .bb file, just to make it easy to find the | |
417 | end of the file name. */ | |
418 | __write_long (delimiter, bb_file, 4); | |
419 | } | |
420 | \f | |
8ade1519 JW |
421 | /* Return TRUE if this insn must be a tablejump entry insn. This works for |
422 | the MIPS port, but may give false negatives for some targets. */ | |
423 | ||
95d75019 | 424 | static int |
8ade1519 JW |
425 | tablejump_entry_p (insn, label) |
426 | rtx insn, label; | |
427 | { | |
428 | rtx next = next_active_insn (insn); | |
429 | enum rtx_code code = GET_CODE (PATTERN (next)); | |
430 | ||
431 | if (code != ADDR_DIFF_VEC && code != ADDR_VEC) | |
432 | return 0; | |
433 | ||
434 | if (PREV_INSN (next) == XEXP (label, 0)) | |
435 | return 1; | |
436 | ||
437 | return 0; | |
438 | } | |
439 | ||
4da896b2 MM |
440 | /* Compute the branch probabilities for the various branches. |
441 | Annotate them accordingly. */ | |
442 | ||
443 | static void | |
444 | compute_branch_probabilities (num_blocks, dump_file) | |
445 | int num_blocks; | |
446 | FILE *dump_file; | |
447 | { | |
448 | int i; | |
449 | int bad_counts = 0; | |
450 | int num_arcs; | |
451 | int changes; | |
452 | int passes; | |
453 | int prob; | |
454 | int total; | |
455 | int num_branches; | |
456 | int num_never_executed; | |
457 | int hist_br_prob[20]; | |
458 | struct adj_list *arcptr; | |
459 | ||
460 | /* For each arc not on the spanning tree, set its execution count from | |
461 | the .da file. */ | |
462 | ||
463 | /* The first count in the .da file is the number of times that the function | |
464 | was entered. This is the exec_count for block zero. */ | |
465 | ||
466 | num_arcs = 0; | |
467 | for (i = 0; i < num_blocks; i++) | |
468 | for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next) | |
469 | if (! arcptr->on_tree) | |
470 | { | |
471 | num_arcs++; | |
472 | if (da_file) | |
473 | { | |
474 | long value; | |
475 | __read_long (&value, da_file, 8); | |
476 | ARC_COUNT (arcptr) = value; | |
477 | } | |
478 | else | |
479 | ARC_COUNT (arcptr) = 0; | |
480 | arcptr->count_valid = 1; | |
481 | bb_graph[i].succ_count--; | |
482 | bb_graph[ARC_TARGET (arcptr)].pred_count--; | |
483 | } | |
484 | ||
485 | if (dump_file) | |
486 | fprintf (dump_file, "%d arc counts read\n", num_arcs); | |
487 | ||
488 | /* For every block in the file, | |
489 | - if every exit/entrance arc has a known count, then set the block count | |
490 | - if the block count is known, and every exit/entrance arc but one has | |
491 | a known execution count, then set the count of the remaining arc | |
492 | ||
493 | As arc counts are set, decrement the succ/pred count, but don't delete | |
494 | the arc, that way we can easily tell when all arcs are known, or only | |
495 | one arc is unknown. */ | |
496 | ||
497 | /* The order that the basic blocks are iterated through is important. | |
498 | Since the code that finds spanning trees starts with block 0, low numbered | |
499 | arcs are put on the spanning tree in preference to high numbered arcs. | |
500 | Hence, most instrumented arcs are at the end. Graph solving works much | |
501 | faster if we propagate numbers from the end to the start. | |
502 | ||
503 | This takes an average of slightly more than 3 passes. */ | |
504 | ||
505 | changes = 1; | |
506 | passes = 0; | |
507 | while (changes) | |
508 | { | |
509 | passes++; | |
510 | changes = 0; | |
511 | ||
512 | for (i = num_blocks - 1; i >= 0; i--) | |
513 | { | |
514 | struct bb_info *binfo = &bb_graph[i]; | |
515 | if (! binfo->count_valid) | |
516 | { | |
517 | if (binfo->succ_count == 0) | |
518 | { | |
519 | total = 0; | |
520 | for (arcptr = binfo->succ; arcptr; | |
521 | arcptr = arcptr->succ_next) | |
522 | total += ARC_COUNT (arcptr); | |
523 | binfo->exec_count = total; | |
524 | binfo->count_valid = 1; | |
525 | changes = 1; | |
526 | } | |
527 | else if (binfo->pred_count == 0) | |
528 | { | |
529 | total = 0; | |
530 | for (arcptr = binfo->pred; arcptr; | |
531 | arcptr = arcptr->pred_next) | |
532 | total += ARC_COUNT (arcptr); | |
533 | binfo->exec_count = total; | |
534 | binfo->count_valid = 1; | |
535 | changes = 1; | |
536 | } | |
537 | } | |
538 | if (binfo->count_valid) | |
539 | { | |
540 | if (binfo->succ_count == 1) | |
541 | { | |
542 | total = 0; | |
543 | /* One of the counts will be invalid, but it is zero, | |
544 | so adding it in also doesn't hurt. */ | |
545 | for (arcptr = binfo->succ; arcptr; | |
546 | arcptr = arcptr->succ_next) | |
547 | total += ARC_COUNT (arcptr); | |
548 | /* Calculate count for remaining arc by conservation. */ | |
549 | total = binfo->exec_count - total; | |
550 | /* Search for the invalid arc, and set its count. */ | |
551 | for (arcptr = binfo->succ; arcptr; | |
552 | arcptr = arcptr->succ_next) | |
553 | if (! arcptr->count_valid) | |
554 | break; | |
555 | if (! arcptr) | |
556 | abort (); | |
557 | arcptr->count_valid = 1; | |
558 | ARC_COUNT (arcptr) = total; | |
559 | binfo->succ_count--; | |
560 | ||
561 | bb_graph[ARC_TARGET (arcptr)].pred_count--; | |
562 | changes = 1; | |
563 | } | |
564 | if (binfo->pred_count == 1) | |
565 | { | |
566 | total = 0; | |
567 | /* One of the counts will be invalid, but it is zero, | |
568 | so adding it in also doesn't hurt. */ | |
569 | for (arcptr = binfo->pred; arcptr; | |
570 | arcptr = arcptr->pred_next) | |
571 | total += ARC_COUNT (arcptr); | |
572 | /* Calculate count for remaining arc by conservation. */ | |
573 | total = binfo->exec_count - total; | |
574 | /* Search for the invalid arc, and set its count. */ | |
575 | for (arcptr = binfo->pred; arcptr; | |
576 | arcptr = arcptr->pred_next) | |
577 | if (! arcptr->count_valid) | |
578 | break; | |
579 | if (! arcptr) | |
580 | abort (); | |
581 | arcptr->count_valid = 1; | |
582 | ARC_COUNT (arcptr) = total; | |
583 | binfo->pred_count--; | |
584 | ||
585 | bb_graph[ARC_SOURCE (arcptr)].succ_count--; | |
586 | changes = 1; | |
587 | } | |
588 | } | |
589 | } | |
590 | } | |
591 | ||
592 | total_num_passes += passes; | |
593 | if (dump_file) | |
594 | fprintf (dump_file, "Graph solving took %d passes.\n\n", passes); | |
595 | ||
596 | /* If the graph has been correctly solved, every block will have a | |
597 | succ and pred count of zero. */ | |
598 | for (i = 0; i < num_blocks; i++) | |
599 | { | |
600 | struct bb_info *binfo = &bb_graph[i]; | |
601 | if (binfo->succ_count || binfo->pred_count) | |
602 | abort (); | |
603 | } | |
604 | ||
605 | /* For every arc, calculate its branch probability and add a reg_note | |
606 | to the branch insn to indicate this. */ | |
607 | ||
608 | for (i = 0; i < 20; i++) | |
609 | hist_br_prob[i] = 0; | |
610 | num_never_executed = 0; | |
611 | num_branches = 0; | |
612 | ||
613 | for (i = 0; i < num_blocks; i++) | |
614 | { | |
615 | struct bb_info *binfo = &bb_graph[i]; | |
616 | ||
617 | total = binfo->exec_count; | |
618 | for (arcptr = binfo->succ; arcptr; arcptr = arcptr->succ_next) | |
619 | { | |
620 | if (arcptr->branch_insn) | |
621 | { | |
622 | /* This calculates the branch probability as an integer between | |
623 | 0 and REG_BR_PROB_BASE, properly rounded to the nearest | |
624 | integer. Perform the arithmetic in double to avoid | |
625 | overflowing the range of ints. */ | |
626 | ||
627 | if (total == 0) | |
628 | prob = -1; | |
629 | else | |
630 | { | |
631 | rtx pat = PATTERN (arcptr->branch_insn); | |
632 | ||
633 | prob = (((double)ARC_COUNT (arcptr) * REG_BR_PROB_BASE) | |
634 | + (total >> 1)) / total; | |
635 | if (prob < 0 || prob > REG_BR_PROB_BASE) | |
636 | { | |
637 | if (dump_file) | |
638 | fprintf (dump_file, "bad count: prob for %d-%d thought to be %d (forcibly normalized)\n", | |
639 | ARC_SOURCE (arcptr), ARC_TARGET (arcptr), | |
640 | prob); | |
641 | ||
642 | bad_counts = 1; | |
643 | prob = REG_BR_PROB_BASE / 2; | |
644 | } | |
645 | ||
646 | /* Match up probability with JUMP pattern. */ | |
647 | ||
648 | if (GET_CODE (pat) == SET | |
649 | && GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) | |
650 | { | |
651 | if (ARC_TARGET (arcptr) == ARC_SOURCE (arcptr) + 1) | |
652 | { | |
653 | /* A fall through arc should never have a | |
654 | branch insn. */ | |
655 | abort (); | |
656 | } | |
657 | else | |
658 | { | |
659 | /* This is the arc for the taken branch. */ | |
660 | if (GET_CODE (XEXP (SET_SRC (pat), 2)) != PC) | |
661 | prob = REG_BR_PROB_BASE - prob; | |
662 | } | |
663 | } | |
664 | } | |
665 | ||
666 | if (prob == -1) | |
667 | num_never_executed++; | |
668 | else | |
669 | { | |
670 | int index = prob * 20 / REG_BR_PROB_BASE; | |
671 | if (index == 20) | |
672 | index = 19; | |
673 | hist_br_prob[index]++; | |
674 | } | |
675 | num_branches++; | |
676 | ||
677 | REG_NOTES (arcptr->branch_insn) | |
678 | = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob), | |
679 | REG_NOTES (arcptr->branch_insn)); | |
680 | } | |
681 | } | |
682 | ||
683 | /* Add a REG_EXEC_COUNT note to the first instruction of this block. */ | |
684 | if (! binfo->first_insn | |
685 | || GET_RTX_CLASS (GET_CODE (binfo->first_insn)) != 'i') | |
686 | { | |
687 | /* Block 0 is a fake block representing function entry, and does | |
688 | not have a real first insn. The second last block might not | |
689 | begin with a real insn. */ | |
690 | if (i == num_blocks - 1) | |
691 | return_label_execution_count = total; | |
692 | else if (i != 0 && i != num_blocks - 2) | |
693 | abort (); | |
694 | } | |
695 | else | |
696 | { | |
697 | REG_NOTES (binfo->first_insn) | |
698 | = gen_rtx_EXPR_LIST (REG_EXEC_COUNT, GEN_INT (total), | |
699 | REG_NOTES (binfo->first_insn)); | |
700 | if (i == num_blocks - 1) | |
701 | return_label_execution_count = total; | |
702 | } | |
703 | } | |
704 | ||
705 | /* This should never happen. */ | |
706 | if (bad_counts) | |
707 | warning ("Arc profiling: some arc counts were bad."); | |
708 | ||
709 | if (dump_file) | |
710 | { | |
711 | fprintf (dump_file, "%d branches\n", num_branches); | |
712 | fprintf (dump_file, "%d branches never executed\n", | |
713 | num_never_executed); | |
714 | if (num_branches) | |
715 | for (i = 0; i < 10; i++) | |
716 | fprintf (dump_file, "%d%% branches in range %d-%d%%\n", | |
717 | (hist_br_prob[i]+hist_br_prob[19-i])*100/num_branches, | |
718 | 5*i, 5*i+5); | |
719 | ||
720 | total_num_branches += num_branches; | |
721 | total_num_never_executed += num_never_executed; | |
722 | for (i = 0; i < 20; i++) | |
723 | total_hist_br_prob[i] += hist_br_prob[i]; | |
724 | } | |
725 | } | |
726 | ||
86144b75 DE |
727 | /* Instrument and/or analyze program behavior based on program flow graph. |
728 | In either case, this function builds a flow graph for the function being | |
729 | compiled. The flow graph is stored in BB_GRAPH. | |
730 | ||
731 | When FLAG_PROFILE_ARCS is nonzero, this function instruments the arcs in | |
732 | the flow graph that are needed to reconstruct the dynamic behavior of the | |
733 | flow graph. | |
734 | ||
956d6950 | 735 | When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary |
86144b75 DE |
736 | information from a data file containing arc count information from previous |
737 | executions of the function being compiled. In this case, the flow graph is | |
738 | annotated with actual execution counts, which are later propagated into the | |
739 | rtl for optimization purposes. | |
740 | ||
741 | Main entry point of this file. */ | |
742 | ||
743 | void | |
744 | branch_prob (f, dump_file) | |
745 | rtx f; | |
746 | FILE *dump_file; | |
747 | { | |
748 | int i, num_blocks; | |
86144b75 | 749 | struct adj_list *arcptr; |
4da896b2 | 750 | int num_arcs; |
86144b75 DE |
751 | |
752 | /* start of a function. */ | |
753 | if (flag_test_coverage) | |
754 | output_gcov_string (current_function_name, (long) -2); | |
755 | ||
756 | /* Execute this only if doing arc profiling or branch probabilities. */ | |
757 | if (! profile_arc_flag && ! flag_branch_probabilities | |
758 | && ! flag_test_coverage) | |
759 | abort (); | |
760 | ||
761 | total_num_times_called++; | |
762 | ||
763 | /* Create an array label_to_bb of ints of size max_label_num. */ | |
764 | label_to_bb_size = max_label_num (); | |
765 | label_to_bb = (int *) oballoc (label_to_bb_size * sizeof (int)); | |
766 | bzero ((char *) label_to_bb, label_to_bb_size * sizeof (int)); | |
767 | ||
768 | /* Scan the insns in the function, count the number of basic blocks | |
769 | present. When a code label is passed, set label_to_bb[label] = bb | |
770 | number. */ | |
771 | ||
772 | /* The first block found will be block 1, so that function entry can be | |
773 | block 0. */ | |
774 | ||
775 | { | |
776 | register RTX_CODE prev_code = JUMP_INSN; | |
777 | register RTX_CODE code; | |
778 | register rtx insn; | |
779 | register int i; | |
780 | int block_separator_emitted = 0; | |
781 | ||
782 | ignore_next_note = 0; | |
783 | ||
784 | for (insn = NEXT_INSN (f), i = 0; insn; insn = NEXT_INSN (insn)) | |
785 | { | |
786 | code = GET_CODE (insn); | |
787 | ||
788 | if (code == BARRIER) | |
789 | ; | |
790 | else if (code == CODE_LABEL) | |
791 | /* This label is part of the next block, but we can't increment | |
792 | block number yet since there might be multiple labels. */ | |
793 | label_to_bb[CODE_LABEL_NUMBER (insn)] = i + 1; | |
794 | /* We make NOTE_INSN_SETJMP notes into a block of their own, so that | |
795 | they can be the target of the fake arc for the setjmp call. | |
796 | This avoids creating cycles of fake arcs, which would happen if | |
797 | the block after the setjmp call contained a call insn. */ | |
798 | else if ((prev_code == JUMP_INSN || prev_code == CALL_INSN | |
799 | || prev_code == CODE_LABEL || prev_code == BARRIER) | |
800 | && (GET_RTX_CLASS (code) == 'i' | |
db3cf6fb MS |
801 | || (code == NOTE |
802 | && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP))) | |
86144b75 DE |
803 | { |
804 | i += 1; | |
805 | ||
806 | /* Emit the block separator if it hasn't already been emitted. */ | |
807 | if (flag_test_coverage && ! block_separator_emitted) | |
808 | { | |
809 | /* Output a zero to the .bb file to indicate that a new | |
810 | block list is starting. */ | |
811 | __write_long (0, bb_file, 4); | |
812 | } | |
813 | block_separator_emitted = 0; | |
814 | } | |
815 | /* If flag_test_coverage is true, then we must add an entry to the | |
816 | .bb file for every note. */ | |
817 | else if (code == NOTE && flag_test_coverage) | |
818 | { | |
819 | /* Must ignore the line number notes that immediately follow the | |
820 | end of an inline function to avoid counting it twice. There | |
821 | is a note before the call, and one after the call. */ | |
b3b42a4d | 822 | if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_REPEATED_LINE_NUMBER) |
86144b75 DE |
823 | ignore_next_note = 1; |
824 | else if (NOTE_LINE_NUMBER (insn) > 0) | |
825 | { | |
826 | if (ignore_next_note) | |
827 | ignore_next_note = 0; | |
828 | else | |
829 | { | |
830 | /* Emit a block separator here to ensure that a NOTE | |
831 | immediately following a JUMP_INSN or CALL_INSN will end | |
832 | up in the right basic block list. */ | |
833 | if ((prev_code == JUMP_INSN || prev_code == CALL_INSN | |
834 | || prev_code == CODE_LABEL || prev_code == BARRIER) | |
835 | && ! block_separator_emitted) | |
836 | { | |
837 | /* Output a zero to the .bb file to indicate that | |
838 | a new block list is starting. */ | |
839 | __write_long (0, bb_file, 4); | |
840 | ||
841 | block_separator_emitted = 1; | |
842 | } | |
843 | ||
844 | /* If this is a new source file, then output the file's | |
845 | name to the .bb file. */ | |
846 | if (! last_bb_file_name | |
847 | || strcmp (NOTE_SOURCE_FILE (insn), | |
848 | last_bb_file_name)) | |
849 | { | |
850 | if (last_bb_file_name) | |
851 | free (last_bb_file_name); | |
ad85216e | 852 | last_bb_file_name = xstrdup (NOTE_SOURCE_FILE (insn)); |
86144b75 DE |
853 | output_gcov_string (NOTE_SOURCE_FILE (insn), (long)-1); |
854 | } | |
855 | ||
856 | /* Output the line number to the .bb file. Must be done | |
857 | after the output_bb_profile_data() call, and after the | |
858 | file name is written, to ensure that it is correctly | |
859 | handled by gcov. */ | |
860 | __write_long (NOTE_LINE_NUMBER (insn), bb_file, 4); | |
861 | } | |
862 | } | |
863 | } | |
864 | ||
865 | if (code != NOTE) | |
866 | prev_code = code; | |
867 | else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP) | |
868 | prev_code = CALL_INSN; | |
869 | } | |
870 | ||
871 | /* Allocate last `normal' entry for bb_graph. */ | |
872 | ||
873 | /* The last insn was a jump, call, or label. In that case we have | |
874 | a block at the end of the function with no insns. */ | |
875 | if (prev_code == JUMP_INSN || prev_code == CALL_INSN | |
876 | || prev_code == CODE_LABEL || prev_code == BARRIER) | |
877 | { | |
878 | i++; | |
879 | ||
880 | /* Emit the block separator if it hasn't already been emitted. */ | |
881 | if (flag_test_coverage && ! block_separator_emitted) | |
882 | { | |
883 | /* Output a zero to the .bb file to indicate that a new | |
884 | block list is starting. */ | |
885 | __write_long (0, bb_file, 4); | |
886 | } | |
887 | } | |
888 | ||
889 | /* Create another block to stand for EXIT, and make all return insns, and | |
890 | the last basic block point here. Add one more to account for block | |
891 | zero. */ | |
892 | num_blocks = i + 2; | |
893 | } | |
894 | ||
895 | total_num_blocks += num_blocks; | |
896 | if (dump_file) | |
897 | fprintf (dump_file, "%d basic blocks\n", num_blocks); | |
898 | ||
899 | /* If we are only doing test coverage here, then return now. */ | |
900 | if (! profile_arc_flag && ! flag_branch_probabilities) | |
901 | return; | |
902 | ||
903 | /* Create and initialize the arrays that will hold bb_graph | |
904 | and execution count info. */ | |
905 | ||
4da896b2 MM |
906 | bb_graph = (struct bb_info *) xcalloc (num_blocks, |
907 | sizeof (struct bb_info)); | |
86144b75 DE |
908 | |
909 | { | |
910 | /* Scan the insns again: | |
911 | - at the entry to each basic block, increment the predecessor count | |
912 | (and successor of previous block) if it is a fall through entry, | |
913 | create adj_list entries for this and the previous block | |
914 | - at each jump insn, increment predecessor/successor counts for | |
915 | target/source basic blocks, add this insn to pred/succ lists. | |
916 | ||
917 | This also cannot be broken out as a separate subroutine | |
918 | because it uses `alloca'. */ | |
919 | ||
920 | register RTX_CODE prev_code = JUMP_INSN; | |
921 | register RTX_CODE code; | |
922 | register rtx insn; | |
923 | register int i; | |
924 | int fall_through = 0; | |
925 | struct adj_list *arcptr; | |
e5e809f4 | 926 | int dest = 0; |
86144b75 DE |
927 | |
928 | /* Block 0 always falls through to block 1. */ | |
929 | num_arcs = 0; | |
930 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
931 | init_arc (arcptr, 0, 1, 0); | |
932 | arcptr->fall_through = 1; | |
933 | num_arcs++; | |
934 | ||
935 | /* Add a fake fall through arc from the last block to block 0, to make the | |
936 | graph complete. */ | |
937 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
938 | init_arc (arcptr, num_blocks - 1, 0, 0); | |
939 | arcptr->fake = 1; | |
940 | num_arcs++; | |
941 | ||
942 | /* Exit must be one node of the graph, and all exits from the function | |
943 | must point there. When see a return branch, must point the arc to the | |
944 | exit node. */ | |
945 | ||
946 | /* Must start scan with second insn in function as above. */ | |
947 | for (insn = NEXT_INSN (f), i = 0; insn; insn = NEXT_INSN (insn)) | |
948 | { | |
949 | code = GET_CODE (insn); | |
950 | ||
951 | if (code == BARRIER) | |
952 | fall_through = 0; | |
953 | else if (code == CODE_LABEL) | |
954 | ; | |
955 | /* We make NOTE_INSN_SETJMP notes into a block of their own, so that | |
956 | they can be the target of the fake arc for the setjmp call. | |
957 | This avoids creating cycles of fake arcs, which would happen if | |
958 | the block after the setjmp call ended with a call. */ | |
959 | else if ((prev_code == JUMP_INSN || prev_code == CALL_INSN | |
960 | || prev_code == CODE_LABEL || prev_code == BARRIER) | |
961 | && (GET_RTX_CLASS (code) == 'i' | |
db3cf6fb MS |
962 | || (code == NOTE |
963 | && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP))) | |
86144b75 DE |
964 | { |
965 | /* This is the first insn of the block. */ | |
966 | i += 1; | |
967 | if (fall_through) | |
968 | { | |
969 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
970 | init_arc (arcptr, i - 1, i, 0); | |
971 | arcptr->fall_through = 1; | |
972 | ||
973 | num_arcs++; | |
974 | } | |
975 | fall_through = 1; | |
976 | bb_graph[i].first_insn = insn; | |
977 | } | |
978 | else if (code == NOTE) | |
9870475c | 979 | {;} |
86144b75 DE |
980 | |
981 | if (code == CALL_INSN) | |
982 | { | |
983 | /* In the normal case, the call returns, and this is just like | |
984 | a branch fall through. */ | |
985 | fall_through = 1; | |
986 | ||
987 | /* Setjmp may return more times than called, so to make the graph | |
988 | solvable, add a fake arc from the function entrance to the | |
989 | next block. | |
990 | ||
991 | All other functions may return fewer times than called (if | |
992 | a descendent call longjmp or exit), so to make the graph | |
993 | solvable, add a fake arc to the function exit from the | |
994 | current block. | |
995 | ||
996 | Distinguish the cases by checking for a SETJUMP note. | |
997 | A call_insn can be the last ins of a function, so must check | |
998 | to see if next insn actually exists. */ | |
999 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
1000 | if (NEXT_INSN (insn) | |
1001 | && GET_CODE (NEXT_INSN (insn)) == NOTE | |
1002 | && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP) | |
1003 | init_arc (arcptr, 0, i+1, insn); | |
1004 | else | |
1005 | init_arc (arcptr, i, num_blocks-1, insn); | |
1006 | arcptr->fake = 1; | |
1007 | num_arcs++; | |
1008 | } | |
1009 | else if (code == JUMP_INSN) | |
1010 | { | |
1011 | rtx tem, pattern = PATTERN (insn); | |
1012 | rtx tablejump = 0; | |
1013 | ||
1014 | /* If running without optimization, then jump label won't be valid, | |
1015 | so we must search for the destination label in that case. | |
1016 | We have to handle tablejumps and returns specially anyways, so | |
1017 | we don't check the JUMP_LABEL at all here. */ | |
1018 | ||
8ade1519 JW |
1019 | /* ??? This code should be rewritten. We need a more elegant way |
1020 | to find the LABEL_REF. We need a more elegant way to | |
1021 | differentiate tablejump entries from computed gotos. | |
1022 | We should perhaps reuse code from flow to compute the CFG | |
1023 | instead of trying to compute it here. | |
1024 | ||
1025 | We can't use current_function_has_computed_jump, because that | |
1026 | is calculated later by flow. We can't use computed_jump_p, | |
1027 | because that returns true for tablejump entry insns for some | |
1028 | targets, e.g. HPPA and MIPS. */ | |
1029 | ||
86144b75 DE |
1030 | if (GET_CODE (pattern) == PARALLEL) |
1031 | { | |
8ade1519 JW |
1032 | /* This assumes that PARALLEL jumps with a USE are |
1033 | tablejump entry jumps. The same assumption can be found | |
1034 | in computed_jump_p. */ | |
86144b75 DE |
1035 | /* Make an arc from this jump to the label of the |
1036 | jump table. This will instrument the number of | |
1037 | times the switch statement is executed. */ | |
1038 | if (GET_CODE (XVECEXP (pattern, 0, 1)) == USE) | |
1039 | { | |
1040 | tem = XEXP (XVECEXP (pattern, 0, 1), 0); | |
1041 | if (GET_CODE (tem) != LABEL_REF) | |
1042 | abort (); | |
1043 | dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (tem, 0))]; | |
1044 | } | |
1045 | else if (GET_CODE (XVECEXP (pattern, 0, 0)) == SET | |
1046 | && SET_DEST (XVECEXP (pattern, 0, 0)) == pc_rtx) | |
1047 | { | |
1048 | tem = SET_SRC (XVECEXP (pattern, 0, 0)); | |
1049 | if (GET_CODE (tem) == PLUS | |
1050 | && GET_CODE (XEXP (tem, 1)) == LABEL_REF) | |
1051 | { | |
1052 | tem = XEXP (tem, 1); | |
1053 | dest = label_to_bb [CODE_LABEL_NUMBER (XEXP (tem, 0))]; | |
1054 | } | |
1055 | } | |
1056 | else | |
1057 | abort (); | |
1058 | } | |
1059 | else if (GET_CODE (pattern) == ADDR_VEC | |
1060 | || GET_CODE (pattern) == ADDR_DIFF_VEC) | |
1061 | tablejump = pattern; | |
1062 | else if (GET_CODE (pattern) == RETURN) | |
1063 | dest = num_blocks - 1; | |
a7ebd547 JW |
1064 | else if (GET_CODE (pattern) != SET) |
1065 | abort (); | |
86144b75 DE |
1066 | else if ((tem = SET_SRC (pattern)) |
1067 | && GET_CODE (tem) == LABEL_REF) | |
1068 | dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (tem, 0))]; | |
a7ebd547 JW |
1069 | /* Recognize HPPA table jump entry. This code is similar to |
1070 | the code above in the PARALLEL case. */ | |
1071 | else if (GET_CODE (tem) == PLUS | |
1072 | && GET_CODE (XEXP (tem, 0)) == MEM | |
1073 | && GET_CODE (XEXP (XEXP (tem, 0), 0)) == PLUS | |
1074 | && GET_CODE (XEXP (XEXP (XEXP (tem, 0), 0), 0)) == PC | |
8ade1519 JW |
1075 | && GET_CODE (XEXP (tem, 1)) == LABEL_REF |
1076 | && tablejump_entry_p (insn, XEXP (tem, 1))) | |
1077 | dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (XEXP (tem, 1), 0))]; | |
1078 | /* Recognize the MIPS table jump entry. */ | |
1079 | else if (GET_CODE (tem) == PLUS | |
1080 | && GET_CODE (XEXP (tem, 0)) == REG | |
1081 | && GET_CODE (XEXP (tem, 1)) == LABEL_REF | |
1082 | && tablejump_entry_p (insn, XEXP (tem, 1))) | |
a7ebd547 | 1083 | dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (XEXP (tem, 1), 0))]; |
86144b75 DE |
1084 | else |
1085 | { | |
1086 | rtx label_ref; | |
1087 | ||
11a932c0 JW |
1088 | /* Must be an IF_THEN_ELSE branch. If it isn't, assume it |
1089 | is a computed goto, which aren't supported yet. */ | |
86144b75 | 1090 | if (GET_CODE (tem) != IF_THEN_ELSE) |
11a932c0 | 1091 | fatal ("-fprofile-arcs does not support computed gotos"); |
86144b75 DE |
1092 | if (XEXP (tem, 1) != pc_rtx) |
1093 | label_ref = XEXP (tem, 1); | |
1094 | else | |
1095 | label_ref = XEXP (tem, 2); | |
1096 | dest = label_to_bb[CODE_LABEL_NUMBER (XEXP (label_ref, 0))]; | |
1097 | } | |
1098 | ||
1099 | if (tablejump) | |
1100 | { | |
1101 | int diff_vec_p = GET_CODE (tablejump) == ADDR_DIFF_VEC; | |
1102 | int len = XVECLEN (tablejump, diff_vec_p); | |
1103 | int k; | |
1104 | ||
1105 | for (k = 0; k < len; k++) | |
1106 | { | |
1107 | rtx tem = XEXP (XVECEXP (tablejump, diff_vec_p, k), 0); | |
1108 | dest = label_to_bb[CODE_LABEL_NUMBER (tem)]; | |
1109 | ||
1110 | arcptr = (struct adj_list *) alloca (sizeof(struct adj_list)); | |
1111 | init_arc (arcptr, i, dest, insn); | |
1112 | ||
1113 | num_arcs++; | |
1114 | } | |
1115 | } | |
1116 | else | |
1117 | { | |
1118 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
1119 | init_arc (arcptr, i, dest, insn); | |
1120 | ||
1121 | num_arcs++; | |
1122 | } | |
1123 | ||
1124 | /* Determine whether or not this jump will fall through. | |
1125 | Unconditional jumps and returns are not always followed by | |
1126 | barriers. */ | |
1127 | pattern = PATTERN (insn); | |
1128 | if (GET_CODE (pattern) == PARALLEL | |
1129 | || GET_CODE (pattern) == RETURN) | |
1130 | fall_through = 0; | |
1131 | else if (GET_CODE (pattern) == ADDR_VEC | |
1132 | || GET_CODE (pattern) == ADDR_DIFF_VEC) | |
1133 | /* These aren't actually jump insns, but they never fall | |
1134 | through, so... */ | |
1135 | fall_through = 0; | |
1136 | else | |
1137 | { | |
1138 | if (GET_CODE (pattern) != SET || SET_DEST (pattern) != pc_rtx) | |
1139 | abort (); | |
1140 | if (GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) | |
1141 | fall_through = 0; | |
1142 | } | |
1143 | } | |
1144 | ||
1145 | if (code != NOTE) | |
1146 | prev_code = code; | |
1147 | else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP) | |
1686f1a2 R |
1148 | { |
1149 | /* Make a fake insn to tag our notes on. */ | |
1150 | bb_graph[i].first_insn = insn | |
38a448ca | 1151 | = emit_insn_after (gen_rtx_USE (VOIDmode, stack_pointer_rtx), |
1686f1a2 R |
1152 | insn); |
1153 | prev_code = CALL_INSN; | |
1154 | } | |
86144b75 DE |
1155 | } |
1156 | ||
1157 | /* If the code at the end of the function would give a new block, then | |
1158 | do the following. */ | |
1159 | ||
1160 | if (prev_code == JUMP_INSN || prev_code == CALL_INSN | |
1161 | || prev_code == CODE_LABEL || prev_code == BARRIER) | |
1162 | { | |
1163 | if (fall_through) | |
1164 | { | |
1165 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
1166 | init_arc (arcptr, i, i + 1, 0); | |
1167 | arcptr->fall_through = 1; | |
1168 | ||
1169 | num_arcs++; | |
1170 | } | |
4da896b2 MM |
1171 | |
1172 | /* This may not be a real insn, but that should not cause a problem. */ | |
1173 | bb_graph[i+1].first_insn = get_last_insn (); | |
1174 | } | |
86144b75 | 1175 | |
4da896b2 MM |
1176 | /* There is always a fake arc from the last block of the function |
1177 | to the function exit block. */ | |
1178 | arcptr = (struct adj_list *) alloca (sizeof (struct adj_list)); | |
1179 | init_arc (arcptr, num_blocks-2, num_blocks-1, 0); | |
1180 | arcptr->fake = 1; | |
1181 | num_arcs++; | |
1182 | } | |
86144b75 | 1183 | |
4da896b2 | 1184 | total_num_arcs += num_arcs; |
86144b75 | 1185 | if (dump_file) |
4da896b2 | 1186 | fprintf (dump_file, "%d arcs\n", num_arcs); |
86144b75 | 1187 | |
4da896b2 MM |
1188 | /* Create spanning tree from basic block graph, mark each arc that is |
1189 | on the spanning tree. */ | |
86144b75 | 1190 | |
4da896b2 MM |
1191 | /* To reduce the instrumentation cost, make two passes over the tree. |
1192 | First, put as many must-split (crowded and fake) arcs on the tree as | |
1193 | possible, then on the second pass fill in the rest of the tree. | |
1194 | Note that the spanning tree is considered undirected, so that as many | |
1195 | must-split arcs as possible can be put on it. | |
86144b75 | 1196 | |
4da896b2 MM |
1197 | Fallthrough arcs which are crowded should not be chosen on the first |
1198 | pass, since they do not require creating a new basic block. These | |
1199 | arcs will have fall_through set. */ | |
86144b75 | 1200 | |
4da896b2 MM |
1201 | find_spanning_tree (num_blocks); |
1202 | ||
1203 | /* Create a .bbg file from which gcov can reconstruct the basic block | |
1204 | graph. First output the number of basic blocks, and then for every | |
1205 | arc output the source and target basic block numbers. | |
1206 | NOTE: The format of this file must be compatible with gcov. */ | |
1207 | ||
1208 | if (flag_test_coverage) | |
86144b75 | 1209 | { |
4da896b2 | 1210 | int flag_bits; |
86144b75 | 1211 | |
4da896b2 MM |
1212 | __write_long (num_blocks, bbg_file, 4); |
1213 | __write_long (num_arcs, bbg_file, 4); | |
86144b75 | 1214 | |
4da896b2 MM |
1215 | for (i = 0; i < num_blocks; i++) |
1216 | { | |
1217 | long count = 0; | |
1218 | for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next) | |
1219 | count++; | |
1220 | __write_long (count, bbg_file, 4); | |
86144b75 | 1221 | |
4da896b2 MM |
1222 | for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next) |
1223 | { | |
1224 | flag_bits = 0; | |
1225 | if (arcptr->on_tree) | |
1226 | flag_bits |= 0x1; | |
1227 | if (arcptr->fake) | |
1228 | flag_bits |= 0x2; | |
1229 | if (arcptr->fall_through) | |
1230 | flag_bits |= 0x4; | |
86144b75 | 1231 | |
4da896b2 MM |
1232 | __write_long (ARC_TARGET (arcptr), bbg_file, 4); |
1233 | __write_long (flag_bits, bbg_file, 4); | |
86144b75 DE |
1234 | } |
1235 | } | |
1236 | ||
4da896b2 MM |
1237 | /* Emit a -1 to separate the list of all arcs from the list of |
1238 | loop back edges that follows. */ | |
1239 | __write_long (-1, bbg_file, 4); | |
86144b75 | 1240 | } |
86144b75 | 1241 | |
4da896b2 | 1242 | /* For each arc not on the spanning tree, add counting code as rtl. */ |
86144b75 | 1243 | |
4da896b2 MM |
1244 | if (profile_arc_flag) |
1245 | { | |
1246 | instrument_arcs (f, num_blocks, dump_file); | |
1247 | allocate_reg_info (max_reg_num (), FALSE, FALSE); | |
86144b75 DE |
1248 | } |
1249 | ||
4da896b2 MM |
1250 | /* Execute the rest only if doing branch probabilities. */ |
1251 | if (flag_branch_probabilities) | |
1252 | compute_branch_probabilities (num_blocks, dump_file); | |
1253 | ||
1254 | /* Clean up. */ | |
1255 | free (bb_graph); | |
86144b75 DE |
1256 | } |
1257 | \f | |
1258 | /* Initialize a new arc. | |
1259 | ARCPTR is the empty adj_list this function fills in. | |
1260 | SOURCE is the block number of the source block. | |
1261 | TARGET is the block number of the target block. | |
1262 | INSN is the insn which transfers control from SOURCE to TARGET, | |
1263 | or zero if the transfer is implicit. */ | |
1264 | ||
1265 | static void | |
1266 | init_arc (arcptr, source, target, insn) | |
1267 | struct adj_list *arcptr; | |
1268 | int source, target; | |
1269 | rtx insn; | |
1270 | { | |
1271 | ARC_TARGET (arcptr) = target; | |
1272 | ARC_SOURCE (arcptr) = source; | |
1273 | ||
1274 | ARC_COUNT (arcptr) = 0; | |
1275 | arcptr->count_valid = 0; | |
1276 | arcptr->on_tree = 0; | |
1277 | arcptr->fake = 0; | |
1278 | arcptr->fall_through = 0; | |
1279 | arcptr->branch_insn = insn; | |
1280 | ||
1281 | arcptr->succ_next = bb_graph[source].succ; | |
1282 | bb_graph[source].succ = arcptr; | |
1283 | bb_graph[source].succ_count++; | |
1284 | ||
1285 | arcptr->pred_next = bb_graph[target].pred; | |
1286 | bb_graph[target].pred = arcptr; | |
1287 | bb_graph[target].pred_count++; | |
1288 | } | |
1289 | ||
1290 | /* This function searches all of the arcs in the program flow graph, and puts | |
1291 | as many bad arcs as possible onto the spanning tree. Bad arcs include | |
1292 | fake arcs (needed for setjmp(), longjmp(), exit()) which MUST be on the | |
1293 | spanning tree as they can't be instrumented. Also, arcs which must be | |
1294 | split when instrumented should be part of the spanning tree if possible. */ | |
1295 | ||
1296 | static void | |
1297 | find_spanning_tree (num_blocks) | |
1298 | int num_blocks; | |
1299 | { | |
1300 | int i; | |
1301 | struct adj_list *arcptr; | |
1302 | struct bb_info *binfo = &bb_graph[0]; | |
1303 | ||
1304 | /* Fake arcs must be part of the spanning tree, and are always safe to put | |
1305 | on the spanning tree. Fake arcs will either be a successor of node 0, | |
1306 | a predecessor of the last node, or from the last node to node 0. */ | |
1307 | ||
1308 | for (arcptr = bb_graph[0].succ; arcptr; arcptr = arcptr->succ_next) | |
1309 | if (arcptr->fake) | |
1310 | { | |
1311 | /* Adding this arc should never cause a cycle. This is a fatal | |
1312 | error if it would. */ | |
1313 | if (bb_graph[ARC_TARGET (arcptr)].on_tree && binfo->on_tree) | |
1314 | abort(); | |
1315 | else | |
1316 | { | |
1317 | arcptr->on_tree = 1; | |
1318 | bb_graph[ARC_TARGET (arcptr)].on_tree = 1; | |
1319 | binfo->on_tree = 1; | |
1320 | } | |
1321 | } | |
1322 | ||
1323 | binfo = &bb_graph[num_blocks-1]; | |
1324 | for (arcptr = binfo->pred; arcptr; arcptr = arcptr->pred_next) | |
1325 | if (arcptr->fake) | |
1326 | { | |
1327 | /* Adding this arc should never cause a cycle. This is a fatal | |
1328 | error if it would. */ | |
1329 | if (bb_graph[ARC_SOURCE (arcptr)].on_tree && binfo->on_tree) | |
1330 | abort(); | |
1331 | else | |
1332 | { | |
1333 | arcptr->on_tree = 1; | |
1334 | bb_graph[ARC_SOURCE (arcptr)].on_tree = 1; | |
1335 | binfo->on_tree = 1; | |
1336 | } | |
1337 | } | |
1338 | /* The only entrace to node zero is a fake arc. */ | |
1339 | bb_graph[0].pred->on_tree = 1; | |
1340 | ||
1341 | /* Arcs which are crowded at both the source and target should be put on | |
1342 | the spanning tree if possible, except for fall_throuch arcs which never | |
1343 | require adding a new block even if crowded, add arcs with the same source | |
1344 | and dest which must always be instrumented. */ | |
1345 | for (i = 0; i < num_blocks; i++) | |
1346 | { | |
1347 | binfo = &bb_graph[i]; | |
1348 | ||
1349 | for (arcptr = binfo->succ; arcptr; arcptr = arcptr->succ_next) | |
1350 | if (! ((binfo->succ == arcptr && arcptr->succ_next == 0) | |
1351 | || (bb_graph[ARC_TARGET (arcptr)].pred | |
1352 | && arcptr->pred_next == 0)) | |
1353 | && ! arcptr->fall_through | |
1354 | && ARC_TARGET (arcptr) != i) | |
1355 | { | |
1356 | /* This is a crowded arc at both source and target. Try to put | |
1357 | in on the spanning tree. Can do this if either the source or | |
1358 | target block is not yet on the tree. */ | |
1359 | if (! bb_graph[ARC_TARGET (arcptr)].on_tree || ! binfo->on_tree) | |
1360 | { | |
1361 | arcptr->on_tree = 1; | |
1362 | bb_graph[ARC_TARGET (arcptr)].on_tree = 1; | |
1363 | binfo->on_tree = 1; | |
1364 | } | |
1365 | } | |
1366 | } | |
1367 | ||
1368 | /* Clear all of the basic block on_tree bits, so that we can use them to | |
1369 | create the spanning tree. */ | |
1370 | for (i = 0; i < num_blocks; i++) | |
1371 | bb_graph[i].on_tree = 0; | |
1372 | ||
1373 | /* Now fill in the spanning tree until every basic block is on it. | |
1374 | Don't put the 0 to 1 fall through arc on the tree, since it is | |
1375 | always cheap to instrument, so start filling the tree from node 1. */ | |
1376 | ||
1377 | for (i = 1; i < num_blocks; i++) | |
1378 | for (arcptr = bb_graph[i].succ; arcptr; arcptr = arcptr->succ_next) | |
1379 | if (! arcptr->on_tree | |
1380 | && ! bb_graph[ARC_TARGET (arcptr)].on_tree) | |
1381 | { | |
1382 | fill_spanning_tree (i); | |
1383 | break; | |
1384 | } | |
1385 | } | |
1386 | ||
1387 | /* Add arcs reached from BLOCK to the spanning tree if they are needed and | |
1388 | not already there. */ | |
1389 | ||
1390 | static void | |
1391 | fill_spanning_tree (block) | |
1392 | int block; | |
1393 | { | |
1394 | struct adj_list *arcptr; | |
1395 | ||
1396 | expand_spanning_tree (block); | |
1397 | ||
1398 | for (arcptr = bb_graph[block].succ; arcptr; arcptr = arcptr->succ_next) | |
1399 | if (! arcptr->on_tree | |
1400 | && ! bb_graph[ARC_TARGET (arcptr)].on_tree) | |
1401 | { | |
1402 | arcptr->on_tree = 1; | |
1403 | fill_spanning_tree (ARC_TARGET (arcptr)); | |
1404 | } | |
1405 | } | |
1406 | ||
1407 | /* When first visit a block, must add all blocks that are already connected | |
1408 | to this block via tree arcs to the spanning tree. */ | |
1409 | ||
1410 | static void | |
1411 | expand_spanning_tree (block) | |
1412 | int block; | |
1413 | { | |
1414 | struct adj_list *arcptr; | |
1415 | ||
1416 | bb_graph[block].on_tree = 1; | |
1417 | ||
1418 | for (arcptr = bb_graph[block].succ; arcptr; arcptr = arcptr->succ_next) | |
1419 | if (arcptr->on_tree && ! bb_graph[ARC_TARGET (arcptr)].on_tree) | |
1420 | expand_spanning_tree (ARC_TARGET (arcptr)); | |
1421 | ||
1422 | for (arcptr = bb_graph[block].pred; | |
1423 | arcptr; arcptr = arcptr->pred_next) | |
1424 | if (arcptr->on_tree && ! bb_graph[ARC_SOURCE (arcptr)].on_tree) | |
1425 | expand_spanning_tree (ARC_SOURCE (arcptr)); | |
1426 | } | |
1427 | \f | |
1428 | /* Perform file-level initialization for branch-prob processing. */ | |
1429 | ||
1430 | void | |
1431 | init_branch_prob (filename) | |
87e11268 | 1432 | const char *filename; |
86144b75 DE |
1433 | { |
1434 | long len; | |
1435 | int i; | |
1436 | ||
1437 | if (flag_test_coverage) | |
1438 | { | |
1439 | /* Open an output file for the basic block/line number map. */ | |
1440 | int len = strlen (filename); | |
1441 | char *data_file = (char *) alloca (len + 4); | |
1442 | strcpy (data_file, filename); | |
1443 | strip_off_ending (data_file, len); | |
1444 | strcat (data_file, ".bb"); | |
14a774a9 | 1445 | if ((bb_file = fopen (data_file, "wb")) == 0) |
86144b75 DE |
1446 | pfatal_with_name (data_file); |
1447 | ||
1448 | /* Open an output file for the program flow graph. */ | |
1449 | len = strlen (filename); | |
1450 | bbg_file_name = (char *) alloca (len + 5); | |
1451 | strcpy (bbg_file_name, filename); | |
1452 | strip_off_ending (bbg_file_name, len); | |
1453 | strcat (bbg_file_name, ".bbg"); | |
14a774a9 | 1454 | if ((bbg_file = fopen (bbg_file_name, "wb")) == 0) |
86144b75 DE |
1455 | pfatal_with_name (bbg_file_name); |
1456 | ||
1457 | /* Initialize to zero, to ensure that the first file name will be | |
1458 | written to the .bb file. */ | |
1459 | last_bb_file_name = 0; | |
1460 | } | |
1461 | ||
1462 | if (flag_branch_probabilities) | |
1463 | { | |
1464 | len = strlen (filename); | |
1465 | da_file_name = (char *) alloca (len + 4); | |
1466 | strcpy (da_file_name, filename); | |
1467 | strip_off_ending (da_file_name, len); | |
1468 | strcat (da_file_name, ".da"); | |
14a774a9 | 1469 | if ((da_file = fopen (da_file_name, "rb")) == 0) |
86144b75 DE |
1470 | warning ("file %s not found, execution counts assumed to be zero.", |
1471 | da_file_name); | |
1472 | ||
1473 | /* The first word in the .da file gives the number of instrumented arcs, | |
1474 | which is not needed for our purposes. */ | |
1475 | ||
1476 | if (da_file) | |
1477 | __read_long (&len, da_file, 8); | |
1478 | } | |
1479 | ||
1480 | if (profile_arc_flag) | |
1481 | init_arc_profiler (); | |
1482 | ||
1483 | total_num_blocks = 0; | |
1484 | total_num_arcs = 0; | |
1485 | total_num_arcs_instrumented = 0; | |
1486 | total_num_blocks_created = 0; | |
1487 | total_num_passes = 0; | |
1488 | total_num_times_called = 0; | |
1489 | total_num_branches = 0; | |
1490 | total_num_never_executed = 0; | |
1491 | for (i = 0; i < 20; i++) | |
1492 | total_hist_br_prob[i] = 0; | |
1493 | } | |
1494 | ||
1495 | /* Performs file-level cleanup after branch-prob processing | |
1496 | is completed. */ | |
1497 | ||
1498 | void | |
1499 | end_branch_prob (dump_file) | |
1500 | FILE *dump_file; | |
1501 | { | |
1502 | if (flag_test_coverage) | |
1503 | { | |
1504 | fclose (bb_file); | |
1505 | fclose (bbg_file); | |
1506 | } | |
1507 | ||
1508 | if (flag_branch_probabilities) | |
1509 | { | |
1510 | if (da_file) | |
1511 | { | |
1512 | long temp; | |
1513 | /* This seems slightly dangerous, as it presumes the EOF | |
1514 | flag will not be set until an attempt is made to read | |
1515 | past the end of the file. */ | |
1516 | if (feof (da_file)) | |
1517 | warning (".da file contents exhausted too early\n"); | |
1518 | /* Should be at end of file now. */ | |
1519 | if (__read_long (&temp, da_file, 8) == 0) | |
1520 | warning (".da file contents not exhausted\n"); | |
1521 | fclose (da_file); | |
1522 | } | |
1523 | } | |
1524 | ||
1525 | if (dump_file) | |
1526 | { | |
1527 | fprintf (dump_file, "\n"); | |
1528 | fprintf (dump_file, "Total number of blocks: %d\n", total_num_blocks); | |
1529 | fprintf (dump_file, "Total number of arcs: %d\n", total_num_arcs); | |
1530 | fprintf (dump_file, "Total number of instrumented arcs: %d\n", | |
1531 | total_num_arcs_instrumented); | |
1532 | fprintf (dump_file, "Total number of blocks created: %d\n", | |
1533 | total_num_blocks_created); | |
1534 | fprintf (dump_file, "Total number of graph solution passes: %d\n", | |
1535 | total_num_passes); | |
1536 | if (total_num_times_called != 0) | |
1537 | fprintf (dump_file, "Average number of graph solution passes: %d\n", | |
1538 | (total_num_passes + (total_num_times_called >> 1)) | |
1539 | / total_num_times_called); | |
1540 | fprintf (dump_file, "Total number of branches: %d\n", total_num_branches); | |
1541 | fprintf (dump_file, "Total number of branches never executed: %d\n", | |
1542 | total_num_never_executed); | |
1543 | if (total_num_branches) | |
1544 | { | |
1545 | int i; | |
1546 | ||
1547 | for (i = 0; i < 10; i++) | |
1548 | fprintf (dump_file, "%d%% branches in range %d-%d%%\n", | |
1549 | (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100 | |
1550 | / total_num_branches, 5*i, 5*i+5); | |
1551 | } | |
1552 | } | |
1553 | } | |
1554 | \f | |
1555 | /* The label used by the arc profiling code. */ | |
1556 | ||
1557 | static rtx profiler_label; | |
1558 | ||
1559 | /* Initialize the profiler_label. */ | |
1560 | ||
1561 | static void | |
1562 | init_arc_profiler () | |
1563 | { | |
1564 | /* Generate and save a copy of this so it can be shared. */ | |
21a427cc | 1565 | char *name = ggc_alloc_string (NULL, 20); |
86144b75 | 1566 | ASM_GENERATE_INTERNAL_LABEL (name, "LPBX", 2); |
38a448ca | 1567 | profiler_label = gen_rtx_SYMBOL_REF (Pmode, name); |
d7db6646 | 1568 | ggc_add_rtx_root (&profiler_label, 1); |
86144b75 DE |
1569 | } |
1570 | ||
1571 | /* Output instructions as RTL to increment the arc execution count. */ | |
1572 | ||
1573 | static void | |
1574 | output_arc_profiler (arcno, insert_after) | |
1575 | int arcno; | |
1576 | rtx insert_after; | |
1577 | { | |
1578 | rtx profiler_target_addr | |
c5c76735 JL |
1579 | = (arcno ? plus_constant (profiler_label, |
1580 | LONG_TYPE_SIZE / BITS_PER_UNIT * arcno) | |
86144b75 DE |
1581 | : profiler_label); |
1582 | enum machine_mode mode = mode_for_size (LONG_TYPE_SIZE, MODE_INT, 0); | |
1583 | rtx profiler_reg = gen_reg_rtx (mode); | |
1584 | rtx address_reg = gen_reg_rtx (Pmode); | |
1585 | rtx mem_ref, add_ref; | |
1586 | rtx sequence; | |
1587 | ||
86144b75 DE |
1588 | /* In this case, reload can use explicitly mentioned hard registers for |
1589 | reloads. It is not safe to output profiling code between a call | |
1590 | and the instruction that copies the result to a pseudo-reg. This | |
1591 | is because reload may allocate one of the profiling code pseudo-regs | |
1592 | to the return value reg, thus clobbering the return value. So we | |
1593 | must check for calls here, and emit the profiling code after the | |
1594 | instruction that uses the return value, if any. | |
1595 | ||
1596 | ??? The code here performs the same tests that reload does so hopefully | |
1597 | all the bases are covered. */ | |
1598 | ||
1599 | if (SMALL_REGISTER_CLASSES | |
1600 | && GET_CODE (insert_after) == CALL_INSN | |
1601 | && (GET_CODE (PATTERN (insert_after)) == SET | |
1602 | || (GET_CODE (PATTERN (insert_after)) == PARALLEL | |
1603 | && GET_CODE (XVECEXP (PATTERN (insert_after), 0, 0)) == SET))) | |
1604 | { | |
1605 | rtx return_reg; | |
1606 | rtx next_insert_after = next_nonnote_insn (insert_after); | |
1607 | ||
6030b6af | 1608 | /* The first insn after the call may be a stack pop, skip it. */ |
f4da7881 JW |
1609 | if (next_insert_after |
1610 | && GET_CODE (next_insert_after) == INSN | |
6030b6af JW |
1611 | && GET_CODE (PATTERN (next_insert_after)) == SET |
1612 | && SET_DEST (PATTERN (next_insert_after)) == stack_pointer_rtx) | |
1613 | next_insert_after = next_nonnote_insn (next_insert_after); | |
1614 | ||
f4da7881 JW |
1615 | if (next_insert_after |
1616 | && GET_CODE (next_insert_after) == INSN) | |
86144b75 | 1617 | { |
86144b75 DE |
1618 | if (GET_CODE (PATTERN (insert_after)) == SET) |
1619 | return_reg = SET_DEST (PATTERN (insert_after)); | |
1620 | else | |
1621 | return_reg = SET_DEST (XVECEXP (PATTERN (insert_after), 0, 0)); | |
1622 | ||
e9a25f70 JL |
1623 | /* Now, NEXT_INSERT_AFTER may be an instruction that uses the |
1624 | return value. However, it could also be something else, | |
1625 | like a CODE_LABEL, so check that the code is INSN. */ | |
1626 | if (next_insert_after != 0 | |
1627 | && GET_RTX_CLASS (GET_CODE (next_insert_after)) == 'i' | |
1628 | && reg_referenced_p (return_reg, PATTERN (next_insert_after))) | |
86144b75 DE |
1629 | insert_after = next_insert_after; |
1630 | } | |
1631 | } | |
86144b75 DE |
1632 | |
1633 | start_sequence (); | |
1634 | ||
1635 | emit_move_insn (address_reg, profiler_target_addr); | |
38a448ca | 1636 | mem_ref = gen_rtx_MEM (mode, address_reg); |
86144b75 DE |
1637 | emit_move_insn (profiler_reg, mem_ref); |
1638 | ||
38a448ca | 1639 | add_ref = gen_rtx_PLUS (mode, profiler_reg, GEN_INT (1)); |
86144b75 DE |
1640 | emit_move_insn (profiler_reg, add_ref); |
1641 | ||
1642 | /* This is the same rtx as above, but it is not legal to share this rtx. */ | |
38a448ca | 1643 | mem_ref = gen_rtx_MEM (mode, address_reg); |
86144b75 DE |
1644 | emit_move_insn (mem_ref, profiler_reg); |
1645 | ||
1646 | sequence = gen_sequence (); | |
1647 | end_sequence (); | |
1648 | emit_insn_after (sequence, insert_after); | |
1649 | } | |
1650 | ||
1651 | /* Output code for a constructor that will invoke __bb_init_func, if | |
1652 | this has not already been done. */ | |
1653 | ||
1654 | void | |
1655 | output_func_start_profiler () | |
1656 | { | |
1657 | tree fnname, fndecl; | |
1658 | char *name, *cfnname; | |
1659 | rtx table_address; | |
1660 | enum machine_mode mode = mode_for_size (LONG_TYPE_SIZE, MODE_INT, 0); | |
d1271ded | 1661 | int save_flag_inline_functions = flag_inline_functions; |
86144b75 DE |
1662 | |
1663 | /* It's either already been output, or we don't need it because we're | |
1664 | not doing profile-arcs. */ | |
1665 | if (! need_func_profiler) | |
1666 | return; | |
1667 | ||
1668 | need_func_profiler = 0; | |
1669 | ||
1670 | /* Synthesize a constructor function to invoke __bb_init_func with a | |
1671 | pointer to this object file's profile block. */ | |
86144b75 DE |
1672 | |
1673 | /* Try and make a unique name given the "file function name". | |
1674 | ||
1675 | And no, I don't like this either. */ | |
1676 | ||
1677 | fnname = get_file_function_name ('I'); | |
1678 | cfnname = IDENTIFIER_POINTER (fnname); | |
1679 | name = xmalloc (strlen (cfnname) + 5); | |
1680 | sprintf (name, "%sGCOV",cfnname); | |
1681 | fnname = get_identifier (name); | |
1682 | free (name); | |
1683 | ||
1684 | fndecl = build_decl (FUNCTION_DECL, fnname, | |
1685 | build_function_type (void_type_node, NULL_TREE)); | |
1aeb1f6f | 1686 | DECL_EXTERNAL (fndecl) = 0; |
86144b75 DE |
1687 | TREE_PUBLIC (fndecl) = 1; |
1688 | DECL_ASSEMBLER_NAME (fndecl) = fnname; | |
1689 | DECL_RESULT (fndecl) = build_decl (RESULT_DECL, NULL_TREE, void_type_node); | |
ab87f8c8 JL |
1690 | |
1691 | fndecl = pushdecl (fndecl); | |
1692 | rest_of_decl_compilation (fndecl, 0, 1, 0); | |
1693 | announce_function (fndecl); | |
86144b75 | 1694 | current_function_decl = fndecl; |
ab87f8c8 JL |
1695 | DECL_INITIAL (fndecl) = error_mark_node; |
1696 | temporary_allocation (); | |
86144b75 DE |
1697 | make_function_rtl (fndecl); |
1698 | init_function_start (fndecl, input_filename, lineno); | |
0a8a198c | 1699 | pushlevel (0); |
86144b75 DE |
1700 | expand_function_start (fndecl, 0); |
1701 | ||
1702 | /* Actually generate the code to call __bb_init_func. */ | |
21a427cc | 1703 | name = ggc_alloc_string (NULL, 20); |
86144b75 | 1704 | ASM_GENERATE_INTERNAL_LABEL (name, "LPBX", 0); |
38a448ca | 1705 | table_address = force_reg (Pmode, gen_rtx_SYMBOL_REF (Pmode, name)); |
21a427cc AS |
1706 | emit_library_call (gen_rtx_SYMBOL_REF |
1707 | (Pmode, ggc_alloc_string ("__bb_init_func", 14)), 0, | |
86144b75 DE |
1708 | mode, 1, table_address, Pmode); |
1709 | ||
1710 | expand_function_end (input_filename, lineno, 0); | |
1711 | poplevel (1, 0, 1); | |
d1271ded R |
1712 | |
1713 | /* Since fndecl isn't in the list of globals, it would never be emitted | |
1714 | when it's considered to be 'safe' for inlining, so turn off | |
1715 | flag_inline_functions. */ | |
1716 | flag_inline_functions = 0; | |
1717 | ||
86144b75 | 1718 | rest_of_compilation (fndecl); |
d1271ded R |
1719 | |
1720 | /* Reset flag_inline_functions to its original value. */ | |
1721 | flag_inline_functions = save_flag_inline_functions; | |
1722 | ||
e5e809f4 JL |
1723 | if (! quiet_flag) |
1724 | fflush (asm_out_file); | |
86144b75 DE |
1725 | current_function_decl = NULL_TREE; |
1726 | ||
1727 | assemble_constructor (IDENTIFIER_POINTER (DECL_NAME (fndecl))); | |
1728 | } |