]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/value-prof.c
7be8db4f897f85784ba54681f39a517e09c22122
[thirdparty/gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36
37 /* In this file value profile based optimizations will be placed (none are
38 here just now, but they are hopefully coming soon).
39
40 Every such optimization should add its requirements for profiled values to
41 insn_values_to_profile function. This function is called from branch_prob
42 in profile.c and the requested values are instrumented by it in the first
43 compilation with -fprofile-arcs. The optimization may then read the
44 gathered data in the second compilation with -fbranch-probabilities.
45 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
46 insn. The argument to the note consists of an EXPR_LIST where its
47 members have the following meaning (from the first to the last):
48
49 -- type of information gathered (HIST_TYPE*)
50 -- the expression that is profiled
51 -- list of counters starting from the first one. */
52
53 static void insn_divmod_values_to_profile (rtx, unsigned *,
54 struct histogram_value **);
55 static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
56 static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
57 rtx, gcov_type);
58 static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
59 static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
60 int);
61 static bool divmod_fixed_value_transform (rtx insn);
62 static bool mod_pow2_value_transform (rtx);
63 static bool mod_subtract_transform (rtx);
64 \f
65 /* Release the list of VALUES of length N_VALUES for that we want to measure
66 histograms. */
67 void
68 free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
69 struct histogram_value *values)
70 {
71 free (values);
72 }
73
74 /* Find values inside INSN for that we want to measure histograms for
75 division/modulo optimization. */
76 static void
77 insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
78 struct histogram_value **values)
79 {
80 rtx set, set_src, op1, op2;
81 enum machine_mode mode;
82
83 if (!INSN_P (insn))
84 return;
85
86 set = single_set (insn);
87 if (!set)
88 return;
89
90 mode = GET_MODE (SET_DEST (set));
91 if (!INTEGRAL_MODE_P (mode))
92 return;
93
94 set_src = SET_SRC (set);
95 switch (GET_CODE (set_src))
96 {
97 case DIV:
98 case MOD:
99 case UDIV:
100 case UMOD:
101 op1 = XEXP (set_src, 0);
102 op2 = XEXP (set_src, 1);
103 if (side_effects_p (op2))
104 return;
105
106 /* Check for a special case where the divisor is power of 2. */
107 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
108 {
109 *values = xrealloc (*values,
110 (*n_values + 1)
111 * sizeof (struct histogram_value));
112 (*values)[*n_values].value = op2;
113 (*values)[*n_values].seq = NULL_RTX;
114 (*values)[*n_values].mode = mode;
115 (*values)[*n_values].insn = insn;
116 (*values)[*n_values].type = HIST_TYPE_POW2;
117 (*values)[*n_values].hdata.pow2.may_be_other = 1;
118 (*n_values)++;
119 }
120
121 /* Check whether the divisor is not in fact a constant. */
122 if (!CONSTANT_P (op2))
123 {
124 *values = xrealloc (*values,
125 (*n_values + 1)
126 * sizeof (struct histogram_value));
127 (*values)[*n_values].value = op2;
128 (*values)[*n_values].mode = mode;
129 (*values)[*n_values].seq = NULL_RTX;
130 (*values)[*n_values].insn = insn;
131 (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
132 (*n_values)++;
133 }
134
135 /* For mod, check whether it is not often a noop (or replaceable by
136 a few subtractions). */
137 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
138 {
139 rtx tmp;
140
141 *values = xrealloc (*values,
142 (*n_values + 1)
143 * sizeof (struct histogram_value));
144 start_sequence ();
145 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
146 (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
147 (*values)[*n_values].seq = get_insns ();
148 end_sequence ();
149 (*values)[*n_values].mode = mode;
150 (*values)[*n_values].insn = insn;
151 (*values)[*n_values].type = HIST_TYPE_INTERVAL;
152 (*values)[*n_values].hdata.intvl.int_start = 0;
153 (*values)[*n_values].hdata.intvl.steps = 2;
154 (*values)[*n_values].hdata.intvl.may_be_less = 1;
155 (*values)[*n_values].hdata.intvl.may_be_more = 1;
156 (*n_values)++;
157 }
158 return;
159
160 default:
161 return;
162 }
163 }
164
165 /* Find values inside INSN for that we want to measure histograms and adds
166 them to list VALUES (increasing the record of its length in N_VALUES). */
167 static void
168 insn_values_to_profile (rtx insn,
169 unsigned *n_values,
170 struct histogram_value **values)
171 {
172 if (flag_value_profile_transformations)
173 insn_divmod_values_to_profile (insn, n_values, values);
174 }
175
176 /* Find list of values for that we want to measure histograms. */
177 void
178 find_values_to_profile (unsigned *n_values, struct histogram_value **values)
179 {
180 rtx insn;
181 unsigned i;
182
183 *n_values = 0;
184 *values = NULL;
185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
186 insn_values_to_profile (insn, n_values, values);
187
188 for (i = 0; i < *n_values; i++)
189 {
190 switch ((*values)[i].type)
191 {
192 case HIST_TYPE_INTERVAL:
193 if (dump_file)
194 fprintf (dump_file,
195 "Interval counter for insn %d, range %d -- %d.\n",
196 INSN_UID ((*values)[i].insn),
197 (*values)[i].hdata.intvl.int_start,
198 ((*values)[i].hdata.intvl.int_start
199 + (*values)[i].hdata.intvl.steps - 1));
200 (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
201 ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
202 ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
203 break;
204
205 case HIST_TYPE_POW2:
206 if (dump_file)
207 fprintf (dump_file,
208 "Pow2 counter for insn %d.\n",
209 INSN_UID ((*values)[i].insn));
210 (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) +
211 ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
212 break;
213
214 case HIST_TYPE_SINGLE_VALUE:
215 if (dump_file)
216 fprintf (dump_file,
217 "Single value counter for insn %d.\n",
218 INSN_UID ((*values)[i].insn));
219 (*values)[i].n_counters = 3;
220 break;
221
222 case HIST_TYPE_CONST_DELTA:
223 if (dump_file)
224 fprintf (dump_file,
225 "Constant delta counter for insn %d.\n",
226 INSN_UID ((*values)[i].insn));
227 (*values)[i].n_counters = 4;
228 break;
229
230 default:
231 abort ();
232 }
233 }
234 }
235
236 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
237 them to identify and exploit properties of values that are hard to analyze
238 statically.
239
240 We do following transformations:
241
242 1)
243
244 x = a / b;
245
246 where b is almost always a constant N is transformed to
247
248 if (b == N)
249 x = a / N;
250 else
251 x = a / b;
252
253 Analogically with %
254
255 2)
256
257 x = a % b
258
259 where b is almost always a power of 2 and the division is unsigned
260 TODO -- handle signed case as well
261
262 if ((b & (b - 1)) == 0)
263 x = a & (b - 1);
264 else
265 x = x % b;
266
267 Note that when b = 0, no error will occur and x = a; this is correct,
268 as result of such operation is undefined.
269
270 3)
271
272 x = a % b
273
274 where a is almost always less then b and the division is unsigned
275 TODO -- handle signed case as well
276
277 x = a;
278 if (x >= b)
279 x %= b;
280
281 4)
282
283 x = a % b
284
285 where a is almost always less then 2 * b and the division is unsigned
286 TODO -- handle signed case as well
287
288 x = a;
289 if (x >= b)
290 x -= b;
291 if (x >= b)
292 x %= b;
293
294 It would be possible to continue analogically for K * b for other small
295 K's, but it is probably not useful.
296
297 TODO:
298
299 There are other useful cases that could be handled by a similar mechanism,
300 for example:
301
302 for (i = 0; i < n; i++)
303 ...
304
305 transform to (for constant N):
306
307 if (n == N)
308 for (i = 0; i < N; i++)
309 ...
310 else
311 for (i = 0; i < n; i++)
312 ...
313 making unroller happy. Since this may grow the code significantly,
314 we would have to be very careful here. */
315
316 bool
317 value_profile_transformations (void)
318 {
319 rtx insn, next;
320 int changed = false;
321
322 for (insn = get_insns (); insn; insn = next)
323 {
324 next = NEXT_INSN (insn);
325
326 if (!INSN_P (insn))
327 continue;
328
329 /* Scan for insn carrying a histogram. */
330 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
331 continue;
332
333 /* Ignore cold areas -- we are growing a code. */
334 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
335 continue;
336
337 if (dump_file)
338 {
339 fprintf (dump_file, "Trying transformations on insn %d\n",
340 INSN_UID (insn));
341 print_rtl_single (dump_file, insn);
342 }
343
344 /* Transformations: */
345 if (flag_value_profile_transformations
346 && (mod_subtract_transform (insn)
347 || divmod_fixed_value_transform (insn)
348 || mod_pow2_value_transform (insn)))
349 changed = true;
350 }
351
352 if (changed)
353 {
354 commit_edge_insertions ();
355 allocate_reg_info (max_reg_num (), FALSE, FALSE);
356 }
357
358 return changed;
359 }
360
361 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
362 and OP2 whose value is expected to be VALUE and result TARGET). */
363 static rtx
364 gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
365 rtx target, rtx op1, rtx op2, gcov_type value)
366 {
367 rtx tmp, tmp1;
368 rtx neq_label = gen_label_rtx ();
369 rtx end_label = gen_label_rtx ();
370 rtx sequence;
371
372 start_sequence ();
373
374 if (!REG_P (op2))
375 {
376 tmp = gen_reg_rtx (mode);
377 emit_move_insn (tmp, copy_rtx (op2));
378 }
379 else
380 tmp = op2;
381
382 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
383 NULL_RTX, neq_label);
384 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
385 tmp1 = force_operand (tmp1, target);
386 if (tmp1 != target)
387 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
388
389 emit_jump_insn (gen_jump (end_label));
390 emit_barrier ();
391
392 emit_label (neq_label);
393 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
394 tmp1 = force_operand (tmp1, target);
395 if (tmp1 != target)
396 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
397
398 emit_label (end_label);
399
400 sequence = get_insns ();
401 end_sequence ();
402 rebuild_jump_labels (sequence);
403 return sequence;
404 }
405
406 /* Do transform 1) on INSN if applicable. */
407 static bool
408 divmod_fixed_value_transform (rtx insn)
409 {
410 rtx set, set_src, set_dest, op1, op2, value, histogram;
411 enum rtx_code code;
412 enum machine_mode mode;
413 gcov_type val, count, all;
414 edge e;
415
416 set = single_set (insn);
417 if (!set)
418 return false;
419
420 set_src = SET_SRC (set);
421 set_dest = SET_DEST (set);
422 code = GET_CODE (set_src);
423 mode = GET_MODE (set_dest);
424
425 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
426 return false;
427 op1 = XEXP (set_src, false);
428 op2 = XEXP (set_src, 1);
429
430 for (histogram = REG_NOTES (insn);
431 histogram;
432 histogram = XEXP (histogram, 1))
433 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
434 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
435 break;
436
437 if (!histogram)
438 return false;
439
440 histogram = XEXP (XEXP (histogram, 0), 1);
441 value = XEXP (histogram, 0);
442 histogram = XEXP (histogram, 1);
443 val = INTVAL (XEXP (histogram, 0));
444 histogram = XEXP (histogram, 1);
445 count = INTVAL (XEXP (histogram, 0));
446 histogram = XEXP (histogram, 1);
447 all = INTVAL (XEXP (histogram, 0));
448
449 /* We require that count is at least half of all; this means
450 that for the transformation to fire the value must be constant
451 at least 50% of time (and 75% gives the guarantee of usage). */
452 if (!rtx_equal_p (op2, value) || 2 * count < all)
453 return false;
454
455 if (dump_file)
456 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
457 INSN_UID (insn));
458
459 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
460 delete_insn (insn);
461
462 insert_insn_on_edge (
463 gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
464
465 return true;
466 }
467
468 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
469 and OP2 and result TARGET). */
470 static rtx
471 gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
472 rtx op1, rtx op2)
473 {
474 rtx tmp, tmp1, tmp2, tmp3;
475 rtx neq_label = gen_label_rtx ();
476 rtx end_label = gen_label_rtx ();
477 rtx sequence;
478
479 start_sequence ();
480
481 if (!REG_P (op2))
482 {
483 tmp = gen_reg_rtx (mode);
484 emit_move_insn (tmp, copy_rtx (op2));
485 }
486 else
487 tmp = op2;
488
489 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
490 0, OPTAB_WIDEN);
491 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
492 0, OPTAB_WIDEN);
493 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
494 NULL_RTX, neq_label);
495 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
496 0, OPTAB_WIDEN);
497 if (tmp3 != target)
498 emit_move_insn (copy_rtx (target), tmp3);
499 emit_jump_insn (gen_jump (end_label));
500 emit_barrier ();
501
502 emit_label (neq_label);
503 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
504 tmp1 = force_operand (tmp1, target);
505 if (tmp1 != target)
506 emit_move_insn (target, tmp1);
507
508 emit_label (end_label);
509
510 sequence = get_insns ();
511 end_sequence ();
512 rebuild_jump_labels (sequence);
513 return sequence;
514 }
515
516 /* Do transform 2) on INSN if applicable. */
517 static bool
518 mod_pow2_value_transform (rtx insn)
519 {
520 rtx set, set_src, set_dest, op1, op2, value, histogram;
521 enum rtx_code code;
522 enum machine_mode mode;
523 gcov_type wrong_values, count;
524 edge e;
525 int i;
526
527 set = single_set (insn);
528 if (!set)
529 return false;
530
531 set_src = SET_SRC (set);
532 set_dest = SET_DEST (set);
533 code = GET_CODE (set_src);
534 mode = GET_MODE (set_dest);
535
536 if (code != UMOD)
537 return false;
538 op1 = XEXP (set_src, 0);
539 op2 = XEXP (set_src, 1);
540
541 for (histogram = REG_NOTES (insn);
542 histogram;
543 histogram = XEXP (histogram, 1))
544 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
545 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
546 break;
547
548 if (!histogram)
549 return false;
550
551 histogram = XEXP (XEXP (histogram, 0), 1);
552 value = XEXP (histogram, 0);
553 histogram = XEXP (histogram, 1);
554 wrong_values =INTVAL (XEXP (histogram, 0));
555 histogram = XEXP (histogram, 1);
556
557 count = 0;
558 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
559 {
560 count += INTVAL (XEXP (histogram, 0));
561 histogram = XEXP (histogram, 1);
562 }
563
564 if (!rtx_equal_p (op2, value))
565 return false;
566
567 /* We require that we hit a power of two at least half of all evaluations. */
568 if (count < wrong_values)
569 return false;
570
571 if (dump_file)
572 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
573 INSN_UID (insn));
574
575 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
576 delete_insn (insn);
577
578 insert_insn_on_edge (
579 gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
580
581 return true;
582 }
583
584 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
585 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
586 static rtx
587 gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
588 rtx target, rtx op1, rtx op2, int sub)
589 {
590 rtx tmp, tmp1;
591 rtx end_label = gen_label_rtx ();
592 rtx sequence;
593 int i;
594
595 start_sequence ();
596
597 if (!REG_P (op2))
598 {
599 tmp = gen_reg_rtx (mode);
600 emit_move_insn (tmp, copy_rtx (op2));
601 }
602 else
603 tmp = op2;
604
605 emit_move_insn (target, copy_rtx (op1));
606 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
607 NULL_RTX, end_label);
608
609
610 for (i = 0; i < sub; i++)
611 {
612 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
613 0, OPTAB_WIDEN);
614 if (tmp1 != target)
615 emit_move_insn (target, tmp1);
616 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
617 NULL_RTX, end_label);
618 }
619
620 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
621 tmp1 = force_operand (tmp1, target);
622 if (tmp1 != target)
623 emit_move_insn (target, tmp1);
624
625 emit_label (end_label);
626
627 sequence = get_insns ();
628 end_sequence ();
629 rebuild_jump_labels (sequence);
630 return sequence;
631 }
632
633 /* Do transforms 3) and 4) on INSN if applicable. */
634 static bool
635 mod_subtract_transform (rtx insn)
636 {
637 rtx set, set_src, set_dest, op1, op2, value, histogram;
638 enum rtx_code code;
639 enum machine_mode mode;
640 gcov_type wrong_values, counts[2], count, all;
641 edge e;
642 int i;
643
644 set = single_set (insn);
645 if (!set)
646 return false;
647
648 set_src = SET_SRC (set);
649 set_dest = SET_DEST (set);
650 code = GET_CODE (set_src);
651 mode = GET_MODE (set_dest);
652
653 if (code != UMOD)
654 return false;
655 op1 = XEXP (set_src, 0);
656 op2 = XEXP (set_src, 1);
657
658 for (histogram = REG_NOTES (insn);
659 histogram;
660 histogram = XEXP (histogram, 1))
661 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
662 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
663 break;
664
665 if (!histogram)
666 return false;
667
668 histogram = XEXP (XEXP (histogram, 0), 1);
669 value = XEXP (histogram, 0);
670 histogram = XEXP (histogram, 1);
671
672 all = 0;
673 for (i = 0; i < 2; i++)
674 {
675 counts[i] = INTVAL (XEXP (histogram, 0));
676 all += counts[i];
677 histogram = XEXP (histogram, 1);
678 }
679 wrong_values = INTVAL (XEXP (histogram, 0));
680 histogram = XEXP (histogram, 1);
681 wrong_values += INTVAL (XEXP (histogram, 0));
682 all += wrong_values;
683
684 /* We require that we use just subtractions in at least 50% of all
685 evaluations. */
686 count = 0;
687 for (i = 0; i < 2; i++)
688 {
689 count += counts[i];
690 if (count * 2 >= all)
691 break;
692 }
693
694 if (i == 2)
695 return false;
696
697 if (dump_file)
698 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
699 INSN_UID (insn));
700
701 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
702 delete_insn (insn);
703
704 insert_insn_on_edge (
705 gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);
706
707 return true;
708 }