]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/value-prof.c
gcc_release (announce_snapshot): Use changedir instead of plain cd.
[thirdparty/gcc.git] / gcc / value-prof.c
CommitLineData
1c72f4ef 1/* Transformations based on profile information for values.
88462c42 2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
1c72f4ef
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING. If not, write to the Free
18Software Foundation, 59 Temple Place - Suite 330, Boston, MA
1902111-1307, USA. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "expr.h"
27#include "hard-reg-set.h"
28#include "basic-block.h"
29#include "value-prof.h"
30#include "output.h"
31#include "flags.h"
32#include "insn-config.h"
33#include "recog.h"
34#include "optabs.h"
fca9dc00 35#include "regs.h"
1c72f4ef
ZD
36
37/* In this file value profile based optimizations will be placed (none are
38 here just now, but they are hopefully coming soon).
39
40 Every such optimization should add its requirements for profiled values to
41 insn_values_to_profile function. This function is called from branch_prob
42 in profile.c and the requested values are instrumented by it in the first
43 compilation with -fprofile-arcs. The optimization may then read the
a98ebe2e 44 gathered data in the second compilation with -fbranch-probabilities.
6e885ee3
ZD
45 The measured data is appended as REG_VALUE_PROFILE note to the instrumented
46 insn. The argument to the note consists of an EXPR_LIST where its
47 members have the following meaning (from the first to the last):
48
49 -- type of information gathered (HIST_TYPE*)
50 -- the expression that is profiled
51 -- list of counters starting from the first one. */
1c72f4ef 52
fca9dc00
ZD
53static void insn_divmod_values_to_profile (rtx, unsigned *,
54 struct histogram_value **);
1c72f4ef 55static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
fca9dc00
ZD
56static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
57 rtx, gcov_type);
58static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
59static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
60 int);
61static bool divmod_fixed_value_transform (rtx insn);
62static bool mod_pow2_value_transform (rtx);
63static bool mod_subtract_transform (rtx);
1c72f4ef
ZD
64\f
65/* Release the list of VALUES of length N_VALUES for that we want to measure
66 histograms. */
67void
68free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
69 struct histogram_value *values)
70{
71 free (values);
72}
73
fca9dc00
ZD
74/* Find values inside INSN for that we want to measure histograms for
75 division/modulo optimization. */
76static void
77insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
78 struct histogram_value **values)
79{
80 rtx set, set_src, op1, op2;
81 enum machine_mode mode;
82
83 if (!INSN_P (insn))
84 return;
85
86 set = single_set (insn);
87 if (!set)
88 return;
89
90 mode = GET_MODE (SET_DEST (set));
91 if (!INTEGRAL_MODE_P (mode))
92 return;
93
94 set_src = SET_SRC (set);
95 switch (GET_CODE (set_src))
96 {
97 case DIV:
98 case MOD:
99 case UDIV:
100 case UMOD:
101 op1 = XEXP (set_src, 0);
102 op2 = XEXP (set_src, 1);
103 if (side_effects_p (op2))
104 return;
105
106 /* Check for a special case where the divisor is power of 2. */
107 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
108 {
109 *values = xrealloc (*values,
110 (*n_values + 1)
111 * sizeof (struct histogram_value));
112 (*values)[*n_values].value = op2;
113 (*values)[*n_values].seq = NULL_RTX;
114 (*values)[*n_values].mode = mode;
115 (*values)[*n_values].insn = insn;
116 (*values)[*n_values].type = HIST_TYPE_POW2;
117 (*values)[*n_values].hdata.pow2.may_be_other = 1;
118 (*n_values)++;
119 }
120
121 /* Check whether the divisor is not in fact a constant. */
122 if (!CONSTANT_P (op2))
123 {
124 *values = xrealloc (*values,
125 (*n_values + 1)
126 * sizeof (struct histogram_value));
127 (*values)[*n_values].value = op2;
128 (*values)[*n_values].mode = mode;
129 (*values)[*n_values].seq = NULL_RTX;
130 (*values)[*n_values].insn = insn;
131 (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
132 (*n_values)++;
133 }
134
d91edf86 135 /* For mod, check whether it is not often a noop (or replaceable by
fca9dc00
ZD
136 a few subtractions). */
137 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
138 {
139 rtx tmp;
140
141 *values = xrealloc (*values,
142 (*n_values + 1)
143 * sizeof (struct histogram_value));
144 start_sequence ();
145 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
146 (*values)[*n_values].value = force_operand (tmp, NULL_RTX);
147 (*values)[*n_values].seq = get_insns ();
148 end_sequence ();
149 (*values)[*n_values].mode = mode;
150 (*values)[*n_values].insn = insn;
151 (*values)[*n_values].type = HIST_TYPE_INTERVAL;
152 (*values)[*n_values].hdata.intvl.int_start = 0;
153 (*values)[*n_values].hdata.intvl.steps = 2;
154 (*values)[*n_values].hdata.intvl.may_be_less = 1;
155 (*values)[*n_values].hdata.intvl.may_be_more = 1;
156 (*n_values)++;
157 }
158 return;
159
160 default:
161 return;
162 }
163}
164
1c72f4ef
ZD
165/* Find values inside INSN for that we want to measure histograms and adds
166 them to list VALUES (increasing the record of its length in N_VALUES). */
167static void
fca9dc00
ZD
168insn_values_to_profile (rtx insn,
169 unsigned *n_values,
170 struct histogram_value **values)
1c72f4ef 171{
fca9dc00
ZD
172 if (flag_value_profile_transformations)
173 insn_divmod_values_to_profile (insn, n_values, values);
1c72f4ef
ZD
174}
175
176/* Find list of values for that we want to measure histograms. */
177void
178find_values_to_profile (unsigned *n_values, struct histogram_value **values)
179{
180 rtx insn;
181 unsigned i;
182
183 *n_values = 0;
184 *values = NULL;
185 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
186 insn_values_to_profile (insn, n_values, values);
187
188 for (i = 0; i < *n_values; i++)
189 {
190 switch ((*values)[i].type)
191 {
192 case HIST_TYPE_INTERVAL:
c263766c
RH
193 if (dump_file)
194 fprintf (dump_file,
fca9dc00
ZD
195 "Interval counter for insn %d, range %d -- %d.\n",
196 INSN_UID ((*values)[i].insn),
197 (*values)[i].hdata.intvl.int_start,
198 ((*values)[i].hdata.intvl.int_start
199 + (*values)[i].hdata.intvl.steps - 1));
1c72f4ef
ZD
200 (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
201 ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
202 ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
203 break;
204
205 case HIST_TYPE_POW2:
c263766c
RH
206 if (dump_file)
207 fprintf (dump_file,
fca9dc00
ZD
208 "Pow2 counter for insn %d.\n",
209 INSN_UID ((*values)[i].insn));
1c72f4ef
ZD
210 (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) +
211 ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
212 break;
213
214 case HIST_TYPE_SINGLE_VALUE:
c263766c
RH
215 if (dump_file)
216 fprintf (dump_file,
fca9dc00
ZD
217 "Single value counter for insn %d.\n",
218 INSN_UID ((*values)[i].insn));
1c72f4ef
ZD
219 (*values)[i].n_counters = 3;
220 break;
221
222 case HIST_TYPE_CONST_DELTA:
c263766c
RH
223 if (dump_file)
224 fprintf (dump_file,
fca9dc00
ZD
225 "Constant delta counter for insn %d.\n",
226 INSN_UID ((*values)[i].insn));
1c72f4ef
ZD
227 (*values)[i].n_counters = 4;
228 break;
229
230 default:
231 abort ();
232 }
233 }
234}
fca9dc00
ZD
235
236/* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
237 them to identify and exploit properties of values that are hard to analyze
238 statically.
239
240 We do following transformations:
241
242 1)
243
244 x = a / b;
245
246 where b is almost always a constant N is transformed to
247
248 if (b == N)
249 x = a / N;
250 else
251 x = a / b;
252
253 Analogically with %
254
255 2)
256
257 x = a % b
258
259 where b is almost always a power of 2 and the division is unsigned
260 TODO -- handle signed case as well
261
262 if ((b & (b - 1)) == 0)
263 x = a & (b - 1);
264 else
265 x = x % b;
266
267 Note that when b = 0, no error will occur and x = a; this is correct,
268 as result of such operation is undefined.
269
270 3)
271
272 x = a % b
273
274 where a is almost always less then b and the division is unsigned
275 TODO -- handle signed case as well
276
277 x = a;
278 if (x >= b)
279 x %= b;
280
281 4)
282
283 x = a % b
284
285 where a is almost always less then 2 * b and the division is unsigned
286 TODO -- handle signed case as well
287
288 x = a;
289 if (x >= b)
290 x -= b;
291 if (x >= b)
292 x %= b;
293
294 It would be possible to continue analogically for K * b for other small
295 K's, but it is probably not useful.
296
297 TODO:
298
299 There are other useful cases that could be handled by a similar mechanism,
300 for example:
301
302 for (i = 0; i < n; i++)
303 ...
304
305 transform to (for constant N):
306
307 if (n == N)
308 for (i = 0; i < N; i++)
309 ...
310 else
311 for (i = 0; i < n; i++)
312 ...
313 making unroller happy. Since this may grow the code significantly,
314 we would have to be very careful here. */
315
316bool
9373164a 317value_profile_transformations (void)
fca9dc00
ZD
318{
319 rtx insn, next;
320 int changed = false;
321
322 for (insn = get_insns (); insn; insn = next)
323 {
324 next = NEXT_INSN (insn);
325
326 if (!INSN_P (insn))
327 continue;
328
329 /* Scan for insn carrying a histogram. */
330 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
331 continue;
332
333 /* Ignore cold areas -- we are growing a code. */
334 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
335 continue;
336
c263766c 337 if (dump_file)
fca9dc00 338 {
c263766c 339 fprintf (dump_file, "Trying transformations on insn %d\n",
fca9dc00 340 INSN_UID (insn));
c263766c 341 print_rtl_single (dump_file, insn);
fca9dc00
ZD
342 }
343
344 /* Transformations: */
345 if (flag_value_profile_transformations
346 && (mod_subtract_transform (insn)
347 || divmod_fixed_value_transform (insn)
348 || mod_pow2_value_transform (insn)))
349 changed = true;
350 }
351
352 if (changed)
353 {
354 commit_edge_insertions ();
355 allocate_reg_info (max_reg_num (), FALSE, FALSE);
356 }
357
358 return changed;
359}
360
361/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
362 and OP2 whose value is expected to be VALUE and result TARGET). */
363static rtx
364gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
365 rtx target, rtx op1, rtx op2, gcov_type value)
366{
367 rtx tmp, tmp1;
368 rtx neq_label = gen_label_rtx ();
369 rtx end_label = gen_label_rtx ();
370 rtx sequence;
371
372 start_sequence ();
373
374 if (!REG_P (op2))
375 {
376 tmp = gen_reg_rtx (mode);
377 emit_move_insn (tmp, copy_rtx (op2));
378 }
379 else
380 tmp = op2;
381
382 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
383 NULL_RTX, neq_label);
384 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
385 tmp1 = force_operand (tmp1, target);
386 if (tmp1 != target)
387 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
388
389 emit_jump_insn (gen_jump (end_label));
390 emit_barrier ();
391
392 emit_label (neq_label);
393 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
394 tmp1 = force_operand (tmp1, target);
395 if (tmp1 != target)
396 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
397
398 emit_label (end_label);
399
400 sequence = get_insns ();
401 end_sequence ();
402 rebuild_jump_labels (sequence);
403 return sequence;
404}
405
406/* Do transform 1) on INSN if applicable. */
407static bool
408divmod_fixed_value_transform (rtx insn)
409{
410 rtx set, set_src, set_dest, op1, op2, value, histogram;
411 enum rtx_code code;
412 enum machine_mode mode;
413 gcov_type val, count, all;
414 edge e;
415
416 set = single_set (insn);
417 if (!set)
418 return false;
419
420 set_src = SET_SRC (set);
421 set_dest = SET_DEST (set);
422 code = GET_CODE (set_src);
423 mode = GET_MODE (set_dest);
424
425 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
426 return false;
427 op1 = XEXP (set_src, false);
428 op2 = XEXP (set_src, 1);
429
430 for (histogram = REG_NOTES (insn);
431 histogram;
432 histogram = XEXP (histogram, 1))
433 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
434 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
435 break;
436
437 if (!histogram)
438 return false;
439
440 histogram = XEXP (XEXP (histogram, 0), 1);
441 value = XEXP (histogram, 0);
442 histogram = XEXP (histogram, 1);
443 val = INTVAL (XEXP (histogram, 0));
444 histogram = XEXP (histogram, 1);
445 count = INTVAL (XEXP (histogram, 0));
446 histogram = XEXP (histogram, 1);
447 all = INTVAL (XEXP (histogram, 0));
448
d91edf86 449 /* We require that count is at least half of all; this means
fca9dc00 450 that for the transformation to fire the value must be constant
d91edf86 451 at least 50% of time (and 75% gives the guarantee of usage). */
fca9dc00
ZD
452 if (!rtx_equal_p (op2, value) || 2 * count < all)
453 return false;
454
c263766c
RH
455 if (dump_file)
456 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
fca9dc00
ZD
457 INSN_UID (insn));
458
459 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
460 delete_insn (insn);
461
462 insert_insn_on_edge (
463 gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
464
465 return true;
466}
467
468/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
469 and OP2 and result TARGET). */
470static rtx
471gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
472 rtx op1, rtx op2)
473{
474 rtx tmp, tmp1, tmp2, tmp3;
475 rtx neq_label = gen_label_rtx ();
476 rtx end_label = gen_label_rtx ();
477 rtx sequence;
478
479 start_sequence ();
480
481 if (!REG_P (op2))
482 {
483 tmp = gen_reg_rtx (mode);
484 emit_move_insn (tmp, copy_rtx (op2));
485 }
486 else
487 tmp = op2;
488
489 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
490 0, OPTAB_WIDEN);
491 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
492 0, OPTAB_WIDEN);
493 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
494 NULL_RTX, neq_label);
495 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
496 0, OPTAB_WIDEN);
497 if (tmp3 != target)
498 emit_move_insn (copy_rtx (target), tmp3);
499 emit_jump_insn (gen_jump (end_label));
500 emit_barrier ();
501
502 emit_label (neq_label);
503 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
504 tmp1 = force_operand (tmp1, target);
505 if (tmp1 != target)
506 emit_move_insn (target, tmp1);
507
508 emit_label (end_label);
509
510 sequence = get_insns ();
511 end_sequence ();
512 rebuild_jump_labels (sequence);
513 return sequence;
514}
515
516/* Do transform 2) on INSN if applicable. */
517static bool
518mod_pow2_value_transform (rtx insn)
519{
520 rtx set, set_src, set_dest, op1, op2, value, histogram;
521 enum rtx_code code;
522 enum machine_mode mode;
523 gcov_type wrong_values, count;
524 edge e;
525 int i;
526
527 set = single_set (insn);
528 if (!set)
529 return false;
530
531 set_src = SET_SRC (set);
532 set_dest = SET_DEST (set);
533 code = GET_CODE (set_src);
534 mode = GET_MODE (set_dest);
535
536 if (code != UMOD)
537 return false;
538 op1 = XEXP (set_src, 0);
539 op2 = XEXP (set_src, 1);
540
541 for (histogram = REG_NOTES (insn);
542 histogram;
543 histogram = XEXP (histogram, 1))
544 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
545 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
546 break;
547
548 if (!histogram)
549 return false;
550
551 histogram = XEXP (XEXP (histogram, 0), 1);
552 value = XEXP (histogram, 0);
553 histogram = XEXP (histogram, 1);
554 wrong_values =INTVAL (XEXP (histogram, 0));
555 histogram = XEXP (histogram, 1);
556
557 count = 0;
558 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
559 {
560 count += INTVAL (XEXP (histogram, 0));
561 histogram = XEXP (histogram, 1);
562 }
563
564 if (!rtx_equal_p (op2, value))
565 return false;
566
567 /* We require that we hit a power of two at least half of all evaluations. */
568 if (count < wrong_values)
569 return false;
570
c263766c
RH
571 if (dump_file)
572 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
fca9dc00
ZD
573 INSN_UID (insn));
574
575 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
576 delete_insn (insn);
577
578 insert_insn_on_edge (
579 gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
580
581 return true;
582}
583
584/* Generate code for transformations 3 and 4 (with MODE and OPERATION,
585 operands OP1 and OP2, result TARGET and at most SUB subtractions). */
586static rtx
587gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
588 rtx target, rtx op1, rtx op2, int sub)
589{
590 rtx tmp, tmp1;
591 rtx end_label = gen_label_rtx ();
592 rtx sequence;
593 int i;
594
595 start_sequence ();
596
597 if (!REG_P (op2))
598 {
599 tmp = gen_reg_rtx (mode);
600 emit_move_insn (tmp, copy_rtx (op2));
601 }
602 else
603 tmp = op2;
604
605 emit_move_insn (target, copy_rtx (op1));
606 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
607 NULL_RTX, end_label);
608
609
610 for (i = 0; i < sub; i++)
611 {
612 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
613 0, OPTAB_WIDEN);
614 if (tmp1 != target)
615 emit_move_insn (target, tmp1);
616 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
617 NULL_RTX, end_label);
618 }
619
620 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
621 tmp1 = force_operand (tmp1, target);
622 if (tmp1 != target)
623 emit_move_insn (target, tmp1);
624
625 emit_label (end_label);
626
627 sequence = get_insns ();
628 end_sequence ();
629 rebuild_jump_labels (sequence);
630 return sequence;
631}
632
633/* Do transforms 3) and 4) on INSN if applicable. */
634static bool
635mod_subtract_transform (rtx insn)
636{
637 rtx set, set_src, set_dest, op1, op2, value, histogram;
638 enum rtx_code code;
639 enum machine_mode mode;
640 gcov_type wrong_values, counts[2], count, all;
641 edge e;
642 int i;
643
644 set = single_set (insn);
645 if (!set)
646 return false;
647
648 set_src = SET_SRC (set);
649 set_dest = SET_DEST (set);
650 code = GET_CODE (set_src);
651 mode = GET_MODE (set_dest);
652
653 if (code != UMOD)
654 return false;
655 op1 = XEXP (set_src, 0);
656 op2 = XEXP (set_src, 1);
657
658 for (histogram = REG_NOTES (insn);
659 histogram;
660 histogram = XEXP (histogram, 1))
661 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
662 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
663 break;
664
665 if (!histogram)
666 return false;
667
668 histogram = XEXP (XEXP (histogram, 0), 1);
669 value = XEXP (histogram, 0);
670 histogram = XEXP (histogram, 1);
671
672 all = 0;
673 for (i = 0; i < 2; i++)
674 {
675 counts[i] = INTVAL (XEXP (histogram, 0));
676 all += counts[i];
677 histogram = XEXP (histogram, 1);
678 }
679 wrong_values = INTVAL (XEXP (histogram, 0));
680 histogram = XEXP (histogram, 1);
681 wrong_values += INTVAL (XEXP (histogram, 0));
682 all += wrong_values;
683
684 /* We require that we use just subtractions in at least 50% of all
685 evaluations. */
686 count = 0;
687 for (i = 0; i < 2; i++)
688 {
689 count += counts[i];
690 if (count * 2 >= all)
691 break;
692 }
693
694 if (i == 2)
695 return false;
696
c263766c
RH
697 if (dump_file)
698 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
fca9dc00
ZD
699 INSN_UID (insn));
700
701 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
702 delete_insn (insn);
703
704 insert_insn_on_edge (
705 gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);
706
707 return true;
708}