]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/value-prof.c
vec.h: Update API to separate allocation mechanism from type.
[thirdparty/gcc.git] / gcc / value-prof.c
1 /* Transformations based on profile information for values.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "expr.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
29 #include "value-prof.h"
30 #include "output.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "recog.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "ggc.h"
37 #include "tree-flow.h"
38 #include "tree-flow-inline.h"
39 #include "diagnostic.h"
40 #include "coverage.h"
41 #include "tree.h"
42 #include "gcov-io.h"
43
44 static struct value_prof_hooks *value_prof_hooks;
45
46 /* This is the vector of histograms. Created in find_values_to_profile.
47 During profile generation, freed by instrument_values.
48 During profile use, freed by value_profile_transformations. */
49
50 static histogram_values static_values = NULL;
51
52 /* In this file value profile based optimizations are placed. Currently the
53 following optimizations are implemented (for more detailed descriptions
54 see comments at value_profile_transformations):
55
56 1) Division/modulo specialization. Provided that we can determine that the
57 operands of the division have some special properties, we may use it to
58 produce more effective code.
59 2) Speculative prefetching. If we are able to determine that the difference
60 between addresses accessed by a memory reference is usually constant, we
61 may add the prefetch instructions.
62
63 Every such optimization should add its requirements for profiled values to
64 insn_values_to_profile function. This function is called from branch_prob
65 in profile.c and the requested values are instrumented by it in the first
66 compilation with -fprofile-arcs. The optimization may then read the
67 gathered data in the second compilation with -fbranch-probabilities.
68
69 There are currently two versions, RTL-based and tree-based. Over time
70 the RTL-based version may go away.
71
72 In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE
73 note to the instrumented insn. The argument to the note consists of an
74 EXPR_LIST where its members have the following meaning (from the first to
75 the last):
76
77 -- type of information gathered (HIST_TYPE*)
78 -- the expression that is profiled
79 -- list of counters starting from the first one.
80
81 In the tree-based version, the measured data is pointed to from the histograms
82 field of the statement annotation of the instrumented insns. It is
83 kept as a linked list of struct histogram_value_t's, which contain the
84 same information as above. */
85
86 /* For speculative prefetching, the range in that we do not prefetch (because
87 we assume that it will be in cache anyway). The asymmetry between min and
88 max range is trying to reflect the fact that the sequential prefetching
89 of the data is commonly done directly by hardware. Nevertheless, these
90 values are just a guess and should of course be target-specific.
91
92 FIXME: There is no tree form of speculative prefetching as yet.
93
94 FIXME: A better approach to instrumentation in the profile-generation
95 pass is to generate calls to magic library functions (to be added to
96 libgcc) rather than inline code. This approach will probably be
97 necessary to get tree-based speculative prefetching working in a useful
98 fashion, as inline code bloats things so much the rest of the compiler has
99 serious problems dealing with it (judging from the rtl behavior). */
100
101 #ifndef NOPREFETCH_RANGE_MIN
102 #define NOPREFETCH_RANGE_MIN (-16)
103 #endif
104 #ifndef NOPREFETCH_RANGE_MAX
105 #define NOPREFETCH_RANGE_MAX 32
106 #endif
107
108 static void rtl_divmod_values_to_profile (rtx, histogram_values *);
109 #ifdef HAVE_prefetch
110 static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
111 static int find_mem_reference_1 (rtx *, void *);
112 static void find_mem_reference_2 (rtx, rtx, void *);
113 static bool find_mem_reference (rtx, rtx *, int *);
114 #endif
115
116 static void rtl_values_to_profile (rtx, histogram_values *);
117 static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
118 rtx, gcov_type, int);
119 static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
120 static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
121 int, int, int);
122 #ifdef HAVE_prefetch
123 static rtx gen_speculative_prefetch (rtx, gcov_type, int);
124 #endif
125 static bool rtl_divmod_fixed_value_transform (rtx);
126 static bool rtl_mod_pow2_value_transform (rtx);
127 static bool rtl_mod_subtract_transform (rtx);
128 #ifdef HAVE_prefetch
129 static bool speculative_prefetching_transform (rtx);
130 #endif
131 static void tree_divmod_values_to_profile (tree, histogram_values *);
132 static void tree_values_to_profile (tree, histogram_values *);
133 static tree tree_divmod_fixed_value (tree, tree, tree, tree,
134 tree, int, gcov_type, gcov_type);
135 static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type);
136 static tree tree_mod_subtract (tree, tree, tree, tree, int, int, int,
137 gcov_type, gcov_type, gcov_type);
138 static bool tree_divmod_fixed_value_transform (tree);
139 static bool tree_mod_pow2_value_transform (tree);
140 static bool tree_mod_subtract_transform (tree);
141
142 \f
143 /* Find values inside INSN for that we want to measure histograms for
144 division/modulo optimization and stores them to VALUES. */
145 static void
146 rtl_divmod_values_to_profile (rtx insn, histogram_values *values)
147 {
148 rtx set, set_src, op1, op2;
149 enum machine_mode mode;
150 histogram_value hist;
151
152 if (!INSN_P (insn))
153 return;
154
155 set = single_set (insn);
156 if (!set)
157 return;
158
159 mode = GET_MODE (SET_DEST (set));
160 if (!INTEGRAL_MODE_P (mode))
161 return;
162
163 set_src = SET_SRC (set);
164 switch (GET_CODE (set_src))
165 {
166 case DIV:
167 case MOD:
168 case UDIV:
169 case UMOD:
170 op1 = XEXP (set_src, 0);
171 op2 = XEXP (set_src, 1);
172 if (side_effects_p (op2))
173 return;
174
175 /* Check for a special case where the divisor is power of 2. */
176 if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
177 {
178 hist = ggc_alloc (sizeof (*hist));
179 hist->hvalue.rtl.value = op2;
180 hist->hvalue.rtl.seq = NULL_RTX;
181 hist->hvalue.rtl.mode = mode;
182 hist->hvalue.rtl.insn = insn;
183 hist->type = HIST_TYPE_POW2;
184 hist->hdata.pow2.may_be_other = 1;
185 VEC_safe_push (histogram_value, heap, *values, hist);
186 }
187
188 /* Check whether the divisor is not in fact a constant. */
189 if (!CONSTANT_P (op2))
190 {
191 hist = ggc_alloc (sizeof (*hist));
192 hist->hvalue.rtl.value = op2;
193 hist->hvalue.rtl.mode = mode;
194 hist->hvalue.rtl.seq = NULL_RTX;
195 hist->hvalue.rtl.insn = insn;
196 hist->type = HIST_TYPE_SINGLE_VALUE;
197 VEC_safe_push (histogram_value, heap, *values, hist);
198 }
199
200 /* For mod, check whether it is not often a noop (or replaceable by
201 a few subtractions). */
202 if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
203 {
204 rtx tmp;
205
206 hist = ggc_alloc (sizeof (*hist));
207 start_sequence ();
208 tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
209 hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX);
210 hist->hvalue.rtl.seq = get_insns ();
211 end_sequence ();
212 hist->hvalue.rtl.mode = mode;
213 hist->hvalue.rtl.insn = insn;
214 hist->type = HIST_TYPE_INTERVAL;
215 hist->hdata.intvl.int_start = 0;
216 hist->hdata.intvl.steps = 2;
217 VEC_safe_push (histogram_value, heap, *values, hist);
218 }
219 return;
220
221 default:
222 return;
223 }
224 }
225
226 #ifdef HAVE_prefetch
227
228 /* Called from find_mem_reference through for_each_rtx, finds a memory
229 reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
230 to *RET and the traversing of the expression is interrupted by returning 1.
231 Otherwise 0 is returned. */
232
233 static int
234 find_mem_reference_1 (rtx *expr, void *ret)
235 {
236 rtx *mem = ret;
237
238 if (GET_CODE (*expr) == MEM)
239 {
240 *mem = *expr;
241 return 1;
242 }
243 return 0;
244 }
245
246 /* Called form find_mem_reference through note_stores to find out whether
247 the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
248 FMR2_WRITE is set to true. */
249
250 static int fmr2_write;
251 static void
252 find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
253 {
254 if (expr == mem)
255 fmr2_write = true;
256 }
257
258 /* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
259 if it is a write of the mem. Return false if no memory reference is found,
260 true otherwise. */
261
262 static bool
263 find_mem_reference (rtx insn, rtx *mem, int *write)
264 {
265 *mem = NULL_RTX;
266 for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
267
268 if (!*mem)
269 return false;
270
271 fmr2_write = false;
272 note_stores (PATTERN (insn), find_mem_reference_2, *mem);
273 *write = fmr2_write;
274 return true;
275 }
276
277 /* Find values inside INSN for that we want to measure histograms for
278 a speculative prefetching. Add them to the list VALUES.
279 Returns true if such we found any such value, false otherwise. */
280
281 static bool
282 insn_prefetch_values_to_profile (rtx insn, histogram_values* values)
283 {
284 rtx mem, address;
285 int write;
286 histogram_value hist;
287
288 /* It only makes sense to look for memory references in ordinary insns. */
289 if (GET_CODE (insn) != INSN)
290 return false;
291
292 if (!find_mem_reference (insn, &mem, &write))
293 return false;
294
295 address = XEXP (mem, 0);
296 if (side_effects_p (address))
297 return false;
298
299 if (CONSTANT_P (address))
300 return false;
301
302 hist = ggc_alloc (sizeof (*hist));
303 hist->hvalue.rtl.value = address;
304 hist->hvalue.rtl.mode = GET_MODE (address);
305 hist->hvalue.rtl.seq = NULL_RTX;
306 hist->hvalue.rtl.insn = insn;
307 hist->type = HIST_TYPE_CONST_DELTA;
308 VEC_safe_push (histogram_value, heap, *values, hist);
309
310 return true;
311 }
312 #endif
313 /* Find values inside INSN for that we want to measure histograms and adds
314 them to list VALUES (increasing the record of its length in N_VALUES). */
315 static void
316 rtl_values_to_profile (rtx insn, histogram_values *values)
317 {
318 if (flag_value_profile_transformations)
319 rtl_divmod_values_to_profile (insn, values);
320
321 #ifdef HAVE_prefetch
322 if (flag_speculative_prefetching)
323 insn_prefetch_values_to_profile (insn, values);
324 #endif
325 }
326
327 /* Find list of values for that we want to measure histograms. */
328 static void
329 rtl_find_values_to_profile (histogram_values *values)
330 {
331 rtx insn;
332 unsigned i, libcall_level;
333 histogram_value hist;
334
335 life_analysis (NULL, PROP_DEATH_NOTES);
336
337 *values = NULL;
338 libcall_level = 0;
339 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
340 rtl_values_to_profile (insn, values);
341 static_values = *values;
342
343 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
344 {
345 switch (hist->type)
346 {
347 case HIST_TYPE_INTERVAL:
348 if (dump_file)
349 fprintf (dump_file,
350 "Interval counter for insn %d, range %d -- %d.\n",
351 INSN_UID ((rtx)hist->hvalue.rtl.insn),
352 hist->hdata.intvl.int_start,
353 (hist->hdata.intvl.int_start
354 + hist->hdata.intvl.steps - 1));
355 hist->n_counters = hist->hdata.intvl.steps + 2;
356 break;
357
358 case HIST_TYPE_POW2:
359 if (dump_file)
360 fprintf (dump_file,
361 "Pow2 counter for insn %d.\n",
362 INSN_UID ((rtx)hist->hvalue.rtl.insn));
363 hist->n_counters
364 = GET_MODE_BITSIZE (hist->hvalue.rtl.mode)
365 + (hist->hdata.pow2.may_be_other ? 1 : 0);
366 break;
367
368 case HIST_TYPE_SINGLE_VALUE:
369 if (dump_file)
370 fprintf (dump_file,
371 "Single value counter for insn %d.\n",
372 INSN_UID ((rtx)hist->hvalue.rtl.insn));
373 hist->n_counters = 3;
374 break;
375
376 case HIST_TYPE_CONST_DELTA:
377 if (dump_file)
378 fprintf (dump_file,
379 "Constant delta counter for insn %d.\n",
380 INSN_UID ((rtx)hist->hvalue.rtl.insn));
381 hist->n_counters = 4;
382 break;
383
384 default:
385 gcc_unreachable ();
386 }
387 }
388 allocate_reg_info (max_reg_num (), FALSE, FALSE);
389 }
390
391 /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
392 them to identify and exploit properties of values that are hard to analyze
393 statically.
394
395 We do following transformations:
396
397 1)
398
399 x = a / b;
400
401 where b is almost always a constant N is transformed to
402
403 if (b == N)
404 x = a / N;
405 else
406 x = a / b;
407
408 Analogically with %
409
410 2)
411
412 x = a % b
413
414 where b is almost always a power of 2 and the division is unsigned
415 TODO -- handle signed case as well
416
417 if ((b & (b - 1)) == 0)
418 x = a & (b - 1);
419 else
420 x = x % b;
421
422 Note that when b = 0, no error will occur and x = a; this is correct,
423 as result of such operation is undefined.
424
425 3)
426
427 x = a % b
428
429 where a is almost always less then b and the division is unsigned
430 TODO -- handle signed case as well
431
432 x = a;
433 if (x >= b)
434 x %= b;
435
436 4)
437
438 x = a % b
439
440 where a is almost always less then 2 * b and the division is unsigned
441 TODO -- handle signed case as well
442
443 x = a;
444 if (x >= b)
445 x -= b;
446 if (x >= b)
447 x %= b;
448
449 It would be possible to continue analogically for K * b for other small
450 K's, but it is probably not useful.
451
452 5)
453
454 Read or write of mem[address], where the value of address changes usually
455 by a constant C != 0 between the following accesses to the computation; with
456 -fspeculative-prefetching we then add a prefetch of address + C before
457 the insn. This handles prefetching of several interesting cases in addition
458 to a simple prefetching for addresses that are induction variables, e. g.
459 linked lists allocated sequentially (even in case they are processed
460 recursively).
461
462 TODO -- we should also check whether there is not (usually) a small
463 difference with the adjacent memory references, so that we do
464 not issue overlapping prefetches. Also we should employ some
465 heuristics to eliminate cases where prefetching evidently spoils
466 the code.
467 -- it should somehow cooperate with the loop optimizer prefetching
468
469 TODO:
470
471 There are other useful cases that could be handled by a similar mechanism,
472 for example:
473
474 for (i = 0; i < n; i++)
475 ...
476
477 transform to (for constant N):
478
479 if (n == N)
480 for (i = 0; i < N; i++)
481 ...
482 else
483 for (i = 0; i < n; i++)
484 ...
485 making unroller happy. Since this may grow the code significantly,
486 we would have to be very careful here. */
487
488 static bool
489 rtl_value_profile_transformations (void)
490 {
491 rtx insn, next;
492 int changed = false;
493
494 for (insn = get_insns (); insn; insn = next)
495 {
496 next = NEXT_INSN (insn);
497
498 if (!INSN_P (insn))
499 continue;
500
501 /* Scan for insn carrying a histogram. */
502 if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
503 continue;
504
505 /* Ignore cold areas -- we are growing a code. */
506 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
507 continue;
508
509 if (dump_file)
510 {
511 fprintf (dump_file, "Trying transformations on insn %d\n",
512 INSN_UID (insn));
513 print_rtl_single (dump_file, insn);
514 }
515
516 /* Transformations: */
517 if (flag_value_profile_transformations
518 && (rtl_mod_subtract_transform (insn)
519 || rtl_divmod_fixed_value_transform (insn)
520 || rtl_mod_pow2_value_transform (insn)))
521 changed = true;
522 #ifdef HAVE_prefetch
523 if (flag_speculative_prefetching
524 && speculative_prefetching_transform (insn))
525 changed = true;
526 #endif
527 }
528
529 if (changed)
530 {
531 commit_edge_insertions ();
532 allocate_reg_info (max_reg_num (), FALSE, FALSE);
533 }
534
535 return changed;
536 }
537
538 /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
539 and OP2, whose value is expected to be VALUE, result TARGET and
540 probability of taking the optimal path PROB). */
541 static rtx
542 rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
543 rtx target, rtx op1, rtx op2, gcov_type value,
544 int prob)
545 {
546 rtx tmp, tmp1, jump;
547 rtx neq_label = gen_label_rtx ();
548 rtx end_label = gen_label_rtx ();
549 rtx sequence;
550
551 start_sequence ();
552
553 if (!REG_P (op2))
554 {
555 tmp = gen_reg_rtx (mode);
556 emit_move_insn (tmp, copy_rtx (op2));
557 }
558 else
559 tmp = op2;
560
561 do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
562 NULL_RTX, neq_label);
563
564 /* Add branch probability to jump we just created. */
565 jump = get_last_insn ();
566 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
567 GEN_INT (REG_BR_PROB_BASE - prob),
568 REG_NOTES (jump));
569
570 tmp1 = simplify_gen_binary (operation, mode,
571 copy_rtx (op1), GEN_INT (value));
572 tmp1 = force_operand (tmp1, target);
573 if (tmp1 != target)
574 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
575
576 emit_jump_insn (gen_jump (end_label));
577 emit_barrier ();
578
579 emit_label (neq_label);
580 tmp1 = simplify_gen_binary (operation, mode,
581 copy_rtx (op1), copy_rtx (tmp));
582 tmp1 = force_operand (tmp1, target);
583 if (tmp1 != target)
584 emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
585
586 emit_label (end_label);
587
588 sequence = get_insns ();
589 end_sequence ();
590 rebuild_jump_labels (sequence);
591 return sequence;
592 }
593
594 /* Do transform 1) on INSN if applicable. */
595 static bool
596 rtl_divmod_fixed_value_transform (rtx insn)
597 {
598 rtx set, set_src, set_dest, op1, op2, value, histogram;
599 enum rtx_code code;
600 enum machine_mode mode;
601 gcov_type val, count, all;
602 edge e;
603 int prob;
604
605 set = single_set (insn);
606 if (!set)
607 return false;
608
609 set_src = SET_SRC (set);
610 set_dest = SET_DEST (set);
611 code = GET_CODE (set_src);
612 mode = GET_MODE (set_dest);
613
614 if (code != DIV && code != MOD && code != UDIV && code != UMOD)
615 return false;
616 op1 = XEXP (set_src, false);
617 op2 = XEXP (set_src, 1);
618
619 for (histogram = REG_NOTES (insn);
620 histogram;
621 histogram = XEXP (histogram, 1))
622 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
623 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
624 break;
625
626 if (!histogram)
627 return false;
628
629 histogram = XEXP (XEXP (histogram, 0), 1);
630 value = XEXP (histogram, 0);
631 histogram = XEXP (histogram, 1);
632 val = INTVAL (XEXP (histogram, 0));
633 histogram = XEXP (histogram, 1);
634 count = INTVAL (XEXP (histogram, 0));
635 histogram = XEXP (histogram, 1);
636 all = INTVAL (XEXP (histogram, 0));
637
638 /* We require that count be at least half of all; this means
639 that for the transformation to fire the value must be constant
640 at least 50% of time (and 75% gives the guarantee of usage). */
641 if (!rtx_equal_p (op2, value) || 2 * count < all)
642 return false;
643
644 if (dump_file)
645 fprintf (dump_file, "Div/mod by constant transformation on insn %d\n",
646 INSN_UID (insn));
647
648 /* Compute probability of taking the optimal path. */
649 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
650
651 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
652 delete_insn (insn);
653
654 insert_insn_on_edge (
655 rtl_divmod_fixed_value (mode, code, set_dest,
656 op1, op2, val, prob), e);
657
658 return true;
659 }
660
661 /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
662 and OP2, result TARGET and probability of taking the optimal path PROB). */
663 static rtx
664 rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
665 rtx op1, rtx op2, int prob)
666 {
667 rtx tmp, tmp1, tmp2, tmp3, jump;
668 rtx neq_label = gen_label_rtx ();
669 rtx end_label = gen_label_rtx ();
670 rtx sequence;
671
672 start_sequence ();
673
674 if (!REG_P (op2))
675 {
676 tmp = gen_reg_rtx (mode);
677 emit_move_insn (tmp, copy_rtx (op2));
678 }
679 else
680 tmp = op2;
681
682 tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
683 0, OPTAB_WIDEN);
684 tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
685 0, OPTAB_WIDEN);
686 do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
687 NULL_RTX, neq_label);
688
689 /* Add branch probability to jump we just created. */
690 jump = get_last_insn ();
691 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
692 GEN_INT (REG_BR_PROB_BASE - prob),
693 REG_NOTES (jump));
694
695 tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
696 0, OPTAB_WIDEN);
697 if (tmp3 != target)
698 emit_move_insn (copy_rtx (target), tmp3);
699 emit_jump_insn (gen_jump (end_label));
700 emit_barrier ();
701
702 emit_label (neq_label);
703 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
704 tmp1 = force_operand (tmp1, target);
705 if (tmp1 != target)
706 emit_move_insn (target, tmp1);
707
708 emit_label (end_label);
709
710 sequence = get_insns ();
711 end_sequence ();
712 rebuild_jump_labels (sequence);
713 return sequence;
714 }
715
716 /* Do transform 2) on INSN if applicable. */
717 static bool
718 rtl_mod_pow2_value_transform (rtx insn)
719 {
720 rtx set, set_src, set_dest, op1, op2, value, histogram;
721 enum rtx_code code;
722 enum machine_mode mode;
723 gcov_type wrong_values, count;
724 edge e;
725 int i, all, prob;
726
727 set = single_set (insn);
728 if (!set)
729 return false;
730
731 set_src = SET_SRC (set);
732 set_dest = SET_DEST (set);
733 code = GET_CODE (set_src);
734 mode = GET_MODE (set_dest);
735
736 if (code != UMOD)
737 return false;
738 op1 = XEXP (set_src, 0);
739 op2 = XEXP (set_src, 1);
740
741 for (histogram = REG_NOTES (insn);
742 histogram;
743 histogram = XEXP (histogram, 1))
744 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
745 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
746 break;
747
748 if (!histogram)
749 return false;
750
751 histogram = XEXP (XEXP (histogram, 0), 1);
752 value = XEXP (histogram, 0);
753 histogram = XEXP (histogram, 1);
754 wrong_values =INTVAL (XEXP (histogram, 0));
755 histogram = XEXP (histogram, 1);
756
757 count = 0;
758 for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
759 {
760 count += INTVAL (XEXP (histogram, 0));
761 histogram = XEXP (histogram, 1);
762 }
763
764 if (!rtx_equal_p (op2, value))
765 return false;
766
767 /* We require that we hit a power of two at least half of all evaluations. */
768 if (count < wrong_values)
769 return false;
770
771 if (dump_file)
772 fprintf (dump_file, "Mod power of 2 transformation on insn %d\n",
773 INSN_UID (insn));
774
775 /* Compute probability of taking the optimal path. */
776 all = count + wrong_values;
777 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
778
779 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
780 delete_insn (insn);
781
782 insert_insn_on_edge (
783 rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e);
784
785 return true;
786 }
787
788 /* Generate code for transformations 3 and 4 (with MODE and OPERATION,
789 operands OP1 and OP2, result TARGET, at most SUB subtractions, and
790 probability of taking the optimal path(s) PROB1 and PROB2). */
791 static rtx
792 rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation,
793 rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2)
794 {
795 rtx tmp, tmp1, jump;
796 rtx end_label = gen_label_rtx ();
797 rtx sequence;
798 int i;
799
800 start_sequence ();
801
802 if (!REG_P (op2))
803 {
804 tmp = gen_reg_rtx (mode);
805 emit_move_insn (tmp, copy_rtx (op2));
806 }
807 else
808 tmp = op2;
809
810 emit_move_insn (target, copy_rtx (op1));
811 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
812 NULL_RTX, end_label);
813
814 /* Add branch probability to jump we just created. */
815 jump = get_last_insn ();
816 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
817 GEN_INT (prob1), REG_NOTES (jump));
818
819 for (i = 0; i < sub; i++)
820 {
821 tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
822 0, OPTAB_WIDEN);
823 if (tmp1 != target)
824 emit_move_insn (target, tmp1);
825 do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
826 NULL_RTX, end_label);
827
828 /* Add branch probability to jump we just created. */
829 jump = get_last_insn ();
830 REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB,
831 GEN_INT (prob2), REG_NOTES (jump));
832 }
833
834 tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
835 tmp1 = force_operand (tmp1, target);
836 if (tmp1 != target)
837 emit_move_insn (target, tmp1);
838
839 emit_label (end_label);
840
841 sequence = get_insns ();
842 end_sequence ();
843 rebuild_jump_labels (sequence);
844 return sequence;
845 }
846
847 /* Do transforms 3) and 4) on INSN if applicable. */
848 static bool
849 rtl_mod_subtract_transform (rtx insn)
850 {
851 rtx set, set_src, set_dest, op1, op2, histogram;
852 enum rtx_code code;
853 enum machine_mode mode;
854 gcov_type wrong_values, counts[2], count, all;
855 edge e;
856 int i, prob1, prob2;
857
858 set = single_set (insn);
859 if (!set)
860 return false;
861
862 set_src = SET_SRC (set);
863 set_dest = SET_DEST (set);
864 code = GET_CODE (set_src);
865 mode = GET_MODE (set_dest);
866
867 if (code != UMOD)
868 return false;
869 op1 = XEXP (set_src, 0);
870 op2 = XEXP (set_src, 1);
871
872 for (histogram = REG_NOTES (insn);
873 histogram;
874 histogram = XEXP (histogram, 1))
875 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
876 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
877 break;
878
879 if (!histogram)
880 return false;
881
882 histogram = XEXP (XEXP (histogram, 0), 1);
883 histogram = XEXP (histogram, 1);
884
885 all = 0;
886 for (i = 0; i < 2; i++)
887 {
888 counts[i] = INTVAL (XEXP (histogram, 0));
889 all += counts[i];
890 histogram = XEXP (histogram, 1);
891 }
892 wrong_values = INTVAL (XEXP (histogram, 0));
893 histogram = XEXP (histogram, 1);
894 wrong_values += INTVAL (XEXP (histogram, 0));
895 all += wrong_values;
896
897 /* We require that we use just subtractions in at least 50% of all
898 evaluations. */
899 count = 0;
900 for (i = 0; i < 2; i++)
901 {
902 count += counts[i];
903 if (count * 2 >= all)
904 break;
905 }
906
907 if (i == 2)
908 return false;
909
910 if (dump_file)
911 fprintf (dump_file, "Mod subtract transformation on insn %d\n",
912 INSN_UID (insn));
913
914 /* Compute probability of taking the optimal path(s). */
915 prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all;
916 prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all;
917
918 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
919 delete_insn (insn);
920
921 insert_insn_on_edge (
922 rtl_mod_subtract (mode, code, set_dest,
923 op1, op2, i, prob1, prob2), e);
924
925 return true;
926 }
927
928 #ifdef HAVE_prefetch
929 /* Generate code for transformation 5 for mem with ADDRESS and a constant
930 step DELTA. WRITE is true if the reference is a store to mem. */
931
932 static rtx
933 gen_speculative_prefetch (rtx address, gcov_type delta, int write)
934 {
935 rtx tmp;
936 rtx sequence;
937
938 /* TODO: we do the prefetching for just one iteration ahead, which
939 often is not enough. */
940 start_sequence ();
941 if (offsettable_address_p (0, VOIDmode, address))
942 tmp = plus_constant (copy_rtx (address), delta);
943 else
944 {
945 tmp = simplify_gen_binary (PLUS, Pmode,
946 copy_rtx (address), GEN_INT (delta));
947 tmp = force_operand (tmp, NULL);
948 }
949 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
950 (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
951 tmp = force_reg (Pmode, tmp);
952 emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
953 sequence = get_insns ();
954 end_sequence ();
955
956 return sequence;
957 }
958
959 /* Do transform 5) on INSN if applicable. */
960
961 static bool
962 speculative_prefetching_transform (rtx insn)
963 {
964 rtx histogram, value;
965 gcov_type val, count, all;
966 edge e;
967 rtx mem, address;
968 int write;
969
970 if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
971 return false;
972
973 if (!find_mem_reference (insn, &mem, &write))
974 return false;
975
976 address = XEXP (mem, 0);
977 if (side_effects_p (address))
978 return false;
979
980 if (CONSTANT_P (address))
981 return false;
982
983 for (histogram = REG_NOTES (insn);
984 histogram;
985 histogram = XEXP (histogram, 1))
986 if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
987 && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
988 break;
989
990 if (!histogram)
991 return false;
992
993 histogram = XEXP (XEXP (histogram, 0), 1);
994 value = XEXP (histogram, 0);
995 histogram = XEXP (histogram, 1);
996 /* Skip last value referenced. */
997 histogram = XEXP (histogram, 1);
998 val = INTVAL (XEXP (histogram, 0));
999 histogram = XEXP (histogram, 1);
1000 count = INTVAL (XEXP (histogram, 0));
1001 histogram = XEXP (histogram, 1);
1002 all = INTVAL (XEXP (histogram, 0));
1003
1004 /* With that few executions we do not really have a reason to optimize the
1005 statement, and more importantly, the data about differences of addresses
1006 are spoiled by the first item that had no previous value to compare
1007 with. */
1008 if (all < 4)
1009 return false;
1010
1011 /* We require that count be at least half of all; this means
1012 that for the transformation to fire the value must be constant
1013 at least 50% of time (and 75% gives the guarantee of usage). */
1014 if (!rtx_equal_p (address, value) || 2 * count < all)
1015 return false;
1016
1017 /* If the difference is too small, it does not make too much sense to
1018 prefetch, as the memory is probably already in cache. */
1019 if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
1020 return false;
1021
1022 if (dump_file)
1023 fprintf (dump_file, "Speculative prefetching for insn %d\n",
1024 INSN_UID (insn));
1025
1026 e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
1027
1028 insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
1029
1030 return true;
1031 }
1032 #endif /* HAVE_prefetch */
1033
1034 /* Tree based transformations. */
1035 static bool
1036 tree_value_profile_transformations (void)
1037 {
1038 basic_block bb;
1039 block_stmt_iterator bsi;
1040 bool changed = false;
1041
1042 FOR_EACH_BB (bb)
1043 {
1044 /* Ignore cold areas -- we are enlarging the code. */
1045 if (!maybe_hot_bb_p (bb))
1046 continue;
1047
1048 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1049 {
1050 tree stmt = bsi_stmt (bsi);
1051 stmt_ann_t ann = get_stmt_ann (stmt);
1052 histogram_value th = ann->histograms;
1053 if (!th)
1054 continue;
1055
1056 if (dump_file)
1057 {
1058 fprintf (dump_file, "Trying transformations on insn ");
1059 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1060 }
1061
1062 /* Transformations: */
1063 /* The order of things in this conditional controls which
1064 transformation is used when more than one is applicable. */
1065 /* It is expected that any code added by the transformations
1066 will be added before the current statement, and that the
1067 current statement remain valid (although possibly
1068 modified) upon return. */
1069 if (flag_value_profile_transformations
1070 && (tree_mod_subtract_transform (stmt)
1071 || tree_divmod_fixed_value_transform (stmt)
1072 || tree_mod_pow2_value_transform (stmt)))
1073 {
1074 changed = true;
1075 /* Original statement may no longer be in the same block. */
1076 bb = bb_for_stmt (stmt);
1077 }
1078
1079 /* Free extra storage from compute_value_histograms. */
1080 while (th)
1081 {
1082 free (th->hvalue.tree.counters);
1083 th = th->hvalue.tree.next;
1084 }
1085 ann->histograms = 0;
1086 }
1087 }
1088
1089 if (changed)
1090 {
1091 counts_to_freqs ();
1092 }
1093
1094 return changed;
1095 }
1096
1097 /* Generate code for transformation 1 (with OPERATION, operands OP1
1098 and OP2, whose value is expected to be VALUE, parent modify-expr STMT and
1099 probability of taking the optimal path PROB, which is equivalent to COUNT/ALL
1100 within roundoff error). This generates the result into a temp and returns
1101 the temp; it does not replace or alter the original STMT. */
1102 static tree
1103 tree_divmod_fixed_value (tree stmt, tree operation,
1104 tree op1, tree op2, tree value, int prob, gcov_type count,
1105 gcov_type all)
1106 {
1107 tree stmt1, stmt2, stmt3;
1108 tree tmp1, tmp2, tmpv;
1109 tree label_decl1 = create_artificial_label ();
1110 tree label_decl2 = create_artificial_label ();
1111 tree label_decl3 = create_artificial_label ();
1112 tree label1, label2, label3;
1113 tree bb1end, bb2end, bb3end;
1114 basic_block bb, bb2, bb3, bb4;
1115 tree optype = TREE_TYPE (operation);
1116 edge e12, e13, e23, e24, e34;
1117 block_stmt_iterator bsi;
1118
1119 bb = bb_for_stmt (stmt);
1120 bsi = bsi_for_stmt (stmt);
1121
1122 tmpv = create_tmp_var (optype, "PROF");
1123 tmp1 = create_tmp_var (optype, "PROF");
1124 stmt1 = build2 (MODIFY_EXPR, optype, tmpv, fold_convert (optype, value));
1125 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1126 stmt3 = build3 (COND_EXPR, void_type_node,
1127 build2 (NE_EXPR, boolean_type_node, tmp1, tmpv),
1128 build1 (GOTO_EXPR, void_type_node, label_decl2),
1129 build1 (GOTO_EXPR, void_type_node, label_decl1));
1130 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1131 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1132 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1133 bb1end = stmt3;
1134
1135 tmp2 = create_tmp_var (optype, "PROF");
1136 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1137 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1138 build2 (TREE_CODE (operation), optype, op1, tmpv));
1139 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1140 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1141 bb2end = stmt1;
1142
1143 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1144 stmt1 = build2 (MODIFY_EXPR, optype, tmp2,
1145 build2 (TREE_CODE (operation), optype, op1, op2));
1146 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1147 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1148 bb3end = stmt1;
1149
1150 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1151 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1152
1153 /* Fix CFG. */
1154 /* Edge e23 connects bb2 to bb3, etc. */
1155 e12 = split_block (bb, bb1end);
1156 bb2 = e12->dest;
1157 bb2->count = count;
1158 e23 = split_block (bb2, bb2end);
1159 bb3 = e23->dest;
1160 bb3->count = all - count;
1161 e34 = split_block (bb3, bb3end);
1162 bb4 = e34->dest;
1163 bb4->count = all;
1164
1165 e12->flags &= ~EDGE_FALLTHRU;
1166 e12->flags |= EDGE_FALSE_VALUE;
1167 e12->probability = prob;
1168 e12->count = count;
1169
1170 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1171 e13->probability = REG_BR_PROB_BASE - prob;
1172 e13->count = all - count;
1173
1174 remove_edge (e23);
1175
1176 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1177 e24->probability = REG_BR_PROB_BASE;
1178 e24->count = count;
1179
1180 e34->probability = REG_BR_PROB_BASE;
1181 e34->count = all - count;
1182
1183 return tmp2;
1184 }
1185
1186 /* Do transform 1) on INSN if applicable. */
1187 static bool
1188 tree_divmod_fixed_value_transform (tree stmt)
1189 {
1190 stmt_ann_t ann = get_stmt_ann (stmt);
1191 histogram_value histogram;
1192 enum tree_code code;
1193 gcov_type val, count, all;
1194 tree modify, op, op1, op2, result, value, tree_val;
1195 int prob;
1196
1197 modify = stmt;
1198 if (TREE_CODE (stmt) == RETURN_EXPR
1199 && TREE_OPERAND (stmt, 0)
1200 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1201 modify = TREE_OPERAND (stmt, 0);
1202 if (TREE_CODE (modify) != MODIFY_EXPR)
1203 return false;
1204 op = TREE_OPERAND (modify, 1);
1205 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1206 return false;
1207 code = TREE_CODE (op);
1208
1209 if (code != TRUNC_DIV_EXPR && code != TRUNC_MOD_EXPR)
1210 return false;
1211
1212 op1 = TREE_OPERAND (op, 0);
1213 op2 = TREE_OPERAND (op, 1);
1214 if (!ann->histograms)
1215 return false;
1216
1217 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1218 if (histogram->type == HIST_TYPE_SINGLE_VALUE)
1219 break;
1220
1221 if (!histogram)
1222 return false;
1223
1224 value = histogram->hvalue.tree.value;
1225 val = histogram->hvalue.tree.counters[0];
1226 count = histogram->hvalue.tree.counters[1];
1227 all = histogram->hvalue.tree.counters[2];
1228
1229 /* We require that count is at least half of all; this means
1230 that for the transformation to fire the value must be constant
1231 at least 50% of time (and 75% gives the guarantee of usage). */
1232 if (simple_cst_equal (op2, value) != 1 || 2 * count < all)
1233 return false;
1234
1235 if (dump_file)
1236 {
1237 fprintf (dump_file, "Div/mod by constant transformation on insn ");
1238 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1239 }
1240
1241 /* Compute probability of taking the optimal path. */
1242 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1243
1244 tree_val = build_int_cst_wide (get_gcov_type (),
1245 (unsigned HOST_WIDE_INT) val,
1246 val >> (HOST_BITS_PER_WIDE_INT - 1) >> 1);
1247 result = tree_divmod_fixed_value (stmt, op, op1, op2, tree_val, prob, count, all);
1248
1249 TREE_OPERAND (modify, 1) = result;
1250
1251 return true;
1252 }
1253
1254 /* Generate code for transformation 2 (with OPERATION, operands OP1
1255 and OP2, parent modify-expr STMT and probability of taking the optimal
1256 path PROB, which is equivalent to COUNT/ALL within roundoff error).
1257 This generates the result into a temp and returns
1258 the temp; it does not replace or alter the original STMT. */
1259 static tree
1260 tree_mod_pow2 (tree stmt, tree operation, tree op1, tree op2, int prob,
1261 gcov_type count, gcov_type all)
1262 {
1263 tree stmt1, stmt2, stmt3, stmt4;
1264 tree tmp1, tmp2, tmp3;
1265 tree label_decl1 = create_artificial_label ();
1266 tree label_decl2 = create_artificial_label ();
1267 tree label_decl3 = create_artificial_label ();
1268 tree label1, label2, label3;
1269 tree bb1end, bb2end, bb3end;
1270 basic_block bb, bb2, bb3, bb4;
1271 tree optype = TREE_TYPE (operation);
1272 edge e12, e13, e23, e24, e34;
1273 block_stmt_iterator bsi;
1274 tree result = create_tmp_var (optype, "PROF");
1275
1276 bb = bb_for_stmt (stmt);
1277 bsi = bsi_for_stmt (stmt);
1278
1279 tmp1 = create_tmp_var (optype, "PROF");
1280 tmp2 = create_tmp_var (optype, "PROF");
1281 tmp3 = create_tmp_var (optype, "PROF");
1282 stmt1 = build2 (MODIFY_EXPR, optype, tmp1, fold_convert (optype, op2));
1283 stmt2 = build2 (MODIFY_EXPR, optype, tmp2,
1284 build2 (PLUS_EXPR, optype, op2, integer_minus_one_node));
1285 stmt3 = build2 (MODIFY_EXPR, optype, tmp3,
1286 build2 (BIT_AND_EXPR, optype, tmp2, tmp1));
1287 stmt4 = build3 (COND_EXPR, void_type_node,
1288 build2 (NE_EXPR, boolean_type_node, tmp3, integer_zero_node),
1289 build1 (GOTO_EXPR, void_type_node, label_decl2),
1290 build1 (GOTO_EXPR, void_type_node, label_decl1));
1291 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1292 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1293 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1294 bsi_insert_before (&bsi, stmt4, BSI_SAME_STMT);
1295 bb1end = stmt4;
1296
1297 /* tmp2 == op2-1 inherited from previous block */
1298 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1299 stmt1 = build2 (MODIFY_EXPR, optype, result,
1300 build2 (BIT_AND_EXPR, optype, op1, tmp2));
1301 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1302 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1303 bb2end = stmt1;
1304
1305 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1306 stmt1 = build2 (MODIFY_EXPR, optype, result,
1307 build2 (TREE_CODE (operation), optype, op1, op2));
1308 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1309 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1310 bb3end = stmt1;
1311
1312 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1313 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1314
1315 /* Fix CFG. */
1316 /* Edge e23 connects bb2 to bb3, etc. */
1317 e12 = split_block (bb, bb1end);
1318 bb2 = e12->dest;
1319 bb2->count = count;
1320 e23 = split_block (bb2, bb2end);
1321 bb3 = e23->dest;
1322 bb3->count = all - count;
1323 e34 = split_block (bb3, bb3end);
1324 bb4 = e34->dest;
1325 bb4->count = all;
1326
1327 e12->flags &= ~EDGE_FALLTHRU;
1328 e12->flags |= EDGE_FALSE_VALUE;
1329 e12->probability = prob;
1330 e12->count = count;
1331
1332 e13 = make_edge (bb, bb3, EDGE_TRUE_VALUE);
1333 e13->probability = REG_BR_PROB_BASE - prob;
1334 e13->count = all - count;
1335
1336 remove_edge (e23);
1337
1338 e24 = make_edge (bb2, bb4, EDGE_FALLTHRU);
1339 e24->probability = REG_BR_PROB_BASE;
1340 e24->count = count;
1341
1342 e34->probability = REG_BR_PROB_BASE;
1343 e34->count = all - count;
1344
1345 return result;
1346 }
1347
1348 /* Do transform 2) on INSN if applicable. */
1349 static bool
1350 tree_mod_pow2_value_transform (tree stmt)
1351 {
1352 stmt_ann_t ann = get_stmt_ann (stmt);
1353 histogram_value histogram;
1354 enum tree_code code;
1355 gcov_type count, wrong_values, all;
1356 tree modify, op, op1, op2, result, value;
1357 int prob;
1358 unsigned int i;
1359
1360 modify = stmt;
1361 if (TREE_CODE (stmt) == RETURN_EXPR
1362 && TREE_OPERAND (stmt, 0)
1363 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1364 modify = TREE_OPERAND (stmt, 0);
1365 if (TREE_CODE (modify) != MODIFY_EXPR)
1366 return false;
1367 op = TREE_OPERAND (modify, 1);
1368 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1369 return false;
1370 code = TREE_CODE (op);
1371
1372 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1373 return false;
1374
1375 op1 = TREE_OPERAND (op, 0);
1376 op2 = TREE_OPERAND (op, 1);
1377 if (!ann->histograms)
1378 return false;
1379
1380 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1381 if (histogram->type == HIST_TYPE_POW2)
1382 break;
1383
1384 if (!histogram)
1385 return false;
1386
1387 value = histogram->hvalue.tree.value;
1388 wrong_values = histogram->hvalue.tree.counters[0];
1389 count = 0;
1390 for (i = 1; i <= TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (stmt))); i++)
1391 count += histogram->hvalue.tree.counters[i];
1392
1393 /* We require that we hit a power of 2 at least half of all evaluations. */
1394 if (simple_cst_equal (op2, value) != 1 || count < wrong_values)
1395 return false;
1396
1397 if (dump_file)
1398 {
1399 fprintf (dump_file, "Mod power of 2 transformation on insn ");
1400 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1401 }
1402
1403 /* Compute probability of taking the optimal path. */
1404 all = count + wrong_values;
1405 prob = (count * REG_BR_PROB_BASE + all / 2) / all;
1406
1407 result = tree_mod_pow2 (stmt, op, op1, op2, prob, count, all);
1408
1409 TREE_OPERAND (modify, 1) = result;
1410
1411 return true;
1412 }
1413
1414 /* Generate code for transformations 3 and 4 (with OPERATION, operands OP1
1415 and OP2, parent modify-expr STMT, and NCOUNTS the number of cases to
1416 support. Currently only NCOUNTS==0 or 1 is supported and this is
1417 built into this interface. The probabilities of taking the optimal
1418 paths are PROB1 and PROB2, which are equivalent to COUNT1/ALL and
1419 COUNT2/ALL respectively within roundoff error). This generates the
1420 result into a temp and returns the temp; it does not replace or alter
1421 the original STMT. */
1422 /* FIXME: Generalize the interface to handle NCOUNTS > 1. */
1423
1424 static tree
1425 tree_mod_subtract (tree stmt, tree operation, tree op1, tree op2,
1426 int prob1, int prob2, int ncounts,
1427 gcov_type count1, gcov_type count2, gcov_type all)
1428 {
1429 tree stmt1, stmt2, stmt3;
1430 tree tmp1;
1431 tree label_decl1 = create_artificial_label ();
1432 tree label_decl2 = create_artificial_label ();
1433 tree label_decl3 = create_artificial_label ();
1434 tree label1, label2, label3;
1435 tree bb1end, bb2end = NULL_TREE, bb3end;
1436 basic_block bb, bb2, bb3, bb4;
1437 tree optype = TREE_TYPE (operation);
1438 edge e12, e23 = 0, e24, e34, e14;
1439 block_stmt_iterator bsi;
1440 tree result = create_tmp_var (optype, "PROF");
1441
1442 bb = bb_for_stmt (stmt);
1443 bsi = bsi_for_stmt (stmt);
1444
1445 tmp1 = create_tmp_var (optype, "PROF");
1446 stmt1 = build2 (MODIFY_EXPR, optype, result, op1);
1447 stmt2 = build2 (MODIFY_EXPR, optype, tmp1, op2);
1448 stmt3 = build3 (COND_EXPR, void_type_node,
1449 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1450 build1 (GOTO_EXPR, void_type_node, label_decl3),
1451 build1 (GOTO_EXPR, void_type_node,
1452 ncounts ? label_decl1 : label_decl2));
1453 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1454 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1455 bsi_insert_before (&bsi, stmt3, BSI_SAME_STMT);
1456 bb1end = stmt3;
1457
1458 if (ncounts) /* Assumed to be 0 or 1 */
1459 {
1460 label1 = build1 (LABEL_EXPR, void_type_node, label_decl1);
1461 stmt1 = build2 (MODIFY_EXPR, optype, result,
1462 build2 (MINUS_EXPR, optype, result, tmp1));
1463 stmt2 = build3 (COND_EXPR, void_type_node,
1464 build2 (LT_EXPR, boolean_type_node, result, tmp1),
1465 build1 (GOTO_EXPR, void_type_node, label_decl3),
1466 build1 (GOTO_EXPR, void_type_node, label_decl2));
1467 bsi_insert_before (&bsi, label1, BSI_SAME_STMT);
1468 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1469 bsi_insert_before (&bsi, stmt2, BSI_SAME_STMT);
1470 bb2end = stmt2;
1471 }
1472
1473 /* Fallback case. */
1474 label2 = build1 (LABEL_EXPR, void_type_node, label_decl2);
1475 stmt1 = build2 (MODIFY_EXPR, optype, result,
1476 build2 (TREE_CODE (operation), optype, result, tmp1));
1477 bsi_insert_before (&bsi, label2, BSI_SAME_STMT);
1478 bsi_insert_before (&bsi, stmt1, BSI_SAME_STMT);
1479 bb3end = stmt1;
1480
1481 label3 = build1 (LABEL_EXPR, void_type_node, label_decl3);
1482 bsi_insert_before (&bsi, label3, BSI_SAME_STMT);
1483
1484 /* Fix CFG. */
1485 /* Edge e23 connects bb2 to bb3, etc. */
1486 /* However block 3 is optional; if it is not there, references
1487 to 3 really refer to block 2. */
1488 e12 = split_block (bb, bb1end);
1489 bb2 = e12->dest;
1490 bb2->count = all - count1;
1491
1492 if (ncounts) /* Assumed to be 0 or 1. */
1493 {
1494 e23 = split_block (bb2, bb2end);
1495 bb3 = e23->dest;
1496 bb3->count = all - count1 - count2;
1497 }
1498
1499 e34 = split_block (ncounts ? bb3 : bb2, bb3end);
1500 bb4 = e34->dest;
1501 bb4->count = all;
1502
1503 e12->flags &= ~EDGE_FALLTHRU;
1504 e12->flags |= EDGE_FALSE_VALUE;
1505 e12->probability = REG_BR_PROB_BASE - prob1;
1506 e12->count = count1;
1507
1508 e14 = make_edge (bb, bb4, EDGE_TRUE_VALUE);
1509 e14->probability = prob1;
1510 e14->count = all - count1;
1511
1512 if (ncounts) /* Assumed to be 0 or 1. */
1513 {
1514 e23->flags &= ~EDGE_FALLTHRU;
1515 e23->flags |= EDGE_FALSE_VALUE;
1516 e23->count = all - count1 - count2;
1517 e23->probability = REG_BR_PROB_BASE - prob2;
1518
1519 e24 = make_edge (bb2, bb4, EDGE_TRUE_VALUE);
1520 e24->probability = prob2;
1521 e24->count = count2;
1522 }
1523
1524 e34->probability = REG_BR_PROB_BASE;
1525 e34->count = all - count1 - count2;
1526
1527 return result;
1528 }
1529
1530 /* Do transforms 3) and 4) on INSN if applicable. */
1531 static bool
1532 tree_mod_subtract_transform (tree stmt)
1533 {
1534 stmt_ann_t ann = get_stmt_ann (stmt);
1535 histogram_value histogram;
1536 enum tree_code code;
1537 gcov_type count, wrong_values, all;
1538 tree modify, op, op1, op2, result, value;
1539 int prob1, prob2;
1540 unsigned int i;
1541
1542 modify = stmt;
1543 if (TREE_CODE (stmt) == RETURN_EXPR
1544 && TREE_OPERAND (stmt, 0)
1545 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1546 modify = TREE_OPERAND (stmt, 0);
1547 if (TREE_CODE (modify) != MODIFY_EXPR)
1548 return false;
1549 op = TREE_OPERAND (modify, 1);
1550 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1551 return false;
1552 code = TREE_CODE (op);
1553
1554 if (code != TRUNC_MOD_EXPR || !TYPE_UNSIGNED (TREE_TYPE (op)))
1555 return false;
1556
1557 op1 = TREE_OPERAND (op, 0);
1558 op2 = TREE_OPERAND (op, 1);
1559 if (!ann->histograms)
1560 return false;
1561
1562 for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next)
1563 if (histogram->type == HIST_TYPE_INTERVAL)
1564 break;
1565
1566 if (!histogram)
1567 return false;
1568
1569 value = histogram->hvalue.tree.value;
1570 all = 0;
1571 wrong_values = 0;
1572 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1573 all += histogram->hvalue.tree.counters[i];
1574
1575 wrong_values += histogram->hvalue.tree.counters[i];
1576 wrong_values += histogram->hvalue.tree.counters[i+1];
1577 all += wrong_values;
1578
1579 /* Sanity check. */
1580 if (simple_cst_equal (op2, value) != 1)
1581 return false;
1582
1583 /* We require that we use just subtractions in at least 50% of all
1584 evaluations. */
1585 count = 0;
1586 for (i = 0; i < histogram->hdata.intvl.steps; i++)
1587 {
1588 count += histogram->hvalue.tree.counters[i];
1589 if (count * 2 >= all)
1590 break;
1591 }
1592 if (i == histogram->hdata.intvl.steps)
1593 return false;
1594
1595 if (dump_file)
1596 {
1597 fprintf (dump_file, "Mod subtract transformation on insn ");
1598 print_generic_stmt (dump_file, stmt, TDF_SLIM);
1599 }
1600
1601 /* Compute probability of taking the optimal path(s). */
1602 prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all;
1603 prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all;
1604
1605 /* In practice, "steps" is always 2. This interface reflects this,
1606 and will need to be changed if "steps" can change. */
1607 result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i,
1608 histogram->hvalue.tree.counters[0],
1609 histogram->hvalue.tree.counters[1], all);
1610
1611 TREE_OPERAND (modify, 1) = result;
1612
1613 return true;
1614 }
1615 \f
1616 /* Connection to the outside world. */
1617 /* Struct for IR-dependent hooks. */
1618 struct value_prof_hooks {
1619 /* Find list of values for which we want to measure histograms. */
1620 void (*find_values_to_profile) (histogram_values *);
1621
1622 /* Identify and exploit properties of values that are hard to analyze
1623 statically. See value-prof.c for more detail. */
1624 bool (*value_profile_transformations) (void);
1625 };
1626
1627 /* Hooks for RTL-based versions (the only ones that currently work). */
1628 static struct value_prof_hooks rtl_value_prof_hooks =
1629 {
1630 rtl_find_values_to_profile,
1631 rtl_value_profile_transformations
1632 };
1633
1634 void
1635 rtl_register_value_prof_hooks (void)
1636 {
1637 value_prof_hooks = &rtl_value_prof_hooks;
1638 gcc_assert (!ir_type ());
1639 }
1640 \f
1641 /* Find values inside INSN for that we want to measure histograms for
1642 division/modulo optimization. */
1643 static void
1644 tree_divmod_values_to_profile (tree stmt, histogram_values *values)
1645 {
1646 tree op, op1, op2;
1647 histogram_value hist;
1648
1649 op = stmt;
1650 if (TREE_CODE (stmt) == RETURN_EXPR
1651 && TREE_OPERAND (stmt, 0)
1652 && TREE_CODE (TREE_OPERAND (stmt, 0)) == MODIFY_EXPR)
1653 op = TREE_OPERAND (stmt, 0);
1654
1655 if (TREE_CODE (op) != MODIFY_EXPR)
1656 return;
1657 if (!INTEGRAL_TYPE_P (TREE_TYPE (op)))
1658 return;
1659 op = TREE_OPERAND (op, 1);
1660 switch (TREE_CODE (op))
1661 {
1662 case TRUNC_DIV_EXPR:
1663 case TRUNC_MOD_EXPR:
1664 op1 = TREE_OPERAND (op, 0);
1665 op2 = TREE_OPERAND (op, 1);
1666
1667 VEC_reserve (histogram_value, heap, *values, 3);
1668
1669 /* Check for a special case where the divisor is power(s) of 2.
1670 This is more aggressive than the RTL version, under the
1671 assumption that later phases will reduce / or % by power of 2
1672 to something clever most of the time. Signed or unsigned. */
1673 if (TREE_CODE (op2) != INTEGER_CST)
1674 {
1675 hist = ggc_alloc (sizeof (*hist));
1676 hist->hvalue.tree.value = op2;
1677 hist->hvalue.tree.stmt = stmt;
1678 hist->type = HIST_TYPE_POW2;
1679 hist->hdata.pow2.may_be_other = 1;
1680 VEC_quick_push (histogram_value, *values, hist);
1681 }
1682
1683 /* Check for the case where the divisor is the same value most
1684 of the time. */
1685 if (TREE_CODE (op2) != INTEGER_CST)
1686 {
1687 hist = ggc_alloc (sizeof (*hist));
1688 hist->hvalue.tree.value = op2;
1689 hist->hvalue.tree.stmt = stmt;
1690 hist->type = HIST_TYPE_SINGLE_VALUE;
1691 VEC_quick_push (histogram_value, *values, hist);
1692 }
1693
1694 /* For mod, check whether it is not often a noop (or replaceable by
1695 a few subtractions). */
1696 if (TREE_CODE (op) == TRUNC_MOD_EXPR && TYPE_UNSIGNED (TREE_TYPE (op)))
1697 {
1698 hist = ggc_alloc (sizeof (*hist));
1699 hist->hvalue.tree.stmt = stmt;
1700 hist->hvalue.tree.value = op2;
1701 hist->type = HIST_TYPE_INTERVAL;
1702 hist->hdata.intvl.int_start = 0;
1703 hist->hdata.intvl.steps = 2;
1704 VEC_quick_push (histogram_value, *values, hist);
1705 }
1706 return;
1707
1708 default:
1709 return;
1710 }
1711 }
1712
1713 /* Find values inside INSN for that we want to measure histograms and adds
1714 them to list VALUES (increasing the record of its length in N_VALUES). */
1715 static void
1716 tree_values_to_profile (tree stmt, histogram_values *values)
1717 {
1718 if (flag_value_profile_transformations)
1719 tree_divmod_values_to_profile (stmt, values);
1720 }
1721
1722 static void
1723 tree_find_values_to_profile (histogram_values *values)
1724 {
1725 basic_block bb;
1726 block_stmt_iterator bsi;
1727 tree stmt;
1728 unsigned int i;
1729 histogram_value hist;
1730
1731 *values = NULL;
1732 FOR_EACH_BB (bb)
1733 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1734 {
1735 tree stmt = bsi_stmt (bsi);
1736 tree_values_to_profile (stmt, values);
1737 }
1738 static_values = *values;
1739
1740 for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++)
1741 {
1742 switch (hist->type)
1743 {
1744 case HIST_TYPE_INTERVAL:
1745 if (dump_file)
1746 {
1747 fprintf (dump_file, "Interval counter for tree ");
1748 print_generic_expr (dump_file, hist->hvalue.tree.stmt,
1749 TDF_SLIM);
1750 fprintf (dump_file, ", range %d -- %d.\n",
1751 hist->hdata.intvl.int_start,
1752 (hist->hdata.intvl.int_start
1753 + hist->hdata.intvl.steps - 1));
1754 }
1755 hist->n_counters = hist->hdata.intvl.steps + 2;
1756 break;
1757
1758 case HIST_TYPE_POW2:
1759 if (dump_file)
1760 {
1761 fprintf (dump_file, "Pow2 counter for insn ");
1762 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1763 fprintf (dump_file, ".\n");
1764 }
1765 stmt = hist->hvalue.tree.stmt;
1766 hist->n_counters
1767 = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (stmt)))
1768 + (hist->hdata.pow2.may_be_other ? 1 : 0);
1769 break;
1770
1771 case HIST_TYPE_SINGLE_VALUE:
1772 if (dump_file)
1773 {
1774 fprintf (dump_file, "Single value counter for insn ");
1775 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1776 fprintf (dump_file, ".\n");
1777 }
1778 hist->n_counters = 3;
1779 break;
1780
1781 case HIST_TYPE_CONST_DELTA:
1782 if (dump_file)
1783 {
1784 fprintf (dump_file, "Constant delta counter for insn ");
1785 print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM);
1786 fprintf (dump_file, ".\n");
1787 }
1788 hist->n_counters = 4;
1789 break;
1790
1791 default:
1792 abort ();
1793 }
1794 }
1795 }
1796
1797 static struct value_prof_hooks tree_value_prof_hooks = {
1798 tree_find_values_to_profile,
1799 tree_value_profile_transformations
1800 };
1801
1802 void
1803 tree_register_value_prof_hooks (void)
1804 {
1805 value_prof_hooks = &tree_value_prof_hooks;
1806 gcc_assert (ir_type ());
1807 }
1808 \f
1809 /* IR-independent entry points. */
1810 void
1811 find_values_to_profile (histogram_values *values)
1812 {
1813 (value_prof_hooks->find_values_to_profile) (values);
1814 }
1815
1816 bool
1817 value_profile_transformations (void)
1818 {
1819 bool retval = (value_prof_hooks->value_profile_transformations) ();
1820 VEC_free (histogram_value, heap, static_values);
1821 return retval;
1822 }