]>
Commit | Line | Data |
---|---|---|
1c72f4ef | 1 | /* Transformations based on profile information for values. |
88462c42 | 2 | Copyright (C) 2003, 2004 Free Software Foundation, Inc. |
1c72f4ef ZD |
3 | |
4 | This file is part of GCC. | |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 2, or (at your option) any later | |
9 | version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 | for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GCC; see the file COPYING. If not, write to the Free | |
18 | Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
19 | 02111-1307, USA. */ | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "tm.h" | |
25 | #include "rtl.h" | |
26 | #include "expr.h" | |
27 | #include "hard-reg-set.h" | |
28 | #include "basic-block.h" | |
29 | #include "value-prof.h" | |
30 | #include "output.h" | |
31 | #include "flags.h" | |
32 | #include "insn-config.h" | |
33 | #include "recog.h" | |
34 | #include "optabs.h" | |
fca9dc00 | 35 | #include "regs.h" |
1c72f4ef ZD |
36 | |
37 | /* In this file value profile based optimizations will be placed (none are | |
38 | here just now, but they are hopefully coming soon). | |
39 | ||
40 | Every such optimization should add its requirements for profiled values to | |
41 | insn_values_to_profile function. This function is called from branch_prob | |
42 | in profile.c and the requested values are instrumented by it in the first | |
43 | compilation with -fprofile-arcs. The optimization may then read the | |
a98ebe2e | 44 | gathered data in the second compilation with -fbranch-probabilities. |
6e885ee3 ZD |
45 | The measured data is appended as REG_VALUE_PROFILE note to the instrumented |
46 | insn. The argument to the note consists of an EXPR_LIST where its | |
47 | members have the following meaning (from the first to the last): | |
48 | ||
49 | -- type of information gathered (HIST_TYPE*) | |
50 | -- the expression that is profiled | |
51 | -- list of counters starting from the first one. */ | |
1c72f4ef | 52 | |
fca9dc00 ZD |
53 | static void insn_divmod_values_to_profile (rtx, unsigned *, |
54 | struct histogram_value **); | |
1c72f4ef | 55 | static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); |
fca9dc00 ZD |
56 | static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, |
57 | rtx, gcov_type); | |
58 | static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx); | |
59 | static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, | |
60 | int); | |
61 | static bool divmod_fixed_value_transform (rtx insn); | |
62 | static bool mod_pow2_value_transform (rtx); | |
63 | static bool mod_subtract_transform (rtx); | |
1c72f4ef ZD |
64 | \f |
65 | /* Release the list of VALUES of length N_VALUES for that we want to measure | |
66 | histograms. */ | |
67 | void | |
68 | free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED, | |
69 | struct histogram_value *values) | |
70 | { | |
71 | free (values); | |
72 | } | |
73 | ||
fca9dc00 ZD |
74 | /* Find values inside INSN for that we want to measure histograms for |
75 | division/modulo optimization. */ | |
76 | static void | |
77 | insn_divmod_values_to_profile (rtx insn, unsigned *n_values, | |
78 | struct histogram_value **values) | |
79 | { | |
80 | rtx set, set_src, op1, op2; | |
81 | enum machine_mode mode; | |
82 | ||
83 | if (!INSN_P (insn)) | |
84 | return; | |
85 | ||
86 | set = single_set (insn); | |
87 | if (!set) | |
88 | return; | |
89 | ||
90 | mode = GET_MODE (SET_DEST (set)); | |
91 | if (!INTEGRAL_MODE_P (mode)) | |
92 | return; | |
93 | ||
94 | set_src = SET_SRC (set); | |
95 | switch (GET_CODE (set_src)) | |
96 | { | |
97 | case DIV: | |
98 | case MOD: | |
99 | case UDIV: | |
100 | case UMOD: | |
101 | op1 = XEXP (set_src, 0); | |
102 | op2 = XEXP (set_src, 1); | |
103 | if (side_effects_p (op2)) | |
104 | return; | |
105 | ||
106 | /* Check for a special case where the divisor is power of 2. */ | |
107 | if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) | |
108 | { | |
109 | *values = xrealloc (*values, | |
110 | (*n_values + 1) | |
111 | * sizeof (struct histogram_value)); | |
112 | (*values)[*n_values].value = op2; | |
113 | (*values)[*n_values].seq = NULL_RTX; | |
114 | (*values)[*n_values].mode = mode; | |
115 | (*values)[*n_values].insn = insn; | |
116 | (*values)[*n_values].type = HIST_TYPE_POW2; | |
117 | (*values)[*n_values].hdata.pow2.may_be_other = 1; | |
118 | (*n_values)++; | |
119 | } | |
120 | ||
121 | /* Check whether the divisor is not in fact a constant. */ | |
122 | if (!CONSTANT_P (op2)) | |
123 | { | |
124 | *values = xrealloc (*values, | |
125 | (*n_values + 1) | |
126 | * sizeof (struct histogram_value)); | |
127 | (*values)[*n_values].value = op2; | |
128 | (*values)[*n_values].mode = mode; | |
129 | (*values)[*n_values].seq = NULL_RTX; | |
130 | (*values)[*n_values].insn = insn; | |
131 | (*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE; | |
132 | (*n_values)++; | |
133 | } | |
134 | ||
d91edf86 | 135 | /* For mod, check whether it is not often a noop (or replaceable by |
fca9dc00 ZD |
136 | a few subtractions). */ |
137 | if (GET_CODE (set_src) == UMOD && !side_effects_p (op1)) | |
138 | { | |
139 | rtx tmp; | |
140 | ||
141 | *values = xrealloc (*values, | |
142 | (*n_values + 1) | |
143 | * sizeof (struct histogram_value)); | |
144 | start_sequence (); | |
145 | tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); | |
146 | (*values)[*n_values].value = force_operand (tmp, NULL_RTX); | |
147 | (*values)[*n_values].seq = get_insns (); | |
148 | end_sequence (); | |
149 | (*values)[*n_values].mode = mode; | |
150 | (*values)[*n_values].insn = insn; | |
151 | (*values)[*n_values].type = HIST_TYPE_INTERVAL; | |
152 | (*values)[*n_values].hdata.intvl.int_start = 0; | |
153 | (*values)[*n_values].hdata.intvl.steps = 2; | |
154 | (*values)[*n_values].hdata.intvl.may_be_less = 1; | |
155 | (*values)[*n_values].hdata.intvl.may_be_more = 1; | |
156 | (*n_values)++; | |
157 | } | |
158 | return; | |
159 | ||
160 | default: | |
161 | return; | |
162 | } | |
163 | } | |
164 | ||
1c72f4ef ZD |
165 | /* Find values inside INSN for that we want to measure histograms and adds |
166 | them to list VALUES (increasing the record of its length in N_VALUES). */ | |
167 | static void | |
fca9dc00 ZD |
168 | insn_values_to_profile (rtx insn, |
169 | unsigned *n_values, | |
170 | struct histogram_value **values) | |
1c72f4ef | 171 | { |
fca9dc00 ZD |
172 | if (flag_value_profile_transformations) |
173 | insn_divmod_values_to_profile (insn, n_values, values); | |
1c72f4ef ZD |
174 | } |
175 | ||
176 | /* Find list of values for that we want to measure histograms. */ | |
177 | void | |
178 | find_values_to_profile (unsigned *n_values, struct histogram_value **values) | |
179 | { | |
180 | rtx insn; | |
181 | unsigned i; | |
182 | ||
183 | *n_values = 0; | |
184 | *values = NULL; | |
185 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
186 | insn_values_to_profile (insn, n_values, values); | |
187 | ||
188 | for (i = 0; i < *n_values; i++) | |
189 | { | |
190 | switch ((*values)[i].type) | |
191 | { | |
192 | case HIST_TYPE_INTERVAL: | |
c263766c RH |
193 | if (dump_file) |
194 | fprintf (dump_file, | |
fca9dc00 ZD |
195 | "Interval counter for insn %d, range %d -- %d.\n", |
196 | INSN_UID ((*values)[i].insn), | |
197 | (*values)[i].hdata.intvl.int_start, | |
198 | ((*values)[i].hdata.intvl.int_start | |
199 | + (*values)[i].hdata.intvl.steps - 1)); | |
1c72f4ef ZD |
200 | (*values)[i].n_counters = (*values)[i].hdata.intvl.steps + |
201 | ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + | |
202 | ((*values)[i].hdata.intvl.may_be_more ? 1 : 0); | |
203 | break; | |
204 | ||
205 | case HIST_TYPE_POW2: | |
c263766c RH |
206 | if (dump_file) |
207 | fprintf (dump_file, | |
fca9dc00 ZD |
208 | "Pow2 counter for insn %d.\n", |
209 | INSN_UID ((*values)[i].insn)); | |
1c72f4ef ZD |
210 | (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) + |
211 | ((*values)[i].hdata.pow2.may_be_other ? 1 : 0); | |
212 | break; | |
213 | ||
214 | case HIST_TYPE_SINGLE_VALUE: | |
c263766c RH |
215 | if (dump_file) |
216 | fprintf (dump_file, | |
fca9dc00 ZD |
217 | "Single value counter for insn %d.\n", |
218 | INSN_UID ((*values)[i].insn)); | |
1c72f4ef ZD |
219 | (*values)[i].n_counters = 3; |
220 | break; | |
221 | ||
222 | case HIST_TYPE_CONST_DELTA: | |
c263766c RH |
223 | if (dump_file) |
224 | fprintf (dump_file, | |
fca9dc00 ZD |
225 | "Constant delta counter for insn %d.\n", |
226 | INSN_UID ((*values)[i].insn)); | |
1c72f4ef ZD |
227 | (*values)[i].n_counters = 4; |
228 | break; | |
229 | ||
230 | default: | |
231 | abort (); | |
232 | } | |
233 | } | |
234 | } | |
fca9dc00 ZD |
235 | |
236 | /* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses | |
237 | them to identify and exploit properties of values that are hard to analyze | |
238 | statically. | |
239 | ||
240 | We do following transformations: | |
241 | ||
242 | 1) | |
243 | ||
244 | x = a / b; | |
245 | ||
246 | where b is almost always a constant N is transformed to | |
247 | ||
248 | if (b == N) | |
249 | x = a / N; | |
250 | else | |
251 | x = a / b; | |
252 | ||
253 | Analogically with % | |
254 | ||
255 | 2) | |
256 | ||
257 | x = a % b | |
258 | ||
259 | where b is almost always a power of 2 and the division is unsigned | |
260 | TODO -- handle signed case as well | |
261 | ||
262 | if ((b & (b - 1)) == 0) | |
263 | x = a & (b - 1); | |
264 | else | |
265 | x = x % b; | |
266 | ||
267 | Note that when b = 0, no error will occur and x = a; this is correct, | |
268 | as result of such operation is undefined. | |
269 | ||
270 | 3) | |
271 | ||
272 | x = a % b | |
273 | ||
274 | where a is almost always less then b and the division is unsigned | |
275 | TODO -- handle signed case as well | |
276 | ||
277 | x = a; | |
278 | if (x >= b) | |
279 | x %= b; | |
280 | ||
281 | 4) | |
282 | ||
283 | x = a % b | |
284 | ||
285 | where a is almost always less then 2 * b and the division is unsigned | |
286 | TODO -- handle signed case as well | |
287 | ||
288 | x = a; | |
289 | if (x >= b) | |
290 | x -= b; | |
291 | if (x >= b) | |
292 | x %= b; | |
293 | ||
294 | It would be possible to continue analogically for K * b for other small | |
295 | K's, but it is probably not useful. | |
296 | ||
297 | TODO: | |
298 | ||
299 | There are other useful cases that could be handled by a similar mechanism, | |
300 | for example: | |
301 | ||
302 | for (i = 0; i < n; i++) | |
303 | ... | |
304 | ||
305 | transform to (for constant N): | |
306 | ||
307 | if (n == N) | |
308 | for (i = 0; i < N; i++) | |
309 | ... | |
310 | else | |
311 | for (i = 0; i < n; i++) | |
312 | ... | |
313 | making unroller happy. Since this may grow the code significantly, | |
314 | we would have to be very careful here. */ | |
315 | ||
316 | bool | |
9373164a | 317 | value_profile_transformations (void) |
fca9dc00 ZD |
318 | { |
319 | rtx insn, next; | |
320 | int changed = false; | |
321 | ||
322 | for (insn = get_insns (); insn; insn = next) | |
323 | { | |
324 | next = NEXT_INSN (insn); | |
325 | ||
326 | if (!INSN_P (insn)) | |
327 | continue; | |
328 | ||
329 | /* Scan for insn carrying a histogram. */ | |
330 | if (!find_reg_note (insn, REG_VALUE_PROFILE, 0)) | |
331 | continue; | |
332 | ||
333 | /* Ignore cold areas -- we are growing a code. */ | |
334 | if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) | |
335 | continue; | |
336 | ||
c263766c | 337 | if (dump_file) |
fca9dc00 | 338 | { |
c263766c | 339 | fprintf (dump_file, "Trying transformations on insn %d\n", |
fca9dc00 | 340 | INSN_UID (insn)); |
c263766c | 341 | print_rtl_single (dump_file, insn); |
fca9dc00 ZD |
342 | } |
343 | ||
344 | /* Transformations: */ | |
345 | if (flag_value_profile_transformations | |
346 | && (mod_subtract_transform (insn) | |
347 | || divmod_fixed_value_transform (insn) | |
348 | || mod_pow2_value_transform (insn))) | |
349 | changed = true; | |
350 | } | |
351 | ||
352 | if (changed) | |
353 | { | |
354 | commit_edge_insertions (); | |
355 | allocate_reg_info (max_reg_num (), FALSE, FALSE); | |
356 | } | |
357 | ||
358 | return changed; | |
359 | } | |
360 | ||
361 | /* Generate code for transformation 1 (with MODE and OPERATION, operands OP1 | |
362 | and OP2 whose value is expected to be VALUE and result TARGET). */ | |
363 | static rtx | |
364 | gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation, | |
365 | rtx target, rtx op1, rtx op2, gcov_type value) | |
366 | { | |
367 | rtx tmp, tmp1; | |
368 | rtx neq_label = gen_label_rtx (); | |
369 | rtx end_label = gen_label_rtx (); | |
370 | rtx sequence; | |
371 | ||
372 | start_sequence (); | |
373 | ||
374 | if (!REG_P (op2)) | |
375 | { | |
376 | tmp = gen_reg_rtx (mode); | |
377 | emit_move_insn (tmp, copy_rtx (op2)); | |
378 | } | |
379 | else | |
380 | tmp = op2; | |
381 | ||
382 | do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX, | |
383 | NULL_RTX, neq_label); | |
384 | tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value)); | |
385 | tmp1 = force_operand (tmp1, target); | |
386 | if (tmp1 != target) | |
387 | emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); | |
388 | ||
389 | emit_jump_insn (gen_jump (end_label)); | |
390 | emit_barrier (); | |
391 | ||
392 | emit_label (neq_label); | |
393 | tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); | |
394 | tmp1 = force_operand (tmp1, target); | |
395 | if (tmp1 != target) | |
396 | emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); | |
397 | ||
398 | emit_label (end_label); | |
399 | ||
400 | sequence = get_insns (); | |
401 | end_sequence (); | |
402 | rebuild_jump_labels (sequence); | |
403 | return sequence; | |
404 | } | |
405 | ||
406 | /* Do transform 1) on INSN if applicable. */ | |
407 | static bool | |
408 | divmod_fixed_value_transform (rtx insn) | |
409 | { | |
410 | rtx set, set_src, set_dest, op1, op2, value, histogram; | |
411 | enum rtx_code code; | |
412 | enum machine_mode mode; | |
413 | gcov_type val, count, all; | |
414 | edge e; | |
415 | ||
416 | set = single_set (insn); | |
417 | if (!set) | |
418 | return false; | |
419 | ||
420 | set_src = SET_SRC (set); | |
421 | set_dest = SET_DEST (set); | |
422 | code = GET_CODE (set_src); | |
423 | mode = GET_MODE (set_dest); | |
424 | ||
425 | if (code != DIV && code != MOD && code != UDIV && code != UMOD) | |
426 | return false; | |
427 | op1 = XEXP (set_src, false); | |
428 | op2 = XEXP (set_src, 1); | |
429 | ||
430 | for (histogram = REG_NOTES (insn); | |
431 | histogram; | |
432 | histogram = XEXP (histogram, 1)) | |
433 | if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE | |
434 | && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE)) | |
435 | break; | |
436 | ||
437 | if (!histogram) | |
438 | return false; | |
439 | ||
440 | histogram = XEXP (XEXP (histogram, 0), 1); | |
441 | value = XEXP (histogram, 0); | |
442 | histogram = XEXP (histogram, 1); | |
443 | val = INTVAL (XEXP (histogram, 0)); | |
444 | histogram = XEXP (histogram, 1); | |
445 | count = INTVAL (XEXP (histogram, 0)); | |
446 | histogram = XEXP (histogram, 1); | |
447 | all = INTVAL (XEXP (histogram, 0)); | |
448 | ||
d91edf86 | 449 | /* We require that count is at least half of all; this means |
fca9dc00 | 450 | that for the transformation to fire the value must be constant |
d91edf86 | 451 | at least 50% of time (and 75% gives the guarantee of usage). */ |
fca9dc00 ZD |
452 | if (!rtx_equal_p (op2, value) || 2 * count < all) |
453 | return false; | |
454 | ||
c263766c RH |
455 | if (dump_file) |
456 | fprintf (dump_file, "Div/mod by constant transformation on insn %d\n", | |
fca9dc00 ZD |
457 | INSN_UID (insn)); |
458 | ||
459 | e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); | |
460 | delete_insn (insn); | |
461 | ||
462 | insert_insn_on_edge ( | |
463 | gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e); | |
464 | ||
465 | return true; | |
466 | } | |
467 | ||
468 | /* Generate code for transformation 2 (with MODE and OPERATION, operands OP1 | |
469 | and OP2 and result TARGET). */ | |
470 | static rtx | |
471 | gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target, | |
472 | rtx op1, rtx op2) | |
473 | { | |
474 | rtx tmp, tmp1, tmp2, tmp3; | |
475 | rtx neq_label = gen_label_rtx (); | |
476 | rtx end_label = gen_label_rtx (); | |
477 | rtx sequence; | |
478 | ||
479 | start_sequence (); | |
480 | ||
481 | if (!REG_P (op2)) | |
482 | { | |
483 | tmp = gen_reg_rtx (mode); | |
484 | emit_move_insn (tmp, copy_rtx (op2)); | |
485 | } | |
486 | else | |
487 | tmp = op2; | |
488 | ||
489 | tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX, | |
490 | 0, OPTAB_WIDEN); | |
491 | tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX, | |
492 | 0, OPTAB_WIDEN); | |
493 | do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX, | |
494 | NULL_RTX, neq_label); | |
495 | tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target, | |
496 | 0, OPTAB_WIDEN); | |
497 | if (tmp3 != target) | |
498 | emit_move_insn (copy_rtx (target), tmp3); | |
499 | emit_jump_insn (gen_jump (end_label)); | |
500 | emit_barrier (); | |
501 | ||
502 | emit_label (neq_label); | |
503 | tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); | |
504 | tmp1 = force_operand (tmp1, target); | |
505 | if (tmp1 != target) | |
506 | emit_move_insn (target, tmp1); | |
507 | ||
508 | emit_label (end_label); | |
509 | ||
510 | sequence = get_insns (); | |
511 | end_sequence (); | |
512 | rebuild_jump_labels (sequence); | |
513 | return sequence; | |
514 | } | |
515 | ||
516 | /* Do transform 2) on INSN if applicable. */ | |
517 | static bool | |
518 | mod_pow2_value_transform (rtx insn) | |
519 | { | |
520 | rtx set, set_src, set_dest, op1, op2, value, histogram; | |
521 | enum rtx_code code; | |
522 | enum machine_mode mode; | |
523 | gcov_type wrong_values, count; | |
524 | edge e; | |
525 | int i; | |
526 | ||
527 | set = single_set (insn); | |
528 | if (!set) | |
529 | return false; | |
530 | ||
531 | set_src = SET_SRC (set); | |
532 | set_dest = SET_DEST (set); | |
533 | code = GET_CODE (set_src); | |
534 | mode = GET_MODE (set_dest); | |
535 | ||
536 | if (code != UMOD) | |
537 | return false; | |
538 | op1 = XEXP (set_src, 0); | |
539 | op2 = XEXP (set_src, 1); | |
540 | ||
541 | for (histogram = REG_NOTES (insn); | |
542 | histogram; | |
543 | histogram = XEXP (histogram, 1)) | |
544 | if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE | |
545 | && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2)) | |
546 | break; | |
547 | ||
548 | if (!histogram) | |
549 | return false; | |
550 | ||
551 | histogram = XEXP (XEXP (histogram, 0), 1); | |
552 | value = XEXP (histogram, 0); | |
553 | histogram = XEXP (histogram, 1); | |
554 | wrong_values =INTVAL (XEXP (histogram, 0)); | |
555 | histogram = XEXP (histogram, 1); | |
556 | ||
557 | count = 0; | |
558 | for (i = 0; i < GET_MODE_BITSIZE (mode); i++) | |
559 | { | |
560 | count += INTVAL (XEXP (histogram, 0)); | |
561 | histogram = XEXP (histogram, 1); | |
562 | } | |
563 | ||
564 | if (!rtx_equal_p (op2, value)) | |
565 | return false; | |
566 | ||
567 | /* We require that we hit a power of two at least half of all evaluations. */ | |
568 | if (count < wrong_values) | |
569 | return false; | |
570 | ||
c263766c RH |
571 | if (dump_file) |
572 | fprintf (dump_file, "Mod power of 2 transformation on insn %d\n", | |
fca9dc00 ZD |
573 | INSN_UID (insn)); |
574 | ||
575 | e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); | |
576 | delete_insn (insn); | |
577 | ||
578 | insert_insn_on_edge ( | |
579 | gen_mod_pow2 (mode, code, set_dest, op1, op2), e); | |
580 | ||
581 | return true; | |
582 | } | |
583 | ||
584 | /* Generate code for transformations 3 and 4 (with MODE and OPERATION, | |
585 | operands OP1 and OP2, result TARGET and at most SUB subtractions). */ | |
586 | static rtx | |
587 | gen_mod_subtract (enum machine_mode mode, enum rtx_code operation, | |
588 | rtx target, rtx op1, rtx op2, int sub) | |
589 | { | |
590 | rtx tmp, tmp1; | |
591 | rtx end_label = gen_label_rtx (); | |
592 | rtx sequence; | |
593 | int i; | |
594 | ||
595 | start_sequence (); | |
596 | ||
597 | if (!REG_P (op2)) | |
598 | { | |
599 | tmp = gen_reg_rtx (mode); | |
600 | emit_move_insn (tmp, copy_rtx (op2)); | |
601 | } | |
602 | else | |
603 | tmp = op2; | |
604 | ||
605 | emit_move_insn (target, copy_rtx (op1)); | |
606 | do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, | |
607 | NULL_RTX, end_label); | |
608 | ||
609 | ||
610 | for (i = 0; i < sub; i++) | |
611 | { | |
612 | tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target, | |
613 | 0, OPTAB_WIDEN); | |
614 | if (tmp1 != target) | |
615 | emit_move_insn (target, tmp1); | |
616 | do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, | |
617 | NULL_RTX, end_label); | |
618 | } | |
619 | ||
620 | tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp)); | |
621 | tmp1 = force_operand (tmp1, target); | |
622 | if (tmp1 != target) | |
623 | emit_move_insn (target, tmp1); | |
624 | ||
625 | emit_label (end_label); | |
626 | ||
627 | sequence = get_insns (); | |
628 | end_sequence (); | |
629 | rebuild_jump_labels (sequence); | |
630 | return sequence; | |
631 | } | |
632 | ||
633 | /* Do transforms 3) and 4) on INSN if applicable. */ | |
634 | static bool | |
635 | mod_subtract_transform (rtx insn) | |
636 | { | |
637 | rtx set, set_src, set_dest, op1, op2, value, histogram; | |
638 | enum rtx_code code; | |
639 | enum machine_mode mode; | |
640 | gcov_type wrong_values, counts[2], count, all; | |
641 | edge e; | |
642 | int i; | |
643 | ||
644 | set = single_set (insn); | |
645 | if (!set) | |
646 | return false; | |
647 | ||
648 | set_src = SET_SRC (set); | |
649 | set_dest = SET_DEST (set); | |
650 | code = GET_CODE (set_src); | |
651 | mode = GET_MODE (set_dest); | |
652 | ||
653 | if (code != UMOD) | |
654 | return false; | |
655 | op1 = XEXP (set_src, 0); | |
656 | op2 = XEXP (set_src, 1); | |
657 | ||
658 | for (histogram = REG_NOTES (insn); | |
659 | histogram; | |
660 | histogram = XEXP (histogram, 1)) | |
661 | if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE | |
662 | && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL)) | |
663 | break; | |
664 | ||
665 | if (!histogram) | |
666 | return false; | |
667 | ||
668 | histogram = XEXP (XEXP (histogram, 0), 1); | |
669 | value = XEXP (histogram, 0); | |
670 | histogram = XEXP (histogram, 1); | |
671 | ||
672 | all = 0; | |
673 | for (i = 0; i < 2; i++) | |
674 | { | |
675 | counts[i] = INTVAL (XEXP (histogram, 0)); | |
676 | all += counts[i]; | |
677 | histogram = XEXP (histogram, 1); | |
678 | } | |
679 | wrong_values = INTVAL (XEXP (histogram, 0)); | |
680 | histogram = XEXP (histogram, 1); | |
681 | wrong_values += INTVAL (XEXP (histogram, 0)); | |
682 | all += wrong_values; | |
683 | ||
684 | /* We require that we use just subtractions in at least 50% of all | |
685 | evaluations. */ | |
686 | count = 0; | |
687 | for (i = 0; i < 2; i++) | |
688 | { | |
689 | count += counts[i]; | |
690 | if (count * 2 >= all) | |
691 | break; | |
692 | } | |
693 | ||
694 | if (i == 2) | |
695 | return false; | |
696 | ||
c263766c RH |
697 | if (dump_file) |
698 | fprintf (dump_file, "Mod subtract transformation on insn %d\n", | |
fca9dc00 ZD |
699 | INSN_UID (insn)); |
700 | ||
701 | e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); | |
702 | delete_insn (insn); | |
703 | ||
704 | insert_insn_on_edge ( | |
705 | gen_mod_subtract (mode, code, set_dest, op1, op2, i), e); | |
706 | ||
707 | return true; | |
708 | } |