]>
Commit | Line | Data |
---|---|---|
26cd9add | 1 | /* Redundant Extension Elimination pass for the GNU compiler. |
282bc7b4 EB |
2 | Copyright (C) 2010, 2011 Free Software Foundation, Inc. |
3 | Contributed by Ilya Enkovich (ilya.enkovich@intel.com) | |
26cd9add | 4 | |
282bc7b4 EB |
5 | Based on the Redundant Zero-extension elimination pass contributed by |
6 | Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com). | |
87a0ebfd ST |
7 | |
8 | This file is part of GCC. | |
9 | ||
10 | GCC is free software; you can redistribute it and/or modify it under | |
11 | the terms of the GNU General Public License as published by the Free | |
12 | Software Foundation; either version 3, or (at your option) any later | |
13 | version. | |
14 | ||
15 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GCC; see the file COPYING3. If not see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | ||
25 | /* Problem Description : | |
26 | -------------------- | |
26cd9add EI |
27 | This pass is intended to remove redundant extension instructions. |
28 | Such instructions appear for different reasons. We expect some of | |
29 | them due to implicit zero-extension in 64-bit registers after writing | |
30 | to their lower 32-bit half (e.g. for the x86-64 architecture). | |
31 | Another possible reason is a type cast which follows a load (for | |
32 | instance a register restore) and which can be combined into a single | |
33 | instruction, and for which earlier local passes, e.g. the combiner, | |
34 | weren't able to optimize. | |
87a0ebfd ST |
35 | |
36 | How does this pass work ? | |
37 | -------------------------- | |
38 | ||
39 | This pass is run after register allocation. Hence, all registers that | |
26cd9add EI |
40 | this pass deals with are hard registers. This pass first looks for an |
41 | extension instruction that could possibly be redundant. Such extension | |
42 | instructions show up in RTL with the pattern : | |
43 | (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))), | |
44 | where x can be any hard register. | |
87a0ebfd | 45 | Now, this pass tries to eliminate this instruction by merging the |
26cd9add | 46 | extension with the definitions of register x. For instance, if |
87a0ebfd ST |
47 | one of the definitions of register x was : |
48 | (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), | |
26cd9add EI |
49 | followed by extension : |
50 | (set (reg:DI x) (zero_extend:DI (reg:SI x))) | |
87a0ebfd ST |
51 | then the combination converts this into : |
52 | (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). | |
53 | If all the merged definitions are recognizable assembly instructions, | |
26cd9add EI |
54 | the extension is effectively eliminated. |
55 | ||
56 | For example, for the x86-64 architecture, implicit zero-extensions | |
57 | are captured with appropriate patterns in the i386.md file. Hence, | |
58 | these merged definition can be matched to a single assembly instruction. | |
59 | The original extension instruction is then deleted if all the | |
60 | definitions can be merged. | |
87a0ebfd ST |
61 | |
62 | However, there are cases where the definition instruction cannot be | |
26cd9add EI |
63 | merged with an extension. Examples are CALL instructions. In such |
64 | cases, the original extension is not redundant and this pass does | |
87a0ebfd ST |
65 | not delete it. |
66 | ||
67 | Handling conditional moves : | |
68 | ---------------------------- | |
69 | ||
26cd9add EI |
70 | Architectures like x86-64 support conditional moves whose semantics for |
71 | extension differ from the other instructions. For instance, the | |
87a0ebfd ST |
72 | instruction *cmov ebx, eax* |
73 | zero-extends eax onto rax only when the move from ebx to eax happens. | |
282bc7b4 | 74 | Otherwise, eax may not be zero-extended. Consider conditional moves as |
87a0ebfd ST |
75 | RTL instructions of the form |
76 | (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). | |
26cd9add | 77 | This pass tries to merge an extension with a conditional move by |
282bc7b4 | 78 | actually merging the definitions of y and z with an extension and then |
87a0ebfd ST |
79 | converting the conditional move into : |
80 | (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). | |
26cd9add EI |
81 | Since registers y and z are extended, register x will also be extended |
82 | after the conditional move. Note that this step has to be done | |
83 | transitively since the definition of a conditional copy can be | |
87a0ebfd ST |
84 | another conditional copy. |
85 | ||
86 | Motivating Example I : | |
87 | --------------------- | |
88 | For this program : | |
89 | ********************************************** | |
90 | bad_code.c | |
91 | ||
92 | int mask[1000]; | |
93 | ||
94 | int foo(unsigned x) | |
95 | { | |
96 | if (x < 10) | |
97 | x = x * 45; | |
98 | else | |
99 | x = x * 78; | |
100 | return mask[x]; | |
101 | } | |
102 | ********************************************** | |
103 | ||
26cd9add | 104 | $ gcc -O2 bad_code.c |
87a0ebfd ST |
105 | ........ |
106 | 400315: b8 4e 00 00 00 mov $0x4e,%eax | |
107 | 40031a: 0f af f8 imul %eax,%edi | |
282bc7b4 | 108 | 40031d: 89 ff mov %edi,%edi - useless extension |
87a0ebfd ST |
109 | 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
110 | 400326: c3 retq | |
111 | ...... | |
112 | 400330: ba 2d 00 00 00 mov $0x2d,%edx | |
113 | 400335: 0f af fa imul %edx,%edi | |
282bc7b4 | 114 | 400338: 89 ff mov %edi,%edi - useless extension |
87a0ebfd ST |
115 | 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
116 | 400341: c3 retq | |
117 | ||
26cd9add | 118 | $ gcc -O2 -free bad_code.c |
87a0ebfd ST |
119 | ...... |
120 | 400315: 6b ff 4e imul $0x4e,%edi,%edi | |
121 | 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
122 | 40031f: c3 retq | |
123 | 400320: 6b ff 2d imul $0x2d,%edi,%edi | |
124 | 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
125 | 40032a: c3 retq | |
126 | ||
127 | Motivating Example II : | |
128 | --------------------- | |
129 | ||
130 | Here is an example with a conditional move. | |
131 | ||
132 | For this program : | |
133 | ********************************************** | |
134 | ||
135 | unsigned long long foo(unsigned x , unsigned y) | |
136 | { | |
137 | unsigned z; | |
138 | if (x > 100) | |
139 | z = x + y; | |
140 | else | |
141 | z = x - y; | |
142 | return (unsigned long long)(z); | |
143 | } | |
144 | ||
26cd9add | 145 | $ gcc -O2 bad_code.c |
87a0ebfd ST |
146 | ............ |
147 | 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx | |
148 | 400363: 89 f8 mov %edi,%eax | |
149 | 400365: 29 f0 sub %esi,%eax | |
150 | 400367: 83 ff 65 cmp $0x65,%edi | |
151 | 40036a: 0f 43 c2 cmovae %edx,%eax | |
282bc7b4 | 152 | 40036d: 89 c0 mov %eax,%eax - useless extension |
87a0ebfd ST |
153 | 40036f: c3 retq |
154 | ||
26cd9add | 155 | $ gcc -O2 -free bad_code.c |
87a0ebfd ST |
156 | ............. |
157 | 400360: 89 fa mov %edi,%edx | |
158 | 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax | |
159 | 400365: 29 f2 sub %esi,%edx | |
160 | 400367: 83 ff 65 cmp $0x65,%edi | |
161 | 40036a: 89 d6 mov %edx,%esi | |
162 | 40036c: 48 0f 42 c6 cmovb %rsi,%rax | |
163 | 400370: c3 retq | |
164 | ||
26cd9add EI |
165 | Motivating Example III : |
166 | --------------------- | |
167 | ||
168 | Here is an example with a type cast. | |
169 | ||
170 | For this program : | |
171 | ********************************************** | |
172 | ||
173 | void test(int size, unsigned char *in, unsigned char *out) | |
174 | { | |
175 | int i; | |
176 | unsigned char xr, xg, xy=0; | |
177 | ||
178 | for (i = 0; i < size; i++) { | |
179 | xr = *in++; | |
180 | xg = *in++; | |
181 | xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); | |
182 | *out++ = xy; | |
183 | } | |
184 | } | |
185 | ||
186 | $ gcc -O2 bad_code.c | |
187 | ............ | |
188 | 10: 0f b6 0e movzbl (%rsi),%ecx | |
189 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
190 | 17: 48 83 c6 02 add $0x2,%rsi | |
282bc7b4 EB |
191 | 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension |
192 | 1e: 0f b6 c0 movzbl %al,%eax - useless extension | |
26cd9add EI |
193 | 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx |
194 | 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
195 | ||
196 | $ gcc -O2 -free bad_code.c | |
197 | ............. | |
198 | 10: 0f b6 0e movzbl (%rsi),%ecx | |
199 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
200 | 17: 48 83 c6 02 add $0x2,%rsi | |
201 | 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx | |
202 | 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
87a0ebfd ST |
203 | |
204 | Usefulness : | |
205 | ---------- | |
206 | ||
26cd9add EI |
207 | The original redundant zero-extension elimination pass reported reduction |
208 | of the dynamic instruction count of a compression benchmark by 2.8% and | |
209 | improvement of its run time by about 1%. | |
87a0ebfd | 210 | |
26cd9add EI |
211 | The additional performance gain with the enhanced pass is mostly expected |
212 | on in-order architectures where redundancy cannot be compensated by out of | |
213 | order execution. Measurements showed up to 10% performance gain (reduced | |
214 | run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance | |
215 | gain 1%. */ | |
87a0ebfd ST |
216 | |
217 | ||
218 | #include "config.h" | |
219 | #include "system.h" | |
220 | #include "coretypes.h" | |
221 | #include "tm.h" | |
222 | #include "rtl.h" | |
223 | #include "tree.h" | |
224 | #include "tm_p.h" | |
225 | #include "flags.h" | |
226 | #include "regs.h" | |
227 | #include "hard-reg-set.h" | |
228 | #include "basic-block.h" | |
229 | #include "insn-config.h" | |
230 | #include "function.h" | |
231 | #include "expr.h" | |
232 | #include "insn-attr.h" | |
233 | #include "recog.h" | |
718f9c0f | 234 | #include "diagnostic-core.h" |
87a0ebfd ST |
235 | #include "target.h" |
236 | #include "timevar.h" | |
237 | #include "optabs.h" | |
238 | #include "insn-codes.h" | |
239 | #include "rtlhooks-def.h" | |
240 | /* Include output.h for dump_file. */ | |
241 | #include "output.h" | |
242 | #include "params.h" | |
243 | #include "timevar.h" | |
244 | #include "tree-pass.h" | |
245 | #include "df.h" | |
246 | #include "cgraph.h" | |
247 | ||
282bc7b4 | 248 | /* This structure represents a candidate for elimination. */ |
87a0ebfd | 249 | |
282bc7b4 | 250 | typedef struct GTY(()) ext_cand |
87a0ebfd | 251 | { |
282bc7b4 EB |
252 | /* The expression. */ |
253 | const_rtx expr; | |
87a0ebfd | 254 | |
282bc7b4 EB |
255 | /* The kind of extension. */ |
256 | enum rtx_code code; | |
26cd9add | 257 | |
282bc7b4 EB |
258 | /* The destination mode. */ |
259 | enum machine_mode mode; | |
260 | ||
261 | /* The instruction where it lives. */ | |
26cd9add | 262 | rtx insn; |
282bc7b4 | 263 | } ext_cand; |
26cd9add EI |
264 | |
265 | DEF_VEC_O(ext_cand); | |
266 | DEF_VEC_ALLOC_O(ext_cand, heap); | |
267 | ||
87a0ebfd ST |
268 | static int max_insn_uid; |
269 | ||
26cd9add EI |
270 | /* Given a insn (CURR_INSN), an extension candidate for removal (CAND) |
271 | and a pointer to the SET rtx (ORIG_SET) that needs to be modified, | |
272 | this code modifies the SET rtx to a new SET rtx that extends the | |
273 | right hand expression into a register on the left hand side. Note | |
274 | that multiple assumptions are made about the nature of the set that | |
275 | needs to be true for this to work and is called from merge_def_and_ext. | |
87a0ebfd ST |
276 | |
277 | Original : | |
26cd9add | 278 | (set (reg a) (expression)) |
87a0ebfd ST |
279 | |
280 | Transform : | |
282bc7b4 | 281 | (set (reg a) (any_extend (expression))) |
87a0ebfd ST |
282 | |
283 | Special Cases : | |
282bc7b4 | 284 | If the expression is a constant or another extension, then directly |
26cd9add | 285 | assign it to the register. */ |
87a0ebfd ST |
286 | |
287 | static bool | |
282bc7b4 | 288 | combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set) |
87a0ebfd | 289 | { |
282bc7b4 EB |
290 | rtx orig_src = SET_SRC (*orig_set); |
291 | rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set))); | |
292 | rtx new_set; | |
87a0ebfd | 293 | |
282bc7b4 EB |
294 | /* Merge constants by directly moving the constant into the register under |
295 | some conditions. Recall that RTL constants are sign-extended. */ | |
26cd9add | 296 | if (GET_CODE (orig_src) == CONST_INT |
282bc7b4 | 297 | && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode)) |
26cd9add | 298 | { |
282bc7b4 EB |
299 | if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND) |
300 | new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src); | |
87a0ebfd | 301 | else |
26cd9add EI |
302 | { |
303 | /* Zero-extend the negative constant by masking out the bits outside | |
304 | the source mode. */ | |
305 | enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set)); | |
282bc7b4 | 306 | rtx new_const_int |
26cd9add | 307 | = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode)); |
282bc7b4 | 308 | new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int); |
26cd9add EI |
309 | } |
310 | } | |
311 | else if (GET_MODE (orig_src) == VOIDmode) | |
312 | { | |
282bc7b4 | 313 | /* This is mostly due to a call insn that should not be optimized. */ |
26cd9add | 314 | return false; |
87a0ebfd | 315 | } |
282bc7b4 | 316 | else if (GET_CODE (orig_src) == cand->code) |
87a0ebfd | 317 | { |
282bc7b4 EB |
318 | /* Here is a sequence of two extensions. Try to merge them. */ |
319 | rtx temp_extension | |
320 | = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0)); | |
321 | rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
87a0ebfd ST |
322 | if (simplified_temp_extension) |
323 | temp_extension = simplified_temp_extension; | |
282bc7b4 | 324 | new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension); |
87a0ebfd ST |
325 | } |
326 | else if (GET_CODE (orig_src) == IF_THEN_ELSE) | |
327 | { | |
26cd9add | 328 | /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, |
87a0ebfd | 329 | in general, IF_THEN_ELSE should not be combined. */ |
87a0ebfd ST |
330 | return false; |
331 | } | |
332 | else | |
333 | { | |
282bc7b4 EB |
334 | /* This is the normal case. */ |
335 | rtx temp_extension | |
336 | = gen_rtx_fmt_e (cand->code, cand->mode, orig_src); | |
337 | rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
87a0ebfd ST |
338 | if (simplified_temp_extension) |
339 | temp_extension = simplified_temp_extension; | |
282bc7b4 | 340 | new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension); |
87a0ebfd ST |
341 | } |
342 | ||
26cd9add | 343 | /* This change is a part of a group of changes. Hence, |
87a0ebfd | 344 | validate_change will not try to commit the change. */ |
87a0ebfd ST |
345 | if (validate_change (curr_insn, orig_set, new_set, true)) |
346 | { | |
347 | if (dump_file) | |
348 | { | |
ca10595c EB |
349 | fprintf (dump_file, |
350 | "Tentatively merged extension with definition:\n"); | |
87a0ebfd ST |
351 | print_rtl_single (dump_file, curr_insn); |
352 | } | |
353 | return true; | |
354 | } | |
282bc7b4 | 355 | |
87a0ebfd ST |
356 | return false; |
357 | } | |
358 | ||
87a0ebfd | 359 | /* Treat if_then_else insns, where the operands of both branches |
26cd9add | 360 | are registers, as copies. For instance, |
87a0ebfd ST |
361 | Original : |
362 | (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) | |
363 | Transformed : | |
364 | (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c))) | |
365 | DEF_INSN is the if_then_else insn. */ | |
366 | ||
367 | static bool | |
282bc7b4 | 368 | transform_ifelse (ext_cand *cand, rtx def_insn) |
87a0ebfd ST |
369 | { |
370 | rtx set_insn = PATTERN (def_insn); | |
371 | rtx srcreg, dstreg, srcreg2; | |
372 | rtx map_srcreg, map_dstreg, map_srcreg2; | |
373 | rtx ifexpr; | |
374 | rtx cond; | |
375 | rtx new_set; | |
376 | ||
377 | gcc_assert (GET_CODE (set_insn) == SET); | |
282bc7b4 | 378 | |
87a0ebfd ST |
379 | cond = XEXP (SET_SRC (set_insn), 0); |
380 | dstreg = SET_DEST (set_insn); | |
381 | srcreg = XEXP (SET_SRC (set_insn), 1); | |
382 | srcreg2 = XEXP (SET_SRC (set_insn), 2); | |
282bc7b4 EB |
383 | map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg)); |
384 | map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2)); | |
385 | map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg)); | |
386 | ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2); | |
87a0ebfd ST |
387 | new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr); |
388 | ||
389 | if (validate_change (def_insn, &PATTERN (def_insn), new_set, true)) | |
390 | { | |
391 | if (dump_file) | |
392 | { | |
282bc7b4 EB |
393 | fprintf (dump_file, |
394 | "Mode of conditional move instruction extended:\n"); | |
87a0ebfd ST |
395 | print_rtl_single (dump_file, def_insn); |
396 | } | |
397 | return true; | |
398 | } | |
282bc7b4 EB |
399 | |
400 | return false; | |
87a0ebfd ST |
401 | } |
402 | ||
282bc7b4 EB |
403 | /* Get all the reaching definitions of an instruction. The definitions are |
404 | desired for REG used in INSN. Return the definition list or NULL if a | |
405 | definition is missing. If DEST is non-NULL, additionally push the INSN | |
406 | of the definitions onto DEST. */ | |
87a0ebfd | 407 | |
282bc7b4 EB |
408 | static struct df_link * |
409 | get_defs (rtx insn, rtx reg, VEC (rtx,heap) **dest) | |
87a0ebfd | 410 | { |
ca10595c | 411 | df_ref reg_info, *uses; |
282bc7b4 | 412 | struct df_link *ref_chain, *ref_link; |
87a0ebfd | 413 | |
87a0ebfd ST |
414 | reg_info = NULL; |
415 | ||
ca10595c | 416 | for (uses = DF_INSN_USES (insn); *uses; uses++) |
87a0ebfd | 417 | { |
ca10595c | 418 | reg_info = *uses; |
87a0ebfd | 419 | if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG) |
282bc7b4 EB |
420 | return NULL; |
421 | if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg)) | |
87a0ebfd | 422 | break; |
87a0ebfd ST |
423 | } |
424 | ||
ca10595c | 425 | gcc_assert (reg_info != NULL && uses != NULL); |
87a0ebfd | 426 | |
282bc7b4 EB |
427 | ref_chain = DF_REF_CHAIN (reg_info); |
428 | ||
429 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
87a0ebfd ST |
430 | { |
431 | /* Problem getting some definition for this instruction. */ | |
282bc7b4 EB |
432 | if (ref_link->ref == NULL) |
433 | return NULL; | |
434 | if (DF_REF_INSN_INFO (ref_link->ref) == NULL) | |
435 | return NULL; | |
87a0ebfd ST |
436 | } |
437 | ||
282bc7b4 EB |
438 | if (dest) |
439 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
440 | VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (ref_link->ref)); | |
87a0ebfd | 441 | |
282bc7b4 | 442 | return ref_chain; |
87a0ebfd ST |
443 | } |
444 | ||
282bc7b4 EB |
445 | /* Return true if INSN is |
446 | (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2))) | |
447 | and store x1 and x2 in REG_1 and REG_2. */ | |
87a0ebfd | 448 | |
282bc7b4 EB |
449 | static bool |
450 | is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2) | |
87a0ebfd | 451 | { |
282bc7b4 | 452 | rtx expr = single_set (insn); |
87a0ebfd | 453 | |
282bc7b4 EB |
454 | if (expr != NULL_RTX |
455 | && GET_CODE (expr) == SET | |
87a0ebfd | 456 | && GET_CODE (SET_DEST (expr)) == REG |
87a0ebfd ST |
457 | && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE |
458 | && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG | |
26cd9add | 459 | && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG) |
87a0ebfd | 460 | { |
282bc7b4 EB |
461 | *reg1 = XEXP (SET_SRC (expr), 1); |
462 | *reg2 = XEXP (SET_SRC (expr), 2); | |
463 | return true; | |
87a0ebfd ST |
464 | } |
465 | ||
282bc7b4 | 466 | return false; |
87a0ebfd ST |
467 | } |
468 | ||
26cd9add EI |
469 | /* Reaching Definitions of the extended register could be conditional copies |
470 | or regular definitions. This function separates the two types into two | |
471 | lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a reaching | |
282bc7b4 EB |
472 | definition is a conditional copy, merging the extension with this definition |
473 | is wrong. Conditional copies are merged by transitively merging their | |
26cd9add EI |
474 | definitions. The defs_list is populated with all the reaching definitions |
475 | of the extension instruction (EXTEND_INSN) which must be merged with an | |
476 | extension. The copies_list contains all the conditional moves that will | |
282bc7b4 EB |
477 | later be extended into a wider mode conditional move if all the merges are |
478 | successful. The function returns 0 upon failure, 1 upon success and 2 when | |
479 | all definitions of the EXTEND_INSN have been previously merged. */ | |
87a0ebfd ST |
480 | |
481 | static int | |
26cd9add | 482 | make_defs_and_copies_lists (rtx extend_insn, rtx set_pat, |
87a0ebfd ST |
483 | VEC (rtx,heap) **defs_list, |
484 | VEC (rtx,heap) **copies_list) | |
485 | { | |
282bc7b4 EB |
486 | VEC (rtx,heap) *work_list = VEC_alloc (rtx, heap, 8); |
487 | rtx src_reg = XEXP (SET_SRC (set_pat), 0); | |
87a0ebfd | 488 | bool *is_insn_visited; |
282bc7b4 | 489 | int ret = 1; |
87a0ebfd | 490 | |
282bc7b4 EB |
491 | /* Initialize the work list. */ |
492 | if (!get_defs (extend_insn, src_reg, &work_list)) | |
87a0ebfd ST |
493 | { |
494 | VEC_free (rtx, heap, work_list); | |
282bc7b4 | 495 | /* The number of defs being equal to zero can only mean that all the |
87a0ebfd ST |
496 | definitions have been previously merged. */ |
497 | return 2; | |
498 | } | |
499 | ||
282bc7b4 | 500 | is_insn_visited = XCNEWVEC (bool, max_insn_uid); |
87a0ebfd ST |
501 | |
502 | /* Perform transitive closure for conditional copies. */ | |
282bc7b4 | 503 | while (!VEC_empty (rtx, work_list)) |
87a0ebfd | 504 | { |
282bc7b4 EB |
505 | rtx def_insn = VEC_pop (rtx, work_list); |
506 | rtx reg1, reg2; | |
507 | ||
87a0ebfd ST |
508 | gcc_assert (INSN_UID (def_insn) < max_insn_uid); |
509 | ||
510 | if (is_insn_visited[INSN_UID (def_insn)]) | |
282bc7b4 | 511 | continue; |
87a0ebfd | 512 | is_insn_visited[INSN_UID (def_insn)] = true; |
87a0ebfd | 513 | |
282bc7b4 EB |
514 | if (is_cond_copy_insn (def_insn, ®1, ®2)) |
515 | { | |
516 | /* Push it onto the copy list first. */ | |
517 | VEC_safe_push (rtx, heap, *copies_list, def_insn); | |
518 | ||
519 | /* Now perform the transitive closure. */ | |
520 | if (!get_defs (def_insn, reg1, &work_list) | |
521 | || !get_defs (def_insn, reg2, &work_list)) | |
522 | { | |
523 | ret = 0; | |
524 | break; | |
525 | } | |
87a0ebfd ST |
526 | } |
527 | else | |
282bc7b4 | 528 | VEC_safe_push (rtx, heap, *defs_list, def_insn); |
87a0ebfd ST |
529 | } |
530 | ||
87a0ebfd | 531 | XDELETEVEC (is_insn_visited); |
282bc7b4 EB |
532 | VEC_free (rtx, heap, work_list); |
533 | ||
534 | return ret; | |
87a0ebfd ST |
535 | } |
536 | ||
282bc7b4 | 537 | /* Merge the DEF_INSN with an extension. Calls combine_set_extension |
87a0ebfd ST |
538 | on the SET pattern. */ |
539 | ||
540 | static bool | |
282bc7b4 | 541 | merge_def_and_ext (ext_cand *cand, rtx def_insn) |
87a0ebfd | 542 | { |
26cd9add | 543 | enum machine_mode ext_src_mode; |
87a0ebfd | 544 | enum rtx_code code; |
87a0ebfd ST |
545 | rtx *sub_rtx; |
546 | rtx s_expr; | |
547 | int i; | |
548 | ||
26cd9add | 549 | ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0)); |
87a0ebfd ST |
550 | code = GET_CODE (PATTERN (def_insn)); |
551 | sub_rtx = NULL; | |
552 | ||
553 | if (code == PARALLEL) | |
554 | { | |
555 | for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++) | |
556 | { | |
557 | s_expr = XVECEXP (PATTERN (def_insn), 0, i); | |
558 | if (GET_CODE (s_expr) != SET) | |
559 | continue; | |
560 | ||
561 | if (sub_rtx == NULL) | |
562 | sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i); | |
563 | else | |
564 | { | |
565 | /* PARALLEL with multiple SETs. */ | |
566 | return false; | |
567 | } | |
568 | } | |
569 | } | |
570 | else if (code == SET) | |
571 | sub_rtx = &PATTERN (def_insn); | |
572 | else | |
573 | { | |
574 | /* It is not a PARALLEL or a SET, what could it be ? */ | |
575 | return false; | |
576 | } | |
577 | ||
578 | gcc_assert (sub_rtx != NULL); | |
579 | ||
580 | if (GET_CODE (SET_DEST (*sub_rtx)) == REG | |
26cd9add | 581 | && GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode) |
87a0ebfd | 582 | { |
282bc7b4 | 583 | return combine_set_extension (cand, def_insn, sub_rtx); |
87a0ebfd | 584 | } |
26cd9add EI |
585 | |
586 | return false; | |
87a0ebfd ST |
587 | } |
588 | ||
589 | /* This function goes through all reaching defs of the source | |
26cd9add EI |
590 | of the candidate for elimination (CAND) and tries to combine |
591 | the extension with the definition instruction. The changes | |
592 | are made as a group so that even if one definition cannot be | |
593 | merged, all reaching definitions end up not being merged. | |
594 | When a conditional copy is encountered, merging is attempted | |
595 | transitively on its definitions. It returns true upon success | |
596 | and false upon failure. */ | |
87a0ebfd ST |
597 | |
598 | static bool | |
282bc7b4 | 599 | combine_reaching_defs (ext_cand *cand, rtx set_pat) |
87a0ebfd ST |
600 | { |
601 | rtx def_insn; | |
602 | bool merge_successful = true; | |
603 | int i; | |
604 | int defs_ix; | |
605 | int outcome; | |
87a0ebfd | 606 | VEC (rtx, heap) *defs_list, *copies_list, *vec; |
87a0ebfd ST |
607 | |
608 | defs_list = VEC_alloc (rtx, heap, 8); | |
609 | copies_list = VEC_alloc (rtx, heap, 8); | |
610 | ||
26cd9add | 611 | outcome = make_defs_and_copies_lists (cand->insn, |
87a0ebfd ST |
612 | set_pat, &defs_list, &copies_list); |
613 | ||
282bc7b4 EB |
614 | /* outcome == 2 means that all the definitions for this extension have been |
615 | previously merged when handling other extensions. */ | |
87a0ebfd ST |
616 | if (outcome == 2) |
617 | { | |
618 | VEC_free (rtx, heap, defs_list); | |
619 | VEC_free (rtx, heap, copies_list); | |
620 | if (dump_file) | |
282bc7b4 | 621 | fprintf (dump_file, "All definitions have been previously merged.\n"); |
87a0ebfd ST |
622 | return true; |
623 | } | |
624 | ||
625 | if (outcome == 0) | |
626 | { | |
627 | VEC_free (rtx, heap, defs_list); | |
628 | VEC_free (rtx, heap, copies_list); | |
629 | return false; | |
630 | } | |
631 | ||
632 | merge_successful = true; | |
633 | ||
634 | /* Go through the defs vector and try to merge all the definitions | |
635 | in this vector. */ | |
87a0ebfd | 636 | vec = VEC_alloc (rtx, heap, 8); |
ac47786e | 637 | FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn) |
87a0ebfd | 638 | { |
26cd9add | 639 | if (merge_def_and_ext (cand, def_insn)) |
87a0ebfd ST |
640 | VEC_safe_push (rtx, heap, vec, def_insn); |
641 | else | |
642 | { | |
643 | merge_successful = false; | |
644 | break; | |
645 | } | |
646 | } | |
647 | ||
648 | /* Now go through the conditional copies vector and try to merge all | |
649 | the copies in this vector. */ | |
87a0ebfd ST |
650 | if (merge_successful) |
651 | { | |
ac47786e | 652 | FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn) |
87a0ebfd | 653 | { |
26cd9add | 654 | if (transform_ifelse (cand, def_insn)) |
87a0ebfd ST |
655 | { |
656 | VEC_safe_push (rtx, heap, vec, def_insn); | |
657 | } | |
658 | else | |
659 | { | |
660 | merge_successful = false; | |
661 | break; | |
662 | } | |
663 | } | |
664 | } | |
665 | ||
666 | if (merge_successful) | |
667 | { | |
282bc7b4 EB |
668 | /* Commit the changes here if possible |
669 | FIXME: It's an all-or-nothing scenario. Even if only one definition | |
670 | cannot be merged, we entirely give up. In the future, we should allow | |
671 | extensions to be partially eliminated along those paths where the | |
672 | definitions could be merged. */ | |
87a0ebfd ST |
673 | if (apply_change_group ()) |
674 | { | |
675 | if (dump_file) | |
282bc7b4 | 676 | fprintf (dump_file, "All merges were successful.\n"); |
87a0ebfd ST |
677 | |
678 | VEC_free (rtx, heap, vec); | |
679 | VEC_free (rtx, heap, defs_list); | |
680 | VEC_free (rtx, heap, copies_list); | |
681 | return true; | |
682 | } | |
683 | else | |
684 | { | |
0acba2b4 EB |
685 | /* Changes need not be cancelled explicitly as apply_change_group |
686 | does it. Print list of definitions in the dump_file for debug | |
26cd9add | 687 | purposes. This extension cannot be deleted. */ |
87a0ebfd ST |
688 | if (dump_file) |
689 | { | |
ca10595c EB |
690 | fprintf (dump_file, |
691 | "Merge cancelled, non-mergeable definitions:\n"); | |
692 | FOR_EACH_VEC_ELT (rtx, vec, i, def_insn) | |
693 | print_rtl_single (dump_file, def_insn); | |
87a0ebfd ST |
694 | } |
695 | } | |
696 | } | |
697 | else | |
698 | { | |
699 | /* Cancel any changes that have been made so far. */ | |
700 | cancel_changes (0); | |
701 | } | |
702 | ||
703 | VEC_free (rtx, heap, vec); | |
704 | VEC_free (rtx, heap, defs_list); | |
705 | VEC_free (rtx, heap, copies_list); | |
282bc7b4 | 706 | |
87a0ebfd ST |
707 | return false; |
708 | } | |
709 | ||
282bc7b4 | 710 | /* This structure holds information while walking the RTL stream. */ |
0acba2b4 | 711 | |
282bc7b4 | 712 | struct re_info |
0acba2b4 | 713 | { |
282bc7b4 | 714 | /* The current insn. */ |
0acba2b4 EB |
715 | rtx insn; |
716 | ||
717 | /* The list of candidates. */ | |
26cd9add | 718 | VEC (ext_cand, heap) *insn_list; |
0acba2b4 | 719 | |
282bc7b4 EB |
720 | /* The map of definition instructions to candidates. */ |
721 | ext_cand **def_map; | |
722 | }; | |
26cd9add EI |
723 | |
724 | /* Add an extension pattern that could be eliminated. This is called via | |
725 | note_stores from find_removable_extensions. */ | |
0acba2b4 EB |
726 | |
727 | static void | |
26cd9add | 728 | add_removable_extension (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data) |
0acba2b4 | 729 | { |
282bc7b4 EB |
730 | struct re_info *rei = (struct re_info *)data; |
731 | enum rtx_code code; | |
732 | enum machine_mode mode; | |
0acba2b4 EB |
733 | rtx src, dest; |
734 | ||
282bc7b4 | 735 | /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */ |
0acba2b4 EB |
736 | if (GET_CODE (expr) != SET) |
737 | return; | |
738 | ||
739 | src = SET_SRC (expr); | |
282bc7b4 | 740 | code = GET_CODE (src); |
0acba2b4 | 741 | dest = SET_DEST (expr); |
282bc7b4 | 742 | mode = GET_MODE (dest); |
0acba2b4 EB |
743 | |
744 | if (REG_P (dest) | |
282bc7b4 | 745 | && (code == SIGN_EXTEND || code == ZERO_EXTEND) |
0acba2b4 | 746 | && REG_P (XEXP (src, 0)) |
0acba2b4 EB |
747 | && REGNO (dest) == REGNO (XEXP (src, 0))) |
748 | { | |
282bc7b4 EB |
749 | struct df_link *defs, *def; |
750 | ext_cand *cand; | |
751 | ||
752 | /* First, make sure we can get all the reaching definitions. */ | |
753 | defs = get_defs (rei->insn, XEXP (src, 0), NULL); | |
754 | if (!defs) | |
0acba2b4 | 755 | { |
282bc7b4 EB |
756 | if (dump_file) |
757 | { | |
758 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
759 | print_rtl_single (dump_file, rei->insn); | |
760 | fprintf (dump_file, " because of missing definition(s)\n"); | |
761 | } | |
762 | return; | |
0acba2b4 | 763 | } |
282bc7b4 EB |
764 | |
765 | /* Second, make sure the reaching definitions don't feed another and | |
766 | different extension. FIXME: this obviously can be improved. */ | |
767 | for (def = defs; def; def = def->next) | |
768 | if ((cand = rei->def_map[INSN_UID(DF_REF_INSN (def->ref))]) | |
769 | && (cand->code != code || cand->mode != mode)) | |
770 | { | |
771 | if (dump_file) | |
772 | { | |
773 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
774 | print_rtl_single (dump_file, rei->insn); | |
775 | fprintf (dump_file, " because of other extension\n"); | |
776 | } | |
777 | return; | |
778 | } | |
779 | ||
780 | /* Then add the candidate to the list and insert the reaching definitions | |
781 | into the definition map. */ | |
782 | cand = VEC_safe_push (ext_cand, heap, rei->insn_list, NULL); | |
783 | cand->expr = expr; | |
784 | cand->code = code; | |
785 | cand->mode = mode; | |
786 | cand->insn = rei->insn; | |
787 | ||
788 | for (def = defs; def; def = def->next) | |
789 | rei->def_map[INSN_UID(DF_REF_INSN (def->ref))] = cand; | |
0acba2b4 EB |
790 | } |
791 | } | |
792 | ||
26cd9add | 793 | /* Traverse the instruction stream looking for extensions and return the |
0acba2b4 | 794 | list of candidates. */ |
87a0ebfd | 795 | |
26cd9add EI |
796 | static VEC (ext_cand, heap)* |
797 | find_removable_extensions (void) | |
87a0ebfd | 798 | { |
282bc7b4 | 799 | struct re_info rei; |
0acba2b4 EB |
800 | basic_block bb; |
801 | rtx insn; | |
87a0ebfd | 802 | |
26cd9add | 803 | rei.insn_list = VEC_alloc (ext_cand, heap, 8); |
282bc7b4 | 804 | rei.def_map = XCNEWVEC (ext_cand *, max_insn_uid); |
87a0ebfd | 805 | |
0acba2b4 EB |
806 | FOR_EACH_BB (bb) |
807 | FOR_BB_INSNS (bb, insn) | |
808 | { | |
809 | if (!NONDEBUG_INSN_P (insn)) | |
810 | continue; | |
87a0ebfd | 811 | |
26cd9add EI |
812 | rei.insn = insn; |
813 | note_stores (PATTERN (insn), add_removable_extension, &rei); | |
0acba2b4 EB |
814 | } |
815 | ||
282bc7b4 EB |
816 | XDELETEVEC (rei.def_map); |
817 | ||
26cd9add | 818 | return rei.insn_list; |
87a0ebfd ST |
819 | } |
820 | ||
821 | /* This is the main function that checks the insn stream for redundant | |
26cd9add | 822 | extensions and tries to remove them if possible. */ |
87a0ebfd | 823 | |
282bc7b4 | 824 | static void |
26cd9add | 825 | find_and_remove_re (void) |
87a0ebfd | 826 | { |
282bc7b4 | 827 | ext_cand *curr_cand; |
87a0ebfd | 828 | rtx curr_insn = NULL_RTX; |
282bc7b4 | 829 | int num_re_opportunities = 0, num_realized = 0, i; |
26cd9add EI |
830 | VEC (ext_cand, heap) *reinsn_list; |
831 | VEC (rtx, heap) *reinsn_del_list; | |
87a0ebfd ST |
832 | |
833 | /* Construct DU chain to get all reaching definitions of each | |
26cd9add | 834 | extension instruction. */ |
87a0ebfd ST |
835 | df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); |
836 | df_analyze (); | |
837 | ||
838 | max_insn_uid = get_max_uid (); | |
26cd9add | 839 | reinsn_del_list = VEC_alloc (rtx, heap, 4); |
26cd9add | 840 | reinsn_list = find_removable_extensions (); |
87a0ebfd | 841 | |
282bc7b4 | 842 | FOR_EACH_VEC_ELT (ext_cand, reinsn_list, i, curr_cand) |
87a0ebfd | 843 | { |
26cd9add | 844 | num_re_opportunities++; |
87a0ebfd | 845 | |
ca10595c EB |
846 | /* If the candidate insn is itself a definition insn for another |
847 | candidate, it may have been modified and the UD chain broken. | |
848 | FIXME: the handling of successive extensions can be improved. */ | |
849 | if (!reg_mentioned_p (curr_cand->expr, PATTERN (curr_cand->insn))) | |
850 | continue; | |
851 | ||
282bc7b4 | 852 | /* Try to combine the extension with the definition. */ |
87a0ebfd ST |
853 | if (dump_file) |
854 | { | |
282bc7b4 EB |
855 | fprintf (dump_file, "Trying to eliminate extension:\n"); |
856 | print_rtl_single (dump_file, curr_cand->insn); | |
87a0ebfd ST |
857 | } |
858 | ||
26cd9add | 859 | if (combine_reaching_defs (curr_cand, PATTERN (curr_cand->insn))) |
87a0ebfd ST |
860 | { |
861 | if (dump_file) | |
282bc7b4 | 862 | fprintf (dump_file, "Eliminated the extension.\n"); |
87a0ebfd | 863 | num_realized++; |
26cd9add | 864 | VEC_safe_push (rtx, heap, reinsn_del_list, curr_cand->insn); |
87a0ebfd ST |
865 | } |
866 | } | |
867 | ||
26cd9add | 868 | /* Delete all useless extensions here in one sweep. */ |
282bc7b4 | 869 | FOR_EACH_VEC_ELT (rtx, reinsn_del_list, i, curr_insn) |
0acba2b4 | 870 | delete_insn (curr_insn); |
87a0ebfd | 871 | |
26cd9add EI |
872 | VEC_free (ext_cand, heap, reinsn_list); |
873 | VEC_free (rtx, heap, reinsn_del_list); | |
87a0ebfd | 874 | |
26cd9add | 875 | if (dump_file && num_re_opportunities > 0) |
282bc7b4 EB |
876 | fprintf (dump_file, "Elimination opportunities = %d realized = %d\n", |
877 | num_re_opportunities, num_realized); | |
87a0ebfd ST |
878 | |
879 | df_finish_pass (false); | |
87a0ebfd ST |
880 | } |
881 | ||
26cd9add | 882 | /* Find and remove redundant extensions. */ |
87a0ebfd ST |
883 | |
884 | static unsigned int | |
26cd9add | 885 | rest_of_handle_ree (void) |
87a0ebfd | 886 | { |
26cd9add EI |
887 | timevar_push (TV_REE); |
888 | find_and_remove_re (); | |
889 | timevar_pop (TV_REE); | |
87a0ebfd ST |
890 | return 0; |
891 | } | |
892 | ||
26cd9add | 893 | /* Run REE pass when flag_ree is set at optimization level > 0. */ |
87a0ebfd ST |
894 | |
895 | static bool | |
26cd9add | 896 | gate_handle_ree (void) |
87a0ebfd | 897 | { |
26cd9add | 898 | return (optimize > 0 && flag_ree); |
87a0ebfd ST |
899 | } |
900 | ||
26cd9add | 901 | struct rtl_opt_pass pass_ree = |
87a0ebfd ST |
902 | { |
903 | { | |
904 | RTL_PASS, | |
26cd9add EI |
905 | "ree", /* name */ |
906 | gate_handle_ree, /* gate */ | |
907 | rest_of_handle_ree, /* execute */ | |
87a0ebfd ST |
908 | NULL, /* sub */ |
909 | NULL, /* next */ | |
910 | 0, /* static_pass_number */ | |
26cd9add | 911 | TV_REE, /* tv_id */ |
87a0ebfd ST |
912 | 0, /* properties_required */ |
913 | 0, /* properties_provided */ | |
914 | 0, /* properties_destroyed */ | |
915 | 0, /* todo_flags_start */ | |
916 | TODO_ggc_collect | | |
87a0ebfd ST |
917 | TODO_verify_rtl_sharing, /* todo_flags_finish */ |
918 | } | |
919 | }; |