]>
Commit | Line | Data |
---|---|---|
26cd9add | 1 | /* Redundant Extension Elimination pass for the GNU compiler. |
5624e564 | 2 | Copyright (C) 2010-2015 Free Software Foundation, Inc. |
282bc7b4 | 3 | Contributed by Ilya Enkovich (ilya.enkovich@intel.com) |
26cd9add | 4 | |
282bc7b4 EB |
5 | Based on the Redundant Zero-extension elimination pass contributed by |
6 | Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com). | |
87a0ebfd ST |
7 | |
8 | This file is part of GCC. | |
9 | ||
10 | GCC is free software; you can redistribute it and/or modify it under | |
11 | the terms of the GNU General Public License as published by the Free | |
12 | Software Foundation; either version 3, or (at your option) any later | |
13 | version. | |
14 | ||
15 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 | for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GCC; see the file COPYING3. If not see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | ||
25 | /* Problem Description : | |
26 | -------------------- | |
26cd9add EI |
27 | This pass is intended to remove redundant extension instructions. |
28 | Such instructions appear for different reasons. We expect some of | |
29 | them due to implicit zero-extension in 64-bit registers after writing | |
30 | to their lower 32-bit half (e.g. for the x86-64 architecture). | |
31 | Another possible reason is a type cast which follows a load (for | |
32 | instance a register restore) and which can be combined into a single | |
33 | instruction, and for which earlier local passes, e.g. the combiner, | |
34 | weren't able to optimize. | |
87a0ebfd ST |
35 | |
36 | How does this pass work ? | |
37 | -------------------------- | |
38 | ||
39 | This pass is run after register allocation. Hence, all registers that | |
26cd9add EI |
40 | this pass deals with are hard registers. This pass first looks for an |
41 | extension instruction that could possibly be redundant. Such extension | |
42 | instructions show up in RTL with the pattern : | |
43 | (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))), | |
44 | where x can be any hard register. | |
87a0ebfd | 45 | Now, this pass tries to eliminate this instruction by merging the |
26cd9add | 46 | extension with the definitions of register x. For instance, if |
87a0ebfd ST |
47 | one of the definitions of register x was : |
48 | (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))), | |
26cd9add EI |
49 | followed by extension : |
50 | (set (reg:DI x) (zero_extend:DI (reg:SI x))) | |
87a0ebfd ST |
51 | then the combination converts this into : |
52 | (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))). | |
53 | If all the merged definitions are recognizable assembly instructions, | |
26cd9add EI |
54 | the extension is effectively eliminated. |
55 | ||
56 | For example, for the x86-64 architecture, implicit zero-extensions | |
57 | are captured with appropriate patterns in the i386.md file. Hence, | |
58 | these merged definition can be matched to a single assembly instruction. | |
59 | The original extension instruction is then deleted if all the | |
60 | definitions can be merged. | |
87a0ebfd ST |
61 | |
62 | However, there are cases where the definition instruction cannot be | |
26cd9add EI |
63 | merged with an extension. Examples are CALL instructions. In such |
64 | cases, the original extension is not redundant and this pass does | |
87a0ebfd ST |
65 | not delete it. |
66 | ||
67 | Handling conditional moves : | |
68 | ---------------------------- | |
69 | ||
26cd9add EI |
70 | Architectures like x86-64 support conditional moves whose semantics for |
71 | extension differ from the other instructions. For instance, the | |
87a0ebfd ST |
72 | instruction *cmov ebx, eax* |
73 | zero-extends eax onto rax only when the move from ebx to eax happens. | |
282bc7b4 | 74 | Otherwise, eax may not be zero-extended. Consider conditional moves as |
87a0ebfd ST |
75 | RTL instructions of the form |
76 | (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))). | |
26cd9add | 77 | This pass tries to merge an extension with a conditional move by |
282bc7b4 | 78 | actually merging the definitions of y and z with an extension and then |
87a0ebfd ST |
79 | converting the conditional move into : |
80 | (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))). | |
26cd9add EI |
81 | Since registers y and z are extended, register x will also be extended |
82 | after the conditional move. Note that this step has to be done | |
83 | transitively since the definition of a conditional copy can be | |
87a0ebfd ST |
84 | another conditional copy. |
85 | ||
86 | Motivating Example I : | |
87 | --------------------- | |
88 | For this program : | |
89 | ********************************************** | |
90 | bad_code.c | |
91 | ||
92 | int mask[1000]; | |
93 | ||
94 | int foo(unsigned x) | |
95 | { | |
96 | if (x < 10) | |
97 | x = x * 45; | |
98 | else | |
99 | x = x * 78; | |
100 | return mask[x]; | |
101 | } | |
102 | ********************************************** | |
103 | ||
26cd9add | 104 | $ gcc -O2 bad_code.c |
87a0ebfd ST |
105 | ........ |
106 | 400315: b8 4e 00 00 00 mov $0x4e,%eax | |
107 | 40031a: 0f af f8 imul %eax,%edi | |
282bc7b4 | 108 | 40031d: 89 ff mov %edi,%edi - useless extension |
87a0ebfd ST |
109 | 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
110 | 400326: c3 retq | |
111 | ...... | |
112 | 400330: ba 2d 00 00 00 mov $0x2d,%edx | |
113 | 400335: 0f af fa imul %edx,%edi | |
282bc7b4 | 114 | 400338: 89 ff mov %edi,%edi - useless extension |
87a0ebfd ST |
115 | 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax |
116 | 400341: c3 retq | |
117 | ||
26cd9add | 118 | $ gcc -O2 -free bad_code.c |
87a0ebfd ST |
119 | ...... |
120 | 400315: 6b ff 4e imul $0x4e,%edi,%edi | |
121 | 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
122 | 40031f: c3 retq | |
123 | 400320: 6b ff 2d imul $0x2d,%edi,%edi | |
124 | 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax | |
125 | 40032a: c3 retq | |
126 | ||
127 | Motivating Example II : | |
128 | --------------------- | |
129 | ||
130 | Here is an example with a conditional move. | |
131 | ||
132 | For this program : | |
133 | ********************************************** | |
134 | ||
135 | unsigned long long foo(unsigned x , unsigned y) | |
136 | { | |
137 | unsigned z; | |
138 | if (x > 100) | |
139 | z = x + y; | |
140 | else | |
141 | z = x - y; | |
142 | return (unsigned long long)(z); | |
143 | } | |
144 | ||
26cd9add | 145 | $ gcc -O2 bad_code.c |
87a0ebfd ST |
146 | ............ |
147 | 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx | |
148 | 400363: 89 f8 mov %edi,%eax | |
149 | 400365: 29 f0 sub %esi,%eax | |
150 | 400367: 83 ff 65 cmp $0x65,%edi | |
151 | 40036a: 0f 43 c2 cmovae %edx,%eax | |
282bc7b4 | 152 | 40036d: 89 c0 mov %eax,%eax - useless extension |
87a0ebfd ST |
153 | 40036f: c3 retq |
154 | ||
26cd9add | 155 | $ gcc -O2 -free bad_code.c |
87a0ebfd ST |
156 | ............. |
157 | 400360: 89 fa mov %edi,%edx | |
158 | 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax | |
159 | 400365: 29 f2 sub %esi,%edx | |
160 | 400367: 83 ff 65 cmp $0x65,%edi | |
161 | 40036a: 89 d6 mov %edx,%esi | |
162 | 40036c: 48 0f 42 c6 cmovb %rsi,%rax | |
163 | 400370: c3 retq | |
164 | ||
26cd9add EI |
165 | Motivating Example III : |
166 | --------------------- | |
167 | ||
168 | Here is an example with a type cast. | |
169 | ||
170 | For this program : | |
171 | ********************************************** | |
172 | ||
173 | void test(int size, unsigned char *in, unsigned char *out) | |
174 | { | |
175 | int i; | |
176 | unsigned char xr, xg, xy=0; | |
177 | ||
178 | for (i = 0; i < size; i++) { | |
179 | xr = *in++; | |
180 | xg = *in++; | |
181 | xy = (unsigned char) ((19595*xr + 38470*xg) >> 16); | |
182 | *out++ = xy; | |
183 | } | |
184 | } | |
185 | ||
186 | $ gcc -O2 bad_code.c | |
187 | ............ | |
188 | 10: 0f b6 0e movzbl (%rsi),%ecx | |
189 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
190 | 17: 48 83 c6 02 add $0x2,%rsi | |
282bc7b4 EB |
191 | 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension |
192 | 1e: 0f b6 c0 movzbl %al,%eax - useless extension | |
26cd9add EI |
193 | 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx |
194 | 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
195 | ||
196 | $ gcc -O2 -free bad_code.c | |
197 | ............. | |
198 | 10: 0f b6 0e movzbl (%rsi),%ecx | |
199 | 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax | |
200 | 17: 48 83 c6 02 add $0x2,%rsi | |
201 | 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx | |
202 | 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax | |
87a0ebfd ST |
203 | |
204 | Usefulness : | |
205 | ---------- | |
206 | ||
26cd9add EI |
207 | The original redundant zero-extension elimination pass reported reduction |
208 | of the dynamic instruction count of a compression benchmark by 2.8% and | |
209 | improvement of its run time by about 1%. | |
87a0ebfd | 210 | |
26cd9add EI |
211 | The additional performance gain with the enhanced pass is mostly expected |
212 | on in-order architectures where redundancy cannot be compensated by out of | |
213 | order execution. Measurements showed up to 10% performance gain (reduced | |
214 | run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance | |
215 | gain 1%. */ | |
87a0ebfd ST |
216 | |
217 | ||
218 | #include "config.h" | |
219 | #include "system.h" | |
220 | #include "coretypes.h" | |
c7131fb2 AM |
221 | #include "backend.h" |
222 | #include "tree.h" | |
87a0ebfd | 223 | #include "rtl.h" |
c7131fb2 | 224 | #include "df.h" |
40e23961 | 225 | #include "alias.h" |
87a0ebfd ST |
226 | #include "tm_p.h" |
227 | #include "flags.h" | |
228 | #include "regs.h" | |
60393bbc | 229 | #include "cfgrtl.h" |
60393bbc | 230 | #include "insn-config.h" |
36566b39 PK |
231 | #include "expmed.h" |
232 | #include "dojump.h" | |
233 | #include "explow.h" | |
234 | #include "calls.h" | |
235 | #include "emit-rtl.h" | |
236 | #include "varasm.h" | |
237 | #include "stmt.h" | |
87a0ebfd ST |
238 | #include "expr.h" |
239 | #include "insn-attr.h" | |
240 | #include "recog.h" | |
718f9c0f | 241 | #include "diagnostic-core.h" |
87a0ebfd | 242 | #include "target.h" |
87a0ebfd | 243 | #include "insn-codes.h" |
b0710fe1 | 244 | #include "optabs.h" |
87a0ebfd | 245 | #include "rtlhooks-def.h" |
87a0ebfd | 246 | #include "params.h" |
87a0ebfd | 247 | #include "tree-pass.h" |
87a0ebfd | 248 | #include "cgraph.h" |
524d9b4b | 249 | #include "bitmap.h" |
87a0ebfd | 250 | |
282bc7b4 | 251 | /* This structure represents a candidate for elimination. */ |
87a0ebfd | 252 | |
50686850 | 253 | struct ext_cand |
87a0ebfd | 254 | { |
282bc7b4 EB |
255 | /* The expression. */ |
256 | const_rtx expr; | |
87a0ebfd | 257 | |
282bc7b4 EB |
258 | /* The kind of extension. */ |
259 | enum rtx_code code; | |
26cd9add | 260 | |
282bc7b4 | 261 | /* The destination mode. */ |
ef4bddc2 | 262 | machine_mode mode; |
282bc7b4 EB |
263 | |
264 | /* The instruction where it lives. */ | |
59a0c032 | 265 | rtx_insn *insn; |
50686850 | 266 | }; |
26cd9add | 267 | |
26cd9add | 268 | |
87a0ebfd ST |
269 | static int max_insn_uid; |
270 | ||
73c49bf5 JJ |
271 | /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN. */ |
272 | ||
273 | static bool | |
274 | update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode new_mode, | |
275 | machine_mode old_mode, enum rtx_code code) | |
276 | { | |
277 | rtx *loc = ®_NOTES (insn); | |
278 | while (*loc) | |
279 | { | |
280 | enum reg_note kind = REG_NOTE_KIND (*loc); | |
281 | if (kind == REG_EQUAL || kind == REG_EQUIV) | |
282 | { | |
283 | rtx orig_src = XEXP (*loc, 0); | |
284 | /* Update equivalency constants. Recall that RTL constants are | |
285 | sign-extended. */ | |
286 | if (GET_CODE (orig_src) == CONST_INT | |
287 | && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (new_mode)) | |
288 | { | |
289 | if (INTVAL (orig_src) >= 0 || code == SIGN_EXTEND) | |
290 | /* Nothing needed. */; | |
291 | else | |
292 | { | |
293 | /* Zero-extend the negative constant by masking out the | |
294 | bits outside the source mode. */ | |
295 | rtx new_const_int | |
296 | = gen_int_mode (INTVAL (orig_src) | |
297 | & GET_MODE_MASK (old_mode), | |
298 | new_mode); | |
299 | if (!validate_change (insn, &XEXP (*loc, 0), | |
300 | new_const_int, true)) | |
301 | return false; | |
302 | } | |
303 | loc = &XEXP (*loc, 1); | |
304 | } | |
305 | /* Drop all other notes, they assume a wrong mode. */ | |
306 | else if (!validate_change (insn, loc, XEXP (*loc, 1), true)) | |
307 | return false; | |
308 | } | |
309 | else | |
310 | loc = &XEXP (*loc, 1); | |
311 | } | |
312 | return true; | |
313 | } | |
314 | ||
26cd9add EI |
315 | /* Given a insn (CURR_INSN), an extension candidate for removal (CAND) |
316 | and a pointer to the SET rtx (ORIG_SET) that needs to be modified, | |
317 | this code modifies the SET rtx to a new SET rtx that extends the | |
318 | right hand expression into a register on the left hand side. Note | |
319 | that multiple assumptions are made about the nature of the set that | |
320 | needs to be true for this to work and is called from merge_def_and_ext. | |
87a0ebfd ST |
321 | |
322 | Original : | |
26cd9add | 323 | (set (reg a) (expression)) |
87a0ebfd ST |
324 | |
325 | Transform : | |
282bc7b4 | 326 | (set (reg a) (any_extend (expression))) |
87a0ebfd ST |
327 | |
328 | Special Cases : | |
282bc7b4 | 329 | If the expression is a constant or another extension, then directly |
26cd9add | 330 | assign it to the register. */ |
87a0ebfd ST |
331 | |
332 | static bool | |
59a0c032 | 333 | combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, rtx *orig_set) |
87a0ebfd | 334 | { |
282bc7b4 | 335 | rtx orig_src = SET_SRC (*orig_set); |
73c49bf5 | 336 | machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set)); |
282bc7b4 | 337 | rtx new_set; |
3c92da90 JL |
338 | rtx cand_pat = PATTERN (cand->insn); |
339 | ||
340 | /* If the extension's source/destination registers are not the same | |
341 | then we need to change the original load to reference the destination | |
342 | of the extension. Then we need to emit a copy from that destination | |
343 | to the original destination of the load. */ | |
344 | rtx new_reg; | |
345 | bool copy_needed | |
346 | = (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0))); | |
347 | if (copy_needed) | |
348 | new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (cand_pat))); | |
349 | else | |
350 | new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set))); | |
87a0ebfd | 351 | |
2043135a JL |
352 | #if 0 |
353 | /* Rethinking test. Temporarily disabled. */ | |
a6a2d67b JL |
354 | /* We're going to be widening the result of DEF_INSN, ensure that doing so |
355 | doesn't change the number of hard registers needed for the result. */ | |
356 | if (HARD_REGNO_NREGS (REGNO (new_reg), cand->mode) | |
2043135a JL |
357 | != HARD_REGNO_NREGS (REGNO (SET_DEST (*orig_set)), |
358 | GET_MODE (SET_DEST (*orig_set)))) | |
a6a2d67b | 359 | return false; |
2043135a | 360 | #endif |
a6a2d67b | 361 | |
282bc7b4 EB |
362 | /* Merge constants by directly moving the constant into the register under |
363 | some conditions. Recall that RTL constants are sign-extended. */ | |
26cd9add | 364 | if (GET_CODE (orig_src) == CONST_INT |
282bc7b4 | 365 | && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode)) |
26cd9add | 366 | { |
282bc7b4 | 367 | if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND) |
f7df4a84 | 368 | new_set = gen_rtx_SET (new_reg, orig_src); |
87a0ebfd | 369 | else |
26cd9add EI |
370 | { |
371 | /* Zero-extend the negative constant by masking out the bits outside | |
372 | the source mode. */ | |
282bc7b4 | 373 | rtx new_const_int |
73c49bf5 | 374 | = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (orig_mode), |
69db2d57 | 375 | GET_MODE (new_reg)); |
f7df4a84 | 376 | new_set = gen_rtx_SET (new_reg, new_const_int); |
26cd9add EI |
377 | } |
378 | } | |
379 | else if (GET_MODE (orig_src) == VOIDmode) | |
380 | { | |
282bc7b4 | 381 | /* This is mostly due to a call insn that should not be optimized. */ |
26cd9add | 382 | return false; |
87a0ebfd | 383 | } |
282bc7b4 | 384 | else if (GET_CODE (orig_src) == cand->code) |
87a0ebfd | 385 | { |
282bc7b4 EB |
386 | /* Here is a sequence of two extensions. Try to merge them. */ |
387 | rtx temp_extension | |
388 | = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0)); | |
389 | rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
87a0ebfd ST |
390 | if (simplified_temp_extension) |
391 | temp_extension = simplified_temp_extension; | |
f7df4a84 | 392 | new_set = gen_rtx_SET (new_reg, temp_extension); |
87a0ebfd ST |
393 | } |
394 | else if (GET_CODE (orig_src) == IF_THEN_ELSE) | |
395 | { | |
26cd9add | 396 | /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise, |
87a0ebfd | 397 | in general, IF_THEN_ELSE should not be combined. */ |
87a0ebfd ST |
398 | return false; |
399 | } | |
400 | else | |
401 | { | |
282bc7b4 EB |
402 | /* This is the normal case. */ |
403 | rtx temp_extension | |
404 | = gen_rtx_fmt_e (cand->code, cand->mode, orig_src); | |
405 | rtx simplified_temp_extension = simplify_rtx (temp_extension); | |
87a0ebfd ST |
406 | if (simplified_temp_extension) |
407 | temp_extension = simplified_temp_extension; | |
f7df4a84 | 408 | new_set = gen_rtx_SET (new_reg, temp_extension); |
87a0ebfd ST |
409 | } |
410 | ||
26cd9add | 411 | /* This change is a part of a group of changes. Hence, |
87a0ebfd | 412 | validate_change will not try to commit the change. */ |
73c49bf5 JJ |
413 | if (validate_change (curr_insn, orig_set, new_set, true) |
414 | && update_reg_equal_equiv_notes (curr_insn, cand->mode, orig_mode, | |
415 | cand->code)) | |
87a0ebfd ST |
416 | { |
417 | if (dump_file) | |
418 | { | |
ca10595c | 419 | fprintf (dump_file, |
3c92da90 JL |
420 | "Tentatively merged extension with definition %s:\n", |
421 | (copy_needed) ? "(copy needed)" : ""); | |
87a0ebfd ST |
422 | print_rtl_single (dump_file, curr_insn); |
423 | } | |
424 | return true; | |
425 | } | |
282bc7b4 | 426 | |
87a0ebfd ST |
427 | return false; |
428 | } | |
429 | ||
87a0ebfd | 430 | /* Treat if_then_else insns, where the operands of both branches |
26cd9add | 431 | are registers, as copies. For instance, |
87a0ebfd ST |
432 | Original : |
433 | (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c))) | |
434 | Transformed : | |
435 | (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c))) | |
436 | DEF_INSN is the if_then_else insn. */ | |
437 | ||
438 | static bool | |
59a0c032 | 439 | transform_ifelse (ext_cand *cand, rtx_insn *def_insn) |
87a0ebfd ST |
440 | { |
441 | rtx set_insn = PATTERN (def_insn); | |
442 | rtx srcreg, dstreg, srcreg2; | |
443 | rtx map_srcreg, map_dstreg, map_srcreg2; | |
444 | rtx ifexpr; | |
445 | rtx cond; | |
446 | rtx new_set; | |
447 | ||
448 | gcc_assert (GET_CODE (set_insn) == SET); | |
282bc7b4 | 449 | |
87a0ebfd ST |
450 | cond = XEXP (SET_SRC (set_insn), 0); |
451 | dstreg = SET_DEST (set_insn); | |
452 | srcreg = XEXP (SET_SRC (set_insn), 1); | |
453 | srcreg2 = XEXP (SET_SRC (set_insn), 2); | |
b57cca0b JJ |
454 | /* If the conditional move already has the right or wider mode, |
455 | there is nothing to do. */ | |
456 | if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode)) | |
457 | return true; | |
458 | ||
282bc7b4 EB |
459 | map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg)); |
460 | map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2)); | |
461 | map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg)); | |
462 | ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2); | |
f7df4a84 | 463 | new_set = gen_rtx_SET (map_dstreg, ifexpr); |
87a0ebfd | 464 | |
73c49bf5 JJ |
465 | if (validate_change (def_insn, &PATTERN (def_insn), new_set, true) |
466 | && update_reg_equal_equiv_notes (def_insn, cand->mode, GET_MODE (dstreg), | |
467 | cand->code)) | |
87a0ebfd ST |
468 | { |
469 | if (dump_file) | |
470 | { | |
282bc7b4 EB |
471 | fprintf (dump_file, |
472 | "Mode of conditional move instruction extended:\n"); | |
87a0ebfd ST |
473 | print_rtl_single (dump_file, def_insn); |
474 | } | |
475 | return true; | |
476 | } | |
282bc7b4 EB |
477 | |
478 | return false; | |
87a0ebfd ST |
479 | } |
480 | ||
282bc7b4 EB |
481 | /* Get all the reaching definitions of an instruction. The definitions are |
482 | desired for REG used in INSN. Return the definition list or NULL if a | |
483 | definition is missing. If DEST is non-NULL, additionally push the INSN | |
484 | of the definitions onto DEST. */ | |
87a0ebfd | 485 | |
282bc7b4 | 486 | static struct df_link * |
59a0c032 | 487 | get_defs (rtx_insn *insn, rtx reg, vec<rtx_insn *> *dest) |
87a0ebfd | 488 | { |
bfac633a | 489 | df_ref use; |
282bc7b4 | 490 | struct df_link *ref_chain, *ref_link; |
87a0ebfd | 491 | |
bfac633a | 492 | FOR_EACH_INSN_USE (use, insn) |
87a0ebfd | 493 | { |
bfac633a | 494 | if (GET_CODE (DF_REF_REG (use)) == SUBREG) |
282bc7b4 | 495 | return NULL; |
bfac633a RS |
496 | if (REGNO (DF_REF_REG (use)) == REGNO (reg)) |
497 | break; | |
87a0ebfd ST |
498 | } |
499 | ||
bfac633a | 500 | gcc_assert (use != NULL); |
87a0ebfd | 501 | |
bfac633a | 502 | ref_chain = DF_REF_CHAIN (use); |
282bc7b4 EB |
503 | |
504 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
87a0ebfd ST |
505 | { |
506 | /* Problem getting some definition for this instruction. */ | |
282bc7b4 EB |
507 | if (ref_link->ref == NULL) |
508 | return NULL; | |
509 | if (DF_REF_INSN_INFO (ref_link->ref) == NULL) | |
510 | return NULL; | |
87a0ebfd ST |
511 | } |
512 | ||
282bc7b4 EB |
513 | if (dest) |
514 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
9771b263 | 515 | dest->safe_push (DF_REF_INSN (ref_link->ref)); |
87a0ebfd | 516 | |
282bc7b4 | 517 | return ref_chain; |
87a0ebfd ST |
518 | } |
519 | ||
282bc7b4 EB |
520 | /* Return true if INSN is |
521 | (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2))) | |
522 | and store x1 and x2 in REG_1 and REG_2. */ | |
87a0ebfd | 523 | |
282bc7b4 | 524 | static bool |
59a0c032 | 525 | is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2) |
87a0ebfd | 526 | { |
282bc7b4 | 527 | rtx expr = single_set (insn); |
87a0ebfd | 528 | |
282bc7b4 EB |
529 | if (expr != NULL_RTX |
530 | && GET_CODE (expr) == SET | |
87a0ebfd | 531 | && GET_CODE (SET_DEST (expr)) == REG |
87a0ebfd ST |
532 | && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE |
533 | && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG | |
26cd9add | 534 | && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG) |
87a0ebfd | 535 | { |
282bc7b4 EB |
536 | *reg1 = XEXP (SET_SRC (expr), 1); |
537 | *reg2 = XEXP (SET_SRC (expr), 2); | |
538 | return true; | |
87a0ebfd ST |
539 | } |
540 | ||
282bc7b4 | 541 | return false; |
87a0ebfd ST |
542 | } |
543 | ||
b57cca0b JJ |
544 | enum ext_modified_kind |
545 | { | |
546 | /* The insn hasn't been modified by ree pass yet. */ | |
547 | EXT_MODIFIED_NONE, | |
548 | /* Changed into zero extension. */ | |
549 | EXT_MODIFIED_ZEXT, | |
550 | /* Changed into sign extension. */ | |
551 | EXT_MODIFIED_SEXT | |
552 | }; | |
553 | ||
925e30ff | 554 | struct ATTRIBUTE_PACKED ext_modified |
b57cca0b JJ |
555 | { |
556 | /* Mode from which ree has zero or sign extended the destination. */ | |
557 | ENUM_BITFIELD(machine_mode) mode : 8; | |
558 | ||
559 | /* Kind of modification of the insn. */ | |
560 | ENUM_BITFIELD(ext_modified_kind) kind : 2; | |
561 | ||
0d732cca JL |
562 | unsigned int do_not_reextend : 1; |
563 | ||
b57cca0b JJ |
564 | /* True if the insn is scheduled to be deleted. */ |
565 | unsigned int deleted : 1; | |
566 | }; | |
567 | ||
568 | /* Vectors used by combine_reaching_defs and its helpers. */ | |
50686850 | 569 | struct ext_state |
b57cca0b | 570 | { |
9771b263 | 571 | /* In order to avoid constant alloc/free, we keep these |
b57cca0b | 572 | 4 vectors live through the entire find_and_remove_re and just |
9771b263 | 573 | truncate them each time. */ |
59a0c032 DM |
574 | vec<rtx_insn *> defs_list; |
575 | vec<rtx_insn *> copies_list; | |
576 | vec<rtx_insn *> modified_list; | |
577 | vec<rtx_insn *> work_list; | |
b57cca0b JJ |
578 | |
579 | /* For instructions that have been successfully modified, this is | |
580 | the original mode from which the insn is extending and | |
581 | kind of extension. */ | |
582 | struct ext_modified *modified; | |
50686850 | 583 | }; |
b57cca0b | 584 | |
26cd9add EI |
585 | /* Reaching Definitions of the extended register could be conditional copies |
586 | or regular definitions. This function separates the two types into two | |
b57cca0b JJ |
587 | lists, STATE->DEFS_LIST and STATE->COPIES_LIST. This is necessary because, |
588 | if a reaching definition is a conditional copy, merging the extension with | |
589 | this definition is wrong. Conditional copies are merged by transitively | |
590 | merging their definitions. The defs_list is populated with all the reaching | |
591 | definitions of the extension instruction (EXTEND_INSN) which must be merged | |
592 | with an extension. The copies_list contains all the conditional moves that | |
593 | will later be extended into a wider mode conditional move if all the merges | |
594 | are successful. The function returns false upon failure, true upon | |
595 | success. */ | |
596 | ||
597 | static bool | |
59a0c032 | 598 | make_defs_and_copies_lists (rtx_insn *extend_insn, const_rtx set_pat, |
b57cca0b | 599 | ext_state *state) |
87a0ebfd | 600 | { |
282bc7b4 | 601 | rtx src_reg = XEXP (SET_SRC (set_pat), 0); |
87a0ebfd | 602 | bool *is_insn_visited; |
b57cca0b JJ |
603 | bool ret = true; |
604 | ||
9771b263 | 605 | state->work_list.truncate (0); |
87a0ebfd | 606 | |
282bc7b4 | 607 | /* Initialize the work list. */ |
b57cca0b JJ |
608 | if (!get_defs (extend_insn, src_reg, &state->work_list)) |
609 | gcc_unreachable (); | |
87a0ebfd | 610 | |
282bc7b4 | 611 | is_insn_visited = XCNEWVEC (bool, max_insn_uid); |
87a0ebfd ST |
612 | |
613 | /* Perform transitive closure for conditional copies. */ | |
9771b263 | 614 | while (!state->work_list.is_empty ()) |
87a0ebfd | 615 | { |
59a0c032 | 616 | rtx_insn *def_insn = state->work_list.pop (); |
282bc7b4 EB |
617 | rtx reg1, reg2; |
618 | ||
87a0ebfd ST |
619 | gcc_assert (INSN_UID (def_insn) < max_insn_uid); |
620 | ||
621 | if (is_insn_visited[INSN_UID (def_insn)]) | |
282bc7b4 | 622 | continue; |
87a0ebfd | 623 | is_insn_visited[INSN_UID (def_insn)] = true; |
87a0ebfd | 624 | |
282bc7b4 EB |
625 | if (is_cond_copy_insn (def_insn, ®1, ®2)) |
626 | { | |
627 | /* Push it onto the copy list first. */ | |
9771b263 | 628 | state->copies_list.safe_push (def_insn); |
282bc7b4 EB |
629 | |
630 | /* Now perform the transitive closure. */ | |
b57cca0b JJ |
631 | if (!get_defs (def_insn, reg1, &state->work_list) |
632 | || !get_defs (def_insn, reg2, &state->work_list)) | |
282bc7b4 | 633 | { |
b57cca0b | 634 | ret = false; |
282bc7b4 EB |
635 | break; |
636 | } | |
87a0ebfd ST |
637 | } |
638 | else | |
9771b263 | 639 | state->defs_list.safe_push (def_insn); |
87a0ebfd ST |
640 | } |
641 | ||
87a0ebfd | 642 | XDELETEVEC (is_insn_visited); |
282bc7b4 EB |
643 | |
644 | return ret; | |
87a0ebfd ST |
645 | } |
646 | ||
650c4c85 JL |
647 | /* If DEF_INSN has single SET expression, possibly buried inside |
648 | a PARALLEL, return the address of the SET expression, else | |
649 | return NULL. This is similar to single_set, except that | |
650 | single_set allows multiple SETs when all but one is dead. */ | |
651 | static rtx * | |
59a0c032 | 652 | get_sub_rtx (rtx_insn *def_insn) |
87a0ebfd | 653 | { |
650c4c85 JL |
654 | enum rtx_code code = GET_CODE (PATTERN (def_insn)); |
655 | rtx *sub_rtx = NULL; | |
87a0ebfd ST |
656 | |
657 | if (code == PARALLEL) | |
658 | { | |
650c4c85 | 659 | for (int i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++) |
87a0ebfd | 660 | { |
650c4c85 | 661 | rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i); |
87a0ebfd ST |
662 | if (GET_CODE (s_expr) != SET) |
663 | continue; | |
664 | ||
665 | if (sub_rtx == NULL) | |
666 | sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i); | |
667 | else | |
668 | { | |
669 | /* PARALLEL with multiple SETs. */ | |
650c4c85 | 670 | return NULL; |
87a0ebfd ST |
671 | } |
672 | } | |
673 | } | |
674 | else if (code == SET) | |
675 | sub_rtx = &PATTERN (def_insn); | |
676 | else | |
677 | { | |
678 | /* It is not a PARALLEL or a SET, what could it be ? */ | |
650c4c85 | 679 | return NULL; |
87a0ebfd ST |
680 | } |
681 | ||
682 | gcc_assert (sub_rtx != NULL); | |
650c4c85 JL |
683 | return sub_rtx; |
684 | } | |
685 | ||
686 | /* Merge the DEF_INSN with an extension. Calls combine_set_extension | |
687 | on the SET pattern. */ | |
688 | ||
689 | static bool | |
59a0c032 | 690 | merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, ext_state *state) |
650c4c85 | 691 | { |
ef4bddc2 | 692 | machine_mode ext_src_mode; |
650c4c85 JL |
693 | rtx *sub_rtx; |
694 | ||
695 | ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0)); | |
696 | sub_rtx = get_sub_rtx (def_insn); | |
697 | ||
698 | if (sub_rtx == NULL) | |
699 | return false; | |
87a0ebfd | 700 | |
b57cca0b JJ |
701 | if (REG_P (SET_DEST (*sub_rtx)) |
702 | && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode | |
703 | || ((state->modified[INSN_UID (def_insn)].kind | |
704 | == (cand->code == ZERO_EXTEND | |
705 | ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)) | |
706 | && state->modified[INSN_UID (def_insn)].mode | |
707 | == ext_src_mode))) | |
87a0ebfd | 708 | { |
b57cca0b JJ |
709 | if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx))) |
710 | >= GET_MODE_SIZE (cand->mode)) | |
711 | return true; | |
712 | /* If def_insn is already scheduled to be deleted, don't attempt | |
713 | to modify it. */ | |
714 | if (state->modified[INSN_UID (def_insn)].deleted) | |
715 | return false; | |
716 | if (combine_set_extension (cand, def_insn, sub_rtx)) | |
717 | { | |
718 | if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE) | |
719 | state->modified[INSN_UID (def_insn)].mode = ext_src_mode; | |
720 | return true; | |
721 | } | |
87a0ebfd | 722 | } |
26cd9add EI |
723 | |
724 | return false; | |
87a0ebfd ST |
725 | } |
726 | ||
059742a4 JL |
727 | /* Given SRC, which should be one or more extensions of a REG, strip |
728 | away the extensions and return the REG. */ | |
729 | ||
730 | static inline rtx | |
731 | get_extended_src_reg (rtx src) | |
732 | { | |
733 | while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND) | |
734 | src = XEXP (src, 0); | |
735 | gcc_assert (REG_P (src)); | |
736 | return src; | |
737 | } | |
738 | ||
87a0ebfd | 739 | /* This function goes through all reaching defs of the source |
26cd9add EI |
740 | of the candidate for elimination (CAND) and tries to combine |
741 | the extension with the definition instruction. The changes | |
742 | are made as a group so that even if one definition cannot be | |
743 | merged, all reaching definitions end up not being merged. | |
744 | When a conditional copy is encountered, merging is attempted | |
745 | transitively on its definitions. It returns true upon success | |
746 | and false upon failure. */ | |
87a0ebfd ST |
747 | |
748 | static bool | |
089dacc5 | 749 | combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state) |
87a0ebfd | 750 | { |
59a0c032 | 751 | rtx_insn *def_insn; |
87a0ebfd ST |
752 | bool merge_successful = true; |
753 | int i; | |
754 | int defs_ix; | |
b57cca0b | 755 | bool outcome; |
87a0ebfd | 756 | |
9771b263 DN |
757 | state->defs_list.truncate (0); |
758 | state->copies_list.truncate (0); | |
87a0ebfd | 759 | |
b57cca0b | 760 | outcome = make_defs_and_copies_lists (cand->insn, set_pat, state); |
87a0ebfd | 761 | |
b57cca0b JJ |
762 | if (!outcome) |
763 | return false; | |
87a0ebfd | 764 | |
3c92da90 JL |
765 | /* If the destination operand of the extension is a different |
766 | register than the source operand, then additional restrictions | |
059742a4 JL |
767 | are needed. Note we have to handle cases where we have nested |
768 | extensions in the source operand. */ | |
0d732cca JL |
769 | bool copy_needed |
770 | = (REGNO (SET_DEST (PATTERN (cand->insn))) | |
771 | != REGNO (get_extended_src_reg (SET_SRC (PATTERN (cand->insn))))); | |
772 | if (copy_needed) | |
3c92da90 | 773 | { |
860dadcb JJ |
774 | /* Considering transformation of |
775 | (set (reg1) (expression)) | |
776 | ... | |
777 | (set (reg2) (any_extend (reg1))) | |
778 | ||
779 | into | |
780 | ||
781 | (set (reg2) (any_extend (expression))) | |
782 | (set (reg1) (reg2)) | |
783 | ... */ | |
784 | ||
3c92da90 JL |
785 | /* In theory we could handle more than one reaching def, it |
786 | just makes the code to update the insn stream more complex. */ | |
787 | if (state->defs_list.length () != 1) | |
788 | return false; | |
789 | ||
059742a4 JL |
790 | /* We require the candidate not already be modified. It may, |
791 | for example have been changed from a (sign_extend (reg)) | |
0d732cca | 792 | into (zero_extend (sign_extend (reg))). |
059742a4 JL |
793 | |
794 | Handling that case shouldn't be terribly difficult, but the code | |
795 | here and the code to emit copies would need auditing. Until | |
796 | we see a need, this is the safe thing to do. */ | |
3c92da90 JL |
797 | if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) |
798 | return false; | |
799 | ||
ef4bddc2 | 800 | machine_mode dst_mode = GET_MODE (SET_DEST (PATTERN (cand->insn))); |
e533e26c WD |
801 | rtx src_reg = get_extended_src_reg (SET_SRC (PATTERN (cand->insn))); |
802 | ||
803 | /* Ensure the number of hard registers of the copy match. */ | |
804 | if (HARD_REGNO_NREGS (REGNO (src_reg), dst_mode) | |
805 | != HARD_REGNO_NREGS (REGNO (src_reg), GET_MODE (src_reg))) | |
806 | return false; | |
807 | ||
3c92da90 | 808 | /* There's only one reaching def. */ |
59a0c032 | 809 | rtx_insn *def_insn = state->defs_list[0]; |
3c92da90 JL |
810 | |
811 | /* The defining statement must not have been modified either. */ | |
812 | if (state->modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE) | |
813 | return false; | |
814 | ||
815 | /* The defining statement and candidate insn must be in the same block. | |
816 | This is merely to keep the test for safety and updating the insn | |
7e41c852 JL |
817 | stream simple. Also ensure that within the block the candidate |
818 | follows the defining insn. */ | |
daca1a96 RS |
819 | basic_block bb = BLOCK_FOR_INSN (cand->insn); |
820 | if (bb != BLOCK_FOR_INSN (def_insn) | |
7e41c852 | 821 | || DF_INSN_LUID (def_insn) > DF_INSN_LUID (cand->insn)) |
3c92da90 JL |
822 | return false; |
823 | ||
824 | /* If there is an overlap between the destination of DEF_INSN and | |
825 | CAND->insn, then this transformation is not safe. Note we have | |
826 | to test in the widened mode. */ | |
650c4c85 JL |
827 | rtx *dest_sub_rtx = get_sub_rtx (def_insn); |
828 | if (dest_sub_rtx == NULL | |
829 | || !REG_P (SET_DEST (*dest_sub_rtx))) | |
830 | return false; | |
831 | ||
3c92da90 | 832 | rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand->insn))), |
650c4c85 | 833 | REGNO (SET_DEST (*dest_sub_rtx))); |
3c92da90 JL |
834 | if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand->insn)))) |
835 | return false; | |
836 | ||
837 | /* The destination register of the extension insn must not be | |
838 | used or set between the def_insn and cand->insn exclusive. */ | |
839 | if (reg_used_between_p (SET_DEST (PATTERN (cand->insn)), | |
840 | def_insn, cand->insn) | |
841 | || reg_set_between_p (SET_DEST (PATTERN (cand->insn)), | |
842 | def_insn, cand->insn)) | |
843 | return false; | |
0d732cca JL |
844 | |
845 | /* We must be able to copy between the two registers. Generate, | |
846 | recognize and verify constraints of the copy. Also fail if this | |
847 | generated more than one insn. | |
848 | ||
849 | This generates garbage since we throw away the insn when we're | |
c7ece684 JL |
850 | done, only to recreate it later if this test was successful. |
851 | ||
852 | Make sure to get the mode from the extension (cand->insn). This | |
853 | is different than in the code to emit the copy as we have not | |
854 | modified the defining insn yet. */ | |
0d732cca | 855 | start_sequence (); |
0d732cca | 856 | rtx pat = PATTERN (cand->insn); |
c7ece684 | 857 | rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (pat)), |
e533e26c | 858 | REGNO (get_extended_src_reg (SET_SRC (pat)))); |
c7ece684 | 859 | rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (pat)), |
0d732cca JL |
860 | REGNO (SET_DEST (pat))); |
861 | emit_move_insn (new_dst, new_src); | |
862 | ||
b32d5189 | 863 | rtx_insn *insn = get_insns(); |
0d732cca JL |
864 | end_sequence (); |
865 | if (NEXT_INSN (insn)) | |
866 | return false; | |
867 | if (recog_memoized (insn) == -1) | |
868 | return false; | |
869 | extract_insn (insn); | |
daca1a96 | 870 | if (!constrain_operands (1, get_preferred_alternatives (insn, bb))) |
0d732cca | 871 | return false; |
3c92da90 JL |
872 | } |
873 | ||
874 | ||
6aae324c JJ |
875 | /* If cand->insn has been already modified, update cand->mode to a wider |
876 | mode if possible, or punt. */ | |
877 | if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE) | |
878 | { | |
ef4bddc2 | 879 | machine_mode mode; |
6aae324c JJ |
880 | rtx set; |
881 | ||
882 | if (state->modified[INSN_UID (cand->insn)].kind | |
883 | != (cand->code == ZERO_EXTEND | |
884 | ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT) | |
885 | || state->modified[INSN_UID (cand->insn)].mode != cand->mode | |
886 | || (set = single_set (cand->insn)) == NULL_RTX) | |
887 | return false; | |
888 | mode = GET_MODE (SET_DEST (set)); | |
889 | gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode)); | |
890 | cand->mode = mode; | |
891 | } | |
892 | ||
87a0ebfd ST |
893 | merge_successful = true; |
894 | ||
895 | /* Go through the defs vector and try to merge all the definitions | |
896 | in this vector. */ | |
9771b263 DN |
897 | state->modified_list.truncate (0); |
898 | FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn) | |
87a0ebfd | 899 | { |
b57cca0b | 900 | if (merge_def_and_ext (cand, def_insn, state)) |
9771b263 | 901 | state->modified_list.safe_push (def_insn); |
87a0ebfd ST |
902 | else |
903 | { | |
904 | merge_successful = false; | |
905 | break; | |
906 | } | |
907 | } | |
908 | ||
909 | /* Now go through the conditional copies vector and try to merge all | |
910 | the copies in this vector. */ | |
87a0ebfd ST |
911 | if (merge_successful) |
912 | { | |
9771b263 | 913 | FOR_EACH_VEC_ELT (state->copies_list, i, def_insn) |
87a0ebfd | 914 | { |
26cd9add | 915 | if (transform_ifelse (cand, def_insn)) |
9771b263 | 916 | state->modified_list.safe_push (def_insn); |
87a0ebfd ST |
917 | else |
918 | { | |
919 | merge_successful = false; | |
920 | break; | |
921 | } | |
922 | } | |
923 | } | |
924 | ||
925 | if (merge_successful) | |
926 | { | |
282bc7b4 EB |
927 | /* Commit the changes here if possible |
928 | FIXME: It's an all-or-nothing scenario. Even if only one definition | |
929 | cannot be merged, we entirely give up. In the future, we should allow | |
930 | extensions to be partially eliminated along those paths where the | |
931 | definitions could be merged. */ | |
87a0ebfd ST |
932 | if (apply_change_group ()) |
933 | { | |
934 | if (dump_file) | |
282bc7b4 | 935 | fprintf (dump_file, "All merges were successful.\n"); |
87a0ebfd | 936 | |
9771b263 | 937 | FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) |
0d732cca JL |
938 | { |
939 | ext_modified *modified = &state->modified[INSN_UID (def_insn)]; | |
940 | if (modified->kind == EXT_MODIFIED_NONE) | |
941 | modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT | |
942 | : EXT_MODIFIED_SEXT); | |
b57cca0b | 943 | |
0d732cca JL |
944 | if (copy_needed) |
945 | modified->do_not_reextend = 1; | |
946 | } | |
87a0ebfd ST |
947 | return true; |
948 | } | |
949 | else | |
950 | { | |
0acba2b4 EB |
951 | /* Changes need not be cancelled explicitly as apply_change_group |
952 | does it. Print list of definitions in the dump_file for debug | |
26cd9add | 953 | purposes. This extension cannot be deleted. */ |
87a0ebfd ST |
954 | if (dump_file) |
955 | { | |
ca10595c EB |
956 | fprintf (dump_file, |
957 | "Merge cancelled, non-mergeable definitions:\n"); | |
9771b263 | 958 | FOR_EACH_VEC_ELT (state->modified_list, i, def_insn) |
ca10595c | 959 | print_rtl_single (dump_file, def_insn); |
87a0ebfd ST |
960 | } |
961 | } | |
962 | } | |
963 | else | |
964 | { | |
965 | /* Cancel any changes that have been made so far. */ | |
966 | cancel_changes (0); | |
967 | } | |
968 | ||
87a0ebfd ST |
969 | return false; |
970 | } | |
971 | ||
089dacc5 | 972 | /* Add an extension pattern that could be eliminated. */ |
0acba2b4 EB |
973 | |
974 | static void | |
59a0c032 | 975 | add_removable_extension (const_rtx expr, rtx_insn *insn, |
9771b263 | 976 | vec<ext_cand> *insn_list, |
524d9b4b PMR |
977 | unsigned *def_map, |
978 | bitmap init_regs) | |
0acba2b4 | 979 | { |
282bc7b4 | 980 | enum rtx_code code; |
ef4bddc2 | 981 | machine_mode mode; |
68c8a824 | 982 | unsigned int idx; |
0acba2b4 EB |
983 | rtx src, dest; |
984 | ||
282bc7b4 | 985 | /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */ |
0acba2b4 EB |
986 | if (GET_CODE (expr) != SET) |
987 | return; | |
988 | ||
989 | src = SET_SRC (expr); | |
282bc7b4 | 990 | code = GET_CODE (src); |
0acba2b4 | 991 | dest = SET_DEST (expr); |
282bc7b4 | 992 | mode = GET_MODE (dest); |
0acba2b4 EB |
993 | |
994 | if (REG_P (dest) | |
282bc7b4 | 995 | && (code == SIGN_EXTEND || code == ZERO_EXTEND) |
3c92da90 | 996 | && REG_P (XEXP (src, 0))) |
0acba2b4 | 997 | { |
524d9b4b | 998 | rtx reg = XEXP (src, 0); |
282bc7b4 EB |
999 | struct df_link *defs, *def; |
1000 | ext_cand *cand; | |
1001 | ||
524d9b4b PMR |
1002 | /* Zero-extension of an undefined value is partly defined (it's |
1003 | completely undefined for sign-extension, though). So if there exists | |
1004 | a path from the entry to this zero-extension that leaves this register | |
1005 | uninitialized, removing the extension could change the behavior of | |
1006 | correct programs. So first, check it is not the case. */ | |
1007 | if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg))) | |
1008 | { | |
1009 | if (dump_file) | |
1010 | { | |
1011 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1012 | print_rtl_single (dump_file, insn); | |
1013 | fprintf (dump_file, " because it can operate on uninitialized" | |
1014 | " data\n"); | |
1015 | } | |
1016 | return; | |
1017 | } | |
1018 | ||
1019 | /* Second, make sure we can get all the reaching definitions. */ | |
1020 | defs = get_defs (insn, reg, NULL); | |
282bc7b4 | 1021 | if (!defs) |
0acba2b4 | 1022 | { |
282bc7b4 EB |
1023 | if (dump_file) |
1024 | { | |
1025 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
089dacc5 | 1026 | print_rtl_single (dump_file, insn); |
282bc7b4 EB |
1027 | fprintf (dump_file, " because of missing definition(s)\n"); |
1028 | } | |
1029 | return; | |
0acba2b4 | 1030 | } |
282bc7b4 | 1031 | |
524d9b4b | 1032 | /* Third, make sure the reaching definitions don't feed another and |
282bc7b4 EB |
1033 | different extension. FIXME: this obviously can be improved. */ |
1034 | for (def = defs; def; def = def->next) | |
c3284718 | 1035 | if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))]) |
860dadcb | 1036 | && idx != -1U |
9771b263 | 1037 | && (cand = &(*insn_list)[idx - 1]) |
ca3f371f | 1038 | && cand->code != code) |
282bc7b4 EB |
1039 | { |
1040 | if (dump_file) | |
1041 | { | |
1042 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
089dacc5 | 1043 | print_rtl_single (dump_file, insn); |
282bc7b4 EB |
1044 | fprintf (dump_file, " because of other extension\n"); |
1045 | } | |
1046 | return; | |
1047 | } | |
860dadcb JJ |
1048 | /* For vector mode extensions, ensure that all uses of the |
1049 | XEXP (src, 0) register are the same extension (both code | |
1050 | and to which mode), as unlike integral extensions lowpart | |
1051 | subreg of the sign/zero extended register are not equal | |
1052 | to the original register, so we have to change all uses or | |
1053 | none. */ | |
1054 | else if (VECTOR_MODE_P (GET_MODE (XEXP (src, 0)))) | |
1055 | { | |
1056 | if (idx == 0) | |
1057 | { | |
1058 | struct df_link *ref_chain, *ref_link; | |
1059 | ||
1060 | ref_chain = DF_REF_CHAIN (def->ref); | |
1061 | for (ref_link = ref_chain; ref_link; ref_link = ref_link->next) | |
1062 | { | |
1063 | if (ref_link->ref == NULL | |
1064 | || DF_REF_INSN_INFO (ref_link->ref) == NULL) | |
1065 | { | |
1066 | idx = -1U; | |
1067 | break; | |
1068 | } | |
1069 | rtx_insn *use_insn = DF_REF_INSN (ref_link->ref); | |
1070 | const_rtx use_set; | |
1071 | if (use_insn == insn || DEBUG_INSN_P (use_insn)) | |
1072 | continue; | |
1073 | if (!(use_set = single_set (use_insn)) | |
1074 | || !REG_P (SET_DEST (use_set)) | |
1075 | || GET_MODE (SET_DEST (use_set)) != GET_MODE (dest) | |
1076 | || GET_CODE (SET_SRC (use_set)) != code | |
1077 | || !rtx_equal_p (XEXP (SET_SRC (use_set), 0), | |
1078 | XEXP (src, 0))) | |
1079 | { | |
1080 | idx = -1U; | |
1081 | break; | |
1082 | } | |
1083 | } | |
1084 | if (idx == -1U) | |
1085 | def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; | |
1086 | } | |
1087 | if (idx == -1U) | |
1088 | { | |
1089 | if (dump_file) | |
1090 | { | |
1091 | fprintf (dump_file, "Cannot eliminate extension:\n"); | |
1092 | print_rtl_single (dump_file, insn); | |
1093 | fprintf (dump_file, | |
1094 | " because some vector uses aren't extension\n"); | |
1095 | } | |
1096 | return; | |
1097 | } | |
1098 | } | |
282bc7b4 EB |
1099 | |
1100 | /* Then add the candidate to the list and insert the reaching definitions | |
1101 | into the definition map. */ | |
f32682ca | 1102 | ext_cand e = {expr, code, mode, insn}; |
9771b263 DN |
1103 | insn_list->safe_push (e); |
1104 | idx = insn_list->length (); | |
282bc7b4 EB |
1105 | |
1106 | for (def = defs; def; def = def->next) | |
c3284718 | 1107 | def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx; |
0acba2b4 EB |
1108 | } |
1109 | } | |
1110 | ||
26cd9add | 1111 | /* Traverse the instruction stream looking for extensions and return the |
0acba2b4 | 1112 | list of candidates. */ |
87a0ebfd | 1113 | |
9771b263 | 1114 | static vec<ext_cand> |
26cd9add | 1115 | find_removable_extensions (void) |
87a0ebfd | 1116 | { |
6e1aa848 | 1117 | vec<ext_cand> insn_list = vNULL; |
0acba2b4 | 1118 | basic_block bb; |
59a0c032 DM |
1119 | rtx_insn *insn; |
1120 | rtx set; | |
68c8a824 | 1121 | unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid); |
524d9b4b PMR |
1122 | bitmap_head init, kill, gen, tmp; |
1123 | ||
1124 | bitmap_initialize (&init, NULL); | |
1125 | bitmap_initialize (&kill, NULL); | |
1126 | bitmap_initialize (&gen, NULL); | |
1127 | bitmap_initialize (&tmp, NULL); | |
87a0ebfd | 1128 | |
11cd3bed | 1129 | FOR_EACH_BB_FN (bb, cfun) |
524d9b4b PMR |
1130 | { |
1131 | bitmap_copy (&init, DF_MIR_IN (bb)); | |
1132 | bitmap_clear (&kill); | |
1133 | bitmap_clear (&gen); | |
87a0ebfd | 1134 | |
524d9b4b PMR |
1135 | FOR_BB_INSNS (bb, insn) |
1136 | { | |
1137 | if (NONDEBUG_INSN_P (insn)) | |
1138 | { | |
1139 | set = single_set (insn); | |
1140 | if (set != NULL_RTX) | |
1141 | add_removable_extension (set, insn, &insn_list, def_map, | |
1142 | &init); | |
1143 | df_mir_simulate_one_insn (bb, insn, &kill, &gen); | |
1144 | bitmap_ior_and_compl (&tmp, &gen, &init, &kill); | |
1145 | bitmap_copy (&init, &tmp); | |
1146 | } | |
1147 | } | |
1148 | } | |
0acba2b4 | 1149 | |
089dacc5 | 1150 | XDELETEVEC (def_map); |
282bc7b4 | 1151 | |
089dacc5 | 1152 | return insn_list; |
87a0ebfd ST |
1153 | } |
1154 | ||
1155 | /* This is the main function that checks the insn stream for redundant | |
26cd9add | 1156 | extensions and tries to remove them if possible. */ |
87a0ebfd | 1157 | |
282bc7b4 | 1158 | static void |
26cd9add | 1159 | find_and_remove_re (void) |
87a0ebfd | 1160 | { |
282bc7b4 | 1161 | ext_cand *curr_cand; |
59a0c032 | 1162 | rtx_insn *curr_insn = NULL; |
282bc7b4 | 1163 | int num_re_opportunities = 0, num_realized = 0, i; |
9771b263 | 1164 | vec<ext_cand> reinsn_list; |
59a0c032 DM |
1165 | auto_vec<rtx_insn *> reinsn_del_list; |
1166 | auto_vec<rtx_insn *> reinsn_copy_list; | |
b57cca0b | 1167 | ext_state state; |
87a0ebfd ST |
1168 | |
1169 | /* Construct DU chain to get all reaching definitions of each | |
26cd9add | 1170 | extension instruction. */ |
7b19209f | 1171 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); |
87a0ebfd | 1172 | df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN); |
524d9b4b | 1173 | df_mir_add_problem (); |
87a0ebfd | 1174 | df_analyze (); |
b57cca0b | 1175 | df_set_flags (DF_DEFER_INSN_RESCAN); |
87a0ebfd ST |
1176 | |
1177 | max_insn_uid = get_max_uid (); | |
26cd9add | 1178 | reinsn_list = find_removable_extensions (); |
9771b263 DN |
1179 | state.defs_list.create (0); |
1180 | state.copies_list.create (0); | |
1181 | state.modified_list.create (0); | |
1182 | state.work_list.create (0); | |
1183 | if (reinsn_list.is_empty ()) | |
b57cca0b JJ |
1184 | state.modified = NULL; |
1185 | else | |
1186 | state.modified = XCNEWVEC (struct ext_modified, max_insn_uid); | |
87a0ebfd | 1187 | |
9771b263 | 1188 | FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand) |
87a0ebfd | 1189 | { |
26cd9add | 1190 | num_re_opportunities++; |
87a0ebfd | 1191 | |
282bc7b4 | 1192 | /* Try to combine the extension with the definition. */ |
87a0ebfd ST |
1193 | if (dump_file) |
1194 | { | |
282bc7b4 EB |
1195 | fprintf (dump_file, "Trying to eliminate extension:\n"); |
1196 | print_rtl_single (dump_file, curr_cand->insn); | |
87a0ebfd ST |
1197 | } |
1198 | ||
089dacc5 | 1199 | if (combine_reaching_defs (curr_cand, curr_cand->expr, &state)) |
87a0ebfd ST |
1200 | { |
1201 | if (dump_file) | |
282bc7b4 | 1202 | fprintf (dump_file, "Eliminated the extension.\n"); |
87a0ebfd | 1203 | num_realized++; |
059742a4 JL |
1204 | /* If the RHS of the current candidate is not (extend (reg)), then |
1205 | we do not allow the optimization of extensions where | |
1206 | the source and destination registers do not match. Thus | |
1207 | checking REG_P here is correct. */ | |
1208 | if (REG_P (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0)) | |
1209 | && (REGNO (SET_DEST (PATTERN (curr_cand->insn))) | |
1210 | != REGNO (XEXP (SET_SRC (PATTERN (curr_cand->insn)), 0)))) | |
3c92da90 JL |
1211 | { |
1212 | reinsn_copy_list.safe_push (curr_cand->insn); | |
1213 | reinsn_copy_list.safe_push (state.defs_list[0]); | |
1214 | } | |
1215 | reinsn_del_list.safe_push (curr_cand->insn); | |
b57cca0b | 1216 | state.modified[INSN_UID (curr_cand->insn)].deleted = 1; |
87a0ebfd ST |
1217 | } |
1218 | } | |
1219 | ||
3c92da90 JL |
1220 | /* The copy list contains pairs of insns which describe copies we |
1221 | need to insert into the INSN stream. | |
1222 | ||
1223 | The first insn in each pair is the extension insn, from which | |
1224 | we derive the source and destination of the copy. | |
1225 | ||
1226 | The second insn in each pair is the memory reference where the | |
1227 | extension will ultimately happen. We emit the new copy | |
1228 | immediately after this insn. | |
1229 | ||
1230 | It may first appear that the arguments for the copy are reversed. | |
1231 | Remember that the memory reference will be changed to refer to the | |
1232 | destination of the extention. So we're actually emitting a copy | |
1233 | from the new destination to the old destination. */ | |
1234 | for (unsigned int i = 0; i < reinsn_copy_list.length (); i += 2) | |
1235 | { | |
59a0c032 DM |
1236 | rtx_insn *curr_insn = reinsn_copy_list[i]; |
1237 | rtx_insn *def_insn = reinsn_copy_list[i + 1]; | |
a6a2d67b JL |
1238 | |
1239 | /* Use the mode of the destination of the defining insn | |
1240 | for the mode of the copy. This is necessary if the | |
1241 | defining insn was used to eliminate a second extension | |
1242 | that was wider than the first. */ | |
1243 | rtx sub_rtx = *get_sub_rtx (def_insn); | |
3c92da90 | 1244 | rtx pat = PATTERN (curr_insn); |
a6a2d67b | 1245 | rtx new_dst = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), |
3c92da90 | 1246 | REGNO (XEXP (SET_SRC (pat), 0))); |
a6a2d67b JL |
1247 | rtx new_src = gen_rtx_REG (GET_MODE (SET_DEST (sub_rtx)), |
1248 | REGNO (SET_DEST (pat))); | |
f7df4a84 | 1249 | rtx set = gen_rtx_SET (new_dst, new_src); |
a6a2d67b | 1250 | emit_insn_after (set, def_insn); |
3c92da90 JL |
1251 | } |
1252 | ||
26cd9add | 1253 | /* Delete all useless extensions here in one sweep. */ |
9771b263 | 1254 | FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn) |
0acba2b4 | 1255 | delete_insn (curr_insn); |
87a0ebfd | 1256 | |
9771b263 | 1257 | reinsn_list.release (); |
9771b263 DN |
1258 | state.defs_list.release (); |
1259 | state.copies_list.release (); | |
1260 | state.modified_list.release (); | |
1261 | state.work_list.release (); | |
b57cca0b | 1262 | XDELETEVEC (state.modified); |
87a0ebfd | 1263 | |
26cd9add | 1264 | if (dump_file && num_re_opportunities > 0) |
282bc7b4 EB |
1265 | fprintf (dump_file, "Elimination opportunities = %d realized = %d\n", |
1266 | num_re_opportunities, num_realized); | |
87a0ebfd ST |
1267 | } |
1268 | ||
26cd9add | 1269 | /* Find and remove redundant extensions. */ |
87a0ebfd ST |
1270 | |
1271 | static unsigned int | |
26cd9add | 1272 | rest_of_handle_ree (void) |
87a0ebfd | 1273 | { |
26cd9add EI |
1274 | timevar_push (TV_REE); |
1275 | find_and_remove_re (); | |
1276 | timevar_pop (TV_REE); | |
87a0ebfd ST |
1277 | return 0; |
1278 | } | |
1279 | ||
27a4cd48 DM |
1280 | namespace { |
1281 | ||
1282 | const pass_data pass_data_ree = | |
87a0ebfd | 1283 | { |
27a4cd48 DM |
1284 | RTL_PASS, /* type */ |
1285 | "ree", /* name */ | |
1286 | OPTGROUP_NONE, /* optinfo_flags */ | |
27a4cd48 DM |
1287 | TV_REE, /* tv_id */ |
1288 | 0, /* properties_required */ | |
1289 | 0, /* properties_provided */ | |
1290 | 0, /* properties_destroyed */ | |
1291 | 0, /* todo_flags_start */ | |
3bea341f | 1292 | TODO_df_finish, /* todo_flags_finish */ |
87a0ebfd | 1293 | }; |
27a4cd48 DM |
1294 | |
1295 | class pass_ree : public rtl_opt_pass | |
1296 | { | |
1297 | public: | |
c3284718 RS |
1298 | pass_ree (gcc::context *ctxt) |
1299 | : rtl_opt_pass (pass_data_ree, ctxt) | |
27a4cd48 DM |
1300 | {} |
1301 | ||
1302 | /* opt_pass methods: */ | |
1a3d085c | 1303 | virtual bool gate (function *) { return (optimize > 0 && flag_ree); } |
be55bfe6 | 1304 | virtual unsigned int execute (function *) { return rest_of_handle_ree (); } |
27a4cd48 DM |
1305 | |
1306 | }; // class pass_ree | |
1307 | ||
1308 | } // anon namespace | |
1309 | ||
1310 | rtl_opt_pass * | |
1311 | make_pass_ree (gcc::context *ctxt) | |
1312 | { | |
1313 | return new pass_ree (ctxt); | |
1314 | } |