]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/ree.c
[multiple changes]
[thirdparty/gcc.git] / gcc / ree.c
CommitLineData
26cd9add 1/* Redundant Extension Elimination pass for the GNU compiler.
282bc7b4
EB
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
3 Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
26cd9add 4
282bc7b4
EB
5 Based on the Redundant Zero-extension elimination pass contributed by
6 Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
87a0ebfd
ST
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify it under
11the terms of the GNU General Public License as published by the Free
12Software Foundation; either version 3, or (at your option) any later
13version.
14
15GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3. If not see
22<http://www.gnu.org/licenses/>. */
23
24
25/* Problem Description :
26 --------------------
26cd9add
EI
27 This pass is intended to remove redundant extension instructions.
28 Such instructions appear for different reasons. We expect some of
29 them due to implicit zero-extension in 64-bit registers after writing
30 to their lower 32-bit half (e.g. for the x86-64 architecture).
31 Another possible reason is a type cast which follows a load (for
32 instance a register restore) and which can be combined into a single
33 instruction, and for which earlier local passes, e.g. the combiner,
34 weren't able to optimize.
87a0ebfd
ST
35
36 How does this pass work ?
37 --------------------------
38
39 This pass is run after register allocation. Hence, all registers that
26cd9add
EI
40 this pass deals with are hard registers. This pass first looks for an
41 extension instruction that could possibly be redundant. Such extension
42 instructions show up in RTL with the pattern :
43 (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
44 where x can be any hard register.
87a0ebfd 45 Now, this pass tries to eliminate this instruction by merging the
26cd9add 46 extension with the definitions of register x. For instance, if
87a0ebfd
ST
47 one of the definitions of register x was :
48 (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
26cd9add
EI
49 followed by extension :
50 (set (reg:DI x) (zero_extend:DI (reg:SI x)))
87a0ebfd
ST
51 then the combination converts this into :
52 (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
53 If all the merged definitions are recognizable assembly instructions,
26cd9add
EI
54 the extension is effectively eliminated.
55
56 For example, for the x86-64 architecture, implicit zero-extensions
57 are captured with appropriate patterns in the i386.md file. Hence,
58 these merged definition can be matched to a single assembly instruction.
59 The original extension instruction is then deleted if all the
60 definitions can be merged.
87a0ebfd
ST
61
62 However, there are cases where the definition instruction cannot be
26cd9add
EI
63 merged with an extension. Examples are CALL instructions. In such
64 cases, the original extension is not redundant and this pass does
87a0ebfd
ST
65 not delete it.
66
67 Handling conditional moves :
68 ----------------------------
69
26cd9add
EI
70 Architectures like x86-64 support conditional moves whose semantics for
71 extension differ from the other instructions. For instance, the
87a0ebfd
ST
72 instruction *cmov ebx, eax*
73 zero-extends eax onto rax only when the move from ebx to eax happens.
282bc7b4 74 Otherwise, eax may not be zero-extended. Consider conditional moves as
87a0ebfd
ST
75 RTL instructions of the form
76 (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
26cd9add 77 This pass tries to merge an extension with a conditional move by
282bc7b4 78 actually merging the definitions of y and z with an extension and then
87a0ebfd
ST
79 converting the conditional move into :
80 (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
26cd9add
EI
81 Since registers y and z are extended, register x will also be extended
82 after the conditional move. Note that this step has to be done
83 transitively since the definition of a conditional copy can be
87a0ebfd
ST
84 another conditional copy.
85
86 Motivating Example I :
87 ---------------------
88 For this program :
89 **********************************************
90 bad_code.c
91
92 int mask[1000];
93
94 int foo(unsigned x)
95 {
96 if (x < 10)
97 x = x * 45;
98 else
99 x = x * 78;
100 return mask[x];
101 }
102 **********************************************
103
26cd9add 104 $ gcc -O2 bad_code.c
87a0ebfd
ST
105 ........
106 400315: b8 4e 00 00 00 mov $0x4e,%eax
107 40031a: 0f af f8 imul %eax,%edi
282bc7b4 108 40031d: 89 ff mov %edi,%edi - useless extension
87a0ebfd
ST
109 40031f: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
110 400326: c3 retq
111 ......
112 400330: ba 2d 00 00 00 mov $0x2d,%edx
113 400335: 0f af fa imul %edx,%edi
282bc7b4 114 400338: 89 ff mov %edi,%edi - useless extension
87a0ebfd
ST
115 40033a: 8b 04 bd 60 19 40 00 mov 0x401960(,%rdi,4),%eax
116 400341: c3 retq
117
26cd9add 118 $ gcc -O2 -free bad_code.c
87a0ebfd
ST
119 ......
120 400315: 6b ff 4e imul $0x4e,%edi,%edi
121 400318: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
122 40031f: c3 retq
123 400320: 6b ff 2d imul $0x2d,%edi,%edi
124 400323: 8b 04 bd 40 19 40 00 mov 0x401940(,%rdi,4),%eax
125 40032a: c3 retq
126
127 Motivating Example II :
128 ---------------------
129
130 Here is an example with a conditional move.
131
132 For this program :
133 **********************************************
134
135 unsigned long long foo(unsigned x , unsigned y)
136 {
137 unsigned z;
138 if (x > 100)
139 z = x + y;
140 else
141 z = x - y;
142 return (unsigned long long)(z);
143 }
144
26cd9add 145 $ gcc -O2 bad_code.c
87a0ebfd
ST
146 ............
147 400360: 8d 14 3e lea (%rsi,%rdi,1),%edx
148 400363: 89 f8 mov %edi,%eax
149 400365: 29 f0 sub %esi,%eax
150 400367: 83 ff 65 cmp $0x65,%edi
151 40036a: 0f 43 c2 cmovae %edx,%eax
282bc7b4 152 40036d: 89 c0 mov %eax,%eax - useless extension
87a0ebfd
ST
153 40036f: c3 retq
154
26cd9add 155 $ gcc -O2 -free bad_code.c
87a0ebfd
ST
156 .............
157 400360: 89 fa mov %edi,%edx
158 400362: 8d 04 3e lea (%rsi,%rdi,1),%eax
159 400365: 29 f2 sub %esi,%edx
160 400367: 83 ff 65 cmp $0x65,%edi
161 40036a: 89 d6 mov %edx,%esi
162 40036c: 48 0f 42 c6 cmovb %rsi,%rax
163 400370: c3 retq
164
26cd9add
EI
165 Motivating Example III :
166 ---------------------
167
168 Here is an example with a type cast.
169
170 For this program :
171 **********************************************
172
173 void test(int size, unsigned char *in, unsigned char *out)
174 {
175 int i;
176 unsigned char xr, xg, xy=0;
177
178 for (i = 0; i < size; i++) {
179 xr = *in++;
180 xg = *in++;
181 xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
182 *out++ = xy;
183 }
184 }
185
186 $ gcc -O2 bad_code.c
187 ............
188 10: 0f b6 0e movzbl (%rsi),%ecx
189 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
190 17: 48 83 c6 02 add $0x2,%rsi
282bc7b4
EB
191 1b: 0f b6 c9 movzbl %cl,%ecx - useless extension
192 1e: 0f b6 c0 movzbl %al,%eax - useless extension
26cd9add
EI
193 21: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
194 27: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
195
196 $ gcc -O2 -free bad_code.c
197 .............
198 10: 0f b6 0e movzbl (%rsi),%ecx
199 13: 0f b6 46 01 movzbl 0x1(%rsi),%eax
200 17: 48 83 c6 02 add $0x2,%rsi
201 1b: 69 c9 8b 4c 00 00 imul $0x4c8b,%ecx,%ecx
202 21: 69 c0 46 96 00 00 imul $0x9646,%eax,%eax
87a0ebfd
ST
203
204 Usefulness :
205 ----------
206
26cd9add
EI
207 The original redundant zero-extension elimination pass reported reduction
208 of the dynamic instruction count of a compression benchmark by 2.8% and
209 improvement of its run time by about 1%.
87a0ebfd 210
26cd9add
EI
211 The additional performance gain with the enhanced pass is mostly expected
212 on in-order architectures where redundancy cannot be compensated by out of
213 order execution. Measurements showed up to 10% performance gain (reduced
214 run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
215 gain 1%. */
87a0ebfd
ST
216
217
218#include "config.h"
219#include "system.h"
220#include "coretypes.h"
221#include "tm.h"
222#include "rtl.h"
223#include "tree.h"
224#include "tm_p.h"
225#include "flags.h"
226#include "regs.h"
227#include "hard-reg-set.h"
228#include "basic-block.h"
229#include "insn-config.h"
230#include "function.h"
231#include "expr.h"
232#include "insn-attr.h"
233#include "recog.h"
718f9c0f 234#include "diagnostic-core.h"
87a0ebfd
ST
235#include "target.h"
236#include "timevar.h"
237#include "optabs.h"
238#include "insn-codes.h"
239#include "rtlhooks-def.h"
240/* Include output.h for dump_file. */
241#include "output.h"
242#include "params.h"
243#include "timevar.h"
244#include "tree-pass.h"
245#include "df.h"
246#include "cgraph.h"
247
282bc7b4 248/* This structure represents a candidate for elimination. */
87a0ebfd 249
282bc7b4 250typedef struct GTY(()) ext_cand
87a0ebfd 251{
282bc7b4
EB
252 /* The expression. */
253 const_rtx expr;
87a0ebfd 254
282bc7b4
EB
255 /* The kind of extension. */
256 enum rtx_code code;
26cd9add 257
282bc7b4
EB
258 /* The destination mode. */
259 enum machine_mode mode;
260
261 /* The instruction where it lives. */
26cd9add 262 rtx insn;
282bc7b4 263} ext_cand;
26cd9add
EI
264
265DEF_VEC_O(ext_cand);
266DEF_VEC_ALLOC_O(ext_cand, heap);
267
87a0ebfd
ST
268static int max_insn_uid;
269
26cd9add
EI
270/* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
271 and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
272 this code modifies the SET rtx to a new SET rtx that extends the
273 right hand expression into a register on the left hand side. Note
274 that multiple assumptions are made about the nature of the set that
275 needs to be true for this to work and is called from merge_def_and_ext.
87a0ebfd
ST
276
277 Original :
26cd9add 278 (set (reg a) (expression))
87a0ebfd
ST
279
280 Transform :
282bc7b4 281 (set (reg a) (any_extend (expression)))
87a0ebfd
ST
282
283 Special Cases :
282bc7b4 284 If the expression is a constant or another extension, then directly
26cd9add 285 assign it to the register. */
87a0ebfd
ST
286
287static bool
282bc7b4 288combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
87a0ebfd 289{
282bc7b4
EB
290 rtx orig_src = SET_SRC (*orig_set);
291 rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
292 rtx new_set;
87a0ebfd 293
282bc7b4
EB
294 /* Merge constants by directly moving the constant into the register under
295 some conditions. Recall that RTL constants are sign-extended. */
26cd9add 296 if (GET_CODE (orig_src) == CONST_INT
282bc7b4 297 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
26cd9add 298 {
282bc7b4
EB
299 if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
300 new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
87a0ebfd 301 else
26cd9add
EI
302 {
303 /* Zero-extend the negative constant by masking out the bits outside
304 the source mode. */
305 enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
282bc7b4 306 rtx new_const_int
26cd9add 307 = GEN_INT (INTVAL (orig_src) & GET_MODE_MASK (src_mode));
282bc7b4 308 new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
26cd9add
EI
309 }
310 }
311 else if (GET_MODE (orig_src) == VOIDmode)
312 {
282bc7b4 313 /* This is mostly due to a call insn that should not be optimized. */
26cd9add 314 return false;
87a0ebfd 315 }
282bc7b4 316 else if (GET_CODE (orig_src) == cand->code)
87a0ebfd 317 {
282bc7b4
EB
318 /* Here is a sequence of two extensions. Try to merge them. */
319 rtx temp_extension
320 = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
321 rtx simplified_temp_extension = simplify_rtx (temp_extension);
87a0ebfd
ST
322 if (simplified_temp_extension)
323 temp_extension = simplified_temp_extension;
282bc7b4 324 new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
87a0ebfd
ST
325 }
326 else if (GET_CODE (orig_src) == IF_THEN_ELSE)
327 {
26cd9add 328 /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
87a0ebfd 329 in general, IF_THEN_ELSE should not be combined. */
87a0ebfd
ST
330 return false;
331 }
332 else
333 {
282bc7b4
EB
334 /* This is the normal case. */
335 rtx temp_extension
336 = gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
337 rtx simplified_temp_extension = simplify_rtx (temp_extension);
87a0ebfd
ST
338 if (simplified_temp_extension)
339 temp_extension = simplified_temp_extension;
282bc7b4 340 new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
87a0ebfd
ST
341 }
342
26cd9add 343 /* This change is a part of a group of changes. Hence,
87a0ebfd 344 validate_change will not try to commit the change. */
87a0ebfd
ST
345 if (validate_change (curr_insn, orig_set, new_set, true))
346 {
347 if (dump_file)
348 {
ca10595c
EB
349 fprintf (dump_file,
350 "Tentatively merged extension with definition:\n");
87a0ebfd
ST
351 print_rtl_single (dump_file, curr_insn);
352 }
353 return true;
354 }
282bc7b4 355
87a0ebfd
ST
356 return false;
357}
358
87a0ebfd 359/* Treat if_then_else insns, where the operands of both branches
26cd9add 360 are registers, as copies. For instance,
87a0ebfd
ST
361 Original :
362 (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
363 Transformed :
364 (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
365 DEF_INSN is the if_then_else insn. */
366
367static bool
282bc7b4 368transform_ifelse (ext_cand *cand, rtx def_insn)
87a0ebfd
ST
369{
370 rtx set_insn = PATTERN (def_insn);
371 rtx srcreg, dstreg, srcreg2;
372 rtx map_srcreg, map_dstreg, map_srcreg2;
373 rtx ifexpr;
374 rtx cond;
375 rtx new_set;
376
377 gcc_assert (GET_CODE (set_insn) == SET);
282bc7b4 378
87a0ebfd
ST
379 cond = XEXP (SET_SRC (set_insn), 0);
380 dstreg = SET_DEST (set_insn);
381 srcreg = XEXP (SET_SRC (set_insn), 1);
382 srcreg2 = XEXP (SET_SRC (set_insn), 2);
282bc7b4
EB
383 map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
384 map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
385 map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
386 ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
87a0ebfd
ST
387 new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);
388
389 if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
390 {
391 if (dump_file)
392 {
282bc7b4
EB
393 fprintf (dump_file,
394 "Mode of conditional move instruction extended:\n");
87a0ebfd
ST
395 print_rtl_single (dump_file, def_insn);
396 }
397 return true;
398 }
282bc7b4
EB
399
400 return false;
87a0ebfd
ST
401}
402
282bc7b4
EB
403/* Get all the reaching definitions of an instruction. The definitions are
404 desired for REG used in INSN. Return the definition list or NULL if a
405 definition is missing. If DEST is non-NULL, additionally push the INSN
406 of the definitions onto DEST. */
87a0ebfd 407
282bc7b4
EB
408static struct df_link *
409get_defs (rtx insn, rtx reg, VEC (rtx,heap) **dest)
87a0ebfd 410{
ca10595c 411 df_ref reg_info, *uses;
282bc7b4 412 struct df_link *ref_chain, *ref_link;
87a0ebfd 413
87a0ebfd
ST
414 reg_info = NULL;
415
ca10595c 416 for (uses = DF_INSN_USES (insn); *uses; uses++)
87a0ebfd 417 {
ca10595c 418 reg_info = *uses;
87a0ebfd 419 if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
282bc7b4
EB
420 return NULL;
421 if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
87a0ebfd 422 break;
87a0ebfd
ST
423 }
424
ca10595c 425 gcc_assert (reg_info != NULL && uses != NULL);
87a0ebfd 426
282bc7b4
EB
427 ref_chain = DF_REF_CHAIN (reg_info);
428
429 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
87a0ebfd
ST
430 {
431 /* Problem getting some definition for this instruction. */
282bc7b4
EB
432 if (ref_link->ref == NULL)
433 return NULL;
434 if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
435 return NULL;
87a0ebfd
ST
436 }
437
282bc7b4
EB
438 if (dest)
439 for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
440 VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (ref_link->ref));
87a0ebfd 441
282bc7b4 442 return ref_chain;
87a0ebfd
ST
443}
444
282bc7b4
EB
445/* Return true if INSN is
446 (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
447 and store x1 and x2 in REG_1 and REG_2. */
87a0ebfd 448
282bc7b4
EB
449static bool
450is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
87a0ebfd 451{
282bc7b4 452 rtx expr = single_set (insn);
87a0ebfd 453
282bc7b4
EB
454 if (expr != NULL_RTX
455 && GET_CODE (expr) == SET
87a0ebfd 456 && GET_CODE (SET_DEST (expr)) == REG
87a0ebfd
ST
457 && GET_CODE (SET_SRC (expr)) == IF_THEN_ELSE
458 && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
26cd9add 459 && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
87a0ebfd 460 {
282bc7b4
EB
461 *reg1 = XEXP (SET_SRC (expr), 1);
462 *reg2 = XEXP (SET_SRC (expr), 2);
463 return true;
87a0ebfd
ST
464 }
465
282bc7b4 466 return false;
87a0ebfd
ST
467}
468
26cd9add
EI
469/* Reaching Definitions of the extended register could be conditional copies
470 or regular definitions. This function separates the two types into two
471 lists, DEFS_LIST and COPIES_LIST. This is necessary because, if a reaching
282bc7b4
EB
472 definition is a conditional copy, merging the extension with this definition
473 is wrong. Conditional copies are merged by transitively merging their
26cd9add
EI
474 definitions. The defs_list is populated with all the reaching definitions
475 of the extension instruction (EXTEND_INSN) which must be merged with an
476 extension. The copies_list contains all the conditional moves that will
282bc7b4
EB
477 later be extended into a wider mode conditional move if all the merges are
478 successful. The function returns 0 upon failure, 1 upon success and 2 when
479 all definitions of the EXTEND_INSN have been previously merged. */
87a0ebfd
ST
480
481static int
26cd9add 482make_defs_and_copies_lists (rtx extend_insn, rtx set_pat,
87a0ebfd
ST
483 VEC (rtx,heap) **defs_list,
484 VEC (rtx,heap) **copies_list)
485{
282bc7b4
EB
486 VEC (rtx,heap) *work_list = VEC_alloc (rtx, heap, 8);
487 rtx src_reg = XEXP (SET_SRC (set_pat), 0);
87a0ebfd 488 bool *is_insn_visited;
282bc7b4 489 int ret = 1;
87a0ebfd 490
282bc7b4
EB
491 /* Initialize the work list. */
492 if (!get_defs (extend_insn, src_reg, &work_list))
87a0ebfd
ST
493 {
494 VEC_free (rtx, heap, work_list);
282bc7b4 495 /* The number of defs being equal to zero can only mean that all the
87a0ebfd
ST
496 definitions have been previously merged. */
497 return 2;
498 }
499
282bc7b4 500 is_insn_visited = XCNEWVEC (bool, max_insn_uid);
87a0ebfd
ST
501
502 /* Perform transitive closure for conditional copies. */
282bc7b4 503 while (!VEC_empty (rtx, work_list))
87a0ebfd 504 {
282bc7b4
EB
505 rtx def_insn = VEC_pop (rtx, work_list);
506 rtx reg1, reg2;
507
87a0ebfd
ST
508 gcc_assert (INSN_UID (def_insn) < max_insn_uid);
509
510 if (is_insn_visited[INSN_UID (def_insn)])
282bc7b4 511 continue;
87a0ebfd 512 is_insn_visited[INSN_UID (def_insn)] = true;
87a0ebfd 513
282bc7b4
EB
514 if (is_cond_copy_insn (def_insn, &reg1, &reg2))
515 {
516 /* Push it onto the copy list first. */
517 VEC_safe_push (rtx, heap, *copies_list, def_insn);
518
519 /* Now perform the transitive closure. */
520 if (!get_defs (def_insn, reg1, &work_list)
521 || !get_defs (def_insn, reg2, &work_list))
522 {
523 ret = 0;
524 break;
525 }
87a0ebfd
ST
526 }
527 else
282bc7b4 528 VEC_safe_push (rtx, heap, *defs_list, def_insn);
87a0ebfd
ST
529 }
530
87a0ebfd 531 XDELETEVEC (is_insn_visited);
282bc7b4
EB
532 VEC_free (rtx, heap, work_list);
533
534 return ret;
87a0ebfd
ST
535}
536
282bc7b4 537/* Merge the DEF_INSN with an extension. Calls combine_set_extension
87a0ebfd
ST
538 on the SET pattern. */
539
540static bool
282bc7b4 541merge_def_and_ext (ext_cand *cand, rtx def_insn)
87a0ebfd 542{
26cd9add 543 enum machine_mode ext_src_mode;
87a0ebfd 544 enum rtx_code code;
87a0ebfd
ST
545 rtx *sub_rtx;
546 rtx s_expr;
547 int i;
548
26cd9add 549 ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
87a0ebfd
ST
550 code = GET_CODE (PATTERN (def_insn));
551 sub_rtx = NULL;
552
553 if (code == PARALLEL)
554 {
555 for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
556 {
557 s_expr = XVECEXP (PATTERN (def_insn), 0, i);
558 if (GET_CODE (s_expr) != SET)
559 continue;
560
561 if (sub_rtx == NULL)
562 sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
563 else
564 {
565 /* PARALLEL with multiple SETs. */
566 return false;
567 }
568 }
569 }
570 else if (code == SET)
571 sub_rtx = &PATTERN (def_insn);
572 else
573 {
574 /* It is not a PARALLEL or a SET, what could it be ? */
575 return false;
576 }
577
578 gcc_assert (sub_rtx != NULL);
579
580 if (GET_CODE (SET_DEST (*sub_rtx)) == REG
26cd9add 581 && GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode)
87a0ebfd 582 {
282bc7b4 583 return combine_set_extension (cand, def_insn, sub_rtx);
87a0ebfd 584 }
26cd9add
EI
585
586 return false;
87a0ebfd
ST
587}
588
589/* This function goes through all reaching defs of the source
26cd9add
EI
590 of the candidate for elimination (CAND) and tries to combine
591 the extension with the definition instruction. The changes
592 are made as a group so that even if one definition cannot be
593 merged, all reaching definitions end up not being merged.
594 When a conditional copy is encountered, merging is attempted
595 transitively on its definitions. It returns true upon success
596 and false upon failure. */
87a0ebfd
ST
597
598static bool
282bc7b4 599combine_reaching_defs (ext_cand *cand, rtx set_pat)
87a0ebfd
ST
600{
601 rtx def_insn;
602 bool merge_successful = true;
603 int i;
604 int defs_ix;
605 int outcome;
87a0ebfd 606 VEC (rtx, heap) *defs_list, *copies_list, *vec;
87a0ebfd
ST
607
608 defs_list = VEC_alloc (rtx, heap, 8);
609 copies_list = VEC_alloc (rtx, heap, 8);
610
26cd9add 611 outcome = make_defs_and_copies_lists (cand->insn,
87a0ebfd
ST
612 set_pat, &defs_list, &copies_list);
613
282bc7b4
EB
614 /* outcome == 2 means that all the definitions for this extension have been
615 previously merged when handling other extensions. */
87a0ebfd
ST
616 if (outcome == 2)
617 {
618 VEC_free (rtx, heap, defs_list);
619 VEC_free (rtx, heap, copies_list);
620 if (dump_file)
282bc7b4 621 fprintf (dump_file, "All definitions have been previously merged.\n");
87a0ebfd
ST
622 return true;
623 }
624
625 if (outcome == 0)
626 {
627 VEC_free (rtx, heap, defs_list);
628 VEC_free (rtx, heap, copies_list);
629 return false;
630 }
631
632 merge_successful = true;
633
634 /* Go through the defs vector and try to merge all the definitions
635 in this vector. */
87a0ebfd 636 vec = VEC_alloc (rtx, heap, 8);
ac47786e 637 FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn)
87a0ebfd 638 {
26cd9add 639 if (merge_def_and_ext (cand, def_insn))
87a0ebfd
ST
640 VEC_safe_push (rtx, heap, vec, def_insn);
641 else
642 {
643 merge_successful = false;
644 break;
645 }
646 }
647
648 /* Now go through the conditional copies vector and try to merge all
649 the copies in this vector. */
87a0ebfd
ST
650 if (merge_successful)
651 {
ac47786e 652 FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn)
87a0ebfd 653 {
26cd9add 654 if (transform_ifelse (cand, def_insn))
87a0ebfd
ST
655 {
656 VEC_safe_push (rtx, heap, vec, def_insn);
657 }
658 else
659 {
660 merge_successful = false;
661 break;
662 }
663 }
664 }
665
666 if (merge_successful)
667 {
282bc7b4
EB
668 /* Commit the changes here if possible
669 FIXME: It's an all-or-nothing scenario. Even if only one definition
670 cannot be merged, we entirely give up. In the future, we should allow
671 extensions to be partially eliminated along those paths where the
672 definitions could be merged. */
87a0ebfd
ST
673 if (apply_change_group ())
674 {
675 if (dump_file)
282bc7b4 676 fprintf (dump_file, "All merges were successful.\n");
87a0ebfd
ST
677
678 VEC_free (rtx, heap, vec);
679 VEC_free (rtx, heap, defs_list);
680 VEC_free (rtx, heap, copies_list);
681 return true;
682 }
683 else
684 {
0acba2b4
EB
685 /* Changes need not be cancelled explicitly as apply_change_group
686 does it. Print list of definitions in the dump_file for debug
26cd9add 687 purposes. This extension cannot be deleted. */
87a0ebfd
ST
688 if (dump_file)
689 {
ca10595c
EB
690 fprintf (dump_file,
691 "Merge cancelled, non-mergeable definitions:\n");
692 FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
693 print_rtl_single (dump_file, def_insn);
87a0ebfd
ST
694 }
695 }
696 }
697 else
698 {
699 /* Cancel any changes that have been made so far. */
700 cancel_changes (0);
701 }
702
703 VEC_free (rtx, heap, vec);
704 VEC_free (rtx, heap, defs_list);
705 VEC_free (rtx, heap, copies_list);
282bc7b4 706
87a0ebfd
ST
707 return false;
708}
709
282bc7b4 710/* This structure holds information while walking the RTL stream. */
0acba2b4 711
282bc7b4 712struct re_info
0acba2b4 713{
282bc7b4 714 /* The current insn. */
0acba2b4
EB
715 rtx insn;
716
717 /* The list of candidates. */
26cd9add 718 VEC (ext_cand, heap) *insn_list;
0acba2b4 719
282bc7b4
EB
720 /* The map of definition instructions to candidates. */
721 ext_cand **def_map;
722};
26cd9add
EI
723
724/* Add an extension pattern that could be eliminated. This is called via
725 note_stores from find_removable_extensions. */
0acba2b4
EB
726
727static void
26cd9add 728add_removable_extension (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)
0acba2b4 729{
282bc7b4
EB
730 struct re_info *rei = (struct re_info *)data;
731 enum rtx_code code;
732 enum machine_mode mode;
0acba2b4
EB
733 rtx src, dest;
734
282bc7b4 735 /* We are looking for SET (REG N) (ANY_EXTEND (REG N)). */
0acba2b4
EB
736 if (GET_CODE (expr) != SET)
737 return;
738
739 src = SET_SRC (expr);
282bc7b4 740 code = GET_CODE (src);
0acba2b4 741 dest = SET_DEST (expr);
282bc7b4 742 mode = GET_MODE (dest);
0acba2b4
EB
743
744 if (REG_P (dest)
282bc7b4 745 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
0acba2b4 746 && REG_P (XEXP (src, 0))
0acba2b4
EB
747 && REGNO (dest) == REGNO (XEXP (src, 0)))
748 {
282bc7b4
EB
749 struct df_link *defs, *def;
750 ext_cand *cand;
751
752 /* First, make sure we can get all the reaching definitions. */
753 defs = get_defs (rei->insn, XEXP (src, 0), NULL);
754 if (!defs)
0acba2b4 755 {
282bc7b4
EB
756 if (dump_file)
757 {
758 fprintf (dump_file, "Cannot eliminate extension:\n");
759 print_rtl_single (dump_file, rei->insn);
760 fprintf (dump_file, " because of missing definition(s)\n");
761 }
762 return;
0acba2b4 763 }
282bc7b4
EB
764
765 /* Second, make sure the reaching definitions don't feed another and
766 different extension. FIXME: this obviously can be improved. */
767 for (def = defs; def; def = def->next)
768 if ((cand = rei->def_map[INSN_UID(DF_REF_INSN (def->ref))])
769 && (cand->code != code || cand->mode != mode))
770 {
771 if (dump_file)
772 {
773 fprintf (dump_file, "Cannot eliminate extension:\n");
774 print_rtl_single (dump_file, rei->insn);
775 fprintf (dump_file, " because of other extension\n");
776 }
777 return;
778 }
779
780 /* Then add the candidate to the list and insert the reaching definitions
781 into the definition map. */
782 cand = VEC_safe_push (ext_cand, heap, rei->insn_list, NULL);
783 cand->expr = expr;
784 cand->code = code;
785 cand->mode = mode;
786 cand->insn = rei->insn;
787
788 for (def = defs; def; def = def->next)
789 rei->def_map[INSN_UID(DF_REF_INSN (def->ref))] = cand;
0acba2b4
EB
790 }
791}
792
26cd9add 793/* Traverse the instruction stream looking for extensions and return the
0acba2b4 794 list of candidates. */
87a0ebfd 795
26cd9add
EI
796static VEC (ext_cand, heap)*
797find_removable_extensions (void)
87a0ebfd 798{
282bc7b4 799 struct re_info rei;
0acba2b4
EB
800 basic_block bb;
801 rtx insn;
87a0ebfd 802
26cd9add 803 rei.insn_list = VEC_alloc (ext_cand, heap, 8);
282bc7b4 804 rei.def_map = XCNEWVEC (ext_cand *, max_insn_uid);
87a0ebfd 805
0acba2b4
EB
806 FOR_EACH_BB (bb)
807 FOR_BB_INSNS (bb, insn)
808 {
809 if (!NONDEBUG_INSN_P (insn))
810 continue;
87a0ebfd 811
26cd9add
EI
812 rei.insn = insn;
813 note_stores (PATTERN (insn), add_removable_extension, &rei);
0acba2b4
EB
814 }
815
282bc7b4
EB
816 XDELETEVEC (rei.def_map);
817
26cd9add 818 return rei.insn_list;
87a0ebfd
ST
819}
820
821/* This is the main function that checks the insn stream for redundant
26cd9add 822 extensions and tries to remove them if possible. */
87a0ebfd 823
282bc7b4 824static void
26cd9add 825find_and_remove_re (void)
87a0ebfd 826{
282bc7b4 827 ext_cand *curr_cand;
87a0ebfd 828 rtx curr_insn = NULL_RTX;
282bc7b4 829 int num_re_opportunities = 0, num_realized = 0, i;
26cd9add
EI
830 VEC (ext_cand, heap) *reinsn_list;
831 VEC (rtx, heap) *reinsn_del_list;
87a0ebfd
ST
832
833 /* Construct DU chain to get all reaching definitions of each
26cd9add 834 extension instruction. */
87a0ebfd
ST
835 df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
836 df_analyze ();
837
838 max_insn_uid = get_max_uid ();
26cd9add 839 reinsn_del_list = VEC_alloc (rtx, heap, 4);
26cd9add 840 reinsn_list = find_removable_extensions ();
87a0ebfd 841
282bc7b4 842 FOR_EACH_VEC_ELT (ext_cand, reinsn_list, i, curr_cand)
87a0ebfd 843 {
26cd9add 844 num_re_opportunities++;
87a0ebfd 845
ca10595c
EB
846 /* If the candidate insn is itself a definition insn for another
847 candidate, it may have been modified and the UD chain broken.
848 FIXME: the handling of successive extensions can be improved. */
849 if (!reg_mentioned_p (curr_cand->expr, PATTERN (curr_cand->insn)))
850 continue;
851
282bc7b4 852 /* Try to combine the extension with the definition. */
87a0ebfd
ST
853 if (dump_file)
854 {
282bc7b4
EB
855 fprintf (dump_file, "Trying to eliminate extension:\n");
856 print_rtl_single (dump_file, curr_cand->insn);
87a0ebfd
ST
857 }
858
26cd9add 859 if (combine_reaching_defs (curr_cand, PATTERN (curr_cand->insn)))
87a0ebfd
ST
860 {
861 if (dump_file)
282bc7b4 862 fprintf (dump_file, "Eliminated the extension.\n");
87a0ebfd 863 num_realized++;
26cd9add 864 VEC_safe_push (rtx, heap, reinsn_del_list, curr_cand->insn);
87a0ebfd
ST
865 }
866 }
867
26cd9add 868 /* Delete all useless extensions here in one sweep. */
282bc7b4 869 FOR_EACH_VEC_ELT (rtx, reinsn_del_list, i, curr_insn)
0acba2b4 870 delete_insn (curr_insn);
87a0ebfd 871
26cd9add
EI
872 VEC_free (ext_cand, heap, reinsn_list);
873 VEC_free (rtx, heap, reinsn_del_list);
87a0ebfd 874
26cd9add 875 if (dump_file && num_re_opportunities > 0)
282bc7b4
EB
876 fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
877 num_re_opportunities, num_realized);
87a0ebfd
ST
878
879 df_finish_pass (false);
87a0ebfd
ST
880}
881
26cd9add 882/* Find and remove redundant extensions. */
87a0ebfd
ST
883
884static unsigned int
26cd9add 885rest_of_handle_ree (void)
87a0ebfd 886{
26cd9add
EI
887 timevar_push (TV_REE);
888 find_and_remove_re ();
889 timevar_pop (TV_REE);
87a0ebfd
ST
890 return 0;
891}
892
26cd9add 893/* Run REE pass when flag_ree is set at optimization level > 0. */
87a0ebfd
ST
894
895static bool
26cd9add 896gate_handle_ree (void)
87a0ebfd 897{
26cd9add 898 return (optimize > 0 && flag_ree);
87a0ebfd
ST
899}
900
26cd9add 901struct rtl_opt_pass pass_ree =
87a0ebfd
ST
902{
903 {
904 RTL_PASS,
26cd9add
EI
905 "ree", /* name */
906 gate_handle_ree, /* gate */
907 rest_of_handle_ree, /* execute */
87a0ebfd
ST
908 NULL, /* sub */
909 NULL, /* next */
910 0, /* static_pass_number */
26cd9add 911 TV_REE, /* tv_id */
87a0ebfd
ST
912 0, /* properties_required */
913 0, /* properties_provided */
914 0, /* properties_destroyed */
915 0, /* todo_flags_start */
916 TODO_ggc_collect |
87a0ebfd
ST
917 TODO_verify_rtl_sharing, /* todo_flags_finish */
918 }
919};