]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
gcc/
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "machmode.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "timevar.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "obstack.h"
34 #include "basic-block.h"
35 #include "recog.h"
36 #include "bitmap.h"
37 #include "dce.h"
38 #include "expr.h"
39 #include "except.h"
40 #include "regs.h"
41 #include "tree-pass.h"
42 #include "df.h"
43 #include "lower-subreg.h"
44
45 #ifdef STACK_GROWS_DOWNWARD
46 # undef STACK_GROWS_DOWNWARD
47 # define STACK_GROWS_DOWNWARD 1
48 #else
49 # define STACK_GROWS_DOWNWARD 0
50 #endif
51
52 DEF_VEC_P (bitmap);
53 DEF_VEC_ALLOC_P (bitmap,heap);
54
55 /* Decompose multi-word pseudo-registers into individual
56 pseudo-registers when possible and profitable. This is possible
57 when all the uses of a multi-word register are via SUBREG, or are
58 copies of the register to another location. Breaking apart the
59 register permits more CSE and permits better register allocation.
60 This is profitable if the machine does not have move instructions
61 to do this.
62
63 This pass only splits moves with modes that are wider than
64 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
65 modes that are twice the width of word_mode. The latter could be
66 generalized if there was a need to do this, but the trend in
67 architectures is to not need this.
68
69 There are two useful preprocessor defines for use by maintainers:
70
71 #define LOG_COSTS 1
72
73 if you wish to see the actual cost estimates that are being used
74 for each mode wider than word mode and the cost estimates for zero
75 extension and the shifts. This can be useful when port maintainers
76 are tuning insn rtx costs.
77
78 #define FORCE_LOWERING 1
79
80 if you wish to test the pass with all the transformation forced on.
81 This can be useful for finding bugs in the transformations. */
82
83 #define LOG_COSTS 0
84 #define FORCE_LOWERING 0
85
86 /* Bit N in this bitmap is set if regno N is used in a context in
87 which we can decompose it. */
88 static bitmap decomposable_context;
89
90 /* Bit N in this bitmap is set if regno N is used in a context in
91 which it can not be decomposed. */
92 static bitmap non_decomposable_context;
93
94 /* Bit N in this bitmap is set if regno N is used in a subreg
95 which changes the mode but not the size. This typically happens
96 when the register accessed as a floating-point value; we want to
97 avoid generating accesses to its subwords in integer modes. */
98 static bitmap subreg_context;
99
100 /* Bit N in the bitmap in element M of this array is set if there is a
101 copy from reg M to reg N. */
102 static VEC(bitmap,heap) *reg_copy_graph;
103
104 struct target_lower_subreg default_target_lower_subreg;
105 #if SWITCHABLE_TARGET
106 struct target_lower_subreg *this_target_lower_subreg
107 = &default_target_lower_subreg;
108 #endif
109
110 #define twice_word_mode \
111 this_target_lower_subreg->x_twice_word_mode
112 #define choices \
113 this_target_lower_subreg->x_choices
114
115 /* RTXes used while computing costs. */
116 struct cost_rtxes {
117 /* Source and target registers. */
118 rtx source;
119 rtx target;
120
121 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
122 rtx zext;
123
124 /* A shift of SOURCE. */
125 rtx shift;
126
127 /* A SET of TARGET. */
128 rtx set;
129 };
130
131 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
132 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
133
134 static int
135 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
136 enum machine_mode mode, int op1)
137 {
138 PUT_CODE (rtxes->shift, code);
139 PUT_MODE (rtxes->shift, mode);
140 PUT_MODE (rtxes->source, mode);
141 XEXP (rtxes->shift, 1) = GEN_INT (op1);
142 return set_src_cost (rtxes->shift, speed_p);
143 }
144
145 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
146 to true if it is profitable to split a double-word CODE shift
147 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
148 for speed or size profitability.
149
150 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
151 the cost of moving zero into a word-mode register. WORD_MOVE_COST
152 is the cost of moving between word registers. */
153
154 static void
155 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
156 bool *splitting, enum rtx_code code,
157 int word_move_zero_cost, int word_move_cost)
158 {
159 int wide_cost, narrow_cost, i;
160
161 for (i = 0; i < BITS_PER_WORD; i++)
162 {
163 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
164 i + BITS_PER_WORD);
165 if (i == 0)
166 narrow_cost = word_move_cost;
167 else
168 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
169
170 if (LOG_COSTS)
171 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
172 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
173 i + BITS_PER_WORD, wide_cost, narrow_cost,
174 word_move_zero_cost);
175
176 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
177 splitting[i] = true;
178 }
179 }
180
181 /* Compute what we should do when optimizing for speed or size; SPEED_P
182 selects which. Use RTXES for computing costs. */
183
184 static void
185 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
186 {
187 unsigned int i;
188 int word_move_zero_cost, word_move_cost;
189
190 PUT_MODE (rtxes->target, word_mode);
191 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
192 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
193
194 SET_SRC (rtxes->set) = rtxes->source;
195 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
196
197 if (LOG_COSTS)
198 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
199 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
200
201 for (i = 0; i < MAX_MACHINE_MODE; i++)
202 {
203 enum machine_mode mode = (enum machine_mode) i;
204 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
205 if (factor > 1)
206 {
207 int mode_move_cost;
208
209 PUT_MODE (rtxes->target, mode);
210 PUT_MODE (rtxes->source, mode);
211 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
212
213 if (LOG_COSTS)
214 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
215 GET_MODE_NAME (mode), mode_move_cost,
216 word_move_cost, factor);
217
218 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
219 {
220 choices[speed_p].move_modes_to_split[i] = true;
221 choices[speed_p].something_to_do = true;
222 }
223 }
224 }
225
226 /* For the moves and shifts, the only case that is checked is one
227 where the mode of the target is an integer mode twice the width
228 of the word_mode.
229
230 If it is not profitable to split a double word move then do not
231 even consider the shifts or the zero extension. */
232 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
233 {
234 int zext_cost;
235
236 /* The only case here to check to see if moving the upper part with a
237 zero is cheaper than doing the zext itself. */
238 PUT_MODE (rtxes->source, word_mode);
239 zext_cost = set_src_cost (rtxes->zext, speed_p);
240
241 if (LOG_COSTS)
242 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
243 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
244 zext_cost, word_move_cost, word_move_zero_cost);
245
246 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
247 choices[speed_p].splitting_zext = true;
248
249 compute_splitting_shift (speed_p, rtxes,
250 choices[speed_p].splitting_ashift, ASHIFT,
251 word_move_zero_cost, word_move_cost);
252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
254 word_move_zero_cost, word_move_cost);
255 }
256 }
257
258 /* Do one-per-target initialisation. This involves determining
259 which operations on the machine are profitable. If none are found,
260 then the pass just returns when called. */
261
262 void
263 init_lower_subreg (void)
264 {
265 struct cost_rtxes rtxes;
266
267 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
268
269 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
270
271 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
272 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
273 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
274 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
275 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
276
277 if (LOG_COSTS)
278 fprintf (stderr, "\nSize costs\n==========\n\n");
279 compute_costs (false, &rtxes);
280
281 if (LOG_COSTS)
282 fprintf (stderr, "\nSpeed costs\n===========\n\n");
283 compute_costs (true, &rtxes);
284 }
285
286 static bool
287 simple_move_operand (rtx x)
288 {
289 if (GET_CODE (x) == SUBREG)
290 x = SUBREG_REG (x);
291
292 if (!OBJECT_P (x))
293 return false;
294
295 if (GET_CODE (x) == LABEL_REF
296 || GET_CODE (x) == SYMBOL_REF
297 || GET_CODE (x) == HIGH
298 || GET_CODE (x) == CONST)
299 return false;
300
301 if (MEM_P (x)
302 && (MEM_VOLATILE_P (x)
303 || mode_dependent_address_p (XEXP (x, 0))))
304 return false;
305
306 return true;
307 }
308
309 /* If INSN is a single set between two objects that we want to split,
310 return the single set. SPEED_P says whether we are optimizing
311 INSN for speed or size.
312
313 INSN should have been passed to recog and extract_insn before this
314 is called. */
315
316 static rtx
317 simple_move (rtx insn, bool speed_p)
318 {
319 rtx x;
320 rtx set;
321 enum machine_mode mode;
322
323 if (recog_data.n_operands != 2)
324 return NULL_RTX;
325
326 set = single_set (insn);
327 if (!set)
328 return NULL_RTX;
329
330 x = SET_DEST (set);
331 if (x != recog_data.operand[0] && x != recog_data.operand[1])
332 return NULL_RTX;
333 if (!simple_move_operand (x))
334 return NULL_RTX;
335
336 x = SET_SRC (set);
337 if (x != recog_data.operand[0] && x != recog_data.operand[1])
338 return NULL_RTX;
339 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
340 things like x86 rdtsc which returns a DImode value. */
341 if (GET_CODE (x) != ASM_OPERANDS
342 && !simple_move_operand (x))
343 return NULL_RTX;
344
345 /* We try to decompose in integer modes, to avoid generating
346 inefficient code copying between integer and floating point
347 registers. That means that we can't decompose if this is a
348 non-integer mode for which there is no integer mode of the same
349 size. */
350 mode = GET_MODE (SET_SRC (set));
351 if (!SCALAR_INT_MODE_P (mode)
352 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
353 == BLKmode))
354 return NULL_RTX;
355
356 /* Reject PARTIAL_INT modes. They are used for processor specific
357 purposes and it's probably best not to tamper with them. */
358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
359 return NULL_RTX;
360
361 if (!choices[speed_p].move_modes_to_split[(int) mode])
362 return NULL_RTX;
363
364 return set;
365 }
366
367 /* If SET is a copy from one multi-word pseudo-register to another,
368 record that in reg_copy_graph. Return whether it is such a
369 copy. */
370
371 static bool
372 find_pseudo_copy (rtx set)
373 {
374 rtx dest = SET_DEST (set);
375 rtx src = SET_SRC (set);
376 unsigned int rd, rs;
377 bitmap b;
378
379 if (!REG_P (dest) || !REG_P (src))
380 return false;
381
382 rd = REGNO (dest);
383 rs = REGNO (src);
384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
385 return false;
386
387 b = VEC_index (bitmap, reg_copy_graph, rs);
388 if (b == NULL)
389 {
390 b = BITMAP_ALLOC (NULL);
391 VEC_replace (bitmap, reg_copy_graph, rs, b);
392 }
393
394 bitmap_set_bit (b, rd);
395
396 return true;
397 }
398
399 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
400 where they are copied to another register, add the register to
401 which they are copied to DECOMPOSABLE_CONTEXT. Use
402 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
404
405 static void
406 propagate_pseudo_copies (void)
407 {
408 bitmap queue, propagate;
409
410 queue = BITMAP_ALLOC (NULL);
411 propagate = BITMAP_ALLOC (NULL);
412
413 bitmap_copy (queue, decomposable_context);
414 do
415 {
416 bitmap_iterator iter;
417 unsigned int i;
418
419 bitmap_clear (propagate);
420
421 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
422 {
423 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
424 if (b)
425 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
426 }
427
428 bitmap_and_compl (queue, propagate, decomposable_context);
429 bitmap_ior_into (decomposable_context, propagate);
430 }
431 while (!bitmap_empty_p (queue));
432
433 BITMAP_FREE (queue);
434 BITMAP_FREE (propagate);
435 }
436
437 /* A pointer to one of these values is passed to
438 find_decomposable_subregs via for_each_rtx. */
439
440 enum classify_move_insn
441 {
442 /* Not a simple move from one location to another. */
443 NOT_SIMPLE_MOVE,
444 /* A simple move from one pseudo-register to another. */
445 SIMPLE_PSEUDO_REG_MOVE,
446 /* A simple move involving a non-pseudo-register. */
447 SIMPLE_MOVE
448 };
449
450 /* This is called via for_each_rtx. If we find a SUBREG which we
451 could use to decompose a pseudo-register, set a bit in
452 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
453 not a simple pseudo-register copy, DATA will point at the type of
454 move, and we set a bit in DECOMPOSABLE_CONTEXT or
455 NON_DECOMPOSABLE_CONTEXT as appropriate. */
456
457 static int
458 find_decomposable_subregs (rtx *px, void *data)
459 {
460 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
461 rtx x = *px;
462
463 if (x == NULL_RTX)
464 return 0;
465
466 if (GET_CODE (x) == SUBREG)
467 {
468 rtx inner = SUBREG_REG (x);
469 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
470
471 if (!REG_P (inner))
472 return 0;
473
474 regno = REGNO (inner);
475 if (HARD_REGISTER_NUM_P (regno))
476 return -1;
477
478 outer_size = GET_MODE_SIZE (GET_MODE (x));
479 inner_size = GET_MODE_SIZE (GET_MODE (inner));
480 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
482
483 /* We only try to decompose single word subregs of multi-word
484 registers. When we find one, we return -1 to avoid iterating
485 over the inner register.
486
487 ??? This doesn't allow, e.g., DImode subregs of TImode values
488 on 32-bit targets. We would need to record the way the
489 pseudo-register was used, and only decompose if all the uses
490 were the same number and size of pieces. Hopefully this
491 doesn't happen much. */
492
493 if (outer_words == 1 && inner_words > 1)
494 {
495 bitmap_set_bit (decomposable_context, regno);
496 return -1;
497 }
498
499 /* If this is a cast from one mode to another, where the modes
500 have the same size, and they are not tieable, then mark this
501 register as non-decomposable. If we decompose it we are
502 likely to mess up whatever the backend is trying to do. */
503 if (outer_words > 1
504 && outer_size == inner_size
505 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
506 {
507 bitmap_set_bit (non_decomposable_context, regno);
508 bitmap_set_bit (subreg_context, regno);
509 return -1;
510 }
511 }
512 else if (REG_P (x))
513 {
514 unsigned int regno;
515
516 /* We will see an outer SUBREG before we see the inner REG, so
517 when we see a plain REG here it means a direct reference to
518 the register.
519
520 If this is not a simple copy from one location to another,
521 then we can not decompose this register. If this is a simple
522 copy from one pseudo-register to another, and the mode is right
523 then we mark the register as decomposable.
524 Otherwise we don't say anything about this register --
525 it could be decomposed, but whether that would be
526 profitable depends upon how it is used elsewhere.
527
528 We only set bits in the bitmap for multi-word
529 pseudo-registers, since those are the only ones we care about
530 and it keeps the size of the bitmaps down. */
531
532 regno = REGNO (x);
533 if (!HARD_REGISTER_NUM_P (regno)
534 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
535 {
536 switch (*pcmi)
537 {
538 case NOT_SIMPLE_MOVE:
539 bitmap_set_bit (non_decomposable_context, regno);
540 break;
541 case SIMPLE_PSEUDO_REG_MOVE:
542 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
543 bitmap_set_bit (decomposable_context, regno);
544 break;
545 case SIMPLE_MOVE:
546 break;
547 default:
548 gcc_unreachable ();
549 }
550 }
551 }
552 else if (MEM_P (x))
553 {
554 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
555
556 /* Any registers used in a MEM do not participate in a
557 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion
558 here, and return -1 to block the parent's recursion. */
559 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
560 return -1;
561 }
562
563 return 0;
564 }
565
566 /* Decompose REGNO into word-sized components. We smash the REG node
567 in place. This ensures that (1) something goes wrong quickly if we
568 fail to make some replacement, and (2) the debug information inside
569 the symbol table is automatically kept up to date. */
570
571 static void
572 decompose_register (unsigned int regno)
573 {
574 rtx reg;
575 unsigned int words, i;
576 rtvec v;
577
578 reg = regno_reg_rtx[regno];
579
580 regno_reg_rtx[regno] = NULL_RTX;
581
582 words = GET_MODE_SIZE (GET_MODE (reg));
583 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
584
585 v = rtvec_alloc (words);
586 for (i = 0; i < words; ++i)
587 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
588
589 PUT_CODE (reg, CONCATN);
590 XVEC (reg, 0) = v;
591
592 if (dump_file)
593 {
594 fprintf (dump_file, "; Splitting reg %u ->", regno);
595 for (i = 0; i < words; ++i)
596 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
597 fputc ('\n', dump_file);
598 }
599 }
600
601 /* Get a SUBREG of a CONCATN. */
602
603 static rtx
604 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
605 unsigned int byte)
606 {
607 unsigned int inner_size;
608 enum machine_mode innermode, partmode;
609 rtx part;
610 unsigned int final_offset;
611
612 gcc_assert (GET_CODE (op) == CONCATN);
613 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
614
615 innermode = GET_MODE (op);
616 gcc_assert (byte < GET_MODE_SIZE (innermode));
617 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
618
619 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
620 part = XVECEXP (op, 0, byte / inner_size);
621 partmode = GET_MODE (part);
622
623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
624 regular CONST_VECTORs. They have vector or integer modes, depending
625 on the capabilities of the target. Cope with them. */
626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
627 partmode = GET_MODE_INNER (innermode);
628 else if (partmode == VOIDmode)
629 {
630 enum mode_class mclass = GET_MODE_CLASS (innermode);
631 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
632 }
633
634 final_offset = byte % inner_size;
635 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
636 return NULL_RTX;
637
638 return simplify_gen_subreg (outermode, part, partmode, final_offset);
639 }
640
641 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
642
643 static rtx
644 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
645 enum machine_mode innermode, unsigned int byte)
646 {
647 rtx ret;
648
649 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
650 If OP is a SUBREG of a CONCATN, then it must be a simple mode
651 change with the same size and offset 0, or it must extract a
652 part. We shouldn't see anything else here. */
653 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
654 {
655 rtx op2;
656
657 if ((GET_MODE_SIZE (GET_MODE (op))
658 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
659 && SUBREG_BYTE (op) == 0)
660 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
661 GET_MODE (SUBREG_REG (op)), byte);
662
663 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
664 SUBREG_BYTE (op));
665 if (op2 == NULL_RTX)
666 {
667 /* We don't handle paradoxical subregs here. */
668 gcc_assert (GET_MODE_SIZE (outermode)
669 <= GET_MODE_SIZE (GET_MODE (op)));
670 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
671 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
672 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
673 byte + SUBREG_BYTE (op));
674 gcc_assert (op2 != NULL_RTX);
675 return op2;
676 }
677
678 op = op2;
679 gcc_assert (op != NULL_RTX);
680 gcc_assert (innermode == GET_MODE (op));
681 }
682
683 if (GET_CODE (op) == CONCATN)
684 return simplify_subreg_concatn (outermode, op, byte);
685
686 ret = simplify_gen_subreg (outermode, op, innermode, byte);
687
688 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
689 resolve_simple_move will ask for the high part of the paradoxical
690 subreg, which does not have a value. Just return a zero. */
691 if (ret == NULL_RTX
692 && GET_CODE (op) == SUBREG
693 && SUBREG_BYTE (op) == 0
694 && (GET_MODE_SIZE (innermode)
695 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
696 return CONST0_RTX (outermode);
697
698 gcc_assert (ret != NULL_RTX);
699 return ret;
700 }
701
702 /* Return whether we should resolve X into the registers into which it
703 was decomposed. */
704
705 static bool
706 resolve_reg_p (rtx x)
707 {
708 return GET_CODE (x) == CONCATN;
709 }
710
711 /* Return whether X is a SUBREG of a register which we need to
712 resolve. */
713
714 static bool
715 resolve_subreg_p (rtx x)
716 {
717 if (GET_CODE (x) != SUBREG)
718 return false;
719 return resolve_reg_p (SUBREG_REG (x));
720 }
721
722 /* This is called via for_each_rtx. Look for SUBREGs which need to be
723 decomposed. */
724
725 static int
726 resolve_subreg_use (rtx *px, void *data)
727 {
728 rtx insn = (rtx) data;
729 rtx x = *px;
730
731 if (x == NULL_RTX)
732 return 0;
733
734 if (resolve_subreg_p (x))
735 {
736 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
737 SUBREG_BYTE (x));
738
739 /* It is possible for a note to contain a reference which we can
740 decompose. In this case, return 1 to the caller to indicate
741 that the note must be removed. */
742 if (!x)
743 {
744 gcc_assert (!insn);
745 return 1;
746 }
747
748 validate_change (insn, px, x, 1);
749 return -1;
750 }
751
752 if (resolve_reg_p (x))
753 {
754 /* Return 1 to the caller to indicate that we found a direct
755 reference to a register which is being decomposed. This can
756 happen inside notes, multiword shift or zero-extend
757 instructions. */
758 return 1;
759 }
760
761 return 0;
762 }
763
764 /* This is called via for_each_rtx. Look for SUBREGs which can be
765 decomposed and decomposed REGs that need copying. */
766
767 static int
768 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
769 {
770 rtx x = *px;
771
772 if (x == NULL_RTX)
773 return 0;
774
775 if (resolve_subreg_p (x))
776 {
777 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
778 SUBREG_BYTE (x));
779
780 if (x)
781 *px = x;
782 else
783 x = copy_rtx (*px);
784 }
785
786 if (resolve_reg_p (x))
787 *px = copy_rtx (x);
788
789 return 0;
790 }
791
792 /* Resolve any decomposed registers which appear in register notes on
793 INSN. */
794
795 static void
796 resolve_reg_notes (rtx insn)
797 {
798 rtx *pnote, note;
799
800 note = find_reg_equal_equiv_note (insn);
801 if (note)
802 {
803 int old_count = num_validated_changes ();
804 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
805 remove_note (insn, note);
806 else
807 if (old_count != num_validated_changes ())
808 df_notes_rescan (insn);
809 }
810
811 pnote = &REG_NOTES (insn);
812 while (*pnote != NULL_RTX)
813 {
814 bool del = false;
815
816 note = *pnote;
817 switch (REG_NOTE_KIND (note))
818 {
819 case REG_DEAD:
820 case REG_UNUSED:
821 if (resolve_reg_p (XEXP (note, 0)))
822 del = true;
823 break;
824
825 default:
826 break;
827 }
828
829 if (del)
830 *pnote = XEXP (note, 1);
831 else
832 pnote = &XEXP (note, 1);
833 }
834 }
835
836 /* Return whether X can be decomposed into subwords. */
837
838 static bool
839 can_decompose_p (rtx x)
840 {
841 if (REG_P (x))
842 {
843 unsigned int regno = REGNO (x);
844
845 if (HARD_REGISTER_NUM_P (regno))
846 {
847 unsigned int byte, num_bytes;
848
849 num_bytes = GET_MODE_SIZE (GET_MODE (x));
850 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
851 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
852 return false;
853 return true;
854 }
855 else
856 return !bitmap_bit_p (subreg_context, regno);
857 }
858
859 return true;
860 }
861
862 /* Decompose the registers used in a simple move SET within INSN. If
863 we don't change anything, return INSN, otherwise return the start
864 of the sequence of moves. */
865
866 static rtx
867 resolve_simple_move (rtx set, rtx insn)
868 {
869 rtx src, dest, real_dest, insns;
870 enum machine_mode orig_mode, dest_mode;
871 unsigned int words;
872 bool pushing;
873
874 src = SET_SRC (set);
875 dest = SET_DEST (set);
876 orig_mode = GET_MODE (dest);
877
878 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
879 gcc_assert (words > 1);
880
881 start_sequence ();
882
883 /* We have to handle copying from a SUBREG of a decomposed reg where
884 the SUBREG is larger than word size. Rather than assume that we
885 can take a word_mode SUBREG of the destination, we copy to a new
886 register and then copy that to the destination. */
887
888 real_dest = NULL_RTX;
889
890 if (GET_CODE (src) == SUBREG
891 && resolve_reg_p (SUBREG_REG (src))
892 && (SUBREG_BYTE (src) != 0
893 || (GET_MODE_SIZE (orig_mode)
894 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
895 {
896 real_dest = dest;
897 dest = gen_reg_rtx (orig_mode);
898 if (REG_P (real_dest))
899 REG_ATTRS (dest) = REG_ATTRS (real_dest);
900 }
901
902 /* Similarly if we are copying to a SUBREG of a decomposed reg where
903 the SUBREG is larger than word size. */
904
905 if (GET_CODE (dest) == SUBREG
906 && resolve_reg_p (SUBREG_REG (dest))
907 && (SUBREG_BYTE (dest) != 0
908 || (GET_MODE_SIZE (orig_mode)
909 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
910 {
911 rtx reg, minsn, smove;
912
913 reg = gen_reg_rtx (orig_mode);
914 minsn = emit_move_insn (reg, src);
915 smove = single_set (minsn);
916 gcc_assert (smove != NULL_RTX);
917 resolve_simple_move (smove, minsn);
918 src = reg;
919 }
920
921 /* If we didn't have any big SUBREGS of decomposed registers, and
922 neither side of the move is a register we are decomposing, then
923 we don't have to do anything here. */
924
925 if (src == SET_SRC (set)
926 && dest == SET_DEST (set)
927 && !resolve_reg_p (src)
928 && !resolve_subreg_p (src)
929 && !resolve_reg_p (dest)
930 && !resolve_subreg_p (dest))
931 {
932 end_sequence ();
933 return insn;
934 }
935
936 /* It's possible for the code to use a subreg of a decomposed
937 register while forming an address. We need to handle that before
938 passing the address to emit_move_insn. We pass NULL_RTX as the
939 insn parameter to resolve_subreg_use because we can not validate
940 the insn yet. */
941 if (MEM_P (src) || MEM_P (dest))
942 {
943 int acg;
944
945 if (MEM_P (src))
946 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
947 if (MEM_P (dest))
948 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
949 acg = apply_change_group ();
950 gcc_assert (acg);
951 }
952
953 /* If SRC is a register which we can't decompose, or has side
954 effects, we need to move via a temporary register. */
955
956 if (!can_decompose_p (src)
957 || side_effects_p (src)
958 || GET_CODE (src) == ASM_OPERANDS)
959 {
960 rtx reg;
961
962 reg = gen_reg_rtx (orig_mode);
963 emit_move_insn (reg, src);
964 src = reg;
965 }
966
967 /* If DEST is a register which we can't decompose, or has side
968 effects, we need to first move to a temporary register. We
969 handle the common case of pushing an operand directly. We also
970 go through a temporary register if it holds a floating point
971 value. This gives us better code on systems which can't move
972 data easily between integer and floating point registers. */
973
974 dest_mode = orig_mode;
975 pushing = push_operand (dest, dest_mode);
976 if (!can_decompose_p (dest)
977 || (side_effects_p (dest) && !pushing)
978 || (!SCALAR_INT_MODE_P (dest_mode)
979 && !resolve_reg_p (dest)
980 && !resolve_subreg_p (dest)))
981 {
982 if (real_dest == NULL_RTX)
983 real_dest = dest;
984 if (!SCALAR_INT_MODE_P (dest_mode))
985 {
986 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
987 MODE_INT, 0);
988 gcc_assert (dest_mode != BLKmode);
989 }
990 dest = gen_reg_rtx (dest_mode);
991 if (REG_P (real_dest))
992 REG_ATTRS (dest) = REG_ATTRS (real_dest);
993 }
994
995 if (pushing)
996 {
997 unsigned int i, j, jinc;
998
999 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1001 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1002
1003 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1004 {
1005 j = 0;
1006 jinc = 1;
1007 }
1008 else
1009 {
1010 j = words - 1;
1011 jinc = -1;
1012 }
1013
1014 for (i = 0; i < words; ++i, j += jinc)
1015 {
1016 rtx temp;
1017
1018 temp = copy_rtx (XEXP (dest, 0));
1019 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1020 j * UNITS_PER_WORD);
1021 emit_move_insn (temp,
1022 simplify_gen_subreg_concatn (word_mode, src,
1023 orig_mode,
1024 j * UNITS_PER_WORD));
1025 }
1026 }
1027 else
1028 {
1029 unsigned int i;
1030
1031 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1032 emit_clobber (dest);
1033
1034 for (i = 0; i < words; ++i)
1035 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1036 dest_mode,
1037 i * UNITS_PER_WORD),
1038 simplify_gen_subreg_concatn (word_mode, src,
1039 orig_mode,
1040 i * UNITS_PER_WORD));
1041 }
1042
1043 if (real_dest != NULL_RTX)
1044 {
1045 rtx mdest, minsn, smove;
1046
1047 if (dest_mode == orig_mode)
1048 mdest = dest;
1049 else
1050 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1051 minsn = emit_move_insn (real_dest, mdest);
1052
1053 smove = single_set (minsn);
1054 gcc_assert (smove != NULL_RTX);
1055
1056 resolve_simple_move (smove, minsn);
1057 }
1058
1059 insns = get_insns ();
1060 end_sequence ();
1061
1062 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1063
1064 emit_insn_before (insns, insn);
1065
1066 delete_insn (insn);
1067
1068 return insns;
1069 }
1070
1071 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1072 component registers. Return whether we changed something. */
1073
1074 static bool
1075 resolve_clobber (rtx pat, rtx insn)
1076 {
1077 rtx reg;
1078 enum machine_mode orig_mode;
1079 unsigned int words, i;
1080 int ret;
1081
1082 reg = XEXP (pat, 0);
1083 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1084 return false;
1085
1086 orig_mode = GET_MODE (reg);
1087 words = GET_MODE_SIZE (orig_mode);
1088 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1089
1090 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1091 simplify_gen_subreg_concatn (word_mode, reg,
1092 orig_mode, 0),
1093 0);
1094 df_insn_rescan (insn);
1095 gcc_assert (ret != 0);
1096
1097 for (i = words - 1; i > 0; --i)
1098 {
1099 rtx x;
1100
1101 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1102 i * UNITS_PER_WORD);
1103 x = gen_rtx_CLOBBER (VOIDmode, x);
1104 emit_insn_after (x, insn);
1105 }
1106
1107 resolve_reg_notes (insn);
1108
1109 return true;
1110 }
1111
1112 /* A USE of a decomposed register is no longer meaningful. Return
1113 whether we changed something. */
1114
1115 static bool
1116 resolve_use (rtx pat, rtx insn)
1117 {
1118 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1119 {
1120 delete_insn (insn);
1121 return true;
1122 }
1123
1124 resolve_reg_notes (insn);
1125
1126 return false;
1127 }
1128
1129 /* A VAR_LOCATION can be simplified. */
1130
1131 static void
1132 resolve_debug (rtx insn)
1133 {
1134 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1135
1136 df_insn_rescan (insn);
1137
1138 resolve_reg_notes (insn);
1139 }
1140
1141 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1142 set the decomposable_context bitmap accordingly. SPEED_P is true
1143 if we are optimizing INSN for speed rather than size. Return true
1144 if INSN is decomposable. */
1145
1146 static bool
1147 find_decomposable_shift_zext (rtx insn, bool speed_p)
1148 {
1149 rtx set;
1150 rtx op;
1151 rtx op_operand;
1152
1153 set = single_set (insn);
1154 if (!set)
1155 return false;
1156
1157 op = SET_SRC (set);
1158 if (GET_CODE (op) != ASHIFT
1159 && GET_CODE (op) != LSHIFTRT
1160 && GET_CODE (op) != ZERO_EXTEND)
1161 return false;
1162
1163 op_operand = XEXP (op, 0);
1164 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1165 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1166 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1167 || GET_MODE (op) != twice_word_mode)
1168 return false;
1169
1170 if (GET_CODE (op) == ZERO_EXTEND)
1171 {
1172 if (GET_MODE (op_operand) != word_mode
1173 || !choices[speed_p].splitting_zext)
1174 return false;
1175 }
1176 else /* left or right shift */
1177 {
1178 bool *splitting = (GET_CODE (op) == ASHIFT
1179 ? choices[speed_p].splitting_ashift
1180 : choices[speed_p].splitting_lshiftrt);
1181 if (!CONST_INT_P (XEXP (op, 1))
1182 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1183 2 * BITS_PER_WORD - 1)
1184 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1185 return false;
1186
1187 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1188 }
1189
1190 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1191
1192 return true;
1193 }
1194
1195 /* Decompose a more than word wide shift (in INSN) of a multiword
1196 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1197 and 'set to zero' insn. Return a pointer to the new insn when a
1198 replacement was done. */
1199
1200 static rtx
1201 resolve_shift_zext (rtx insn)
1202 {
1203 rtx set;
1204 rtx op;
1205 rtx op_operand;
1206 rtx insns;
1207 rtx src_reg, dest_reg, dest_zero;
1208 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1209
1210 set = single_set (insn);
1211 if (!set)
1212 return NULL_RTX;
1213
1214 op = SET_SRC (set);
1215 if (GET_CODE (op) != ASHIFT
1216 && GET_CODE (op) != LSHIFTRT
1217 && GET_CODE (op) != ZERO_EXTEND)
1218 return NULL_RTX;
1219
1220 op_operand = XEXP (op, 0);
1221
1222 /* We can tear this operation apart only if the regs were already
1223 torn apart. */
1224 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1225 return NULL_RTX;
1226
1227 /* src_reg_num is the number of the word mode register which we
1228 are operating on. For a left shift and a zero_extend on little
1229 endian machines this is register 0. */
1230 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1231
1232 if (WORDS_BIG_ENDIAN
1233 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1234 src_reg_num = 1 - src_reg_num;
1235
1236 if (GET_CODE (op) == ZERO_EXTEND)
1237 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1238 else
1239 dest_reg_num = 1 - src_reg_num;
1240
1241 offset1 = UNITS_PER_WORD * dest_reg_num;
1242 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1243 src_offset = UNITS_PER_WORD * src_reg_num;
1244
1245 start_sequence ();
1246
1247 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1248 GET_MODE (SET_DEST (set)),
1249 offset1);
1250 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1251 GET_MODE (SET_DEST (set)),
1252 offset2);
1253 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1254 GET_MODE (op_operand),
1255 src_offset);
1256 if (GET_CODE (op) != ZERO_EXTEND)
1257 {
1258 int shift_count = INTVAL (XEXP (op, 1));
1259 if (shift_count > BITS_PER_WORD)
1260 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1261 LSHIFT_EXPR : RSHIFT_EXPR,
1262 word_mode, src_reg,
1263 shift_count - BITS_PER_WORD,
1264 dest_reg, 1);
1265 }
1266
1267 if (dest_reg != src_reg)
1268 emit_move_insn (dest_reg, src_reg);
1269 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1270 insns = get_insns ();
1271
1272 end_sequence ();
1273
1274 emit_insn_before (insns, insn);
1275
1276 if (dump_file)
1277 {
1278 rtx in;
1279 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1280 for (in = insns; in != insn; in = NEXT_INSN (in))
1281 fprintf (dump_file, "%d ", INSN_UID (in));
1282 fprintf (dump_file, "\n");
1283 }
1284
1285 delete_insn (insn);
1286 return insns;
1287 }
1288
1289 /* Print to dump_file a description of what we're doing with shift code CODE.
1290 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1291
1292 static void
1293 dump_shift_choices (enum rtx_code code, bool *splitting)
1294 {
1295 int i;
1296 const char *sep;
1297
1298 fprintf (dump_file,
1299 " Splitting mode %s for %s lowering with shift amounts = ",
1300 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1301 sep = "";
1302 for (i = 0; i < BITS_PER_WORD; i++)
1303 if (splitting[i])
1304 {
1305 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1306 sep = ",";
1307 }
1308 fprintf (dump_file, "\n");
1309 }
1310
1311 /* Print to dump_file a description of what we're doing when optimizing
1312 for speed or size; SPEED_P says which. DESCRIPTION is a description
1313 of the SPEED_P choice. */
1314
1315 static void
1316 dump_choices (bool speed_p, const char *description)
1317 {
1318 unsigned int i;
1319
1320 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1321
1322 for (i = 0; i < MAX_MACHINE_MODE; i++)
1323 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1324 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1325 choices[speed_p].move_modes_to_split[i]
1326 ? "Splitting"
1327 : "Skipping",
1328 GET_MODE_NAME ((enum machine_mode) i));
1329
1330 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1331 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1332 GET_MODE_NAME (twice_word_mode));
1333
1334 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1335 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1336 fprintf (dump_file, "\n");
1337 }
1338
1339 /* Look for registers which are always accessed via word-sized SUBREGs
1340 or via copies. Decompose these registers into several word-sized
1341 pseudo-registers. */
1342
1343 static void
1344 decompose_multiword_subregs (void)
1345 {
1346 unsigned int max;
1347 basic_block bb;
1348 bool speed_p;
1349
1350 if (dump_file)
1351 {
1352 dump_choices (false, "size");
1353 dump_choices (true, "speed");
1354 }
1355
1356 /* Check if this target even has any modes to consider lowering. */
1357 if (!choices[false].something_to_do && !choices[true].something_to_do)
1358 {
1359 if (dump_file)
1360 fprintf (dump_file, "Nothing to do!\n");
1361 return;
1362 }
1363
1364 max = max_reg_num ();
1365
1366 /* First see if there are any multi-word pseudo-registers. If there
1367 aren't, there is nothing we can do. This should speed up this
1368 pass in the normal case, since it should be faster than scanning
1369 all the insns. */
1370 {
1371 unsigned int i;
1372 bool useful_modes_seen = false;
1373
1374 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1375 if (regno_reg_rtx[i] != NULL)
1376 {
1377 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1378 if (choices[false].move_modes_to_split[(int) mode]
1379 || choices[true].move_modes_to_split[(int) mode])
1380 {
1381 useful_modes_seen = true;
1382 break;
1383 }
1384 }
1385
1386 if (!useful_modes_seen)
1387 {
1388 if (dump_file)
1389 fprintf (dump_file, "Nothing to lower in this function.\n");
1390 return;
1391 }
1392 }
1393
1394 if (df)
1395 {
1396 df_set_flags (DF_DEFER_INSN_RESCAN);
1397 run_word_dce ();
1398 }
1399
1400 /* FIXME: It may be possible to change this code to look for each
1401 multi-word pseudo-register and to find each insn which sets or
1402 uses that register. That should be faster than scanning all the
1403 insns. */
1404
1405 decomposable_context = BITMAP_ALLOC (NULL);
1406 non_decomposable_context = BITMAP_ALLOC (NULL);
1407 subreg_context = BITMAP_ALLOC (NULL);
1408
1409 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1410 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1411 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1412
1413 speed_p = optimize_function_for_speed_p (cfun);
1414 FOR_EACH_BB (bb)
1415 {
1416 rtx insn;
1417
1418 FOR_BB_INSNS (bb, insn)
1419 {
1420 rtx set;
1421 enum classify_move_insn cmi;
1422 int i, n;
1423
1424 if (!INSN_P (insn)
1425 || GET_CODE (PATTERN (insn)) == CLOBBER
1426 || GET_CODE (PATTERN (insn)) == USE)
1427 continue;
1428
1429 recog_memoized (insn);
1430
1431 if (find_decomposable_shift_zext (insn, speed_p))
1432 continue;
1433
1434 extract_insn (insn);
1435
1436 set = simple_move (insn, speed_p);
1437
1438 if (!set)
1439 cmi = NOT_SIMPLE_MOVE;
1440 else
1441 {
1442 if (find_pseudo_copy (set))
1443 cmi = SIMPLE_PSEUDO_REG_MOVE;
1444 else
1445 cmi = SIMPLE_MOVE;
1446 }
1447
1448 n = recog_data.n_operands;
1449 for (i = 0; i < n; ++i)
1450 {
1451 for_each_rtx (&recog_data.operand[i],
1452 find_decomposable_subregs,
1453 &cmi);
1454
1455 /* We handle ASM_OPERANDS as a special case to support
1456 things like x86 rdtsc which returns a DImode value.
1457 We can decompose the output, which will certainly be
1458 operand 0, but not the inputs. */
1459
1460 if (cmi == SIMPLE_MOVE
1461 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1462 {
1463 gcc_assert (i == 0);
1464 cmi = NOT_SIMPLE_MOVE;
1465 }
1466 }
1467 }
1468 }
1469
1470 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1471 if (!bitmap_empty_p (decomposable_context))
1472 {
1473 sbitmap sub_blocks;
1474 unsigned int i;
1475 sbitmap_iterator sbi;
1476 bitmap_iterator iter;
1477 unsigned int regno;
1478
1479 propagate_pseudo_copies ();
1480
1481 sub_blocks = sbitmap_alloc (last_basic_block);
1482 sbitmap_zero (sub_blocks);
1483
1484 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1485 decompose_register (regno);
1486
1487 FOR_EACH_BB (bb)
1488 {
1489 rtx insn;
1490 bool speed_p;
1491
1492 speed_p = optimize_bb_for_speed_p (bb);
1493 FOR_BB_INSNS (bb, insn)
1494 {
1495 rtx pat;
1496
1497 if (!INSN_P (insn))
1498 continue;
1499
1500 pat = PATTERN (insn);
1501 if (GET_CODE (pat) == CLOBBER)
1502 resolve_clobber (pat, insn);
1503 else if (GET_CODE (pat) == USE)
1504 resolve_use (pat, insn);
1505 else if (DEBUG_INSN_P (insn))
1506 resolve_debug (insn);
1507 else
1508 {
1509 rtx set;
1510 int i;
1511
1512 recog_memoized (insn);
1513 extract_insn (insn);
1514
1515 set = simple_move (insn, speed_p);
1516 if (set)
1517 {
1518 rtx orig_insn = insn;
1519 bool cfi = control_flow_insn_p (insn);
1520
1521 /* We can end up splitting loads to multi-word pseudos
1522 into separate loads to machine word size pseudos.
1523 When this happens, we first had one load that can
1524 throw, and after resolve_simple_move we'll have a
1525 bunch of loads (at least two). All those loads may
1526 trap if we can have non-call exceptions, so they
1527 all will end the current basic block. We split the
1528 block after the outer loop over all insns, but we
1529 make sure here that we will be able to split the
1530 basic block and still produce the correct control
1531 flow graph for it. */
1532 gcc_assert (!cfi
1533 || (cfun->can_throw_non_call_exceptions
1534 && can_throw_internal (insn)));
1535
1536 insn = resolve_simple_move (set, insn);
1537 if (insn != orig_insn)
1538 {
1539 recog_memoized (insn);
1540 extract_insn (insn);
1541
1542 if (cfi)
1543 SET_BIT (sub_blocks, bb->index);
1544 }
1545 }
1546 else
1547 {
1548 rtx decomposed_shift;
1549
1550 decomposed_shift = resolve_shift_zext (insn);
1551 if (decomposed_shift != NULL_RTX)
1552 {
1553 insn = decomposed_shift;
1554 recog_memoized (insn);
1555 extract_insn (insn);
1556 }
1557 }
1558
1559 for (i = recog_data.n_operands - 1; i >= 0; --i)
1560 for_each_rtx (recog_data.operand_loc[i],
1561 resolve_subreg_use,
1562 insn);
1563
1564 resolve_reg_notes (insn);
1565
1566 if (num_validated_changes () > 0)
1567 {
1568 for (i = recog_data.n_dups - 1; i >= 0; --i)
1569 {
1570 rtx *pl = recog_data.dup_loc[i];
1571 int dup_num = recog_data.dup_num[i];
1572 rtx *px = recog_data.operand_loc[dup_num];
1573
1574 validate_unshare_change (insn, pl, *px, 1);
1575 }
1576
1577 i = apply_change_group ();
1578 gcc_assert (i);
1579 }
1580 }
1581 }
1582 }
1583
1584 /* If we had insns to split that caused control flow insns in the middle
1585 of a basic block, split those blocks now. Note that we only handle
1586 the case where splitting a load has caused multiple possibly trapping
1587 loads to appear. */
1588 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1589 {
1590 rtx insn, end;
1591 edge fallthru;
1592
1593 bb = BASIC_BLOCK (i);
1594 insn = BB_HEAD (bb);
1595 end = BB_END (bb);
1596
1597 while (insn != end)
1598 {
1599 if (control_flow_insn_p (insn))
1600 {
1601 /* Split the block after insn. There will be a fallthru
1602 edge, which is OK so we keep it. We have to create the
1603 exception edges ourselves. */
1604 fallthru = split_block (bb, insn);
1605 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1606 bb = fallthru->dest;
1607 insn = BB_HEAD (bb);
1608 }
1609 else
1610 insn = NEXT_INSN (insn);
1611 }
1612 }
1613
1614 sbitmap_free (sub_blocks);
1615 }
1616
1617 {
1618 unsigned int i;
1619 bitmap b;
1620
1621 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1622 if (b)
1623 BITMAP_FREE (b);
1624 }
1625
1626 VEC_free (bitmap, heap, reg_copy_graph);
1627
1628 BITMAP_FREE (decomposable_context);
1629 BITMAP_FREE (non_decomposable_context);
1630 BITMAP_FREE (subreg_context);
1631 }
1632 \f
1633 /* Gate function for lower subreg pass. */
1634
1635 static bool
1636 gate_handle_lower_subreg (void)
1637 {
1638 return flag_split_wide_types != 0;
1639 }
1640
1641 /* Implement first lower subreg pass. */
1642
1643 static unsigned int
1644 rest_of_handle_lower_subreg (void)
1645 {
1646 decompose_multiword_subregs ();
1647 return 0;
1648 }
1649
1650 /* Implement second lower subreg pass. */
1651
1652 static unsigned int
1653 rest_of_handle_lower_subreg2 (void)
1654 {
1655 decompose_multiword_subregs ();
1656 return 0;
1657 }
1658
1659 struct rtl_opt_pass pass_lower_subreg =
1660 {
1661 {
1662 RTL_PASS,
1663 "subreg1", /* name */
1664 gate_handle_lower_subreg, /* gate */
1665 rest_of_handle_lower_subreg, /* execute */
1666 NULL, /* sub */
1667 NULL, /* next */
1668 0, /* static_pass_number */
1669 TV_LOWER_SUBREG, /* tv_id */
1670 0, /* properties_required */
1671 0, /* properties_provided */
1672 0, /* properties_destroyed */
1673 0, /* todo_flags_start */
1674 TODO_ggc_collect |
1675 TODO_verify_flow /* todo_flags_finish */
1676 }
1677 };
1678
1679 struct rtl_opt_pass pass_lower_subreg2 =
1680 {
1681 {
1682 RTL_PASS,
1683 "subreg2", /* name */
1684 gate_handle_lower_subreg, /* gate */
1685 rest_of_handle_lower_subreg2, /* execute */
1686 NULL, /* sub */
1687 NULL, /* next */
1688 0, /* static_pass_number */
1689 TV_LOWER_SUBREG, /* tv_id */
1690 0, /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
1694 TODO_df_finish | TODO_verify_rtl_sharing |
1695 TODO_ggc_collect |
1696 TODO_verify_flow /* todo_flags_finish */
1697 }
1698 };