]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
ChangeLog:
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "machmode.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42 #include "lower-subreg.h"
43
44 #ifdef STACK_GROWS_DOWNWARD
45 # undef STACK_GROWS_DOWNWARD
46 # define STACK_GROWS_DOWNWARD 1
47 #else
48 # define STACK_GROWS_DOWNWARD 0
49 #endif
50
51 DEF_VEC_P (bitmap);
52 DEF_VEC_ALLOC_P (bitmap,heap);
53
54 /* Decompose multi-word pseudo-registers into individual
55 pseudo-registers when possible and profitable. This is possible
56 when all the uses of a multi-word register are via SUBREG, or are
57 copies of the register to another location. Breaking apart the
58 register permits more CSE and permits better register allocation.
59 This is profitable if the machine does not have move instructions
60 to do this.
61
62 This pass only splits moves with modes that are wider than
63 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
64 modes that are twice the width of word_mode. The latter could be
65 generalized if there was a need to do this, but the trend in
66 architectures is to not need this.
67
68 There are two useful preprocessor defines for use by maintainers:
69
70 #define LOG_COSTS 1
71
72 if you wish to see the actual cost estimates that are being used
73 for each mode wider than word mode and the cost estimates for zero
74 extension and the shifts. This can be useful when port maintainers
75 are tuning insn rtx costs.
76
77 #define FORCE_LOWERING 1
78
79 if you wish to test the pass with all the transformation forced on.
80 This can be useful for finding bugs in the transformations. */
81
82 #define LOG_COSTS 0
83 #define FORCE_LOWERING 0
84
85 /* Bit N in this bitmap is set if regno N is used in a context in
86 which we can decompose it. */
87 static bitmap decomposable_context;
88
89 /* Bit N in this bitmap is set if regno N is used in a context in
90 which it can not be decomposed. */
91 static bitmap non_decomposable_context;
92
93 /* Bit N in this bitmap is set if regno N is used in a subreg
94 which changes the mode but not the size. This typically happens
95 when the register accessed as a floating-point value; we want to
96 avoid generating accesses to its subwords in integer modes. */
97 static bitmap subreg_context;
98
99 /* Bit N in the bitmap in element M of this array is set if there is a
100 copy from reg M to reg N. */
101 static VEC(bitmap,heap) *reg_copy_graph;
102
103 struct target_lower_subreg default_target_lower_subreg;
104 #if SWITCHABLE_TARGET
105 struct target_lower_subreg *this_target_lower_subreg
106 = &default_target_lower_subreg;
107 #endif
108
109 #define twice_word_mode \
110 this_target_lower_subreg->x_twice_word_mode
111 #define choices \
112 this_target_lower_subreg->x_choices
113
114 /* RTXes used while computing costs. */
115 struct cost_rtxes {
116 /* Source and target registers. */
117 rtx source;
118 rtx target;
119
120 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
121 rtx zext;
122
123 /* A shift of SOURCE. */
124 rtx shift;
125
126 /* A SET of TARGET. */
127 rtx set;
128 };
129
130 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
131 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
132
133 static int
134 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
135 enum machine_mode mode, int op1)
136 {
137 PUT_CODE (rtxes->shift, code);
138 PUT_MODE (rtxes->shift, mode);
139 PUT_MODE (rtxes->source, mode);
140 XEXP (rtxes->shift, 1) = GEN_INT (op1);
141 return set_src_cost (rtxes->shift, speed_p);
142 }
143
144 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
145 to true if it is profitable to split a double-word CODE shift
146 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
147 for speed or size profitability.
148
149 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
150 the cost of moving zero into a word-mode register. WORD_MOVE_COST
151 is the cost of moving between word registers. */
152
153 static void
154 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
155 bool *splitting, enum rtx_code code,
156 int word_move_zero_cost, int word_move_cost)
157 {
158 int wide_cost, narrow_cost, i;
159
160 for (i = 0; i < BITS_PER_WORD; i++)
161 {
162 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
163 i + BITS_PER_WORD);
164 if (i == 0)
165 narrow_cost = word_move_cost;
166 else
167 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
168
169 if (LOG_COSTS)
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
172 i + BITS_PER_WORD, wide_cost, narrow_cost,
173 word_move_zero_cost);
174
175 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
176 splitting[i] = true;
177 }
178 }
179
180 /* Compute what we should do when optimizing for speed or size; SPEED_P
181 selects which. Use RTXES for computing costs. */
182
183 static void
184 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
185 {
186 unsigned int i;
187 int word_move_zero_cost, word_move_cost;
188
189 PUT_MODE (rtxes->target, word_mode);
190 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
191 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
192
193 SET_SRC (rtxes->set) = rtxes->source;
194 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
195
196 if (LOG_COSTS)
197 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
198 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
199
200 for (i = 0; i < MAX_MACHINE_MODE; i++)
201 {
202 enum machine_mode mode = (enum machine_mode) i;
203 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
204 if (factor > 1)
205 {
206 int mode_move_cost;
207
208 PUT_MODE (rtxes->target, mode);
209 PUT_MODE (rtxes->source, mode);
210 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
211
212 if (LOG_COSTS)
213 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
214 GET_MODE_NAME (mode), mode_move_cost,
215 word_move_cost, factor);
216
217 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
218 {
219 choices[speed_p].move_modes_to_split[i] = true;
220 choices[speed_p].something_to_do = true;
221 }
222 }
223 }
224
225 /* For the moves and shifts, the only case that is checked is one
226 where the mode of the target is an integer mode twice the width
227 of the word_mode.
228
229 If it is not profitable to split a double word move then do not
230 even consider the shifts or the zero extension. */
231 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
232 {
233 int zext_cost;
234
235 /* The only case here to check to see if moving the upper part with a
236 zero is cheaper than doing the zext itself. */
237 PUT_MODE (rtxes->source, word_mode);
238 zext_cost = set_src_cost (rtxes->zext, speed_p);
239
240 if (LOG_COSTS)
241 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
242 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
243 zext_cost, word_move_cost, word_move_zero_cost);
244
245 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
246 choices[speed_p].splitting_zext = true;
247
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_ashift, ASHIFT,
250 word_move_zero_cost, word_move_cost);
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
253 word_move_zero_cost, word_move_cost);
254 }
255 }
256
257 /* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
260
261 void
262 init_lower_subreg (void)
263 {
264 struct cost_rtxes rtxes;
265
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
267
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
269
270 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
271 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
272 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
275
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
283 }
284
285 static bool
286 simple_move_operand (rtx x)
287 {
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
290
291 if (!OBJECT_P (x))
292 return false;
293
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
298 return false;
299
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
303 return false;
304
305 return true;
306 }
307
308 /* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
311
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
314
315 static rtx
316 simple_move (rtx insn, bool speed_p)
317 {
318 rtx x;
319 rtx set;
320 enum machine_mode mode;
321
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
324
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
328
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
332 if (!simple_move_operand (x))
333 return NULL_RTX;
334
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
342 return NULL_RTX;
343
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
349 mode = GET_MODE (SET_SRC (set));
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
354
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
363 return set;
364 }
365
366 /* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370 static bool
371 find_pseudo_copy (rtx set)
372 {
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
386 b = VEC_index (bitmap, reg_copy_graph, rs);
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
390 VEC_replace (bitmap, reg_copy_graph, rs, b);
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396 }
397
398 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404 static void
405 propagate_pseudo_copies (void)
406 {
407 bitmap queue, propagate;
408
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
411
412 bitmap_copy (queue, decomposable_context);
413 do
414 {
415 bitmap_iterator iter;
416 unsigned int i;
417
418 bitmap_clear (propagate);
419
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
421 {
422 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 }
426
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
429 }
430 while (!bitmap_empty_p (queue));
431
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
434 }
435
436 /* A pointer to one of these values is passed to
437 find_decomposable_subregs via for_each_rtx. */
438
439 enum classify_move_insn
440 {
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
446 SIMPLE_MOVE
447 };
448
449 /* This is called via for_each_rtx. If we find a SUBREG which we
450 could use to decompose a pseudo-register, set a bit in
451 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
452 not a simple pseudo-register copy, DATA will point at the type of
453 move, and we set a bit in DECOMPOSABLE_CONTEXT or
454 NON_DECOMPOSABLE_CONTEXT as appropriate. */
455
456 static int
457 find_decomposable_subregs (rtx *px, void *data)
458 {
459 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
460 rtx x = *px;
461
462 if (x == NULL_RTX)
463 return 0;
464
465 if (GET_CODE (x) == SUBREG)
466 {
467 rtx inner = SUBREG_REG (x);
468 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
469
470 if (!REG_P (inner))
471 return 0;
472
473 regno = REGNO (inner);
474 if (HARD_REGISTER_NUM_P (regno))
475 return -1;
476
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
485
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
491
492 if (outer_words == 1 && inner_words > 1)
493 {
494 bitmap_set_bit (decomposable_context, regno);
495 return -1;
496 }
497
498 /* If this is a cast from one mode to another, where the modes
499 have the same size, and they are not tieable, then mark this
500 register as non-decomposable. If we decompose it we are
501 likely to mess up whatever the backend is trying to do. */
502 if (outer_words > 1
503 && outer_size == inner_size
504 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
505 {
506 bitmap_set_bit (non_decomposable_context, regno);
507 bitmap_set_bit (subreg_context, regno);
508 return -1;
509 }
510 }
511 else if (REG_P (x))
512 {
513 unsigned int regno;
514
515 /* We will see an outer SUBREG before we see the inner REG, so
516 when we see a plain REG here it means a direct reference to
517 the register.
518
519 If this is not a simple copy from one location to another,
520 then we can not decompose this register. If this is a simple
521 copy we want to decompose, and the mode is right,
522 then we mark the register as decomposable.
523 Otherwise we don't say anything about this register --
524 it could be decomposed, but whether that would be
525 profitable depends upon how it is used elsewhere.
526
527 We only set bits in the bitmap for multi-word
528 pseudo-registers, since those are the only ones we care about
529 and it keeps the size of the bitmaps down. */
530
531 regno = REGNO (x);
532 if (!HARD_REGISTER_NUM_P (regno)
533 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
534 {
535 switch (*pcmi)
536 {
537 case NOT_SIMPLE_MOVE:
538 bitmap_set_bit (non_decomposable_context, regno);
539 break;
540 case DECOMPOSABLE_SIMPLE_MOVE:
541 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
542 bitmap_set_bit (decomposable_context, regno);
543 break;
544 case SIMPLE_MOVE:
545 break;
546 default:
547 gcc_unreachable ();
548 }
549 }
550 }
551 else if (MEM_P (x))
552 {
553 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
554
555 /* Any registers used in a MEM do not participate in a
556 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
557 here, and return -1 to block the parent's recursion. */
558 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
559 return -1;
560 }
561
562 return 0;
563 }
564
565 /* Decompose REGNO into word-sized components. We smash the REG node
566 in place. This ensures that (1) something goes wrong quickly if we
567 fail to make some replacement, and (2) the debug information inside
568 the symbol table is automatically kept up to date. */
569
570 static void
571 decompose_register (unsigned int regno)
572 {
573 rtx reg;
574 unsigned int words, i;
575 rtvec v;
576
577 reg = regno_reg_rtx[regno];
578
579 regno_reg_rtx[regno] = NULL_RTX;
580
581 words = GET_MODE_SIZE (GET_MODE (reg));
582 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
583
584 v = rtvec_alloc (words);
585 for (i = 0; i < words; ++i)
586 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
587
588 PUT_CODE (reg, CONCATN);
589 XVEC (reg, 0) = v;
590
591 if (dump_file)
592 {
593 fprintf (dump_file, "; Splitting reg %u ->", regno);
594 for (i = 0; i < words; ++i)
595 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
596 fputc ('\n', dump_file);
597 }
598 }
599
600 /* Get a SUBREG of a CONCATN. */
601
602 static rtx
603 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
604 unsigned int byte)
605 {
606 unsigned int inner_size;
607 enum machine_mode innermode, partmode;
608 rtx part;
609 unsigned int final_offset;
610
611 gcc_assert (GET_CODE (op) == CONCATN);
612 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
613
614 innermode = GET_MODE (op);
615 gcc_assert (byte < GET_MODE_SIZE (innermode));
616 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
617
618 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
619 part = XVECEXP (op, 0, byte / inner_size);
620 partmode = GET_MODE (part);
621
622 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
623 regular CONST_VECTORs. They have vector or integer modes, depending
624 on the capabilities of the target. Cope with them. */
625 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
626 partmode = GET_MODE_INNER (innermode);
627 else if (partmode == VOIDmode)
628 {
629 enum mode_class mclass = GET_MODE_CLASS (innermode);
630 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
631 }
632
633 final_offset = byte % inner_size;
634 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
635 return NULL_RTX;
636
637 return simplify_gen_subreg (outermode, part, partmode, final_offset);
638 }
639
640 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
641
642 static rtx
643 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
644 enum machine_mode innermode, unsigned int byte)
645 {
646 rtx ret;
647
648 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
649 If OP is a SUBREG of a CONCATN, then it must be a simple mode
650 change with the same size and offset 0, or it must extract a
651 part. We shouldn't see anything else here. */
652 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
653 {
654 rtx op2;
655
656 if ((GET_MODE_SIZE (GET_MODE (op))
657 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
658 && SUBREG_BYTE (op) == 0)
659 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
660 GET_MODE (SUBREG_REG (op)), byte);
661
662 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
663 SUBREG_BYTE (op));
664 if (op2 == NULL_RTX)
665 {
666 /* We don't handle paradoxical subregs here. */
667 gcc_assert (GET_MODE_SIZE (outermode)
668 <= GET_MODE_SIZE (GET_MODE (op)));
669 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
670 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
671 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
672 byte + SUBREG_BYTE (op));
673 gcc_assert (op2 != NULL_RTX);
674 return op2;
675 }
676
677 op = op2;
678 gcc_assert (op != NULL_RTX);
679 gcc_assert (innermode == GET_MODE (op));
680 }
681
682 if (GET_CODE (op) == CONCATN)
683 return simplify_subreg_concatn (outermode, op, byte);
684
685 ret = simplify_gen_subreg (outermode, op, innermode, byte);
686
687 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
688 resolve_simple_move will ask for the high part of the paradoxical
689 subreg, which does not have a value. Just return a zero. */
690 if (ret == NULL_RTX
691 && GET_CODE (op) == SUBREG
692 && SUBREG_BYTE (op) == 0
693 && (GET_MODE_SIZE (innermode)
694 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
695 return CONST0_RTX (outermode);
696
697 gcc_assert (ret != NULL_RTX);
698 return ret;
699 }
700
701 /* Return whether we should resolve X into the registers into which it
702 was decomposed. */
703
704 static bool
705 resolve_reg_p (rtx x)
706 {
707 return GET_CODE (x) == CONCATN;
708 }
709
710 /* Return whether X is a SUBREG of a register which we need to
711 resolve. */
712
713 static bool
714 resolve_subreg_p (rtx x)
715 {
716 if (GET_CODE (x) != SUBREG)
717 return false;
718 return resolve_reg_p (SUBREG_REG (x));
719 }
720
721 /* This is called via for_each_rtx. Look for SUBREGs which need to be
722 decomposed. */
723
724 static int
725 resolve_subreg_use (rtx *px, void *data)
726 {
727 rtx insn = (rtx) data;
728 rtx x = *px;
729
730 if (x == NULL_RTX)
731 return 0;
732
733 if (resolve_subreg_p (x))
734 {
735 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
736 SUBREG_BYTE (x));
737
738 /* It is possible for a note to contain a reference which we can
739 decompose. In this case, return 1 to the caller to indicate
740 that the note must be removed. */
741 if (!x)
742 {
743 gcc_assert (!insn);
744 return 1;
745 }
746
747 validate_change (insn, px, x, 1);
748 return -1;
749 }
750
751 if (resolve_reg_p (x))
752 {
753 /* Return 1 to the caller to indicate that we found a direct
754 reference to a register which is being decomposed. This can
755 happen inside notes, multiword shift or zero-extend
756 instructions. */
757 return 1;
758 }
759
760 return 0;
761 }
762
763 /* This is called via for_each_rtx. Look for SUBREGs which can be
764 decomposed and decomposed REGs that need copying. */
765
766 static int
767 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
768 {
769 rtx x = *px;
770
771 if (x == NULL_RTX)
772 return 0;
773
774 if (resolve_subreg_p (x))
775 {
776 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
777 SUBREG_BYTE (x));
778
779 if (x)
780 *px = x;
781 else
782 x = copy_rtx (*px);
783 }
784
785 if (resolve_reg_p (x))
786 *px = copy_rtx (x);
787
788 return 0;
789 }
790
791 /* Resolve any decomposed registers which appear in register notes on
792 INSN. */
793
794 static void
795 resolve_reg_notes (rtx insn)
796 {
797 rtx *pnote, note;
798
799 note = find_reg_equal_equiv_note (insn);
800 if (note)
801 {
802 int old_count = num_validated_changes ();
803 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
804 remove_note (insn, note);
805 else
806 if (old_count != num_validated_changes ())
807 df_notes_rescan (insn);
808 }
809
810 pnote = &REG_NOTES (insn);
811 while (*pnote != NULL_RTX)
812 {
813 bool del = false;
814
815 note = *pnote;
816 switch (REG_NOTE_KIND (note))
817 {
818 case REG_DEAD:
819 case REG_UNUSED:
820 if (resolve_reg_p (XEXP (note, 0)))
821 del = true;
822 break;
823
824 default:
825 break;
826 }
827
828 if (del)
829 *pnote = XEXP (note, 1);
830 else
831 pnote = &XEXP (note, 1);
832 }
833 }
834
835 /* Return whether X can be decomposed into subwords. */
836
837 static bool
838 can_decompose_p (rtx x)
839 {
840 if (REG_P (x))
841 {
842 unsigned int regno = REGNO (x);
843
844 if (HARD_REGISTER_NUM_P (regno))
845 {
846 unsigned int byte, num_bytes;
847
848 num_bytes = GET_MODE_SIZE (GET_MODE (x));
849 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
850 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
851 return false;
852 return true;
853 }
854 else
855 return !bitmap_bit_p (subreg_context, regno);
856 }
857
858 return true;
859 }
860
861 /* Decompose the registers used in a simple move SET within INSN. If
862 we don't change anything, return INSN, otherwise return the start
863 of the sequence of moves. */
864
865 static rtx
866 resolve_simple_move (rtx set, rtx insn)
867 {
868 rtx src, dest, real_dest, insns;
869 enum machine_mode orig_mode, dest_mode;
870 unsigned int words;
871 bool pushing;
872
873 src = SET_SRC (set);
874 dest = SET_DEST (set);
875 orig_mode = GET_MODE (dest);
876
877 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
878 gcc_assert (words > 1);
879
880 start_sequence ();
881
882 /* We have to handle copying from a SUBREG of a decomposed reg where
883 the SUBREG is larger than word size. Rather than assume that we
884 can take a word_mode SUBREG of the destination, we copy to a new
885 register and then copy that to the destination. */
886
887 real_dest = NULL_RTX;
888
889 if (GET_CODE (src) == SUBREG
890 && resolve_reg_p (SUBREG_REG (src))
891 && (SUBREG_BYTE (src) != 0
892 || (GET_MODE_SIZE (orig_mode)
893 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
894 {
895 real_dest = dest;
896 dest = gen_reg_rtx (orig_mode);
897 if (REG_P (real_dest))
898 REG_ATTRS (dest) = REG_ATTRS (real_dest);
899 }
900
901 /* Similarly if we are copying to a SUBREG of a decomposed reg where
902 the SUBREG is larger than word size. */
903
904 if (GET_CODE (dest) == SUBREG
905 && resolve_reg_p (SUBREG_REG (dest))
906 && (SUBREG_BYTE (dest) != 0
907 || (GET_MODE_SIZE (orig_mode)
908 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
909 {
910 rtx reg, minsn, smove;
911
912 reg = gen_reg_rtx (orig_mode);
913 minsn = emit_move_insn (reg, src);
914 smove = single_set (minsn);
915 gcc_assert (smove != NULL_RTX);
916 resolve_simple_move (smove, minsn);
917 src = reg;
918 }
919
920 /* If we didn't have any big SUBREGS of decomposed registers, and
921 neither side of the move is a register we are decomposing, then
922 we don't have to do anything here. */
923
924 if (src == SET_SRC (set)
925 && dest == SET_DEST (set)
926 && !resolve_reg_p (src)
927 && !resolve_subreg_p (src)
928 && !resolve_reg_p (dest)
929 && !resolve_subreg_p (dest))
930 {
931 end_sequence ();
932 return insn;
933 }
934
935 /* It's possible for the code to use a subreg of a decomposed
936 register while forming an address. We need to handle that before
937 passing the address to emit_move_insn. We pass NULL_RTX as the
938 insn parameter to resolve_subreg_use because we can not validate
939 the insn yet. */
940 if (MEM_P (src) || MEM_P (dest))
941 {
942 int acg;
943
944 if (MEM_P (src))
945 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
946 if (MEM_P (dest))
947 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
948 acg = apply_change_group ();
949 gcc_assert (acg);
950 }
951
952 /* If SRC is a register which we can't decompose, or has side
953 effects, we need to move via a temporary register. */
954
955 if (!can_decompose_p (src)
956 || side_effects_p (src)
957 || GET_CODE (src) == ASM_OPERANDS)
958 {
959 rtx reg;
960
961 reg = gen_reg_rtx (orig_mode);
962 emit_move_insn (reg, src);
963 src = reg;
964 }
965
966 /* If DEST is a register which we can't decompose, or has side
967 effects, we need to first move to a temporary register. We
968 handle the common case of pushing an operand directly. We also
969 go through a temporary register if it holds a floating point
970 value. This gives us better code on systems which can't move
971 data easily between integer and floating point registers. */
972
973 dest_mode = orig_mode;
974 pushing = push_operand (dest, dest_mode);
975 if (!can_decompose_p (dest)
976 || (side_effects_p (dest) && !pushing)
977 || (!SCALAR_INT_MODE_P (dest_mode)
978 && !resolve_reg_p (dest)
979 && !resolve_subreg_p (dest)))
980 {
981 if (real_dest == NULL_RTX)
982 real_dest = dest;
983 if (!SCALAR_INT_MODE_P (dest_mode))
984 {
985 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
986 MODE_INT, 0);
987 gcc_assert (dest_mode != BLKmode);
988 }
989 dest = gen_reg_rtx (dest_mode);
990 if (REG_P (real_dest))
991 REG_ATTRS (dest) = REG_ATTRS (real_dest);
992 }
993
994 if (pushing)
995 {
996 unsigned int i, j, jinc;
997
998 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
999 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1001
1002 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1003 {
1004 j = 0;
1005 jinc = 1;
1006 }
1007 else
1008 {
1009 j = words - 1;
1010 jinc = -1;
1011 }
1012
1013 for (i = 0; i < words; ++i, j += jinc)
1014 {
1015 rtx temp;
1016
1017 temp = copy_rtx (XEXP (dest, 0));
1018 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1019 j * UNITS_PER_WORD);
1020 emit_move_insn (temp,
1021 simplify_gen_subreg_concatn (word_mode, src,
1022 orig_mode,
1023 j * UNITS_PER_WORD));
1024 }
1025 }
1026 else
1027 {
1028 unsigned int i;
1029
1030 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1031 emit_clobber (dest);
1032
1033 for (i = 0; i < words; ++i)
1034 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1035 dest_mode,
1036 i * UNITS_PER_WORD),
1037 simplify_gen_subreg_concatn (word_mode, src,
1038 orig_mode,
1039 i * UNITS_PER_WORD));
1040 }
1041
1042 if (real_dest != NULL_RTX)
1043 {
1044 rtx mdest, minsn, smove;
1045
1046 if (dest_mode == orig_mode)
1047 mdest = dest;
1048 else
1049 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1050 minsn = emit_move_insn (real_dest, mdest);
1051
1052 smove = single_set (minsn);
1053 gcc_assert (smove != NULL_RTX);
1054
1055 resolve_simple_move (smove, minsn);
1056 }
1057
1058 insns = get_insns ();
1059 end_sequence ();
1060
1061 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1062
1063 emit_insn_before (insns, insn);
1064
1065 delete_insn (insn);
1066
1067 return insns;
1068 }
1069
1070 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1071 component registers. Return whether we changed something. */
1072
1073 static bool
1074 resolve_clobber (rtx pat, rtx insn)
1075 {
1076 rtx reg;
1077 enum machine_mode orig_mode;
1078 unsigned int words, i;
1079 int ret;
1080
1081 reg = XEXP (pat, 0);
1082 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1083 return false;
1084
1085 orig_mode = GET_MODE (reg);
1086 words = GET_MODE_SIZE (orig_mode);
1087 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1088
1089 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1090 simplify_gen_subreg_concatn (word_mode, reg,
1091 orig_mode, 0),
1092 0);
1093 df_insn_rescan (insn);
1094 gcc_assert (ret != 0);
1095
1096 for (i = words - 1; i > 0; --i)
1097 {
1098 rtx x;
1099
1100 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1101 i * UNITS_PER_WORD);
1102 x = gen_rtx_CLOBBER (VOIDmode, x);
1103 emit_insn_after (x, insn);
1104 }
1105
1106 resolve_reg_notes (insn);
1107
1108 return true;
1109 }
1110
1111 /* A USE of a decomposed register is no longer meaningful. Return
1112 whether we changed something. */
1113
1114 static bool
1115 resolve_use (rtx pat, rtx insn)
1116 {
1117 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1118 {
1119 delete_insn (insn);
1120 return true;
1121 }
1122
1123 resolve_reg_notes (insn);
1124
1125 return false;
1126 }
1127
1128 /* A VAR_LOCATION can be simplified. */
1129
1130 static void
1131 resolve_debug (rtx insn)
1132 {
1133 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1134
1135 df_insn_rescan (insn);
1136
1137 resolve_reg_notes (insn);
1138 }
1139
1140 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1141 set the decomposable_context bitmap accordingly. SPEED_P is true
1142 if we are optimizing INSN for speed rather than size. Return true
1143 if INSN is decomposable. */
1144
1145 static bool
1146 find_decomposable_shift_zext (rtx insn, bool speed_p)
1147 {
1148 rtx set;
1149 rtx op;
1150 rtx op_operand;
1151
1152 set = single_set (insn);
1153 if (!set)
1154 return false;
1155
1156 op = SET_SRC (set);
1157 if (GET_CODE (op) != ASHIFT
1158 && GET_CODE (op) != LSHIFTRT
1159 && GET_CODE (op) != ZERO_EXTEND)
1160 return false;
1161
1162 op_operand = XEXP (op, 0);
1163 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1164 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1165 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1166 || GET_MODE (op) != twice_word_mode)
1167 return false;
1168
1169 if (GET_CODE (op) == ZERO_EXTEND)
1170 {
1171 if (GET_MODE (op_operand) != word_mode
1172 || !choices[speed_p].splitting_zext)
1173 return false;
1174 }
1175 else /* left or right shift */
1176 {
1177 bool *splitting = (GET_CODE (op) == ASHIFT
1178 ? choices[speed_p].splitting_ashift
1179 : choices[speed_p].splitting_lshiftrt);
1180 if (!CONST_INT_P (XEXP (op, 1))
1181 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1182 2 * BITS_PER_WORD - 1)
1183 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1184 return false;
1185
1186 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1187 }
1188
1189 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1190
1191 return true;
1192 }
1193
1194 /* Decompose a more than word wide shift (in INSN) of a multiword
1195 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1196 and 'set to zero' insn. Return a pointer to the new insn when a
1197 replacement was done. */
1198
1199 static rtx
1200 resolve_shift_zext (rtx insn)
1201 {
1202 rtx set;
1203 rtx op;
1204 rtx op_operand;
1205 rtx insns;
1206 rtx src_reg, dest_reg, dest_zero;
1207 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1208
1209 set = single_set (insn);
1210 if (!set)
1211 return NULL_RTX;
1212
1213 op = SET_SRC (set);
1214 if (GET_CODE (op) != ASHIFT
1215 && GET_CODE (op) != LSHIFTRT
1216 && GET_CODE (op) != ZERO_EXTEND)
1217 return NULL_RTX;
1218
1219 op_operand = XEXP (op, 0);
1220
1221 /* We can tear this operation apart only if the regs were already
1222 torn apart. */
1223 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1224 return NULL_RTX;
1225
1226 /* src_reg_num is the number of the word mode register which we
1227 are operating on. For a left shift and a zero_extend on little
1228 endian machines this is register 0. */
1229 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1230
1231 if (WORDS_BIG_ENDIAN
1232 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1233 src_reg_num = 1 - src_reg_num;
1234
1235 if (GET_CODE (op) == ZERO_EXTEND)
1236 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1237 else
1238 dest_reg_num = 1 - src_reg_num;
1239
1240 offset1 = UNITS_PER_WORD * dest_reg_num;
1241 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1242 src_offset = UNITS_PER_WORD * src_reg_num;
1243
1244 start_sequence ();
1245
1246 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1247 GET_MODE (SET_DEST (set)),
1248 offset1);
1249 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1250 GET_MODE (SET_DEST (set)),
1251 offset2);
1252 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1253 GET_MODE (op_operand),
1254 src_offset);
1255 if (GET_CODE (op) != ZERO_EXTEND)
1256 {
1257 int shift_count = INTVAL (XEXP (op, 1));
1258 if (shift_count > BITS_PER_WORD)
1259 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1260 LSHIFT_EXPR : RSHIFT_EXPR,
1261 word_mode, src_reg,
1262 shift_count - BITS_PER_WORD,
1263 dest_reg, 1);
1264 }
1265
1266 if (dest_reg != src_reg)
1267 emit_move_insn (dest_reg, src_reg);
1268 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1269 insns = get_insns ();
1270
1271 end_sequence ();
1272
1273 emit_insn_before (insns, insn);
1274
1275 if (dump_file)
1276 {
1277 rtx in;
1278 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1279 for (in = insns; in != insn; in = NEXT_INSN (in))
1280 fprintf (dump_file, "%d ", INSN_UID (in));
1281 fprintf (dump_file, "\n");
1282 }
1283
1284 delete_insn (insn);
1285 return insns;
1286 }
1287
1288 /* Print to dump_file a description of what we're doing with shift code CODE.
1289 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1290
1291 static void
1292 dump_shift_choices (enum rtx_code code, bool *splitting)
1293 {
1294 int i;
1295 const char *sep;
1296
1297 fprintf (dump_file,
1298 " Splitting mode %s for %s lowering with shift amounts = ",
1299 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1300 sep = "";
1301 for (i = 0; i < BITS_PER_WORD; i++)
1302 if (splitting[i])
1303 {
1304 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1305 sep = ",";
1306 }
1307 fprintf (dump_file, "\n");
1308 }
1309
1310 /* Print to dump_file a description of what we're doing when optimizing
1311 for speed or size; SPEED_P says which. DESCRIPTION is a description
1312 of the SPEED_P choice. */
1313
1314 static void
1315 dump_choices (bool speed_p, const char *description)
1316 {
1317 unsigned int i;
1318
1319 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1320
1321 for (i = 0; i < MAX_MACHINE_MODE; i++)
1322 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1323 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1324 choices[speed_p].move_modes_to_split[i]
1325 ? "Splitting"
1326 : "Skipping",
1327 GET_MODE_NAME ((enum machine_mode) i));
1328
1329 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1330 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1331 GET_MODE_NAME (twice_word_mode));
1332
1333 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1334 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1335 fprintf (dump_file, "\n");
1336 }
1337
1338 /* Look for registers which are always accessed via word-sized SUBREGs
1339 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1340 registers into several word-sized pseudo-registers. */
1341
1342 static void
1343 decompose_multiword_subregs (bool decompose_copies)
1344 {
1345 unsigned int max;
1346 basic_block bb;
1347 bool speed_p;
1348
1349 if (dump_file)
1350 {
1351 dump_choices (false, "size");
1352 dump_choices (true, "speed");
1353 }
1354
1355 /* Check if this target even has any modes to consider lowering. */
1356 if (!choices[false].something_to_do && !choices[true].something_to_do)
1357 {
1358 if (dump_file)
1359 fprintf (dump_file, "Nothing to do!\n");
1360 return;
1361 }
1362
1363 max = max_reg_num ();
1364
1365 /* First see if there are any multi-word pseudo-registers. If there
1366 aren't, there is nothing we can do. This should speed up this
1367 pass in the normal case, since it should be faster than scanning
1368 all the insns. */
1369 {
1370 unsigned int i;
1371 bool useful_modes_seen = false;
1372
1373 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1374 if (regno_reg_rtx[i] != NULL)
1375 {
1376 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1377 if (choices[false].move_modes_to_split[(int) mode]
1378 || choices[true].move_modes_to_split[(int) mode])
1379 {
1380 useful_modes_seen = true;
1381 break;
1382 }
1383 }
1384
1385 if (!useful_modes_seen)
1386 {
1387 if (dump_file)
1388 fprintf (dump_file, "Nothing to lower in this function.\n");
1389 return;
1390 }
1391 }
1392
1393 if (df)
1394 {
1395 df_set_flags (DF_DEFER_INSN_RESCAN);
1396 run_word_dce ();
1397 }
1398
1399 /* FIXME: It may be possible to change this code to look for each
1400 multi-word pseudo-register and to find each insn which sets or
1401 uses that register. That should be faster than scanning all the
1402 insns. */
1403
1404 decomposable_context = BITMAP_ALLOC (NULL);
1405 non_decomposable_context = BITMAP_ALLOC (NULL);
1406 subreg_context = BITMAP_ALLOC (NULL);
1407
1408 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1409 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1410 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1411
1412 speed_p = optimize_function_for_speed_p (cfun);
1413 FOR_EACH_BB (bb)
1414 {
1415 rtx insn;
1416
1417 FOR_BB_INSNS (bb, insn)
1418 {
1419 rtx set;
1420 enum classify_move_insn cmi;
1421 int i, n;
1422
1423 if (!INSN_P (insn)
1424 || GET_CODE (PATTERN (insn)) == CLOBBER
1425 || GET_CODE (PATTERN (insn)) == USE)
1426 continue;
1427
1428 recog_memoized (insn);
1429
1430 if (find_decomposable_shift_zext (insn, speed_p))
1431 continue;
1432
1433 extract_insn (insn);
1434
1435 set = simple_move (insn, speed_p);
1436
1437 if (!set)
1438 cmi = NOT_SIMPLE_MOVE;
1439 else
1440 {
1441 /* We mark pseudo-to-pseudo copies as decomposable during the
1442 second pass only. The first pass is so early that there is
1443 good chance such moves will be optimized away completely by
1444 subsequent optimizations anyway.
1445
1446 However, we call find_pseudo_copy even during the first pass
1447 so as to properly set up the reg_copy_graph. */
1448 if (find_pseudo_copy (set))
1449 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1450 else
1451 cmi = SIMPLE_MOVE;
1452 }
1453
1454 n = recog_data.n_operands;
1455 for (i = 0; i < n; ++i)
1456 {
1457 for_each_rtx (&recog_data.operand[i],
1458 find_decomposable_subregs,
1459 &cmi);
1460
1461 /* We handle ASM_OPERANDS as a special case to support
1462 things like x86 rdtsc which returns a DImode value.
1463 We can decompose the output, which will certainly be
1464 operand 0, but not the inputs. */
1465
1466 if (cmi == SIMPLE_MOVE
1467 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1468 {
1469 gcc_assert (i == 0);
1470 cmi = NOT_SIMPLE_MOVE;
1471 }
1472 }
1473 }
1474 }
1475
1476 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1477 if (!bitmap_empty_p (decomposable_context))
1478 {
1479 sbitmap sub_blocks;
1480 unsigned int i;
1481 sbitmap_iterator sbi;
1482 bitmap_iterator iter;
1483 unsigned int regno;
1484
1485 propagate_pseudo_copies ();
1486
1487 sub_blocks = sbitmap_alloc (last_basic_block);
1488 sbitmap_zero (sub_blocks);
1489
1490 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1491 decompose_register (regno);
1492
1493 FOR_EACH_BB (bb)
1494 {
1495 rtx insn;
1496
1497 FOR_BB_INSNS (bb, insn)
1498 {
1499 rtx pat;
1500
1501 if (!INSN_P (insn))
1502 continue;
1503
1504 pat = PATTERN (insn);
1505 if (GET_CODE (pat) == CLOBBER)
1506 resolve_clobber (pat, insn);
1507 else if (GET_CODE (pat) == USE)
1508 resolve_use (pat, insn);
1509 else if (DEBUG_INSN_P (insn))
1510 resolve_debug (insn);
1511 else
1512 {
1513 rtx set;
1514 int i;
1515
1516 recog_memoized (insn);
1517 extract_insn (insn);
1518
1519 set = simple_move (insn, speed_p);
1520 if (set)
1521 {
1522 rtx orig_insn = insn;
1523 bool cfi = control_flow_insn_p (insn);
1524
1525 /* We can end up splitting loads to multi-word pseudos
1526 into separate loads to machine word size pseudos.
1527 When this happens, we first had one load that can
1528 throw, and after resolve_simple_move we'll have a
1529 bunch of loads (at least two). All those loads may
1530 trap if we can have non-call exceptions, so they
1531 all will end the current basic block. We split the
1532 block after the outer loop over all insns, but we
1533 make sure here that we will be able to split the
1534 basic block and still produce the correct control
1535 flow graph for it. */
1536 gcc_assert (!cfi
1537 || (cfun->can_throw_non_call_exceptions
1538 && can_throw_internal (insn)));
1539
1540 insn = resolve_simple_move (set, insn);
1541 if (insn != orig_insn)
1542 {
1543 recog_memoized (insn);
1544 extract_insn (insn);
1545
1546 if (cfi)
1547 SET_BIT (sub_blocks, bb->index);
1548 }
1549 }
1550 else
1551 {
1552 rtx decomposed_shift;
1553
1554 decomposed_shift = resolve_shift_zext (insn);
1555 if (decomposed_shift != NULL_RTX)
1556 {
1557 insn = decomposed_shift;
1558 recog_memoized (insn);
1559 extract_insn (insn);
1560 }
1561 }
1562
1563 for (i = recog_data.n_operands - 1; i >= 0; --i)
1564 for_each_rtx (recog_data.operand_loc[i],
1565 resolve_subreg_use,
1566 insn);
1567
1568 resolve_reg_notes (insn);
1569
1570 if (num_validated_changes () > 0)
1571 {
1572 for (i = recog_data.n_dups - 1; i >= 0; --i)
1573 {
1574 rtx *pl = recog_data.dup_loc[i];
1575 int dup_num = recog_data.dup_num[i];
1576 rtx *px = recog_data.operand_loc[dup_num];
1577
1578 validate_unshare_change (insn, pl, *px, 1);
1579 }
1580
1581 i = apply_change_group ();
1582 gcc_assert (i);
1583 }
1584 }
1585 }
1586 }
1587
1588 /* If we had insns to split that caused control flow insns in the middle
1589 of a basic block, split those blocks now. Note that we only handle
1590 the case where splitting a load has caused multiple possibly trapping
1591 loads to appear. */
1592 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1593 {
1594 rtx insn, end;
1595 edge fallthru;
1596
1597 bb = BASIC_BLOCK (i);
1598 insn = BB_HEAD (bb);
1599 end = BB_END (bb);
1600
1601 while (insn != end)
1602 {
1603 if (control_flow_insn_p (insn))
1604 {
1605 /* Split the block after insn. There will be a fallthru
1606 edge, which is OK so we keep it. We have to create the
1607 exception edges ourselves. */
1608 fallthru = split_block (bb, insn);
1609 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1610 bb = fallthru->dest;
1611 insn = BB_HEAD (bb);
1612 }
1613 else
1614 insn = NEXT_INSN (insn);
1615 }
1616 }
1617
1618 sbitmap_free (sub_blocks);
1619 }
1620
1621 {
1622 unsigned int i;
1623 bitmap b;
1624
1625 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1626 if (b)
1627 BITMAP_FREE (b);
1628 }
1629
1630 VEC_free (bitmap, heap, reg_copy_graph);
1631
1632 BITMAP_FREE (decomposable_context);
1633 BITMAP_FREE (non_decomposable_context);
1634 BITMAP_FREE (subreg_context);
1635 }
1636 \f
1637 /* Gate function for lower subreg pass. */
1638
1639 static bool
1640 gate_handle_lower_subreg (void)
1641 {
1642 return flag_split_wide_types != 0;
1643 }
1644
1645 /* Implement first lower subreg pass. */
1646
1647 static unsigned int
1648 rest_of_handle_lower_subreg (void)
1649 {
1650 decompose_multiword_subregs (false);
1651 return 0;
1652 }
1653
1654 /* Implement second lower subreg pass. */
1655
1656 static unsigned int
1657 rest_of_handle_lower_subreg2 (void)
1658 {
1659 decompose_multiword_subregs (true);
1660 return 0;
1661 }
1662
1663 struct rtl_opt_pass pass_lower_subreg =
1664 {
1665 {
1666 RTL_PASS,
1667 "subreg1", /* name */
1668 gate_handle_lower_subreg, /* gate */
1669 rest_of_handle_lower_subreg, /* execute */
1670 NULL, /* sub */
1671 NULL, /* next */
1672 0, /* static_pass_number */
1673 TV_LOWER_SUBREG, /* tv_id */
1674 0, /* properties_required */
1675 0, /* properties_provided */
1676 0, /* properties_destroyed */
1677 0, /* todo_flags_start */
1678 TODO_ggc_collect |
1679 TODO_verify_flow /* todo_flags_finish */
1680 }
1681 };
1682
1683 struct rtl_opt_pass pass_lower_subreg2 =
1684 {
1685 {
1686 RTL_PASS,
1687 "subreg2", /* name */
1688 gate_handle_lower_subreg, /* gate */
1689 rest_of_handle_lower_subreg2, /* execute */
1690 NULL, /* sub */
1691 NULL, /* next */
1692 0, /* static_pass_number */
1693 TV_LOWER_SUBREG, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 TODO_df_finish | TODO_verify_rtl_sharing |
1699 TODO_ggc_collect |
1700 TODO_verify_flow /* todo_flags_finish */
1701 }
1702 };