]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
ChangeLog:
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
1a6a0f2a 1/* Decompose multiword subregs.
08b31038 2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
1a6a0f2a 4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
8c4c00c1 11Software Foundation; either version 3, or (at your option) any later
1a6a0f2a 12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
8c4c00c1 20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
1a6a0f2a 22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "machmode.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tm_p.h"
1a6a0f2a 30#include "flags.h"
31#include "insn-config.h"
32#include "obstack.h"
33#include "basic-block.h"
34#include "recog.h"
35#include "bitmap.h"
0e8e9be3 36#include "dce.h"
1a6a0f2a 37#include "expr.h"
db1c50be 38#include "except.h"
1a6a0f2a 39#include "regs.h"
40#include "tree-pass.h"
3072d30e 41#include "df.h"
c7944dce 42#include "lower-subreg.h"
1a6a0f2a 43
44#ifdef STACK_GROWS_DOWNWARD
45# undef STACK_GROWS_DOWNWARD
46# define STACK_GROWS_DOWNWARD 1
47#else
48# define STACK_GROWS_DOWNWARD 0
49#endif
50
51DEF_VEC_P (bitmap);
52DEF_VEC_ALLOC_P (bitmap,heap);
53
54/* Decompose multi-word pseudo-registers into individual
c7944dce 55 pseudo-registers when possible and profitable. This is possible
56 when all the uses of a multi-word register are via SUBREG, or are
57 copies of the register to another location. Breaking apart the
58 register permits more CSE and permits better register allocation.
59 This is profitable if the machine does not have move instructions
60 to do this.
61
62 This pass only splits moves with modes that are wider than
63 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
64 modes that are twice the width of word_mode. The latter could be
65 generalized if there was a need to do this, but the trend in
66 architectures is to not need this.
67
68 There are two useful preprocessor defines for use by maintainers:
69
70 #define LOG_COSTS 1
71
72 if you wish to see the actual cost estimates that are being used
73 for each mode wider than word mode and the cost estimates for zero
74 extension and the shifts. This can be useful when port maintainers
75 are tuning insn rtx costs.
76
77 #define FORCE_LOWERING 1
78
79 if you wish to test the pass with all the transformation forced on.
80 This can be useful for finding bugs in the transformations. */
81
82#define LOG_COSTS 0
83#define FORCE_LOWERING 0
1a6a0f2a 84
85/* Bit N in this bitmap is set if regno N is used in a context in
86 which we can decompose it. */
87static bitmap decomposable_context;
88
89/* Bit N in this bitmap is set if regno N is used in a context in
90 which it can not be decomposed. */
91static bitmap non_decomposable_context;
92
5277d36e 93/* Bit N in this bitmap is set if regno N is used in a subreg
94 which changes the mode but not the size. This typically happens
95 when the register accessed as a floating-point value; we want to
96 avoid generating accesses to its subwords in integer modes. */
97static bitmap subreg_context;
98
1a6a0f2a 99/* Bit N in the bitmap in element M of this array is set if there is a
100 copy from reg M to reg N. */
101static VEC(bitmap,heap) *reg_copy_graph;
102
c7944dce 103struct target_lower_subreg default_target_lower_subreg;
104#if SWITCHABLE_TARGET
105struct target_lower_subreg *this_target_lower_subreg
106 = &default_target_lower_subreg;
107#endif
108
109#define twice_word_mode \
110 this_target_lower_subreg->x_twice_word_mode
111#define choices \
112 this_target_lower_subreg->x_choices
113
114/* RTXes used while computing costs. */
115struct cost_rtxes {
116 /* Source and target registers. */
117 rtx source;
118 rtx target;
119
120 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
121 rtx zext;
122
123 /* A shift of SOURCE. */
124 rtx shift;
125
126 /* A SET of TARGET. */
127 rtx set;
128};
129
130/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
131 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
132
133static int
134shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
135 enum machine_mode mode, int op1)
136{
c7944dce 137 PUT_CODE (rtxes->shift, code);
138 PUT_MODE (rtxes->shift, mode);
139 PUT_MODE (rtxes->source, mode);
140 XEXP (rtxes->shift, 1) = GEN_INT (op1);
a6d935b7 141 return set_src_cost (rtxes->shift, speed_p);
c7944dce 142}
143
144/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
145 to true if it is profitable to split a double-word CODE shift
146 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
147 for speed or size profitability.
148
149 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
150 the cost of moving zero into a word-mode register. WORD_MOVE_COST
151 is the cost of moving between word registers. */
152
153static void
154compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
155 bool *splitting, enum rtx_code code,
156 int word_move_zero_cost, int word_move_cost)
157{
158 int wide_cost, narrow_cost, i;
159
160 for (i = 0; i < BITS_PER_WORD; i++)
161 {
162 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
163 i + BITS_PER_WORD);
164 if (i == 0)
165 narrow_cost = word_move_cost;
166 else
167 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
168
169 if (LOG_COSTS)
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
172 i + BITS_PER_WORD, wide_cost, narrow_cost,
173 word_move_zero_cost);
174
175 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
176 splitting[i] = true;
177 }
178}
179
180/* Compute what we should do when optimizing for speed or size; SPEED_P
181 selects which. Use RTXES for computing costs. */
182
183static void
184compute_costs (bool speed_p, struct cost_rtxes *rtxes)
185{
186 unsigned int i;
187 int word_move_zero_cost, word_move_cost;
188
a6d935b7 189 PUT_MODE (rtxes->target, word_mode);
c7944dce 190 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
a6d935b7 191 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 192
193 SET_SRC (rtxes->set) = rtxes->source;
a6d935b7 194 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 195
196 if (LOG_COSTS)
197 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
198 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
199
200 for (i = 0; i < MAX_MACHINE_MODE; i++)
201 {
202 enum machine_mode mode = (enum machine_mode) i;
203 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
204 if (factor > 1)
205 {
206 int mode_move_cost;
207
208 PUT_MODE (rtxes->target, mode);
209 PUT_MODE (rtxes->source, mode);
a6d935b7 210 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 211
212 if (LOG_COSTS)
213 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
214 GET_MODE_NAME (mode), mode_move_cost,
215 word_move_cost, factor);
216
217 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
218 {
219 choices[speed_p].move_modes_to_split[i] = true;
220 choices[speed_p].something_to_do = true;
221 }
222 }
223 }
224
225 /* For the moves and shifts, the only case that is checked is one
226 where the mode of the target is an integer mode twice the width
227 of the word_mode.
228
229 If it is not profitable to split a double word move then do not
230 even consider the shifts or the zero extension. */
231 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
232 {
233 int zext_cost;
234
235 /* The only case here to check to see if moving the upper part with a
236 zero is cheaper than doing the zext itself. */
c7944dce 237 PUT_MODE (rtxes->source, word_mode);
a6d935b7 238 zext_cost = set_src_cost (rtxes->zext, speed_p);
c7944dce 239
240 if (LOG_COSTS)
241 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
242 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
243 zext_cost, word_move_cost, word_move_zero_cost);
244
245 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
246 choices[speed_p].splitting_zext = true;
247
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_ashift, ASHIFT,
250 word_move_zero_cost, word_move_cost);
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
253 word_move_zero_cost, word_move_cost);
254 }
255}
256
257/* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
260
261void
262init_lower_subreg (void)
263{
264 struct cost_rtxes rtxes;
265
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
267
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
269
270 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
271 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
272 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
275
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
283}
67c3f580 284
285static bool
286simple_move_operand (rtx x)
287{
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
290
291 if (!OBJECT_P (x))
292 return false;
293
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
ab9eaa97 296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
67c3f580 298 return false;
299
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
4e27ffd0 302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
67c3f580 303 return false;
304
305 return true;
306}
307
c7944dce 308/* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
311
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
1a6a0f2a 314
315static rtx
c7944dce 316simple_move (rtx insn, bool speed_p)
1a6a0f2a 317{
318 rtx x;
319 rtx set;
320 enum machine_mode mode;
321
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
324
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
328
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
67c3f580 332 if (!simple_move_operand (x))
1a6a0f2a 333 return NULL_RTX;
334
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
67c3f580 338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
1a6a0f2a 342 return NULL_RTX;
343
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
349 mode = GET_MODE (SET_SRC (set));
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
354
5e016dfc 355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
c7944dce 360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
1a6a0f2a 363 return set;
364}
365
366/* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370static bool
371find_pseudo_copy (rtx set)
372{
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
1a6a0f2a 386 b = VEC_index (bitmap, reg_copy_graph, rs);
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
390 VEC_replace (bitmap, reg_copy_graph, rs, b);
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396}
397
398/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404static void
405propagate_pseudo_copies (void)
406{
407 bitmap queue, propagate;
408
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
411
412 bitmap_copy (queue, decomposable_context);
413 do
414 {
415 bitmap_iterator iter;
416 unsigned int i;
417
418 bitmap_clear (propagate);
419
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
421 {
422 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 }
426
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
429 }
430 while (!bitmap_empty_p (queue));
431
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
434}
435
436/* A pointer to one of these values is passed to
437 find_decomposable_subregs via for_each_rtx. */
438
439enum classify_move_insn
440{
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
b5ca6624 443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
1a6a0f2a 446 SIMPLE_MOVE
447};
448
449/* This is called via for_each_rtx. If we find a SUBREG which we
450 could use to decompose a pseudo-register, set a bit in
451 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
452 not a simple pseudo-register copy, DATA will point at the type of
453 move, and we set a bit in DECOMPOSABLE_CONTEXT or
454 NON_DECOMPOSABLE_CONTEXT as appropriate. */
455
456static int
457find_decomposable_subregs (rtx *px, void *data)
458{
459 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
460 rtx x = *px;
461
462 if (x == NULL_RTX)
463 return 0;
464
465 if (GET_CODE (x) == SUBREG)
466 {
467 rtx inner = SUBREG_REG (x);
468 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
469
470 if (!REG_P (inner))
471 return 0;
472
473 regno = REGNO (inner);
474 if (HARD_REGISTER_NUM_P (regno))
475 return -1;
476
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
485
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
491
492 if (outer_words == 1 && inner_words > 1)
493 {
494 bitmap_set_bit (decomposable_context, regno);
495 return -1;
496 }
4e7a1eb8 497
498 /* If this is a cast from one mode to another, where the modes
499 have the same size, and they are not tieable, then mark this
500 register as non-decomposable. If we decompose it we are
501 likely to mess up whatever the backend is trying to do. */
502 if (outer_words > 1
503 && outer_size == inner_size
504 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
505 {
506 bitmap_set_bit (non_decomposable_context, regno);
5277d36e 507 bitmap_set_bit (subreg_context, regno);
4e7a1eb8 508 return -1;
509 }
1a6a0f2a 510 }
67c3f580 511 else if (REG_P (x))
1a6a0f2a 512 {
513 unsigned int regno;
514
515 /* We will see an outer SUBREG before we see the inner REG, so
516 when we see a plain REG here it means a direct reference to
517 the register.
518
519 If this is not a simple copy from one location to another,
520 then we can not decompose this register. If this is a simple
b5ca6624 521 copy we want to decompose, and the mode is right,
1e5b92fa 522 then we mark the register as decomposable.
523 Otherwise we don't say anything about this register --
524 it could be decomposed, but whether that would be
1a6a0f2a 525 profitable depends upon how it is used elsewhere.
526
527 We only set bits in the bitmap for multi-word
528 pseudo-registers, since those are the only ones we care about
529 and it keeps the size of the bitmaps down. */
530
531 regno = REGNO (x);
532 if (!HARD_REGISTER_NUM_P (regno)
533 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
534 {
535 switch (*pcmi)
536 {
537 case NOT_SIMPLE_MOVE:
538 bitmap_set_bit (non_decomposable_context, regno);
539 break;
b5ca6624 540 case DECOMPOSABLE_SIMPLE_MOVE:
1a6a0f2a 541 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
542 bitmap_set_bit (decomposable_context, regno);
543 break;
544 case SIMPLE_MOVE:
545 break;
546 default:
547 gcc_unreachable ();
548 }
549 }
550 }
67c3f580 551 else if (MEM_P (x))
552 {
553 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
554
555 /* Any registers used in a MEM do not participate in a
b5ca6624 556 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
67c3f580 557 here, and return -1 to block the parent's recursion. */
558 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
559 return -1;
560 }
1a6a0f2a 561
562 return 0;
563}
564
565/* Decompose REGNO into word-sized components. We smash the REG node
566 in place. This ensures that (1) something goes wrong quickly if we
567 fail to make some replacement, and (2) the debug information inside
568 the symbol table is automatically kept up to date. */
569
570static void
571decompose_register (unsigned int regno)
572{
573 rtx reg;
574 unsigned int words, i;
575 rtvec v;
576
577 reg = regno_reg_rtx[regno];
578
579 regno_reg_rtx[regno] = NULL_RTX;
1a6a0f2a 580
581 words = GET_MODE_SIZE (GET_MODE (reg));
582 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
583
584 v = rtvec_alloc (words);
585 for (i = 0; i < words; ++i)
586 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
587
588 PUT_CODE (reg, CONCATN);
589 XVEC (reg, 0) = v;
590
591 if (dump_file)
592 {
593 fprintf (dump_file, "; Splitting reg %u ->", regno);
594 for (i = 0; i < words; ++i)
595 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
596 fputc ('\n', dump_file);
597 }
598}
599
600/* Get a SUBREG of a CONCATN. */
601
602static rtx
603simplify_subreg_concatn (enum machine_mode outermode, rtx op,
604 unsigned int byte)
605{
606 unsigned int inner_size;
50bdfec8 607 enum machine_mode innermode, partmode;
1a6a0f2a 608 rtx part;
609 unsigned int final_offset;
610
611 gcc_assert (GET_CODE (op) == CONCATN);
612 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
613
614 innermode = GET_MODE (op);
615 gcc_assert (byte < GET_MODE_SIZE (innermode));
616 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
617
618 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
619 part = XVECEXP (op, 0, byte / inner_size);
50bdfec8 620 partmode = GET_MODE (part);
621
598ffe59 622 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
623 regular CONST_VECTORs. They have vector or integer modes, depending
624 on the capabilities of the target. Cope with them. */
625 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
626 partmode = GET_MODE_INNER (innermode);
627 else if (partmode == VOIDmode)
50bdfec8 628 {
598ffe59 629 enum mode_class mclass = GET_MODE_CLASS (innermode);
630 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
50bdfec8 631 }
632
1a6a0f2a 633 final_offset = byte % inner_size;
634 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
635 return NULL_RTX;
636
50bdfec8 637 return simplify_gen_subreg (outermode, part, partmode, final_offset);
1a6a0f2a 638}
639
640/* Wrapper around simplify_gen_subreg which handles CONCATN. */
641
642static rtx
643simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
644 enum machine_mode innermode, unsigned int byte)
645{
3fa57b79 646 rtx ret;
647
1a6a0f2a 648 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
649 If OP is a SUBREG of a CONCATN, then it must be a simple mode
650 change with the same size and offset 0, or it must extract a
651 part. We shouldn't see anything else here. */
652 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
653 {
654 rtx op2;
655
656 if ((GET_MODE_SIZE (GET_MODE (op))
657 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
658 && SUBREG_BYTE (op) == 0)
659 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
660 GET_MODE (SUBREG_REG (op)), byte);
661
662 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
663 SUBREG_BYTE (op));
664 if (op2 == NULL_RTX)
665 {
666 /* We don't handle paradoxical subregs here. */
667 gcc_assert (GET_MODE_SIZE (outermode)
668 <= GET_MODE_SIZE (GET_MODE (op)));
669 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
670 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
671 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
672 byte + SUBREG_BYTE (op));
673 gcc_assert (op2 != NULL_RTX);
674 return op2;
675 }
676
677 op = op2;
678 gcc_assert (op != NULL_RTX);
679 gcc_assert (innermode == GET_MODE (op));
680 }
3fa57b79 681
1a6a0f2a 682 if (GET_CODE (op) == CONCATN)
683 return simplify_subreg_concatn (outermode, op, byte);
3fa57b79 684
685 ret = simplify_gen_subreg (outermode, op, innermode, byte);
686
687 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
688 resolve_simple_move will ask for the high part of the paradoxical
689 subreg, which does not have a value. Just return a zero. */
690 if (ret == NULL_RTX
691 && GET_CODE (op) == SUBREG
692 && SUBREG_BYTE (op) == 0
693 && (GET_MODE_SIZE (innermode)
694 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
695 return CONST0_RTX (outermode);
696
697 gcc_assert (ret != NULL_RTX);
698 return ret;
1a6a0f2a 699}
700
701/* Return whether we should resolve X into the registers into which it
702 was decomposed. */
703
704static bool
705resolve_reg_p (rtx x)
706{
707 return GET_CODE (x) == CONCATN;
708}
709
710/* Return whether X is a SUBREG of a register which we need to
711 resolve. */
712
713static bool
714resolve_subreg_p (rtx x)
715{
716 if (GET_CODE (x) != SUBREG)
717 return false;
718 return resolve_reg_p (SUBREG_REG (x));
719}
720
721/* This is called via for_each_rtx. Look for SUBREGs which need to be
722 decomposed. */
723
724static int
725resolve_subreg_use (rtx *px, void *data)
726{
727 rtx insn = (rtx) data;
728 rtx x = *px;
729
730 if (x == NULL_RTX)
731 return 0;
732
733 if (resolve_subreg_p (x))
734 {
735 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
736 SUBREG_BYTE (x));
737
738 /* It is possible for a note to contain a reference which we can
739 decompose. In this case, return 1 to the caller to indicate
740 that the note must be removed. */
741 if (!x)
742 {
ccd1ec59 743 gcc_assert (!insn);
1a6a0f2a 744 return 1;
745 }
746
747 validate_change (insn, px, x, 1);
748 return -1;
749 }
750
751 if (resolve_reg_p (x))
752 {
753 /* Return 1 to the caller to indicate that we found a direct
754 reference to a register which is being decomposed. This can
9cf5d19e 755 happen inside notes, multiword shift or zero-extend
756 instructions. */
1a6a0f2a 757 return 1;
758 }
759
760 return 0;
761}
762
9845d120 763/* This is called via for_each_rtx. Look for SUBREGs which can be
764 decomposed and decomposed REGs that need copying. */
765
766static int
767adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
768{
769 rtx x = *px;
770
771 if (x == NULL_RTX)
772 return 0;
773
774 if (resolve_subreg_p (x))
775 {
776 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
777 SUBREG_BYTE (x));
778
779 if (x)
780 *px = x;
781 else
782 x = copy_rtx (*px);
783 }
784
785 if (resolve_reg_p (x))
786 *px = copy_rtx (x);
787
788 return 0;
789}
790
1a6a0f2a 791/* Resolve any decomposed registers which appear in register notes on
792 INSN. */
793
794static void
795resolve_reg_notes (rtx insn)
796{
797 rtx *pnote, note;
798
799 note = find_reg_equal_equiv_note (insn);
800 if (note)
801 {
3072d30e 802 int old_count = num_validated_changes ();
1a6a0f2a 803 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
1e5b92fa 804 remove_note (insn, note);
3072d30e 805 else
806 if (old_count != num_validated_changes ())
807 df_notes_rescan (insn);
1a6a0f2a 808 }
809
810 pnote = &REG_NOTES (insn);
811 while (*pnote != NULL_RTX)
812 {
9ce37fa7 813 bool del = false;
1a6a0f2a 814
815 note = *pnote;
816 switch (REG_NOTE_KIND (note))
817 {
3072d30e 818 case REG_DEAD:
819 case REG_UNUSED:
1a6a0f2a 820 if (resolve_reg_p (XEXP (note, 0)))
9ce37fa7 821 del = true;
1a6a0f2a 822 break;
823
824 default:
825 break;
826 }
827
9ce37fa7 828 if (del)
1a6a0f2a 829 *pnote = XEXP (note, 1);
830 else
831 pnote = &XEXP (note, 1);
832 }
833}
834
67c3f580 835/* Return whether X can be decomposed into subwords. */
1a6a0f2a 836
837static bool
67c3f580 838can_decompose_p (rtx x)
1a6a0f2a 839{
840 if (REG_P (x))
841 {
842 unsigned int regno = REGNO (x);
843
844 if (HARD_REGISTER_NUM_P (regno))
5f961ca4 845 {
846 unsigned int byte, num_bytes;
847
848 num_bytes = GET_MODE_SIZE (GET_MODE (x));
849 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
850 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
851 return false;
852 return true;
853 }
1a6a0f2a 854 else
5277d36e 855 return !bitmap_bit_p (subreg_context, regno);
1a6a0f2a 856 }
857
67c3f580 858 return true;
1a6a0f2a 859}
860
861/* Decompose the registers used in a simple move SET within INSN. If
862 we don't change anything, return INSN, otherwise return the start
863 of the sequence of moves. */
864
865static rtx
866resolve_simple_move (rtx set, rtx insn)
867{
868 rtx src, dest, real_dest, insns;
869 enum machine_mode orig_mode, dest_mode;
870 unsigned int words;
871 bool pushing;
872
873 src = SET_SRC (set);
874 dest = SET_DEST (set);
875 orig_mode = GET_MODE (dest);
876
877 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
c7944dce 878 gcc_assert (words > 1);
1a6a0f2a 879
880 start_sequence ();
881
882 /* We have to handle copying from a SUBREG of a decomposed reg where
883 the SUBREG is larger than word size. Rather than assume that we
884 can take a word_mode SUBREG of the destination, we copy to a new
885 register and then copy that to the destination. */
886
887 real_dest = NULL_RTX;
888
889 if (GET_CODE (src) == SUBREG
890 && resolve_reg_p (SUBREG_REG (src))
891 && (SUBREG_BYTE (src) != 0
892 || (GET_MODE_SIZE (orig_mode)
893 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
894 {
895 real_dest = dest;
896 dest = gen_reg_rtx (orig_mode);
897 if (REG_P (real_dest))
898 REG_ATTRS (dest) = REG_ATTRS (real_dest);
899 }
900
901 /* Similarly if we are copying to a SUBREG of a decomposed reg where
902 the SUBREG is larger than word size. */
903
904 if (GET_CODE (dest) == SUBREG
905 && resolve_reg_p (SUBREG_REG (dest))
906 && (SUBREG_BYTE (dest) != 0
907 || (GET_MODE_SIZE (orig_mode)
908 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
909 {
910 rtx reg, minsn, smove;
911
912 reg = gen_reg_rtx (orig_mode);
913 minsn = emit_move_insn (reg, src);
914 smove = single_set (minsn);
915 gcc_assert (smove != NULL_RTX);
916 resolve_simple_move (smove, minsn);
917 src = reg;
918 }
919
920 /* If we didn't have any big SUBREGS of decomposed registers, and
921 neither side of the move is a register we are decomposing, then
922 we don't have to do anything here. */
923
924 if (src == SET_SRC (set)
925 && dest == SET_DEST (set)
926 && !resolve_reg_p (src)
927 && !resolve_subreg_p (src)
928 && !resolve_reg_p (dest)
929 && !resolve_subreg_p (dest))
930 {
931 end_sequence ();
932 return insn;
933 }
934
ccd1ec59 935 /* It's possible for the code to use a subreg of a decomposed
936 register while forming an address. We need to handle that before
937 passing the address to emit_move_insn. We pass NULL_RTX as the
938 insn parameter to resolve_subreg_use because we can not validate
939 the insn yet. */
940 if (MEM_P (src) || MEM_P (dest))
941 {
942 int acg;
943
944 if (MEM_P (src))
945 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
946 if (MEM_P (dest))
947 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
948 acg = apply_change_group ();
949 gcc_assert (acg);
950 }
951
1a6a0f2a 952 /* If SRC is a register which we can't decompose, or has side
953 effects, we need to move via a temporary register. */
954
67c3f580 955 if (!can_decompose_p (src)
1a6a0f2a 956 || side_effects_p (src)
957 || GET_CODE (src) == ASM_OPERANDS)
958 {
959 rtx reg;
960
961 reg = gen_reg_rtx (orig_mode);
962 emit_move_insn (reg, src);
963 src = reg;
964 }
965
966 /* If DEST is a register which we can't decompose, or has side
967 effects, we need to first move to a temporary register. We
968 handle the common case of pushing an operand directly. We also
969 go through a temporary register if it holds a floating point
970 value. This gives us better code on systems which can't move
971 data easily between integer and floating point registers. */
972
973 dest_mode = orig_mode;
974 pushing = push_operand (dest, dest_mode);
67c3f580 975 if (!can_decompose_p (dest)
1a6a0f2a 976 || (side_effects_p (dest) && !pushing)
977 || (!SCALAR_INT_MODE_P (dest_mode)
978 && !resolve_reg_p (dest)
979 && !resolve_subreg_p (dest)))
980 {
981 if (real_dest == NULL_RTX)
982 real_dest = dest;
983 if (!SCALAR_INT_MODE_P (dest_mode))
984 {
985 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
986 MODE_INT, 0);
987 gcc_assert (dest_mode != BLKmode);
988 }
989 dest = gen_reg_rtx (dest_mode);
990 if (REG_P (real_dest))
991 REG_ATTRS (dest) = REG_ATTRS (real_dest);
992 }
993
994 if (pushing)
995 {
996 unsigned int i, j, jinc;
997
998 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
999 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1001
1002 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1003 {
1004 j = 0;
1005 jinc = 1;
1006 }
1007 else
1008 {
1009 j = words - 1;
1010 jinc = -1;
1011 }
1012
1013 for (i = 0; i < words; ++i, j += jinc)
1014 {
1015 rtx temp;
1016
1017 temp = copy_rtx (XEXP (dest, 0));
1018 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1019 j * UNITS_PER_WORD);
1020 emit_move_insn (temp,
1021 simplify_gen_subreg_concatn (word_mode, src,
1022 orig_mode,
1023 j * UNITS_PER_WORD));
1024 }
1025 }
1026 else
1027 {
1028 unsigned int i;
1029
1030 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
18b42941 1031 emit_clobber (dest);
1a6a0f2a 1032
1033 for (i = 0; i < words; ++i)
1034 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1035 dest_mode,
1036 i * UNITS_PER_WORD),
1037 simplify_gen_subreg_concatn (word_mode, src,
1038 orig_mode,
1039 i * UNITS_PER_WORD));
1040 }
1041
1042 if (real_dest != NULL_RTX)
1043 {
1044 rtx mdest, minsn, smove;
1045
1046 if (dest_mode == orig_mode)
1047 mdest = dest;
1048 else
1049 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1050 minsn = emit_move_insn (real_dest, mdest);
1051
1052 smove = single_set (minsn);
1053 gcc_assert (smove != NULL_RTX);
1054
1055 resolve_simple_move (smove, minsn);
1056 }
1057
1058 insns = get_insns ();
1059 end_sequence ();
1060
e38def9c 1061 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
97bb2849 1062
1a6a0f2a 1063 emit_insn_before (insns, insn);
1064
1a6a0f2a 1065 delete_insn (insn);
1066
1067 return insns;
1068}
1069
1070/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1071 component registers. Return whether we changed something. */
1072
1073static bool
1074resolve_clobber (rtx pat, rtx insn)
1075{
e29831db 1076 rtx reg;
1a6a0f2a 1077 enum machine_mode orig_mode;
1078 unsigned int words, i;
ab9eaa97 1079 int ret;
1a6a0f2a 1080
1081 reg = XEXP (pat, 0);
2289a5f2 1082 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1a6a0f2a 1083 return false;
1084
1085 orig_mode = GET_MODE (reg);
1086 words = GET_MODE_SIZE (orig_mode);
1087 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1088
ab9eaa97 1089 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1090 simplify_gen_subreg_concatn (word_mode, reg,
1091 orig_mode, 0),
1092 0);
3072d30e 1093 df_insn_rescan (insn);
ab9eaa97 1094 gcc_assert (ret != 0);
1095
1a6a0f2a 1096 for (i = words - 1; i > 0; --i)
1097 {
1098 rtx x;
1099
2289a5f2 1100 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1101 i * UNITS_PER_WORD);
1a6a0f2a 1102 x = gen_rtx_CLOBBER (VOIDmode, x);
1103 emit_insn_after (x, insn);
1104 }
1105
db2200eb 1106 resolve_reg_notes (insn);
1107
1a6a0f2a 1108 return true;
1109}
1110
1111/* A USE of a decomposed register is no longer meaningful. Return
1112 whether we changed something. */
1113
1114static bool
1115resolve_use (rtx pat, rtx insn)
1116{
1117 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1118 {
1119 delete_insn (insn);
1120 return true;
1121 }
db2200eb 1122
1123 resolve_reg_notes (insn);
1124
1a6a0f2a 1125 return false;
1126}
1127
9845d120 1128/* A VAR_LOCATION can be simplified. */
1129
1130static void
1131resolve_debug (rtx insn)
1132{
1133 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1134
1135 df_insn_rescan (insn);
1136
1137 resolve_reg_notes (insn);
1138}
1139
c7944dce 1140/* Check if INSN is a decomposable multiword-shift or zero-extend and
1141 set the decomposable_context bitmap accordingly. SPEED_P is true
1142 if we are optimizing INSN for speed rather than size. Return true
1143 if INSN is decomposable. */
9cf5d19e 1144
c7944dce 1145static bool
1146find_decomposable_shift_zext (rtx insn, bool speed_p)
9cf5d19e 1147{
1148 rtx set;
1149 rtx op;
1150 rtx op_operand;
1151
1152 set = single_set (insn);
1153 if (!set)
c7944dce 1154 return false;
9cf5d19e 1155
1156 op = SET_SRC (set);
1157 if (GET_CODE (op) != ASHIFT
1158 && GET_CODE (op) != LSHIFTRT
1159 && GET_CODE (op) != ZERO_EXTEND)
c7944dce 1160 return false;
9cf5d19e 1161
1162 op_operand = XEXP (op, 0);
1163 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1164 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1165 || HARD_REGISTER_NUM_P (REGNO (op_operand))
c7944dce 1166 || GET_MODE (op) != twice_word_mode)
1167 return false;
9cf5d19e 1168
1169 if (GET_CODE (op) == ZERO_EXTEND)
1170 {
1171 if (GET_MODE (op_operand) != word_mode
c7944dce 1172 || !choices[speed_p].splitting_zext)
1173 return false;
9cf5d19e 1174 }
1175 else /* left or right shift */
1176 {
c7944dce 1177 bool *splitting = (GET_CODE (op) == ASHIFT
1178 ? choices[speed_p].splitting_ashift
1179 : choices[speed_p].splitting_lshiftrt);
971ba038 1180 if (!CONST_INT_P (XEXP (op, 1))
c7944dce 1181 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1182 2 * BITS_PER_WORD - 1)
1183 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1184 return false;
1185
1186 bitmap_set_bit (decomposable_context, REGNO (op_operand));
9cf5d19e 1187 }
1188
1189 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1190
c7944dce 1191 return true;
9cf5d19e 1192}
1193
1194/* Decompose a more than word wide shift (in INSN) of a multiword
1195 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1196 and 'set to zero' insn. Return a pointer to the new insn when a
1197 replacement was done. */
1198
1199static rtx
1200resolve_shift_zext (rtx insn)
1201{
1202 rtx set;
1203 rtx op;
1204 rtx op_operand;
1205 rtx insns;
1206 rtx src_reg, dest_reg, dest_zero;
1207 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1208
1209 set = single_set (insn);
1210 if (!set)
1211 return NULL_RTX;
1212
1213 op = SET_SRC (set);
1214 if (GET_CODE (op) != ASHIFT
1215 && GET_CODE (op) != LSHIFTRT
1216 && GET_CODE (op) != ZERO_EXTEND)
1217 return NULL_RTX;
1218
1219 op_operand = XEXP (op, 0);
1220
c7944dce 1221 /* We can tear this operation apart only if the regs were already
1222 torn apart. */
9cf5d19e 1223 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1224 return NULL_RTX;
1225
1226 /* src_reg_num is the number of the word mode register which we
1227 are operating on. For a left shift and a zero_extend on little
1228 endian machines this is register 0. */
1229 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1230
4d8ad352 1231 if (WORDS_BIG_ENDIAN
1232 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
9cf5d19e 1233 src_reg_num = 1 - src_reg_num;
1234
1235 if (GET_CODE (op) == ZERO_EXTEND)
4d8ad352 1236 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
9cf5d19e 1237 else
1238 dest_reg_num = 1 - src_reg_num;
1239
1240 offset1 = UNITS_PER_WORD * dest_reg_num;
1241 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1242 src_offset = UNITS_PER_WORD * src_reg_num;
1243
9cf5d19e 1244 start_sequence ();
1245
1246 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1247 GET_MODE (SET_DEST (set)),
1248 offset1);
1249 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1250 GET_MODE (SET_DEST (set)),
1251 offset2);
1252 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1253 GET_MODE (op_operand),
1254 src_offset);
1255 if (GET_CODE (op) != ZERO_EXTEND)
1256 {
1257 int shift_count = INTVAL (XEXP (op, 1));
1258 if (shift_count > BITS_PER_WORD)
1259 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1260 LSHIFT_EXPR : RSHIFT_EXPR,
1261 word_mode, src_reg,
f5ff0b21 1262 shift_count - BITS_PER_WORD,
9cf5d19e 1263 dest_reg, 1);
1264 }
1265
1266 if (dest_reg != src_reg)
1267 emit_move_insn (dest_reg, src_reg);
1268 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1269 insns = get_insns ();
1270
1271 end_sequence ();
1272
1273 emit_insn_before (insns, insn);
1274
1275 if (dump_file)
1276 {
1277 rtx in;
1278 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1279 for (in = insns; in != insn; in = NEXT_INSN (in))
1280 fprintf (dump_file, "%d ", INSN_UID (in));
1281 fprintf (dump_file, "\n");
1282 }
1283
1284 delete_insn (insn);
1285 return insns;
1286}
1287
c7944dce 1288/* Print to dump_file a description of what we're doing with shift code CODE.
1289 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1290
1291static void
1292dump_shift_choices (enum rtx_code code, bool *splitting)
1293{
1294 int i;
1295 const char *sep;
1296
1297 fprintf (dump_file,
1298 " Splitting mode %s for %s lowering with shift amounts = ",
1299 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1300 sep = "";
1301 for (i = 0; i < BITS_PER_WORD; i++)
1302 if (splitting[i])
1303 {
1304 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1305 sep = ",";
1306 }
1307 fprintf (dump_file, "\n");
1308}
1309
1310/* Print to dump_file a description of what we're doing when optimizing
1311 for speed or size; SPEED_P says which. DESCRIPTION is a description
1312 of the SPEED_P choice. */
1313
1314static void
1315dump_choices (bool speed_p, const char *description)
1316{
1317 unsigned int i;
1318
1319 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1320
1321 for (i = 0; i < MAX_MACHINE_MODE; i++)
1322 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1323 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1324 choices[speed_p].move_modes_to_split[i]
1325 ? "Splitting"
1326 : "Skipping",
1327 GET_MODE_NAME ((enum machine_mode) i));
1328
1329 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1330 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1331 GET_MODE_NAME (twice_word_mode));
1332
1333 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1334 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1335 fprintf (dump_file, "\n");
1336}
1337
1a6a0f2a 1338/* Look for registers which are always accessed via word-sized SUBREGs
b5ca6624 1339 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1340 registers into several word-sized pseudo-registers. */
1a6a0f2a 1341
1342static void
b5ca6624 1343decompose_multiword_subregs (bool decompose_copies)
1a6a0f2a 1344{
1345 unsigned int max;
1346 basic_block bb;
c7944dce 1347 bool speed_p;
1a6a0f2a 1348
c7944dce 1349 if (dump_file)
1350 {
1351 dump_choices (false, "size");
1352 dump_choices (true, "speed");
1353 }
1354
1355 /* Check if this target even has any modes to consider lowering. */
1356 if (!choices[false].something_to_do && !choices[true].something_to_do)
1357 {
1358 if (dump_file)
1359 fprintf (dump_file, "Nothing to do!\n");
1360 return;
1361 }
3072d30e 1362
1a6a0f2a 1363 max = max_reg_num ();
1364
1365 /* First see if there are any multi-word pseudo-registers. If there
1366 aren't, there is nothing we can do. This should speed up this
1367 pass in the normal case, since it should be faster than scanning
1368 all the insns. */
1369 {
1370 unsigned int i;
c7944dce 1371 bool useful_modes_seen = false;
1a6a0f2a 1372
1373 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
c7944dce 1374 if (regno_reg_rtx[i] != NULL)
1375 {
1376 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1377 if (choices[false].move_modes_to_split[(int) mode]
1378 || choices[true].move_modes_to_split[(int) mode])
1379 {
1380 useful_modes_seen = true;
1381 break;
1382 }
1383 }
1384
1385 if (!useful_modes_seen)
1a6a0f2a 1386 {
c7944dce 1387 if (dump_file)
1388 fprintf (dump_file, "Nothing to lower in this function.\n");
1389 return;
1a6a0f2a 1390 }
1a6a0f2a 1391 }
1392
0e8e9be3 1393 if (df)
c7944dce 1394 {
1395 df_set_flags (DF_DEFER_INSN_RESCAN);
1396 run_word_dce ();
1397 }
0e8e9be3 1398
c7944dce 1399 /* FIXME: It may be possible to change this code to look for each
1400 multi-word pseudo-register and to find each insn which sets or
1401 uses that register. That should be faster than scanning all the
1402 insns. */
1a6a0f2a 1403
1404 decomposable_context = BITMAP_ALLOC (NULL);
1405 non_decomposable_context = BITMAP_ALLOC (NULL);
5277d36e 1406 subreg_context = BITMAP_ALLOC (NULL);
1a6a0f2a 1407
1408 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1409 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1410 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1411
c7944dce 1412 speed_p = optimize_function_for_speed_p (cfun);
1a6a0f2a 1413 FOR_EACH_BB (bb)
1414 {
1415 rtx insn;
1416
1417 FOR_BB_INSNS (bb, insn)
1418 {
1419 rtx set;
1420 enum classify_move_insn cmi;
1421 int i, n;
1422
1423 if (!INSN_P (insn)
1424 || GET_CODE (PATTERN (insn)) == CLOBBER
1425 || GET_CODE (PATTERN (insn)) == USE)
1426 continue;
1427
08b31038 1428 recog_memoized (insn);
1429
c7944dce 1430 if (find_decomposable_shift_zext (insn, speed_p))
9cf5d19e 1431 continue;
1432
1a6a0f2a 1433 extract_insn (insn);
1434
c7944dce 1435 set = simple_move (insn, speed_p);
1a6a0f2a 1436
1437 if (!set)
1438 cmi = NOT_SIMPLE_MOVE;
1439 else
1440 {
b5ca6624 1441 /* We mark pseudo-to-pseudo copies as decomposable during the
1442 second pass only. The first pass is so early that there is
1443 good chance such moves will be optimized away completely by
1444 subsequent optimizations anyway.
1445
1446 However, we call find_pseudo_copy even during the first pass
1447 so as to properly set up the reg_copy_graph. */
1e5b92fa 1448 if (find_pseudo_copy (set))
b5ca6624 1449 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1a6a0f2a 1450 else
1451 cmi = SIMPLE_MOVE;
1452 }
1453
1454 n = recog_data.n_operands;
1455 for (i = 0; i < n; ++i)
1456 {
1457 for_each_rtx (&recog_data.operand[i],
1458 find_decomposable_subregs,
1459 &cmi);
1460
1461 /* We handle ASM_OPERANDS as a special case to support
1462 things like x86 rdtsc which returns a DImode value.
1463 We can decompose the output, which will certainly be
1464 operand 0, but not the inputs. */
1465
1466 if (cmi == SIMPLE_MOVE
1467 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1468 {
1469 gcc_assert (i == 0);
1470 cmi = NOT_SIMPLE_MOVE;
1471 }
1472 }
1473 }
1474 }
1475
1476 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1477 if (!bitmap_empty_p (decomposable_context))
1478 {
97bb2849 1479 sbitmap sub_blocks;
db1c50be 1480 unsigned int i;
1481 sbitmap_iterator sbi;
1a6a0f2a 1482 bitmap_iterator iter;
1483 unsigned int regno;
1484
1485 propagate_pseudo_copies ();
1486
97bb2849 1487 sub_blocks = sbitmap_alloc (last_basic_block);
1488 sbitmap_zero (sub_blocks);
1a6a0f2a 1489
1490 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1491 decompose_register (regno);
1492
1493 FOR_EACH_BB (bb)
1494 {
201f6961 1495 rtx insn;
1a6a0f2a 1496
201f6961 1497 FOR_BB_INSNS (bb, insn)
1a6a0f2a 1498 {
da7a04f1 1499 rtx pat;
1a6a0f2a 1500
1501 if (!INSN_P (insn))
1502 continue;
1503
1a6a0f2a 1504 pat = PATTERN (insn);
1505 if (GET_CODE (pat) == CLOBBER)
db2200eb 1506 resolve_clobber (pat, insn);
1a6a0f2a 1507 else if (GET_CODE (pat) == USE)
db2200eb 1508 resolve_use (pat, insn);
9845d120 1509 else if (DEBUG_INSN_P (insn))
1510 resolve_debug (insn);
1a6a0f2a 1511 else
1512 {
1513 rtx set;
1514 int i;
1515
1516 recog_memoized (insn);
1517 extract_insn (insn);
1518
c7944dce 1519 set = simple_move (insn, speed_p);
1a6a0f2a 1520 if (set)
1521 {
1522 rtx orig_insn = insn;
97bb2849 1523 bool cfi = control_flow_insn_p (insn);
1a6a0f2a 1524
db1c50be 1525 /* We can end up splitting loads to multi-word pseudos
1526 into separate loads to machine word size pseudos.
1527 When this happens, we first had one load that can
1528 throw, and after resolve_simple_move we'll have a
1529 bunch of loads (at least two). All those loads may
1530 trap if we can have non-call exceptions, so they
1531 all will end the current basic block. We split the
1532 block after the outer loop over all insns, but we
1533 make sure here that we will be able to split the
1534 basic block and still produce the correct control
1535 flow graph for it. */
1536 gcc_assert (!cfi
cbeb677e 1537 || (cfun->can_throw_non_call_exceptions
db1c50be 1538 && can_throw_internal (insn)));
1539
1a6a0f2a 1540 insn = resolve_simple_move (set, insn);
1541 if (insn != orig_insn)
1542 {
1a6a0f2a 1543 recog_memoized (insn);
1544 extract_insn (insn);
97bb2849 1545
1546 if (cfi)
1547 SET_BIT (sub_blocks, bb->index);
1a6a0f2a 1548 }
1549 }
9cf5d19e 1550 else
1551 {
1552 rtx decomposed_shift;
1553
1554 decomposed_shift = resolve_shift_zext (insn);
1555 if (decomposed_shift != NULL_RTX)
1556 {
9cf5d19e 1557 insn = decomposed_shift;
1558 recog_memoized (insn);
1559 extract_insn (insn);
1560 }
1561 }
1a6a0f2a 1562
1563 for (i = recog_data.n_operands - 1; i >= 0; --i)
1564 for_each_rtx (recog_data.operand_loc[i],
1565 resolve_subreg_use,
1566 insn);
1567
1568 resolve_reg_notes (insn);
1569
1570 if (num_validated_changes () > 0)
1571 {
1572 for (i = recog_data.n_dups - 1; i >= 0; --i)
1573 {
1574 rtx *pl = recog_data.dup_loc[i];
1575 int dup_num = recog_data.dup_num[i];
1576 rtx *px = recog_data.operand_loc[dup_num];
1577
c47adb48 1578 validate_unshare_change (insn, pl, *px, 1);
1a6a0f2a 1579 }
1580
1581 i = apply_change_group ();
1582 gcc_assert (i);
1a6a0f2a 1583 }
1584 }
1a6a0f2a 1585 }
1586 }
1587
db1c50be 1588 /* If we had insns to split that caused control flow insns in the middle
1589 of a basic block, split those blocks now. Note that we only handle
1590 the case where splitting a load has caused multiple possibly trapping
1591 loads to appear. */
1592 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1593 {
1594 rtx insn, end;
1595 edge fallthru;
1596
1597 bb = BASIC_BLOCK (i);
1598 insn = BB_HEAD (bb);
1599 end = BB_END (bb);
1600
1601 while (insn != end)
1602 {
1603 if (control_flow_insn_p (insn))
1604 {
1605 /* Split the block after insn. There will be a fallthru
1606 edge, which is OK so we keep it. We have to create the
1607 exception edges ourselves. */
1608 fallthru = split_block (bb, insn);
1609 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1610 bb = fallthru->dest;
1611 insn = BB_HEAD (bb);
1612 }
1613 else
1614 insn = NEXT_INSN (insn);
1615 }
1616 }
97bb2849 1617
97bb2849 1618 sbitmap_free (sub_blocks);
1a6a0f2a 1619 }
1620
1621 {
1622 unsigned int i;
1623 bitmap b;
1624
48148244 1625 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1a6a0f2a 1626 if (b)
1627 BITMAP_FREE (b);
1628 }
1629
48e1416a 1630 VEC_free (bitmap, heap, reg_copy_graph);
1a6a0f2a 1631
1632 BITMAP_FREE (decomposable_context);
1633 BITMAP_FREE (non_decomposable_context);
5277d36e 1634 BITMAP_FREE (subreg_context);
1a6a0f2a 1635}
1636\f
1637/* Gate function for lower subreg pass. */
1638
1639static bool
1640gate_handle_lower_subreg (void)
1641{
1642 return flag_split_wide_types != 0;
1643}
1644
1645/* Implement first lower subreg pass. */
1646
1647static unsigned int
1648rest_of_handle_lower_subreg (void)
1649{
b5ca6624 1650 decompose_multiword_subregs (false);
1a6a0f2a 1651 return 0;
1652}
1653
1654/* Implement second lower subreg pass. */
1655
1656static unsigned int
1657rest_of_handle_lower_subreg2 (void)
1658{
b5ca6624 1659 decompose_multiword_subregs (true);
1a6a0f2a 1660 return 0;
1661}
1662
20099e35 1663struct rtl_opt_pass pass_lower_subreg =
1a6a0f2a 1664{
20099e35 1665 {
1666 RTL_PASS,
b85ccd2c 1667 "subreg1", /* name */
1a6a0f2a 1668 gate_handle_lower_subreg, /* gate */
1669 rest_of_handle_lower_subreg, /* execute */
1670 NULL, /* sub */
1671 NULL, /* next */
1672 0, /* static_pass_number */
1673 TV_LOWER_SUBREG, /* tv_id */
1674 0, /* properties_required */
1675 0, /* properties_provided */
1676 0, /* properties_destroyed */
1677 0, /* todo_flags_start */
97bb2849 1678 TODO_ggc_collect |
20099e35 1679 TODO_verify_flow /* todo_flags_finish */
1680 }
1a6a0f2a 1681};
1682
20099e35 1683struct rtl_opt_pass pass_lower_subreg2 =
1a6a0f2a 1684{
20099e35 1685 {
1686 RTL_PASS,
1a6a0f2a 1687 "subreg2", /* name */
1688 gate_handle_lower_subreg, /* gate */
1689 rest_of_handle_lower_subreg2, /* execute */
1690 NULL, /* sub */
1691 NULL, /* next */
1692 0, /* static_pass_number */
1693 TV_LOWER_SUBREG, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
0806b508 1698 TODO_df_finish | TODO_verify_rtl_sharing |
97bb2849 1699 TODO_ggc_collect |
20099e35 1700 TODO_verify_flow /* todo_flags_finish */
1701 }
1a6a0f2a 1702};