]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
This patch renames sbitmap iterators to unify them with the bitmap iterators.
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
e53a16e7 1/* Decompose multiword subregs.
d5785e76
JJ
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
e53a16e7
ILT
4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
9dcd6f09 11Software Foundation; either version 3, or (at your option) any later
e53a16e7
ILT
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
9dcd6f09
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
e53a16e7
ILT
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "machmode.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tm_p.h"
e53a16e7
ILT
30#include "flags.h"
31#include "insn-config.h"
32#include "obstack.h"
33#include "basic-block.h"
34#include "recog.h"
35#include "bitmap.h"
8d074192 36#include "dce.h"
e53a16e7 37#include "expr.h"
7984c787 38#include "except.h"
e53a16e7
ILT
39#include "regs.h"
40#include "tree-pass.h"
6fb5fa3c 41#include "df.h"
af4ba423 42#include "lower-subreg.h"
e53a16e7
ILT
43
44#ifdef STACK_GROWS_DOWNWARD
45# undef STACK_GROWS_DOWNWARD
46# define STACK_GROWS_DOWNWARD 1
47#else
48# define STACK_GROWS_DOWNWARD 0
49#endif
50
51DEF_VEC_P (bitmap);
52DEF_VEC_ALLOC_P (bitmap,heap);
53
54/* Decompose multi-word pseudo-registers into individual
af4ba423
KZ
55 pseudo-registers when possible and profitable. This is possible
56 when all the uses of a multi-word register are via SUBREG, or are
57 copies of the register to another location. Breaking apart the
58 register permits more CSE and permits better register allocation.
59 This is profitable if the machine does not have move instructions
60 to do this.
61
62 This pass only splits moves with modes that are wider than
63 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
64 modes that are twice the width of word_mode. The latter could be
65 generalized if there was a need to do this, but the trend in
66 architectures is to not need this.
67
68 There are two useful preprocessor defines for use by maintainers:
69
70 #define LOG_COSTS 1
71
72 if you wish to see the actual cost estimates that are being used
73 for each mode wider than word mode and the cost estimates for zero
74 extension and the shifts. This can be useful when port maintainers
75 are tuning insn rtx costs.
76
77 #define FORCE_LOWERING 1
78
79 if you wish to test the pass with all the transformation forced on.
80 This can be useful for finding bugs in the transformations. */
81
82#define LOG_COSTS 0
83#define FORCE_LOWERING 0
e53a16e7
ILT
84
85/* Bit N in this bitmap is set if regno N is used in a context in
86 which we can decompose it. */
87static bitmap decomposable_context;
88
89/* Bit N in this bitmap is set if regno N is used in a context in
90 which it can not be decomposed. */
91static bitmap non_decomposable_context;
92
402464a0
BS
93/* Bit N in this bitmap is set if regno N is used in a subreg
94 which changes the mode but not the size. This typically happens
95 when the register accessed as a floating-point value; we want to
96 avoid generating accesses to its subwords in integer modes. */
97static bitmap subreg_context;
98
e53a16e7
ILT
99/* Bit N in the bitmap in element M of this array is set if there is a
100 copy from reg M to reg N. */
101static VEC(bitmap,heap) *reg_copy_graph;
102
af4ba423
KZ
103struct target_lower_subreg default_target_lower_subreg;
104#if SWITCHABLE_TARGET
105struct target_lower_subreg *this_target_lower_subreg
106 = &default_target_lower_subreg;
107#endif
108
109#define twice_word_mode \
110 this_target_lower_subreg->x_twice_word_mode
111#define choices \
112 this_target_lower_subreg->x_choices
113
114/* RTXes used while computing costs. */
115struct cost_rtxes {
116 /* Source and target registers. */
117 rtx source;
118 rtx target;
119
120 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
121 rtx zext;
122
123 /* A shift of SOURCE. */
124 rtx shift;
125
126 /* A SET of TARGET. */
127 rtx set;
128};
129
130/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
131 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
132
133static int
134shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
135 enum machine_mode mode, int op1)
136{
af4ba423
KZ
137 PUT_CODE (rtxes->shift, code);
138 PUT_MODE (rtxes->shift, mode);
139 PUT_MODE (rtxes->source, mode);
140 XEXP (rtxes->shift, 1) = GEN_INT (op1);
69523a7c 141 return set_src_cost (rtxes->shift, speed_p);
af4ba423
KZ
142}
143
144/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
145 to true if it is profitable to split a double-word CODE shift
146 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
147 for speed or size profitability.
148
149 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
150 the cost of moving zero into a word-mode register. WORD_MOVE_COST
151 is the cost of moving between word registers. */
152
153static void
154compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
155 bool *splitting, enum rtx_code code,
156 int word_move_zero_cost, int word_move_cost)
157{
158 int wide_cost, narrow_cost, i;
159
160 for (i = 0; i < BITS_PER_WORD; i++)
161 {
162 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
163 i + BITS_PER_WORD);
164 if (i == 0)
165 narrow_cost = word_move_cost;
166 else
167 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
168
169 if (LOG_COSTS)
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
172 i + BITS_PER_WORD, wide_cost, narrow_cost,
173 word_move_zero_cost);
174
175 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
176 splitting[i] = true;
177 }
178}
179
180/* Compute what we should do when optimizing for speed or size; SPEED_P
181 selects which. Use RTXES for computing costs. */
182
183static void
184compute_costs (bool speed_p, struct cost_rtxes *rtxes)
185{
186 unsigned int i;
187 int word_move_zero_cost, word_move_cost;
188
69523a7c 189 PUT_MODE (rtxes->target, word_mode);
af4ba423 190 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
69523a7c 191 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
192
193 SET_SRC (rtxes->set) = rtxes->source;
69523a7c 194 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
195
196 if (LOG_COSTS)
197 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
198 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
199
200 for (i = 0; i < MAX_MACHINE_MODE; i++)
201 {
202 enum machine_mode mode = (enum machine_mode) i;
203 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
204 if (factor > 1)
205 {
206 int mode_move_cost;
207
208 PUT_MODE (rtxes->target, mode);
209 PUT_MODE (rtxes->source, mode);
69523a7c 210 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
211
212 if (LOG_COSTS)
213 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
214 GET_MODE_NAME (mode), mode_move_cost,
215 word_move_cost, factor);
216
217 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
218 {
219 choices[speed_p].move_modes_to_split[i] = true;
220 choices[speed_p].something_to_do = true;
221 }
222 }
223 }
224
225 /* For the moves and shifts, the only case that is checked is one
226 where the mode of the target is an integer mode twice the width
227 of the word_mode.
228
229 If it is not profitable to split a double word move then do not
230 even consider the shifts or the zero extension. */
231 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
232 {
233 int zext_cost;
234
235 /* The only case here to check to see if moving the upper part with a
236 zero is cheaper than doing the zext itself. */
af4ba423 237 PUT_MODE (rtxes->source, word_mode);
69523a7c 238 zext_cost = set_src_cost (rtxes->zext, speed_p);
af4ba423
KZ
239
240 if (LOG_COSTS)
241 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
242 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
243 zext_cost, word_move_cost, word_move_zero_cost);
244
245 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
246 choices[speed_p].splitting_zext = true;
247
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_ashift, ASHIFT,
250 word_move_zero_cost, word_move_cost);
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
253 word_move_zero_cost, word_move_cost);
254 }
255}
256
257/* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
260
261void
262init_lower_subreg (void)
263{
264 struct cost_rtxes rtxes;
265
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
267
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
269
270 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
271 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
272 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
275
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
283}
2b54c30f
ILT
284
285static bool
286simple_move_operand (rtx x)
287{
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
290
291 if (!OBJECT_P (x))
292 return false;
293
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
7e0c3f57
ILT
296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
2b54c30f
ILT
298 return false;
299
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
5bfed9a9 302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
2b54c30f
ILT
303 return false;
304
305 return true;
306}
307
af4ba423
KZ
308/* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
311
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
e53a16e7
ILT
314
315static rtx
af4ba423 316simple_move (rtx insn, bool speed_p)
e53a16e7
ILT
317{
318 rtx x;
319 rtx set;
320 enum machine_mode mode;
321
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
324
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
328
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
2b54c30f 332 if (!simple_move_operand (x))
e53a16e7
ILT
333 return NULL_RTX;
334
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
2b54c30f
ILT
338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
e53a16e7
ILT
342 return NULL_RTX;
343
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
349 mode = GET_MODE (SET_SRC (set));
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
354
1f64a081
ILT
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
af4ba423
KZ
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
e53a16e7
ILT
363 return set;
364}
365
366/* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370static bool
371find_pseudo_copy (rtx set)
372{
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
e53a16e7
ILT
386 b = VEC_index (bitmap, reg_copy_graph, rs);
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
390 VEC_replace (bitmap, reg_copy_graph, rs, b);
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396}
397
398/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404static void
405propagate_pseudo_copies (void)
406{
407 bitmap queue, propagate;
408
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
411
412 bitmap_copy (queue, decomposable_context);
413 do
414 {
415 bitmap_iterator iter;
416 unsigned int i;
417
418 bitmap_clear (propagate);
419
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
421 {
422 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 }
426
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
429 }
430 while (!bitmap_empty_p (queue));
431
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
434}
435
436/* A pointer to one of these values is passed to
437 find_decomposable_subregs via for_each_rtx. */
438
439enum classify_move_insn
440{
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
c2c47e8f
UW
443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
e53a16e7
ILT
446 SIMPLE_MOVE
447};
448
449/* This is called via for_each_rtx. If we find a SUBREG which we
450 could use to decompose a pseudo-register, set a bit in
451 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
452 not a simple pseudo-register copy, DATA will point at the type of
453 move, and we set a bit in DECOMPOSABLE_CONTEXT or
454 NON_DECOMPOSABLE_CONTEXT as appropriate. */
455
456static int
457find_decomposable_subregs (rtx *px, void *data)
458{
459 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
460 rtx x = *px;
461
462 if (x == NULL_RTX)
463 return 0;
464
465 if (GET_CODE (x) == SUBREG)
466 {
467 rtx inner = SUBREG_REG (x);
468 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
469
470 if (!REG_P (inner))
471 return 0;
472
473 regno = REGNO (inner);
474 if (HARD_REGISTER_NUM_P (regno))
475 return -1;
476
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
485
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
491
492 if (outer_words == 1 && inner_words > 1)
493 {
494 bitmap_set_bit (decomposable_context, regno);
495 return -1;
496 }
03743286
ILT
497
498 /* If this is a cast from one mode to another, where the modes
499 have the same size, and they are not tieable, then mark this
500 register as non-decomposable. If we decompose it we are
501 likely to mess up whatever the backend is trying to do. */
502 if (outer_words > 1
503 && outer_size == inner_size
504 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
505 {
506 bitmap_set_bit (non_decomposable_context, regno);
402464a0 507 bitmap_set_bit (subreg_context, regno);
03743286
ILT
508 return -1;
509 }
e53a16e7 510 }
2b54c30f 511 else if (REG_P (x))
e53a16e7
ILT
512 {
513 unsigned int regno;
514
515 /* We will see an outer SUBREG before we see the inner REG, so
516 when we see a plain REG here it means a direct reference to
517 the register.
518
519 If this is not a simple copy from one location to another,
520 then we can not decompose this register. If this is a simple
c2c47e8f 521 copy we want to decompose, and the mode is right,
4a8cae83
SB
522 then we mark the register as decomposable.
523 Otherwise we don't say anything about this register --
524 it could be decomposed, but whether that would be
e53a16e7
ILT
525 profitable depends upon how it is used elsewhere.
526
527 We only set bits in the bitmap for multi-word
528 pseudo-registers, since those are the only ones we care about
529 and it keeps the size of the bitmaps down. */
530
531 regno = REGNO (x);
532 if (!HARD_REGISTER_NUM_P (regno)
533 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
534 {
535 switch (*pcmi)
536 {
537 case NOT_SIMPLE_MOVE:
538 bitmap_set_bit (non_decomposable_context, regno);
539 break;
c2c47e8f 540 case DECOMPOSABLE_SIMPLE_MOVE:
e53a16e7
ILT
541 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
542 bitmap_set_bit (decomposable_context, regno);
543 break;
544 case SIMPLE_MOVE:
545 break;
546 default:
547 gcc_unreachable ();
548 }
549 }
550 }
2b54c30f
ILT
551 else if (MEM_P (x))
552 {
553 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
554
555 /* Any registers used in a MEM do not participate in a
c2c47e8f 556 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
2b54c30f
ILT
557 here, and return -1 to block the parent's recursion. */
558 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
559 return -1;
560 }
e53a16e7
ILT
561
562 return 0;
563}
564
565/* Decompose REGNO into word-sized components. We smash the REG node
566 in place. This ensures that (1) something goes wrong quickly if we
567 fail to make some replacement, and (2) the debug information inside
568 the symbol table is automatically kept up to date. */
569
570static void
571decompose_register (unsigned int regno)
572{
573 rtx reg;
574 unsigned int words, i;
575 rtvec v;
576
577 reg = regno_reg_rtx[regno];
578
579 regno_reg_rtx[regno] = NULL_RTX;
e53a16e7
ILT
580
581 words = GET_MODE_SIZE (GET_MODE (reg));
582 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
583
584 v = rtvec_alloc (words);
585 for (i = 0; i < words; ++i)
586 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
587
588 PUT_CODE (reg, CONCATN);
589 XVEC (reg, 0) = v;
590
591 if (dump_file)
592 {
593 fprintf (dump_file, "; Splitting reg %u ->", regno);
594 for (i = 0; i < words; ++i)
595 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
596 fputc ('\n', dump_file);
597 }
598}
599
600/* Get a SUBREG of a CONCATN. */
601
602static rtx
603simplify_subreg_concatn (enum machine_mode outermode, rtx op,
604 unsigned int byte)
605{
606 unsigned int inner_size;
695ae295 607 enum machine_mode innermode, partmode;
e53a16e7
ILT
608 rtx part;
609 unsigned int final_offset;
610
611 gcc_assert (GET_CODE (op) == CONCATN);
612 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
613
614 innermode = GET_MODE (op);
615 gcc_assert (byte < GET_MODE_SIZE (innermode));
616 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
617
618 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
619 part = XVECEXP (op, 0, byte / inner_size);
695ae295
UB
620 partmode = GET_MODE (part);
621
822a55a0
UB
622 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
623 regular CONST_VECTORs. They have vector or integer modes, depending
624 on the capabilities of the target. Cope with them. */
625 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
626 partmode = GET_MODE_INNER (innermode);
627 else if (partmode == VOIDmode)
695ae295 628 {
822a55a0
UB
629 enum mode_class mclass = GET_MODE_CLASS (innermode);
630 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
695ae295
UB
631 }
632
e53a16e7
ILT
633 final_offset = byte % inner_size;
634 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
635 return NULL_RTX;
636
695ae295 637 return simplify_gen_subreg (outermode, part, partmode, final_offset);
e53a16e7
ILT
638}
639
640/* Wrapper around simplify_gen_subreg which handles CONCATN. */
641
642static rtx
643simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
644 enum machine_mode innermode, unsigned int byte)
645{
0e6c5b58
ILT
646 rtx ret;
647
e53a16e7
ILT
648 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
649 If OP is a SUBREG of a CONCATN, then it must be a simple mode
650 change with the same size and offset 0, or it must extract a
651 part. We shouldn't see anything else here. */
652 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
653 {
654 rtx op2;
655
656 if ((GET_MODE_SIZE (GET_MODE (op))
657 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
658 && SUBREG_BYTE (op) == 0)
659 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
660 GET_MODE (SUBREG_REG (op)), byte);
661
662 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
663 SUBREG_BYTE (op));
664 if (op2 == NULL_RTX)
665 {
666 /* We don't handle paradoxical subregs here. */
667 gcc_assert (GET_MODE_SIZE (outermode)
668 <= GET_MODE_SIZE (GET_MODE (op)));
669 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
670 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
671 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
672 byte + SUBREG_BYTE (op));
673 gcc_assert (op2 != NULL_RTX);
674 return op2;
675 }
676
677 op = op2;
678 gcc_assert (op != NULL_RTX);
679 gcc_assert (innermode == GET_MODE (op));
680 }
0e6c5b58 681
e53a16e7
ILT
682 if (GET_CODE (op) == CONCATN)
683 return simplify_subreg_concatn (outermode, op, byte);
0e6c5b58
ILT
684
685 ret = simplify_gen_subreg (outermode, op, innermode, byte);
686
687 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
688 resolve_simple_move will ask for the high part of the paradoxical
689 subreg, which does not have a value. Just return a zero. */
690 if (ret == NULL_RTX
691 && GET_CODE (op) == SUBREG
692 && SUBREG_BYTE (op) == 0
693 && (GET_MODE_SIZE (innermode)
694 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
695 return CONST0_RTX (outermode);
696
697 gcc_assert (ret != NULL_RTX);
698 return ret;
e53a16e7
ILT
699}
700
701/* Return whether we should resolve X into the registers into which it
702 was decomposed. */
703
704static bool
705resolve_reg_p (rtx x)
706{
707 return GET_CODE (x) == CONCATN;
708}
709
710/* Return whether X is a SUBREG of a register which we need to
711 resolve. */
712
713static bool
714resolve_subreg_p (rtx x)
715{
716 if (GET_CODE (x) != SUBREG)
717 return false;
718 return resolve_reg_p (SUBREG_REG (x));
719}
720
721/* This is called via for_each_rtx. Look for SUBREGs which need to be
722 decomposed. */
723
724static int
725resolve_subreg_use (rtx *px, void *data)
726{
727 rtx insn = (rtx) data;
728 rtx x = *px;
729
730 if (x == NULL_RTX)
731 return 0;
732
733 if (resolve_subreg_p (x))
734 {
735 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
736 SUBREG_BYTE (x));
737
738 /* It is possible for a note to contain a reference which we can
739 decompose. In this case, return 1 to the caller to indicate
740 that the note must be removed. */
741 if (!x)
742 {
30d18db4 743 gcc_assert (!insn);
e53a16e7
ILT
744 return 1;
745 }
746
747 validate_change (insn, px, x, 1);
748 return -1;
749 }
750
751 if (resolve_reg_p (x))
752 {
753 /* Return 1 to the caller to indicate that we found a direct
754 reference to a register which is being decomposed. This can
e0892570
AK
755 happen inside notes, multiword shift or zero-extend
756 instructions. */
e53a16e7
ILT
757 return 1;
758 }
759
760 return 0;
761}
762
b5b8b0ac
AO
763/* This is called via for_each_rtx. Look for SUBREGs which can be
764 decomposed and decomposed REGs that need copying. */
765
766static int
767adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
768{
769 rtx x = *px;
770
771 if (x == NULL_RTX)
772 return 0;
773
774 if (resolve_subreg_p (x))
775 {
776 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
777 SUBREG_BYTE (x));
778
779 if (x)
780 *px = x;
781 else
782 x = copy_rtx (*px);
783 }
784
785 if (resolve_reg_p (x))
786 *px = copy_rtx (x);
787
788 return 0;
789}
790
e53a16e7
ILT
791/* Resolve any decomposed registers which appear in register notes on
792 INSN. */
793
794static void
795resolve_reg_notes (rtx insn)
796{
797 rtx *pnote, note;
798
799 note = find_reg_equal_equiv_note (insn);
800 if (note)
801 {
6fb5fa3c 802 int old_count = num_validated_changes ();
e53a16e7 803 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
4a8cae83 804 remove_note (insn, note);
6fb5fa3c
DB
805 else
806 if (old_count != num_validated_changes ())
807 df_notes_rescan (insn);
e53a16e7
ILT
808 }
809
810 pnote = &REG_NOTES (insn);
811 while (*pnote != NULL_RTX)
812 {
60564289 813 bool del = false;
e53a16e7
ILT
814
815 note = *pnote;
816 switch (REG_NOTE_KIND (note))
817 {
6fb5fa3c
DB
818 case REG_DEAD:
819 case REG_UNUSED:
e53a16e7 820 if (resolve_reg_p (XEXP (note, 0)))
60564289 821 del = true;
e53a16e7
ILT
822 break;
823
824 default:
825 break;
826 }
827
60564289 828 if (del)
e53a16e7
ILT
829 *pnote = XEXP (note, 1);
830 else
831 pnote = &XEXP (note, 1);
832 }
833}
834
2b54c30f 835/* Return whether X can be decomposed into subwords. */
e53a16e7
ILT
836
837static bool
2b54c30f 838can_decompose_p (rtx x)
e53a16e7
ILT
839{
840 if (REG_P (x))
841 {
842 unsigned int regno = REGNO (x);
843
844 if (HARD_REGISTER_NUM_P (regno))
488c8379
RS
845 {
846 unsigned int byte, num_bytes;
847
848 num_bytes = GET_MODE_SIZE (GET_MODE (x));
849 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
850 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
851 return false;
852 return true;
853 }
e53a16e7 854 else
402464a0 855 return !bitmap_bit_p (subreg_context, regno);
e53a16e7
ILT
856 }
857
2b54c30f 858 return true;
e53a16e7
ILT
859}
860
861/* Decompose the registers used in a simple move SET within INSN. If
862 we don't change anything, return INSN, otherwise return the start
863 of the sequence of moves. */
864
865static rtx
866resolve_simple_move (rtx set, rtx insn)
867{
868 rtx src, dest, real_dest, insns;
869 enum machine_mode orig_mode, dest_mode;
870 unsigned int words;
871 bool pushing;
872
873 src = SET_SRC (set);
874 dest = SET_DEST (set);
875 orig_mode = GET_MODE (dest);
876
877 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
af4ba423 878 gcc_assert (words > 1);
e53a16e7
ILT
879
880 start_sequence ();
881
882 /* We have to handle copying from a SUBREG of a decomposed reg where
883 the SUBREG is larger than word size. Rather than assume that we
884 can take a word_mode SUBREG of the destination, we copy to a new
885 register and then copy that to the destination. */
886
887 real_dest = NULL_RTX;
888
889 if (GET_CODE (src) == SUBREG
890 && resolve_reg_p (SUBREG_REG (src))
891 && (SUBREG_BYTE (src) != 0
892 || (GET_MODE_SIZE (orig_mode)
893 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
894 {
895 real_dest = dest;
896 dest = gen_reg_rtx (orig_mode);
897 if (REG_P (real_dest))
898 REG_ATTRS (dest) = REG_ATTRS (real_dest);
899 }
900
901 /* Similarly if we are copying to a SUBREG of a decomposed reg where
902 the SUBREG is larger than word size. */
903
904 if (GET_CODE (dest) == SUBREG
905 && resolve_reg_p (SUBREG_REG (dest))
906 && (SUBREG_BYTE (dest) != 0
907 || (GET_MODE_SIZE (orig_mode)
908 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
909 {
910 rtx reg, minsn, smove;
911
912 reg = gen_reg_rtx (orig_mode);
913 minsn = emit_move_insn (reg, src);
914 smove = single_set (minsn);
915 gcc_assert (smove != NULL_RTX);
916 resolve_simple_move (smove, minsn);
917 src = reg;
918 }
919
920 /* If we didn't have any big SUBREGS of decomposed registers, and
921 neither side of the move is a register we are decomposing, then
922 we don't have to do anything here. */
923
924 if (src == SET_SRC (set)
925 && dest == SET_DEST (set)
926 && !resolve_reg_p (src)
927 && !resolve_subreg_p (src)
928 && !resolve_reg_p (dest)
929 && !resolve_subreg_p (dest))
930 {
931 end_sequence ();
932 return insn;
933 }
934
30d18db4
ILT
935 /* It's possible for the code to use a subreg of a decomposed
936 register while forming an address. We need to handle that before
937 passing the address to emit_move_insn. We pass NULL_RTX as the
938 insn parameter to resolve_subreg_use because we can not validate
939 the insn yet. */
940 if (MEM_P (src) || MEM_P (dest))
941 {
942 int acg;
943
944 if (MEM_P (src))
945 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
946 if (MEM_P (dest))
947 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
948 acg = apply_change_group ();
949 gcc_assert (acg);
950 }
951
e53a16e7
ILT
952 /* If SRC is a register which we can't decompose, or has side
953 effects, we need to move via a temporary register. */
954
2b54c30f 955 if (!can_decompose_p (src)
e53a16e7
ILT
956 || side_effects_p (src)
957 || GET_CODE (src) == ASM_OPERANDS)
958 {
959 rtx reg;
960
961 reg = gen_reg_rtx (orig_mode);
962 emit_move_insn (reg, src);
963 src = reg;
964 }
965
966 /* If DEST is a register which we can't decompose, or has side
967 effects, we need to first move to a temporary register. We
968 handle the common case of pushing an operand directly. We also
969 go through a temporary register if it holds a floating point
970 value. This gives us better code on systems which can't move
971 data easily between integer and floating point registers. */
972
973 dest_mode = orig_mode;
974 pushing = push_operand (dest, dest_mode);
2b54c30f 975 if (!can_decompose_p (dest)
e53a16e7
ILT
976 || (side_effects_p (dest) && !pushing)
977 || (!SCALAR_INT_MODE_P (dest_mode)
978 && !resolve_reg_p (dest)
979 && !resolve_subreg_p (dest)))
980 {
981 if (real_dest == NULL_RTX)
982 real_dest = dest;
983 if (!SCALAR_INT_MODE_P (dest_mode))
984 {
985 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
986 MODE_INT, 0);
987 gcc_assert (dest_mode != BLKmode);
988 }
989 dest = gen_reg_rtx (dest_mode);
990 if (REG_P (real_dest))
991 REG_ATTRS (dest) = REG_ATTRS (real_dest);
992 }
993
994 if (pushing)
995 {
996 unsigned int i, j, jinc;
997
998 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
999 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1001
1002 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1003 {
1004 j = 0;
1005 jinc = 1;
1006 }
1007 else
1008 {
1009 j = words - 1;
1010 jinc = -1;
1011 }
1012
1013 for (i = 0; i < words; ++i, j += jinc)
1014 {
1015 rtx temp;
1016
1017 temp = copy_rtx (XEXP (dest, 0));
1018 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1019 j * UNITS_PER_WORD);
1020 emit_move_insn (temp,
1021 simplify_gen_subreg_concatn (word_mode, src,
1022 orig_mode,
1023 j * UNITS_PER_WORD));
1024 }
1025 }
1026 else
1027 {
1028 unsigned int i;
1029
1030 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
c41c1387 1031 emit_clobber (dest);
e53a16e7
ILT
1032
1033 for (i = 0; i < words; ++i)
1034 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1035 dest_mode,
1036 i * UNITS_PER_WORD),
1037 simplify_gen_subreg_concatn (word_mode, src,
1038 orig_mode,
1039 i * UNITS_PER_WORD));
1040 }
1041
1042 if (real_dest != NULL_RTX)
1043 {
1044 rtx mdest, minsn, smove;
1045
1046 if (dest_mode == orig_mode)
1047 mdest = dest;
1048 else
1049 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1050 minsn = emit_move_insn (real_dest, mdest);
1051
1052 smove = single_set (minsn);
1053 gcc_assert (smove != NULL_RTX);
1054
1055 resolve_simple_move (smove, minsn);
1056 }
1057
1058 insns = get_insns ();
1059 end_sequence ();
1060
1d65f45c 1061 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
73663bb7 1062
e53a16e7
ILT
1063 emit_insn_before (insns, insn);
1064
e53a16e7
ILT
1065 delete_insn (insn);
1066
1067 return insns;
1068}
1069
1070/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1071 component registers. Return whether we changed something. */
1072
1073static bool
1074resolve_clobber (rtx pat, rtx insn)
1075{
d70dcf29 1076 rtx reg;
e53a16e7
ILT
1077 enum machine_mode orig_mode;
1078 unsigned int words, i;
7e0c3f57 1079 int ret;
e53a16e7
ILT
1080
1081 reg = XEXP (pat, 0);
9a5a8e58 1082 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
e53a16e7
ILT
1083 return false;
1084
1085 orig_mode = GET_MODE (reg);
1086 words = GET_MODE_SIZE (orig_mode);
1087 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1088
7e0c3f57
ILT
1089 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1090 simplify_gen_subreg_concatn (word_mode, reg,
1091 orig_mode, 0),
1092 0);
6fb5fa3c 1093 df_insn_rescan (insn);
7e0c3f57
ILT
1094 gcc_assert (ret != 0);
1095
e53a16e7
ILT
1096 for (i = words - 1; i > 0; --i)
1097 {
1098 rtx x;
1099
9a5a8e58
ILT
1100 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1101 i * UNITS_PER_WORD);
e53a16e7
ILT
1102 x = gen_rtx_CLOBBER (VOIDmode, x);
1103 emit_insn_after (x, insn);
1104 }
1105
d4fd3465
ILT
1106 resolve_reg_notes (insn);
1107
e53a16e7
ILT
1108 return true;
1109}
1110
1111/* A USE of a decomposed register is no longer meaningful. Return
1112 whether we changed something. */
1113
1114static bool
1115resolve_use (rtx pat, rtx insn)
1116{
1117 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1118 {
1119 delete_insn (insn);
1120 return true;
1121 }
d4fd3465
ILT
1122
1123 resolve_reg_notes (insn);
1124
e53a16e7
ILT
1125 return false;
1126}
1127
b5b8b0ac
AO
1128/* A VAR_LOCATION can be simplified. */
1129
1130static void
1131resolve_debug (rtx insn)
1132{
1133 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1134
1135 df_insn_rescan (insn);
1136
1137 resolve_reg_notes (insn);
1138}
1139
af4ba423
KZ
1140/* Check if INSN is a decomposable multiword-shift or zero-extend and
1141 set the decomposable_context bitmap accordingly. SPEED_P is true
1142 if we are optimizing INSN for speed rather than size. Return true
1143 if INSN is decomposable. */
e0892570 1144
af4ba423
KZ
1145static bool
1146find_decomposable_shift_zext (rtx insn, bool speed_p)
e0892570
AK
1147{
1148 rtx set;
1149 rtx op;
1150 rtx op_operand;
1151
1152 set = single_set (insn);
1153 if (!set)
af4ba423 1154 return false;
e0892570
AK
1155
1156 op = SET_SRC (set);
1157 if (GET_CODE (op) != ASHIFT
1158 && GET_CODE (op) != LSHIFTRT
1159 && GET_CODE (op) != ZERO_EXTEND)
af4ba423 1160 return false;
e0892570
AK
1161
1162 op_operand = XEXP (op, 0);
1163 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1164 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1165 || HARD_REGISTER_NUM_P (REGNO (op_operand))
af4ba423
KZ
1166 || GET_MODE (op) != twice_word_mode)
1167 return false;
e0892570
AK
1168
1169 if (GET_CODE (op) == ZERO_EXTEND)
1170 {
1171 if (GET_MODE (op_operand) != word_mode
af4ba423
KZ
1172 || !choices[speed_p].splitting_zext)
1173 return false;
e0892570
AK
1174 }
1175 else /* left or right shift */
1176 {
af4ba423
KZ
1177 bool *splitting = (GET_CODE (op) == ASHIFT
1178 ? choices[speed_p].splitting_ashift
1179 : choices[speed_p].splitting_lshiftrt);
481683e1 1180 if (!CONST_INT_P (XEXP (op, 1))
af4ba423
KZ
1181 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1182 2 * BITS_PER_WORD - 1)
1183 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1184 return false;
1185
1186 bitmap_set_bit (decomposable_context, REGNO (op_operand));
e0892570
AK
1187 }
1188
1189 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1190
af4ba423 1191 return true;
e0892570
AK
1192}
1193
1194/* Decompose a more than word wide shift (in INSN) of a multiword
1195 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1196 and 'set to zero' insn. Return a pointer to the new insn when a
1197 replacement was done. */
1198
1199static rtx
1200resolve_shift_zext (rtx insn)
1201{
1202 rtx set;
1203 rtx op;
1204 rtx op_operand;
1205 rtx insns;
1206 rtx src_reg, dest_reg, dest_zero;
1207 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1208
1209 set = single_set (insn);
1210 if (!set)
1211 return NULL_RTX;
1212
1213 op = SET_SRC (set);
1214 if (GET_CODE (op) != ASHIFT
1215 && GET_CODE (op) != LSHIFTRT
1216 && GET_CODE (op) != ZERO_EXTEND)
1217 return NULL_RTX;
1218
1219 op_operand = XEXP (op, 0);
1220
af4ba423
KZ
1221 /* We can tear this operation apart only if the regs were already
1222 torn apart. */
e0892570
AK
1223 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1224 return NULL_RTX;
1225
1226 /* src_reg_num is the number of the word mode register which we
1227 are operating on. For a left shift and a zero_extend on little
1228 endian machines this is register 0. */
1229 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1230
acbe5496
AK
1231 if (WORDS_BIG_ENDIAN
1232 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
e0892570
AK
1233 src_reg_num = 1 - src_reg_num;
1234
1235 if (GET_CODE (op) == ZERO_EXTEND)
acbe5496 1236 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
e0892570
AK
1237 else
1238 dest_reg_num = 1 - src_reg_num;
1239
1240 offset1 = UNITS_PER_WORD * dest_reg_num;
1241 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1242 src_offset = UNITS_PER_WORD * src_reg_num;
1243
e0892570
AK
1244 start_sequence ();
1245
1246 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1247 GET_MODE (SET_DEST (set)),
1248 offset1);
1249 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1250 GET_MODE (SET_DEST (set)),
1251 offset2);
1252 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1253 GET_MODE (op_operand),
1254 src_offset);
1255 if (GET_CODE (op) != ZERO_EXTEND)
1256 {
1257 int shift_count = INTVAL (XEXP (op, 1));
1258 if (shift_count > BITS_PER_WORD)
1259 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1260 LSHIFT_EXPR : RSHIFT_EXPR,
1261 word_mode, src_reg,
eb6c3df1 1262 shift_count - BITS_PER_WORD,
e0892570
AK
1263 dest_reg, 1);
1264 }
1265
1266 if (dest_reg != src_reg)
1267 emit_move_insn (dest_reg, src_reg);
1268 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1269 insns = get_insns ();
1270
1271 end_sequence ();
1272
1273 emit_insn_before (insns, insn);
1274
1275 if (dump_file)
1276 {
1277 rtx in;
1278 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1279 for (in = insns; in != insn; in = NEXT_INSN (in))
1280 fprintf (dump_file, "%d ", INSN_UID (in));
1281 fprintf (dump_file, "\n");
1282 }
1283
1284 delete_insn (insn);
1285 return insns;
1286}
1287
af4ba423
KZ
1288/* Print to dump_file a description of what we're doing with shift code CODE.
1289 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1290
1291static void
1292dump_shift_choices (enum rtx_code code, bool *splitting)
1293{
1294 int i;
1295 const char *sep;
1296
1297 fprintf (dump_file,
1298 " Splitting mode %s for %s lowering with shift amounts = ",
1299 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1300 sep = "";
1301 for (i = 0; i < BITS_PER_WORD; i++)
1302 if (splitting[i])
1303 {
1304 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1305 sep = ",";
1306 }
1307 fprintf (dump_file, "\n");
1308}
1309
1310/* Print to dump_file a description of what we're doing when optimizing
1311 for speed or size; SPEED_P says which. DESCRIPTION is a description
1312 of the SPEED_P choice. */
1313
1314static void
1315dump_choices (bool speed_p, const char *description)
1316{
1317 unsigned int i;
1318
1319 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1320
1321 for (i = 0; i < MAX_MACHINE_MODE; i++)
1322 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1323 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1324 choices[speed_p].move_modes_to_split[i]
1325 ? "Splitting"
1326 : "Skipping",
1327 GET_MODE_NAME ((enum machine_mode) i));
1328
1329 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1330 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1331 GET_MODE_NAME (twice_word_mode));
1332
1333 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1334 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1335 fprintf (dump_file, "\n");
1336}
1337
e53a16e7 1338/* Look for registers which are always accessed via word-sized SUBREGs
c2c47e8f
UW
1339 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1340 registers into several word-sized pseudo-registers. */
e53a16e7
ILT
1341
1342static void
c2c47e8f 1343decompose_multiword_subregs (bool decompose_copies)
e53a16e7
ILT
1344{
1345 unsigned int max;
1346 basic_block bb;
af4ba423 1347 bool speed_p;
e53a16e7 1348
af4ba423
KZ
1349 if (dump_file)
1350 {
1351 dump_choices (false, "size");
1352 dump_choices (true, "speed");
1353 }
1354
1355 /* Check if this target even has any modes to consider lowering. */
1356 if (!choices[false].something_to_do && !choices[true].something_to_do)
1357 {
1358 if (dump_file)
1359 fprintf (dump_file, "Nothing to do!\n");
1360 return;
1361 }
6fb5fa3c 1362
e53a16e7
ILT
1363 max = max_reg_num ();
1364
1365 /* First see if there are any multi-word pseudo-registers. If there
1366 aren't, there is nothing we can do. This should speed up this
1367 pass in the normal case, since it should be faster than scanning
1368 all the insns. */
1369 {
1370 unsigned int i;
af4ba423 1371 bool useful_modes_seen = false;
e53a16e7
ILT
1372
1373 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
af4ba423
KZ
1374 if (regno_reg_rtx[i] != NULL)
1375 {
1376 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1377 if (choices[false].move_modes_to_split[(int) mode]
1378 || choices[true].move_modes_to_split[(int) mode])
1379 {
1380 useful_modes_seen = true;
1381 break;
1382 }
1383 }
1384
1385 if (!useful_modes_seen)
e53a16e7 1386 {
af4ba423
KZ
1387 if (dump_file)
1388 fprintf (dump_file, "Nothing to lower in this function.\n");
1389 return;
e53a16e7 1390 }
e53a16e7
ILT
1391 }
1392
8d074192 1393 if (df)
af4ba423
KZ
1394 {
1395 df_set_flags (DF_DEFER_INSN_RESCAN);
1396 run_word_dce ();
1397 }
8d074192 1398
af4ba423
KZ
1399 /* FIXME: It may be possible to change this code to look for each
1400 multi-word pseudo-register and to find each insn which sets or
1401 uses that register. That should be faster than scanning all the
1402 insns. */
e53a16e7
ILT
1403
1404 decomposable_context = BITMAP_ALLOC (NULL);
1405 non_decomposable_context = BITMAP_ALLOC (NULL);
402464a0 1406 subreg_context = BITMAP_ALLOC (NULL);
e53a16e7
ILT
1407
1408 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1409 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1410 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1411
af4ba423 1412 speed_p = optimize_function_for_speed_p (cfun);
e53a16e7
ILT
1413 FOR_EACH_BB (bb)
1414 {
1415 rtx insn;
1416
1417 FOR_BB_INSNS (bb, insn)
1418 {
1419 rtx set;
1420 enum classify_move_insn cmi;
1421 int i, n;
1422
1423 if (!INSN_P (insn)
1424 || GET_CODE (PATTERN (insn)) == CLOBBER
1425 || GET_CODE (PATTERN (insn)) == USE)
1426 continue;
1427
d5785e76
JJ
1428 recog_memoized (insn);
1429
af4ba423 1430 if (find_decomposable_shift_zext (insn, speed_p))
e0892570
AK
1431 continue;
1432
e53a16e7
ILT
1433 extract_insn (insn);
1434
af4ba423 1435 set = simple_move (insn, speed_p);
e53a16e7
ILT
1436
1437 if (!set)
1438 cmi = NOT_SIMPLE_MOVE;
1439 else
1440 {
c2c47e8f
UW
1441 /* We mark pseudo-to-pseudo copies as decomposable during the
1442 second pass only. The first pass is so early that there is
1443 good chance such moves will be optimized away completely by
1444 subsequent optimizations anyway.
1445
1446 However, we call find_pseudo_copy even during the first pass
1447 so as to properly set up the reg_copy_graph. */
4a8cae83 1448 if (find_pseudo_copy (set))
c2c47e8f 1449 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
e53a16e7
ILT
1450 else
1451 cmi = SIMPLE_MOVE;
1452 }
1453
1454 n = recog_data.n_operands;
1455 for (i = 0; i < n; ++i)
1456 {
1457 for_each_rtx (&recog_data.operand[i],
1458 find_decomposable_subregs,
1459 &cmi);
1460
1461 /* We handle ASM_OPERANDS as a special case to support
1462 things like x86 rdtsc which returns a DImode value.
1463 We can decompose the output, which will certainly be
1464 operand 0, but not the inputs. */
1465
1466 if (cmi == SIMPLE_MOVE
1467 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1468 {
1469 gcc_assert (i == 0);
1470 cmi = NOT_SIMPLE_MOVE;
1471 }
1472 }
1473 }
1474 }
1475
1476 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1477 if (!bitmap_empty_p (decomposable_context))
1478 {
73663bb7 1479 sbitmap sub_blocks;
7984c787
SB
1480 unsigned int i;
1481 sbitmap_iterator sbi;
e53a16e7
ILT
1482 bitmap_iterator iter;
1483 unsigned int regno;
1484
1485 propagate_pseudo_copies ();
1486
73663bb7 1487 sub_blocks = sbitmap_alloc (last_basic_block);
f61e445a 1488 bitmap_clear (sub_blocks);
e53a16e7
ILT
1489
1490 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1491 decompose_register (regno);
1492
1493 FOR_EACH_BB (bb)
1494 {
ba4807a0 1495 rtx insn;
e53a16e7 1496
ba4807a0 1497 FOR_BB_INSNS (bb, insn)
e53a16e7 1498 {
11895e28 1499 rtx pat;
e53a16e7
ILT
1500
1501 if (!INSN_P (insn))
1502 continue;
1503
e53a16e7
ILT
1504 pat = PATTERN (insn);
1505 if (GET_CODE (pat) == CLOBBER)
d4fd3465 1506 resolve_clobber (pat, insn);
e53a16e7 1507 else if (GET_CODE (pat) == USE)
d4fd3465 1508 resolve_use (pat, insn);
b5b8b0ac
AO
1509 else if (DEBUG_INSN_P (insn))
1510 resolve_debug (insn);
e53a16e7
ILT
1511 else
1512 {
1513 rtx set;
1514 int i;
1515
1516 recog_memoized (insn);
1517 extract_insn (insn);
1518
af4ba423 1519 set = simple_move (insn, speed_p);
e53a16e7
ILT
1520 if (set)
1521 {
1522 rtx orig_insn = insn;
73663bb7 1523 bool cfi = control_flow_insn_p (insn);
e53a16e7 1524
7984c787
SB
1525 /* We can end up splitting loads to multi-word pseudos
1526 into separate loads to machine word size pseudos.
1527 When this happens, we first had one load that can
1528 throw, and after resolve_simple_move we'll have a
1529 bunch of loads (at least two). All those loads may
1530 trap if we can have non-call exceptions, so they
1531 all will end the current basic block. We split the
1532 block after the outer loop over all insns, but we
1533 make sure here that we will be able to split the
1534 basic block and still produce the correct control
1535 flow graph for it. */
1536 gcc_assert (!cfi
8f4f502f 1537 || (cfun->can_throw_non_call_exceptions
7984c787
SB
1538 && can_throw_internal (insn)));
1539
e53a16e7
ILT
1540 insn = resolve_simple_move (set, insn);
1541 if (insn != orig_insn)
1542 {
e53a16e7
ILT
1543 recog_memoized (insn);
1544 extract_insn (insn);
73663bb7
ILT
1545
1546 if (cfi)
d7c028c0 1547 bitmap_set_bit (sub_blocks, bb->index);
e53a16e7
ILT
1548 }
1549 }
e0892570
AK
1550 else
1551 {
1552 rtx decomposed_shift;
1553
1554 decomposed_shift = resolve_shift_zext (insn);
1555 if (decomposed_shift != NULL_RTX)
1556 {
e0892570
AK
1557 insn = decomposed_shift;
1558 recog_memoized (insn);
1559 extract_insn (insn);
1560 }
1561 }
e53a16e7
ILT
1562
1563 for (i = recog_data.n_operands - 1; i >= 0; --i)
1564 for_each_rtx (recog_data.operand_loc[i],
1565 resolve_subreg_use,
1566 insn);
1567
1568 resolve_reg_notes (insn);
1569
1570 if (num_validated_changes () > 0)
1571 {
1572 for (i = recog_data.n_dups - 1; i >= 0; --i)
1573 {
1574 rtx *pl = recog_data.dup_loc[i];
1575 int dup_num = recog_data.dup_num[i];
1576 rtx *px = recog_data.operand_loc[dup_num];
1577
1a309dfb 1578 validate_unshare_change (insn, pl, *px, 1);
e53a16e7
ILT
1579 }
1580
1581 i = apply_change_group ();
1582 gcc_assert (i);
e53a16e7
ILT
1583 }
1584 }
e53a16e7
ILT
1585 }
1586 }
1587
7984c787
SB
1588 /* If we had insns to split that caused control flow insns in the middle
1589 of a basic block, split those blocks now. Note that we only handle
1590 the case where splitting a load has caused multiple possibly trapping
1591 loads to appear. */
d4ac4ce2 1592 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
7984c787
SB
1593 {
1594 rtx insn, end;
1595 edge fallthru;
1596
1597 bb = BASIC_BLOCK (i);
1598 insn = BB_HEAD (bb);
1599 end = BB_END (bb);
1600
1601 while (insn != end)
1602 {
1603 if (control_flow_insn_p (insn))
1604 {
1605 /* Split the block after insn. There will be a fallthru
1606 edge, which is OK so we keep it. We have to create the
1607 exception edges ourselves. */
1608 fallthru = split_block (bb, insn);
1609 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1610 bb = fallthru->dest;
1611 insn = BB_HEAD (bb);
1612 }
1613 else
1614 insn = NEXT_INSN (insn);
1615 }
1616 }
73663bb7 1617
73663bb7 1618 sbitmap_free (sub_blocks);
e53a16e7
ILT
1619 }
1620
1621 {
1622 unsigned int i;
1623 bitmap b;
1624
ac47786e 1625 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
e53a16e7
ILT
1626 if (b)
1627 BITMAP_FREE (b);
1628 }
1629
b8698a0f 1630 VEC_free (bitmap, heap, reg_copy_graph);
e53a16e7
ILT
1631
1632 BITMAP_FREE (decomposable_context);
1633 BITMAP_FREE (non_decomposable_context);
402464a0 1634 BITMAP_FREE (subreg_context);
e53a16e7
ILT
1635}
1636\f
1637/* Gate function for lower subreg pass. */
1638
1639static bool
1640gate_handle_lower_subreg (void)
1641{
1642 return flag_split_wide_types != 0;
1643}
1644
1645/* Implement first lower subreg pass. */
1646
1647static unsigned int
1648rest_of_handle_lower_subreg (void)
1649{
c2c47e8f 1650 decompose_multiword_subregs (false);
e53a16e7
ILT
1651 return 0;
1652}
1653
1654/* Implement second lower subreg pass. */
1655
1656static unsigned int
1657rest_of_handle_lower_subreg2 (void)
1658{
c2c47e8f 1659 decompose_multiword_subregs (true);
e53a16e7
ILT
1660 return 0;
1661}
1662
8ddbbcae 1663struct rtl_opt_pass pass_lower_subreg =
e53a16e7 1664{
8ddbbcae
JH
1665 {
1666 RTL_PASS,
00b251a0 1667 "subreg1", /* name */
2b4e6bf1 1668 OPTGROUP_NONE, /* optinfo_flags */
e53a16e7
ILT
1669 gate_handle_lower_subreg, /* gate */
1670 rest_of_handle_lower_subreg, /* execute */
1671 NULL, /* sub */
1672 NULL, /* next */
1673 0, /* static_pass_number */
1674 TV_LOWER_SUBREG, /* tv_id */
1675 0, /* properties_required */
1676 0, /* properties_provided */
1677 0, /* properties_destroyed */
1678 0, /* todo_flags_start */
73663bb7 1679 TODO_ggc_collect |
8ddbbcae
JH
1680 TODO_verify_flow /* todo_flags_finish */
1681 }
e53a16e7
ILT
1682};
1683
8ddbbcae 1684struct rtl_opt_pass pass_lower_subreg2 =
e53a16e7 1685{
8ddbbcae
JH
1686 {
1687 RTL_PASS,
e53a16e7 1688 "subreg2", /* name */
2b4e6bf1 1689 OPTGROUP_NONE, /* optinfo_flags */
e53a16e7
ILT
1690 gate_handle_lower_subreg, /* gate */
1691 rest_of_handle_lower_subreg2, /* execute */
1692 NULL, /* sub */
1693 NULL, /* next */
1694 0, /* static_pass_number */
1695 TV_LOWER_SUBREG, /* tv_id */
1696 0, /* properties_required */
1697 0, /* properties_provided */
1698 0, /* properties_destroyed */
1699 0, /* todo_flags_start */
a36b8a1e 1700 TODO_df_finish | TODO_verify_rtl_sharing |
73663bb7 1701 TODO_ggc_collect |
8ddbbcae
JH
1702 TODO_verify_flow /* todo_flags_finish */
1703 }
e53a16e7 1704};