]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
Update copyright years in gcc/
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
e53a16e7 1/* Decompose multiword subregs.
23a5b65a 2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
e53a16e7
ILT
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
e53a16e7
ILT
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
e53a16e7
ILT
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "machmode.h"
26#include "tm.h"
4d648807 27#include "tree.h"
e53a16e7
ILT
28#include "rtl.h"
29#include "tm_p.h"
e53a16e7
ILT
30#include "flags.h"
31#include "insn-config.h"
32#include "obstack.h"
33#include "basic-block.h"
34#include "recog.h"
35#include "bitmap.h"
8d074192 36#include "dce.h"
e53a16e7 37#include "expr.h"
7984c787 38#include "except.h"
e53a16e7
ILT
39#include "regs.h"
40#include "tree-pass.h"
6fb5fa3c 41#include "df.h"
af4ba423 42#include "lower-subreg.h"
e53a16e7
ILT
43
44#ifdef STACK_GROWS_DOWNWARD
45# undef STACK_GROWS_DOWNWARD
46# define STACK_GROWS_DOWNWARD 1
47#else
48# define STACK_GROWS_DOWNWARD 0
49#endif
50
e53a16e7
ILT
51
52/* Decompose multi-word pseudo-registers into individual
af4ba423
KZ
53 pseudo-registers when possible and profitable. This is possible
54 when all the uses of a multi-word register are via SUBREG, or are
55 copies of the register to another location. Breaking apart the
56 register permits more CSE and permits better register allocation.
57 This is profitable if the machine does not have move instructions
58 to do this.
59
60 This pass only splits moves with modes that are wider than
d7fde18c
JJ
61 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
62 integer modes that are twice the width of word_mode. The latter
63 could be generalized if there was a need to do this, but the trend in
af4ba423
KZ
64 architectures is to not need this.
65
66 There are two useful preprocessor defines for use by maintainers:
67
68 #define LOG_COSTS 1
69
70 if you wish to see the actual cost estimates that are being used
71 for each mode wider than word mode and the cost estimates for zero
72 extension and the shifts. This can be useful when port maintainers
73 are tuning insn rtx costs.
74
75 #define FORCE_LOWERING 1
76
77 if you wish to test the pass with all the transformation forced on.
78 This can be useful for finding bugs in the transformations. */
79
80#define LOG_COSTS 0
81#define FORCE_LOWERING 0
e53a16e7
ILT
82
83/* Bit N in this bitmap is set if regno N is used in a context in
84 which we can decompose it. */
85static bitmap decomposable_context;
86
87/* Bit N in this bitmap is set if regno N is used in a context in
88 which it can not be decomposed. */
89static bitmap non_decomposable_context;
90
402464a0
BS
91/* Bit N in this bitmap is set if regno N is used in a subreg
92 which changes the mode but not the size. This typically happens
93 when the register accessed as a floating-point value; we want to
94 avoid generating accesses to its subwords in integer modes. */
95static bitmap subreg_context;
96
e53a16e7
ILT
97/* Bit N in the bitmap in element M of this array is set if there is a
98 copy from reg M to reg N. */
9771b263 99static vec<bitmap> reg_copy_graph;
e53a16e7 100
af4ba423
KZ
101struct target_lower_subreg default_target_lower_subreg;
102#if SWITCHABLE_TARGET
103struct target_lower_subreg *this_target_lower_subreg
104 = &default_target_lower_subreg;
105#endif
106
107#define twice_word_mode \
108 this_target_lower_subreg->x_twice_word_mode
109#define choices \
110 this_target_lower_subreg->x_choices
111
112/* RTXes used while computing costs. */
113struct cost_rtxes {
114 /* Source and target registers. */
115 rtx source;
116 rtx target;
117
118 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
119 rtx zext;
120
121 /* A shift of SOURCE. */
122 rtx shift;
123
124 /* A SET of TARGET. */
125 rtx set;
126};
127
128/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
129 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
130
131static int
132shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
133 enum machine_mode mode, int op1)
134{
af4ba423
KZ
135 PUT_CODE (rtxes->shift, code);
136 PUT_MODE (rtxes->shift, mode);
137 PUT_MODE (rtxes->source, mode);
138 XEXP (rtxes->shift, 1) = GEN_INT (op1);
69523a7c 139 return set_src_cost (rtxes->shift, speed_p);
af4ba423
KZ
140}
141
142/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
143 to true if it is profitable to split a double-word CODE shift
144 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
145 for speed or size profitability.
146
147 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
148 the cost of moving zero into a word-mode register. WORD_MOVE_COST
149 is the cost of moving between word registers. */
150
151static void
152compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
153 bool *splitting, enum rtx_code code,
154 int word_move_zero_cost, int word_move_cost)
155{
d7fde18c 156 int wide_cost, narrow_cost, upper_cost, i;
af4ba423
KZ
157
158 for (i = 0; i < BITS_PER_WORD; i++)
159 {
160 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
161 i + BITS_PER_WORD);
162 if (i == 0)
163 narrow_cost = word_move_cost;
164 else
165 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
166
d7fde18c
JJ
167 if (code != ASHIFTRT)
168 upper_cost = word_move_zero_cost;
169 else if (i == BITS_PER_WORD - 1)
170 upper_cost = word_move_cost;
171 else
172 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
173 BITS_PER_WORD - 1);
174
af4ba423
KZ
175 if (LOG_COSTS)
176 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
177 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
d7fde18c 178 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
af4ba423 179
d7fde18c 180 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
af4ba423
KZ
181 splitting[i] = true;
182 }
183}
184
185/* Compute what we should do when optimizing for speed or size; SPEED_P
186 selects which. Use RTXES for computing costs. */
187
188static void
189compute_costs (bool speed_p, struct cost_rtxes *rtxes)
190{
191 unsigned int i;
192 int word_move_zero_cost, word_move_cost;
193
69523a7c 194 PUT_MODE (rtxes->target, word_mode);
af4ba423 195 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
69523a7c 196 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
197
198 SET_SRC (rtxes->set) = rtxes->source;
69523a7c 199 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
200
201 if (LOG_COSTS)
202 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
203 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
204
205 for (i = 0; i < MAX_MACHINE_MODE; i++)
206 {
207 enum machine_mode mode = (enum machine_mode) i;
208 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
209 if (factor > 1)
210 {
211 int mode_move_cost;
212
213 PUT_MODE (rtxes->target, mode);
214 PUT_MODE (rtxes->source, mode);
69523a7c 215 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
216
217 if (LOG_COSTS)
218 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
219 GET_MODE_NAME (mode), mode_move_cost,
220 word_move_cost, factor);
221
222 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
223 {
224 choices[speed_p].move_modes_to_split[i] = true;
225 choices[speed_p].something_to_do = true;
226 }
227 }
228 }
229
230 /* For the moves and shifts, the only case that is checked is one
231 where the mode of the target is an integer mode twice the width
232 of the word_mode.
233
234 If it is not profitable to split a double word move then do not
235 even consider the shifts or the zero extension. */
236 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
237 {
238 int zext_cost;
239
240 /* The only case here to check to see if moving the upper part with a
241 zero is cheaper than doing the zext itself. */
af4ba423 242 PUT_MODE (rtxes->source, word_mode);
69523a7c 243 zext_cost = set_src_cost (rtxes->zext, speed_p);
af4ba423
KZ
244
245 if (LOG_COSTS)
246 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
247 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
248 zext_cost, word_move_cost, word_move_zero_cost);
249
250 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
251 choices[speed_p].splitting_zext = true;
252
253 compute_splitting_shift (speed_p, rtxes,
254 choices[speed_p].splitting_ashift, ASHIFT,
255 word_move_zero_cost, word_move_cost);
256 compute_splitting_shift (speed_p, rtxes,
257 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
258 word_move_zero_cost, word_move_cost);
d7fde18c
JJ
259 compute_splitting_shift (speed_p, rtxes,
260 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
261 word_move_zero_cost, word_move_cost);
af4ba423
KZ
262 }
263}
264
265/* Do one-per-target initialisation. This involves determining
266 which operations on the machine are profitable. If none are found,
267 then the pass just returns when called. */
268
269void
270init_lower_subreg (void)
271{
272 struct cost_rtxes rtxes;
273
274 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
275
276 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
277
278 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
279 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
280 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
281 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
282 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
283
284 if (LOG_COSTS)
285 fprintf (stderr, "\nSize costs\n==========\n\n");
286 compute_costs (false, &rtxes);
287
288 if (LOG_COSTS)
289 fprintf (stderr, "\nSpeed costs\n===========\n\n");
290 compute_costs (true, &rtxes);
291}
2b54c30f
ILT
292
293static bool
294simple_move_operand (rtx x)
295{
296 if (GET_CODE (x) == SUBREG)
297 x = SUBREG_REG (x);
298
299 if (!OBJECT_P (x))
300 return false;
301
302 if (GET_CODE (x) == LABEL_REF
303 || GET_CODE (x) == SYMBOL_REF
7e0c3f57
ILT
304 || GET_CODE (x) == HIGH
305 || GET_CODE (x) == CONST)
2b54c30f
ILT
306 return false;
307
308 if (MEM_P (x)
309 && (MEM_VOLATILE_P (x)
5bfed9a9 310 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
2b54c30f
ILT
311 return false;
312
313 return true;
314}
315
af4ba423
KZ
316/* If INSN is a single set between two objects that we want to split,
317 return the single set. SPEED_P says whether we are optimizing
318 INSN for speed or size.
319
320 INSN should have been passed to recog and extract_insn before this
321 is called. */
e53a16e7
ILT
322
323static rtx
af4ba423 324simple_move (rtx insn, bool speed_p)
e53a16e7
ILT
325{
326 rtx x;
327 rtx set;
328 enum machine_mode mode;
329
330 if (recog_data.n_operands != 2)
331 return NULL_RTX;
332
333 set = single_set (insn);
334 if (!set)
335 return NULL_RTX;
336
337 x = SET_DEST (set);
338 if (x != recog_data.operand[0] && x != recog_data.operand[1])
339 return NULL_RTX;
2b54c30f 340 if (!simple_move_operand (x))
e53a16e7
ILT
341 return NULL_RTX;
342
343 x = SET_SRC (set);
344 if (x != recog_data.operand[0] && x != recog_data.operand[1])
345 return NULL_RTX;
2b54c30f
ILT
346 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
347 things like x86 rdtsc which returns a DImode value. */
348 if (GET_CODE (x) != ASM_OPERANDS
349 && !simple_move_operand (x))
e53a16e7
ILT
350 return NULL_RTX;
351
352 /* We try to decompose in integer modes, to avoid generating
353 inefficient code copying between integer and floating point
354 registers. That means that we can't decompose if this is a
355 non-integer mode for which there is no integer mode of the same
356 size. */
576fe41a 357 mode = GET_MODE (SET_DEST (set));
e53a16e7
ILT
358 if (!SCALAR_INT_MODE_P (mode)
359 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
360 == BLKmode))
361 return NULL_RTX;
362
1f64a081
ILT
363 /* Reject PARTIAL_INT modes. They are used for processor specific
364 purposes and it's probably best not to tamper with them. */
365 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
366 return NULL_RTX;
367
af4ba423
KZ
368 if (!choices[speed_p].move_modes_to_split[(int) mode])
369 return NULL_RTX;
370
e53a16e7
ILT
371 return set;
372}
373
374/* If SET is a copy from one multi-word pseudo-register to another,
375 record that in reg_copy_graph. Return whether it is such a
376 copy. */
377
378static bool
379find_pseudo_copy (rtx set)
380{
381 rtx dest = SET_DEST (set);
382 rtx src = SET_SRC (set);
383 unsigned int rd, rs;
384 bitmap b;
385
386 if (!REG_P (dest) || !REG_P (src))
387 return false;
388
389 rd = REGNO (dest);
390 rs = REGNO (src);
391 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
392 return false;
393
9771b263 394 b = reg_copy_graph[rs];
e53a16e7
ILT
395 if (b == NULL)
396 {
397 b = BITMAP_ALLOC (NULL);
9771b263 398 reg_copy_graph[rs] = b;
e53a16e7
ILT
399 }
400
401 bitmap_set_bit (b, rd);
402
403 return true;
404}
405
406/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
407 where they are copied to another register, add the register to
408 which they are copied to DECOMPOSABLE_CONTEXT. Use
409 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
410 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
411
412static void
413propagate_pseudo_copies (void)
414{
415 bitmap queue, propagate;
416
417 queue = BITMAP_ALLOC (NULL);
418 propagate = BITMAP_ALLOC (NULL);
419
420 bitmap_copy (queue, decomposable_context);
421 do
422 {
423 bitmap_iterator iter;
424 unsigned int i;
425
426 bitmap_clear (propagate);
427
428 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
429 {
9771b263 430 bitmap b = reg_copy_graph[i];
e53a16e7
ILT
431 if (b)
432 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
433 }
434
435 bitmap_and_compl (queue, propagate, decomposable_context);
436 bitmap_ior_into (decomposable_context, propagate);
437 }
438 while (!bitmap_empty_p (queue));
439
440 BITMAP_FREE (queue);
441 BITMAP_FREE (propagate);
442}
443
444/* A pointer to one of these values is passed to
445 find_decomposable_subregs via for_each_rtx. */
446
447enum classify_move_insn
448{
449 /* Not a simple move from one location to another. */
450 NOT_SIMPLE_MOVE,
c2c47e8f
UW
451 /* A simple move we want to decompose. */
452 DECOMPOSABLE_SIMPLE_MOVE,
453 /* Any other simple move. */
e53a16e7
ILT
454 SIMPLE_MOVE
455};
456
457/* This is called via for_each_rtx. If we find a SUBREG which we
458 could use to decompose a pseudo-register, set a bit in
459 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
460 not a simple pseudo-register copy, DATA will point at the type of
461 move, and we set a bit in DECOMPOSABLE_CONTEXT or
462 NON_DECOMPOSABLE_CONTEXT as appropriate. */
463
464static int
465find_decomposable_subregs (rtx *px, void *data)
466{
467 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
468 rtx x = *px;
469
470 if (x == NULL_RTX)
471 return 0;
472
473 if (GET_CODE (x) == SUBREG)
474 {
475 rtx inner = SUBREG_REG (x);
476 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
477
478 if (!REG_P (inner))
479 return 0;
480
481 regno = REGNO (inner);
482 if (HARD_REGISTER_NUM_P (regno))
483 return -1;
484
485 outer_size = GET_MODE_SIZE (GET_MODE (x));
486 inner_size = GET_MODE_SIZE (GET_MODE (inner));
487 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
488 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
489
490 /* We only try to decompose single word subregs of multi-word
491 registers. When we find one, we return -1 to avoid iterating
492 over the inner register.
493
494 ??? This doesn't allow, e.g., DImode subregs of TImode values
495 on 32-bit targets. We would need to record the way the
496 pseudo-register was used, and only decompose if all the uses
497 were the same number and size of pieces. Hopefully this
498 doesn't happen much. */
499
500 if (outer_words == 1 && inner_words > 1)
501 {
502 bitmap_set_bit (decomposable_context, regno);
503 return -1;
504 }
03743286
ILT
505
506 /* If this is a cast from one mode to another, where the modes
507 have the same size, and they are not tieable, then mark this
508 register as non-decomposable. If we decompose it we are
509 likely to mess up whatever the backend is trying to do. */
510 if (outer_words > 1
511 && outer_size == inner_size
512 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
513 {
514 bitmap_set_bit (non_decomposable_context, regno);
402464a0 515 bitmap_set_bit (subreg_context, regno);
03743286
ILT
516 return -1;
517 }
e53a16e7 518 }
2b54c30f 519 else if (REG_P (x))
e53a16e7
ILT
520 {
521 unsigned int regno;
522
523 /* We will see an outer SUBREG before we see the inner REG, so
524 when we see a plain REG here it means a direct reference to
525 the register.
526
527 If this is not a simple copy from one location to another,
528 then we can not decompose this register. If this is a simple
c2c47e8f 529 copy we want to decompose, and the mode is right,
4a8cae83
SB
530 then we mark the register as decomposable.
531 Otherwise we don't say anything about this register --
532 it could be decomposed, but whether that would be
e53a16e7
ILT
533 profitable depends upon how it is used elsewhere.
534
535 We only set bits in the bitmap for multi-word
536 pseudo-registers, since those are the only ones we care about
537 and it keeps the size of the bitmaps down. */
538
539 regno = REGNO (x);
540 if (!HARD_REGISTER_NUM_P (regno)
541 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
542 {
543 switch (*pcmi)
544 {
545 case NOT_SIMPLE_MOVE:
546 bitmap_set_bit (non_decomposable_context, regno);
547 break;
c2c47e8f 548 case DECOMPOSABLE_SIMPLE_MOVE:
e53a16e7
ILT
549 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
550 bitmap_set_bit (decomposable_context, regno);
551 break;
552 case SIMPLE_MOVE:
553 break;
554 default:
555 gcc_unreachable ();
556 }
557 }
558 }
2b54c30f
ILT
559 else if (MEM_P (x))
560 {
561 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
562
563 /* Any registers used in a MEM do not participate in a
c2c47e8f 564 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
2b54c30f
ILT
565 here, and return -1 to block the parent's recursion. */
566 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
567 return -1;
568 }
e53a16e7
ILT
569
570 return 0;
571}
572
573/* Decompose REGNO into word-sized components. We smash the REG node
574 in place. This ensures that (1) something goes wrong quickly if we
575 fail to make some replacement, and (2) the debug information inside
576 the symbol table is automatically kept up to date. */
577
578static void
579decompose_register (unsigned int regno)
580{
581 rtx reg;
582 unsigned int words, i;
583 rtvec v;
584
585 reg = regno_reg_rtx[regno];
586
587 regno_reg_rtx[regno] = NULL_RTX;
e53a16e7
ILT
588
589 words = GET_MODE_SIZE (GET_MODE (reg));
590 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
591
592 v = rtvec_alloc (words);
593 for (i = 0; i < words; ++i)
594 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
595
596 PUT_CODE (reg, CONCATN);
597 XVEC (reg, 0) = v;
598
599 if (dump_file)
600 {
601 fprintf (dump_file, "; Splitting reg %u ->", regno);
602 for (i = 0; i < words; ++i)
603 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
604 fputc ('\n', dump_file);
605 }
606}
607
608/* Get a SUBREG of a CONCATN. */
609
610static rtx
611simplify_subreg_concatn (enum machine_mode outermode, rtx op,
612 unsigned int byte)
613{
614 unsigned int inner_size;
695ae295 615 enum machine_mode innermode, partmode;
e53a16e7
ILT
616 rtx part;
617 unsigned int final_offset;
618
619 gcc_assert (GET_CODE (op) == CONCATN);
620 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
621
622 innermode = GET_MODE (op);
623 gcc_assert (byte < GET_MODE_SIZE (innermode));
624 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
625
626 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
627 part = XVECEXP (op, 0, byte / inner_size);
695ae295
UB
628 partmode = GET_MODE (part);
629
822a55a0
UB
630 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
631 regular CONST_VECTORs. They have vector or integer modes, depending
632 on the capabilities of the target. Cope with them. */
633 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
634 partmode = GET_MODE_INNER (innermode);
635 else if (partmode == VOIDmode)
695ae295 636 {
822a55a0
UB
637 enum mode_class mclass = GET_MODE_CLASS (innermode);
638 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
695ae295
UB
639 }
640
e53a16e7
ILT
641 final_offset = byte % inner_size;
642 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
643 return NULL_RTX;
644
695ae295 645 return simplify_gen_subreg (outermode, part, partmode, final_offset);
e53a16e7
ILT
646}
647
648/* Wrapper around simplify_gen_subreg which handles CONCATN. */
649
650static rtx
651simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
652 enum machine_mode innermode, unsigned int byte)
653{
0e6c5b58
ILT
654 rtx ret;
655
e53a16e7
ILT
656 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
657 If OP is a SUBREG of a CONCATN, then it must be a simple mode
658 change with the same size and offset 0, or it must extract a
659 part. We shouldn't see anything else here. */
660 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
661 {
662 rtx op2;
663
664 if ((GET_MODE_SIZE (GET_MODE (op))
665 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
666 && SUBREG_BYTE (op) == 0)
667 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
668 GET_MODE (SUBREG_REG (op)), byte);
669
670 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
671 SUBREG_BYTE (op));
672 if (op2 == NULL_RTX)
673 {
674 /* We don't handle paradoxical subregs here. */
675 gcc_assert (GET_MODE_SIZE (outermode)
676 <= GET_MODE_SIZE (GET_MODE (op)));
677 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
678 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
679 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
680 byte + SUBREG_BYTE (op));
681 gcc_assert (op2 != NULL_RTX);
682 return op2;
683 }
684
685 op = op2;
686 gcc_assert (op != NULL_RTX);
687 gcc_assert (innermode == GET_MODE (op));
688 }
0e6c5b58 689
e53a16e7
ILT
690 if (GET_CODE (op) == CONCATN)
691 return simplify_subreg_concatn (outermode, op, byte);
0e6c5b58
ILT
692
693 ret = simplify_gen_subreg (outermode, op, innermode, byte);
694
695 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
696 resolve_simple_move will ask for the high part of the paradoxical
697 subreg, which does not have a value. Just return a zero. */
698 if (ret == NULL_RTX
699 && GET_CODE (op) == SUBREG
700 && SUBREG_BYTE (op) == 0
701 && (GET_MODE_SIZE (innermode)
702 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
703 return CONST0_RTX (outermode);
704
705 gcc_assert (ret != NULL_RTX);
706 return ret;
e53a16e7
ILT
707}
708
709/* Return whether we should resolve X into the registers into which it
710 was decomposed. */
711
712static bool
713resolve_reg_p (rtx x)
714{
715 return GET_CODE (x) == CONCATN;
716}
717
718/* Return whether X is a SUBREG of a register which we need to
719 resolve. */
720
721static bool
722resolve_subreg_p (rtx x)
723{
724 if (GET_CODE (x) != SUBREG)
725 return false;
726 return resolve_reg_p (SUBREG_REG (x));
727}
728
729/* This is called via for_each_rtx. Look for SUBREGs which need to be
730 decomposed. */
731
732static int
733resolve_subreg_use (rtx *px, void *data)
734{
735 rtx insn = (rtx) data;
736 rtx x = *px;
737
738 if (x == NULL_RTX)
739 return 0;
740
741 if (resolve_subreg_p (x))
742 {
743 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 SUBREG_BYTE (x));
745
746 /* It is possible for a note to contain a reference which we can
747 decompose. In this case, return 1 to the caller to indicate
748 that the note must be removed. */
749 if (!x)
750 {
30d18db4 751 gcc_assert (!insn);
e53a16e7
ILT
752 return 1;
753 }
754
755 validate_change (insn, px, x, 1);
756 return -1;
757 }
758
759 if (resolve_reg_p (x))
760 {
761 /* Return 1 to the caller to indicate that we found a direct
762 reference to a register which is being decomposed. This can
e0892570
AK
763 happen inside notes, multiword shift or zero-extend
764 instructions. */
e53a16e7
ILT
765 return 1;
766 }
767
768 return 0;
769}
770
b5b8b0ac
AO
771/* This is called via for_each_rtx. Look for SUBREGs which can be
772 decomposed and decomposed REGs that need copying. */
773
774static int
775adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
776{
777 rtx x = *px;
778
779 if (x == NULL_RTX)
780 return 0;
781
782 if (resolve_subreg_p (x))
783 {
784 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
785 SUBREG_BYTE (x));
786
787 if (x)
788 *px = x;
789 else
790 x = copy_rtx (*px);
791 }
792
793 if (resolve_reg_p (x))
794 *px = copy_rtx (x);
795
796 return 0;
797}
798
e53a16e7
ILT
799/* Resolve any decomposed registers which appear in register notes on
800 INSN. */
801
802static void
803resolve_reg_notes (rtx insn)
804{
805 rtx *pnote, note;
806
807 note = find_reg_equal_equiv_note (insn);
808 if (note)
809 {
6fb5fa3c 810 int old_count = num_validated_changes ();
e53a16e7 811 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
4a8cae83 812 remove_note (insn, note);
6fb5fa3c
DB
813 else
814 if (old_count != num_validated_changes ())
815 df_notes_rescan (insn);
e53a16e7
ILT
816 }
817
818 pnote = &REG_NOTES (insn);
819 while (*pnote != NULL_RTX)
820 {
60564289 821 bool del = false;
e53a16e7
ILT
822
823 note = *pnote;
824 switch (REG_NOTE_KIND (note))
825 {
6fb5fa3c
DB
826 case REG_DEAD:
827 case REG_UNUSED:
e53a16e7 828 if (resolve_reg_p (XEXP (note, 0)))
60564289 829 del = true;
e53a16e7
ILT
830 break;
831
832 default:
833 break;
834 }
835
60564289 836 if (del)
e53a16e7
ILT
837 *pnote = XEXP (note, 1);
838 else
839 pnote = &XEXP (note, 1);
840 }
841}
842
2b54c30f 843/* Return whether X can be decomposed into subwords. */
e53a16e7
ILT
844
845static bool
2b54c30f 846can_decompose_p (rtx x)
e53a16e7
ILT
847{
848 if (REG_P (x))
849 {
850 unsigned int regno = REGNO (x);
851
852 if (HARD_REGISTER_NUM_P (regno))
488c8379
RS
853 {
854 unsigned int byte, num_bytes;
855
856 num_bytes = GET_MODE_SIZE (GET_MODE (x));
857 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
858 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
859 return false;
860 return true;
861 }
e53a16e7 862 else
402464a0 863 return !bitmap_bit_p (subreg_context, regno);
e53a16e7
ILT
864 }
865
2b54c30f 866 return true;
e53a16e7
ILT
867}
868
869/* Decompose the registers used in a simple move SET within INSN. If
870 we don't change anything, return INSN, otherwise return the start
871 of the sequence of moves. */
872
873static rtx
874resolve_simple_move (rtx set, rtx insn)
875{
876 rtx src, dest, real_dest, insns;
877 enum machine_mode orig_mode, dest_mode;
878 unsigned int words;
879 bool pushing;
880
881 src = SET_SRC (set);
882 dest = SET_DEST (set);
883 orig_mode = GET_MODE (dest);
884
885 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
af4ba423 886 gcc_assert (words > 1);
e53a16e7
ILT
887
888 start_sequence ();
889
890 /* We have to handle copying from a SUBREG of a decomposed reg where
891 the SUBREG is larger than word size. Rather than assume that we
892 can take a word_mode SUBREG of the destination, we copy to a new
893 register and then copy that to the destination. */
894
895 real_dest = NULL_RTX;
896
897 if (GET_CODE (src) == SUBREG
898 && resolve_reg_p (SUBREG_REG (src))
899 && (SUBREG_BYTE (src) != 0
900 || (GET_MODE_SIZE (orig_mode)
901 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
902 {
903 real_dest = dest;
904 dest = gen_reg_rtx (orig_mode);
905 if (REG_P (real_dest))
906 REG_ATTRS (dest) = REG_ATTRS (real_dest);
907 }
908
909 /* Similarly if we are copying to a SUBREG of a decomposed reg where
910 the SUBREG is larger than word size. */
911
912 if (GET_CODE (dest) == SUBREG
913 && resolve_reg_p (SUBREG_REG (dest))
914 && (SUBREG_BYTE (dest) != 0
915 || (GET_MODE_SIZE (orig_mode)
916 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
917 {
918 rtx reg, minsn, smove;
919
920 reg = gen_reg_rtx (orig_mode);
921 minsn = emit_move_insn (reg, src);
922 smove = single_set (minsn);
923 gcc_assert (smove != NULL_RTX);
924 resolve_simple_move (smove, minsn);
925 src = reg;
926 }
927
928 /* If we didn't have any big SUBREGS of decomposed registers, and
929 neither side of the move is a register we are decomposing, then
930 we don't have to do anything here. */
931
932 if (src == SET_SRC (set)
933 && dest == SET_DEST (set)
934 && !resolve_reg_p (src)
935 && !resolve_subreg_p (src)
936 && !resolve_reg_p (dest)
937 && !resolve_subreg_p (dest))
938 {
939 end_sequence ();
940 return insn;
941 }
942
30d18db4
ILT
943 /* It's possible for the code to use a subreg of a decomposed
944 register while forming an address. We need to handle that before
945 passing the address to emit_move_insn. We pass NULL_RTX as the
946 insn parameter to resolve_subreg_use because we can not validate
947 the insn yet. */
948 if (MEM_P (src) || MEM_P (dest))
949 {
950 int acg;
951
952 if (MEM_P (src))
953 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
954 if (MEM_P (dest))
955 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
956 acg = apply_change_group ();
957 gcc_assert (acg);
958 }
959
e53a16e7
ILT
960 /* If SRC is a register which we can't decompose, or has side
961 effects, we need to move via a temporary register. */
962
2b54c30f 963 if (!can_decompose_p (src)
e53a16e7
ILT
964 || side_effects_p (src)
965 || GET_CODE (src) == ASM_OPERANDS)
966 {
967 rtx reg;
968
969 reg = gen_reg_rtx (orig_mode);
ce5d49a8
ZC
970
971#ifdef AUTO_INC_DEC
972 {
973 rtx move = emit_move_insn (reg, src);
974 if (MEM_P (src))
975 {
976 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
977 if (note)
978 add_reg_note (move, REG_INC, XEXP (note, 0));
979 }
980 }
981#else
e53a16e7 982 emit_move_insn (reg, src);
ce5d49a8 983#endif
e53a16e7
ILT
984 src = reg;
985 }
986
987 /* If DEST is a register which we can't decompose, or has side
988 effects, we need to first move to a temporary register. We
989 handle the common case of pushing an operand directly. We also
990 go through a temporary register if it holds a floating point
991 value. This gives us better code on systems which can't move
992 data easily between integer and floating point registers. */
993
994 dest_mode = orig_mode;
995 pushing = push_operand (dest, dest_mode);
2b54c30f 996 if (!can_decompose_p (dest)
e53a16e7
ILT
997 || (side_effects_p (dest) && !pushing)
998 || (!SCALAR_INT_MODE_P (dest_mode)
999 && !resolve_reg_p (dest)
1000 && !resolve_subreg_p (dest)))
1001 {
1002 if (real_dest == NULL_RTX)
1003 real_dest = dest;
1004 if (!SCALAR_INT_MODE_P (dest_mode))
1005 {
1006 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1007 MODE_INT, 0);
1008 gcc_assert (dest_mode != BLKmode);
1009 }
1010 dest = gen_reg_rtx (dest_mode);
1011 if (REG_P (real_dest))
1012 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1013 }
1014
1015 if (pushing)
1016 {
1017 unsigned int i, j, jinc;
1018
1019 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1020 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1021 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1022
1023 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1024 {
1025 j = 0;
1026 jinc = 1;
1027 }
1028 else
1029 {
1030 j = words - 1;
1031 jinc = -1;
1032 }
1033
1034 for (i = 0; i < words; ++i, j += jinc)
1035 {
1036 rtx temp;
1037
1038 temp = copy_rtx (XEXP (dest, 0));
1039 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1040 j * UNITS_PER_WORD);
1041 emit_move_insn (temp,
1042 simplify_gen_subreg_concatn (word_mode, src,
1043 orig_mode,
1044 j * UNITS_PER_WORD));
1045 }
1046 }
1047 else
1048 {
1049 unsigned int i;
1050
1051 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
c41c1387 1052 emit_clobber (dest);
e53a16e7
ILT
1053
1054 for (i = 0; i < words; ++i)
1055 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1056 dest_mode,
1057 i * UNITS_PER_WORD),
1058 simplify_gen_subreg_concatn (word_mode, src,
1059 orig_mode,
1060 i * UNITS_PER_WORD));
1061 }
1062
1063 if (real_dest != NULL_RTX)
1064 {
1065 rtx mdest, minsn, smove;
1066
1067 if (dest_mode == orig_mode)
1068 mdest = dest;
1069 else
1070 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1071 minsn = emit_move_insn (real_dest, mdest);
1072
ce5d49a8
ZC
1073#ifdef AUTO_INC_DEC
1074 if (MEM_P (real_dest)
1075 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1076 {
1077 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1078 if (note)
1079 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1080 }
1081#endif
1082
e53a16e7
ILT
1083 smove = single_set (minsn);
1084 gcc_assert (smove != NULL_RTX);
1085
1086 resolve_simple_move (smove, minsn);
1087 }
1088
1089 insns = get_insns ();
1090 end_sequence ();
1091
1d65f45c 1092 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
73663bb7 1093
e53a16e7
ILT
1094 emit_insn_before (insns, insn);
1095
82981227 1096 /* If we get here via self-recursion, then INSN is not yet in the insns
6873ecab
SB
1097 chain and delete_insn will fail. We only want to remove INSN from the
1098 current sequence. See PR56738. */
1099 if (in_sequence_p ())
1100 remove_insn (insn);
1101 else
1102 delete_insn (insn);
e53a16e7
ILT
1103
1104 return insns;
1105}
1106
1107/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1108 component registers. Return whether we changed something. */
1109
1110static bool
1111resolve_clobber (rtx pat, rtx insn)
1112{
d70dcf29 1113 rtx reg;
e53a16e7
ILT
1114 enum machine_mode orig_mode;
1115 unsigned int words, i;
7e0c3f57 1116 int ret;
e53a16e7
ILT
1117
1118 reg = XEXP (pat, 0);
9a5a8e58 1119 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
e53a16e7
ILT
1120 return false;
1121
1122 orig_mode = GET_MODE (reg);
1123 words = GET_MODE_SIZE (orig_mode);
1124 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1125
7e0c3f57
ILT
1126 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1127 simplify_gen_subreg_concatn (word_mode, reg,
1128 orig_mode, 0),
1129 0);
6fb5fa3c 1130 df_insn_rescan (insn);
7e0c3f57
ILT
1131 gcc_assert (ret != 0);
1132
e53a16e7
ILT
1133 for (i = words - 1; i > 0; --i)
1134 {
1135 rtx x;
1136
9a5a8e58
ILT
1137 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1138 i * UNITS_PER_WORD);
e53a16e7
ILT
1139 x = gen_rtx_CLOBBER (VOIDmode, x);
1140 emit_insn_after (x, insn);
1141 }
1142
d4fd3465
ILT
1143 resolve_reg_notes (insn);
1144
e53a16e7
ILT
1145 return true;
1146}
1147
1148/* A USE of a decomposed register is no longer meaningful. Return
1149 whether we changed something. */
1150
1151static bool
1152resolve_use (rtx pat, rtx insn)
1153{
1154 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1155 {
1156 delete_insn (insn);
1157 return true;
1158 }
d4fd3465
ILT
1159
1160 resolve_reg_notes (insn);
1161
e53a16e7
ILT
1162 return false;
1163}
1164
b5b8b0ac
AO
1165/* A VAR_LOCATION can be simplified. */
1166
1167static void
1168resolve_debug (rtx insn)
1169{
1170 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1171
1172 df_insn_rescan (insn);
1173
1174 resolve_reg_notes (insn);
1175}
1176
af4ba423
KZ
1177/* Check if INSN is a decomposable multiword-shift or zero-extend and
1178 set the decomposable_context bitmap accordingly. SPEED_P is true
1179 if we are optimizing INSN for speed rather than size. Return true
1180 if INSN is decomposable. */
e0892570 1181
af4ba423
KZ
1182static bool
1183find_decomposable_shift_zext (rtx insn, bool speed_p)
e0892570
AK
1184{
1185 rtx set;
1186 rtx op;
1187 rtx op_operand;
1188
1189 set = single_set (insn);
1190 if (!set)
af4ba423 1191 return false;
e0892570
AK
1192
1193 op = SET_SRC (set);
1194 if (GET_CODE (op) != ASHIFT
1195 && GET_CODE (op) != LSHIFTRT
d7fde18c 1196 && GET_CODE (op) != ASHIFTRT
e0892570 1197 && GET_CODE (op) != ZERO_EXTEND)
af4ba423 1198 return false;
e0892570
AK
1199
1200 op_operand = XEXP (op, 0);
1201 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1202 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1203 || HARD_REGISTER_NUM_P (REGNO (op_operand))
af4ba423
KZ
1204 || GET_MODE (op) != twice_word_mode)
1205 return false;
e0892570
AK
1206
1207 if (GET_CODE (op) == ZERO_EXTEND)
1208 {
1209 if (GET_MODE (op_operand) != word_mode
af4ba423
KZ
1210 || !choices[speed_p].splitting_zext)
1211 return false;
e0892570
AK
1212 }
1213 else /* left or right shift */
1214 {
af4ba423
KZ
1215 bool *splitting = (GET_CODE (op) == ASHIFT
1216 ? choices[speed_p].splitting_ashift
d7fde18c
JJ
1217 : GET_CODE (op) == ASHIFTRT
1218 ? choices[speed_p].splitting_ashiftrt
af4ba423 1219 : choices[speed_p].splitting_lshiftrt);
481683e1 1220 if (!CONST_INT_P (XEXP (op, 1))
af4ba423
KZ
1221 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1222 2 * BITS_PER_WORD - 1)
1223 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1224 return false;
1225
1226 bitmap_set_bit (decomposable_context, REGNO (op_operand));
e0892570
AK
1227 }
1228
1229 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1230
af4ba423 1231 return true;
e0892570
AK
1232}
1233
1234/* Decompose a more than word wide shift (in INSN) of a multiword
1235 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1236 and 'set to zero' insn. Return a pointer to the new insn when a
1237 replacement was done. */
1238
1239static rtx
1240resolve_shift_zext (rtx insn)
1241{
1242 rtx set;
1243 rtx op;
1244 rtx op_operand;
1245 rtx insns;
d7fde18c 1246 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
e0892570
AK
1247 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1248
1249 set = single_set (insn);
1250 if (!set)
1251 return NULL_RTX;
1252
1253 op = SET_SRC (set);
1254 if (GET_CODE (op) != ASHIFT
1255 && GET_CODE (op) != LSHIFTRT
d7fde18c 1256 && GET_CODE (op) != ASHIFTRT
e0892570
AK
1257 && GET_CODE (op) != ZERO_EXTEND)
1258 return NULL_RTX;
1259
1260 op_operand = XEXP (op, 0);
1261
af4ba423
KZ
1262 /* We can tear this operation apart only if the regs were already
1263 torn apart. */
e0892570
AK
1264 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1265 return NULL_RTX;
1266
1267 /* src_reg_num is the number of the word mode register which we
1268 are operating on. For a left shift and a zero_extend on little
1269 endian machines this is register 0. */
d7fde18c
JJ
1270 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1271 ? 1 : 0;
e0892570 1272
acbe5496
AK
1273 if (WORDS_BIG_ENDIAN
1274 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
e0892570
AK
1275 src_reg_num = 1 - src_reg_num;
1276
1277 if (GET_CODE (op) == ZERO_EXTEND)
acbe5496 1278 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
e0892570
AK
1279 else
1280 dest_reg_num = 1 - src_reg_num;
1281
1282 offset1 = UNITS_PER_WORD * dest_reg_num;
1283 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1284 src_offset = UNITS_PER_WORD * src_reg_num;
1285
e0892570
AK
1286 start_sequence ();
1287
1288 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1289 GET_MODE (SET_DEST (set)),
1290 offset1);
d7fde18c
JJ
1291 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1292 GET_MODE (SET_DEST (set)),
1293 offset2);
e0892570
AK
1294 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1295 GET_MODE (op_operand),
1296 src_offset);
d7fde18c
JJ
1297 if (GET_CODE (op) == ASHIFTRT
1298 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1299 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1300 BITS_PER_WORD - 1, NULL_RTX, 0);
1301
e0892570
AK
1302 if (GET_CODE (op) != ZERO_EXTEND)
1303 {
1304 int shift_count = INTVAL (XEXP (op, 1));
1305 if (shift_count > BITS_PER_WORD)
1306 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1307 LSHIFT_EXPR : RSHIFT_EXPR,
1308 word_mode, src_reg,
eb6c3df1 1309 shift_count - BITS_PER_WORD,
d7fde18c 1310 dest_reg, GET_CODE (op) != ASHIFTRT);
e0892570
AK
1311 }
1312
1313 if (dest_reg != src_reg)
1314 emit_move_insn (dest_reg, src_reg);
d7fde18c
JJ
1315 if (GET_CODE (op) != ASHIFTRT)
1316 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1317 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1318 emit_move_insn (dest_upper, copy_rtx (src_reg));
1319 else
1320 emit_move_insn (dest_upper, upper_src);
e0892570
AK
1321 insns = get_insns ();
1322
1323 end_sequence ();
1324
1325 emit_insn_before (insns, insn);
1326
1327 if (dump_file)
1328 {
1329 rtx in;
1330 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1331 for (in = insns; in != insn; in = NEXT_INSN (in))
1332 fprintf (dump_file, "%d ", INSN_UID (in));
1333 fprintf (dump_file, "\n");
1334 }
1335
1336 delete_insn (insn);
1337 return insns;
1338}
1339
af4ba423
KZ
1340/* Print to dump_file a description of what we're doing with shift code CODE.
1341 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1342
1343static void
1344dump_shift_choices (enum rtx_code code, bool *splitting)
1345{
1346 int i;
1347 const char *sep;
1348
1349 fprintf (dump_file,
1350 " Splitting mode %s for %s lowering with shift amounts = ",
1351 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1352 sep = "";
1353 for (i = 0; i < BITS_PER_WORD; i++)
1354 if (splitting[i])
1355 {
1356 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1357 sep = ",";
1358 }
1359 fprintf (dump_file, "\n");
1360}
1361
1362/* Print to dump_file a description of what we're doing when optimizing
1363 for speed or size; SPEED_P says which. DESCRIPTION is a description
1364 of the SPEED_P choice. */
1365
1366static void
1367dump_choices (bool speed_p, const char *description)
1368{
1369 unsigned int i;
1370
1371 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1372
1373 for (i = 0; i < MAX_MACHINE_MODE; i++)
1374 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1375 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1376 choices[speed_p].move_modes_to_split[i]
1377 ? "Splitting"
1378 : "Skipping",
1379 GET_MODE_NAME ((enum machine_mode) i));
1380
1381 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1382 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1383 GET_MODE_NAME (twice_word_mode));
1384
1385 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
d7fde18c
JJ
1386 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1387 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
af4ba423
KZ
1388 fprintf (dump_file, "\n");
1389}
1390
e53a16e7 1391/* Look for registers which are always accessed via word-sized SUBREGs
c2c47e8f
UW
1392 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1393 registers into several word-sized pseudo-registers. */
e53a16e7
ILT
1394
1395static void
c2c47e8f 1396decompose_multiword_subregs (bool decompose_copies)
e53a16e7
ILT
1397{
1398 unsigned int max;
1399 basic_block bb;
af4ba423 1400 bool speed_p;
e53a16e7 1401
af4ba423
KZ
1402 if (dump_file)
1403 {
1404 dump_choices (false, "size");
1405 dump_choices (true, "speed");
1406 }
1407
1408 /* Check if this target even has any modes to consider lowering. */
1409 if (!choices[false].something_to_do && !choices[true].something_to_do)
1410 {
1411 if (dump_file)
1412 fprintf (dump_file, "Nothing to do!\n");
1413 return;
1414 }
6fb5fa3c 1415
e53a16e7
ILT
1416 max = max_reg_num ();
1417
1418 /* First see if there are any multi-word pseudo-registers. If there
1419 aren't, there is nothing we can do. This should speed up this
1420 pass in the normal case, since it should be faster than scanning
1421 all the insns. */
1422 {
1423 unsigned int i;
af4ba423 1424 bool useful_modes_seen = false;
e53a16e7
ILT
1425
1426 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
af4ba423
KZ
1427 if (regno_reg_rtx[i] != NULL)
1428 {
1429 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1430 if (choices[false].move_modes_to_split[(int) mode]
1431 || choices[true].move_modes_to_split[(int) mode])
1432 {
1433 useful_modes_seen = true;
1434 break;
1435 }
1436 }
1437
1438 if (!useful_modes_seen)
e53a16e7 1439 {
af4ba423
KZ
1440 if (dump_file)
1441 fprintf (dump_file, "Nothing to lower in this function.\n");
1442 return;
e53a16e7 1443 }
e53a16e7
ILT
1444 }
1445
8d074192 1446 if (df)
af4ba423
KZ
1447 {
1448 df_set_flags (DF_DEFER_INSN_RESCAN);
1449 run_word_dce ();
1450 }
8d074192 1451
af4ba423
KZ
1452 /* FIXME: It may be possible to change this code to look for each
1453 multi-word pseudo-register and to find each insn which sets or
1454 uses that register. That should be faster than scanning all the
1455 insns. */
e53a16e7
ILT
1456
1457 decomposable_context = BITMAP_ALLOC (NULL);
1458 non_decomposable_context = BITMAP_ALLOC (NULL);
402464a0 1459 subreg_context = BITMAP_ALLOC (NULL);
e53a16e7 1460
9771b263
DN
1461 reg_copy_graph.create (max);
1462 reg_copy_graph.safe_grow_cleared (max);
1463 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
e53a16e7 1464
af4ba423 1465 speed_p = optimize_function_for_speed_p (cfun);
11cd3bed 1466 FOR_EACH_BB_FN (bb, cfun)
e53a16e7
ILT
1467 {
1468 rtx insn;
1469
1470 FOR_BB_INSNS (bb, insn)
1471 {
1472 rtx set;
1473 enum classify_move_insn cmi;
1474 int i, n;
1475
1476 if (!INSN_P (insn)
1477 || GET_CODE (PATTERN (insn)) == CLOBBER
1478 || GET_CODE (PATTERN (insn)) == USE)
1479 continue;
1480
d5785e76
JJ
1481 recog_memoized (insn);
1482
af4ba423 1483 if (find_decomposable_shift_zext (insn, speed_p))
e0892570
AK
1484 continue;
1485
e53a16e7
ILT
1486 extract_insn (insn);
1487
af4ba423 1488 set = simple_move (insn, speed_p);
e53a16e7
ILT
1489
1490 if (!set)
1491 cmi = NOT_SIMPLE_MOVE;
1492 else
1493 {
c2c47e8f
UW
1494 /* We mark pseudo-to-pseudo copies as decomposable during the
1495 second pass only. The first pass is so early that there is
1496 good chance such moves will be optimized away completely by
1497 subsequent optimizations anyway.
1498
1499 However, we call find_pseudo_copy even during the first pass
1500 so as to properly set up the reg_copy_graph. */
4a8cae83 1501 if (find_pseudo_copy (set))
c2c47e8f 1502 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
e53a16e7
ILT
1503 else
1504 cmi = SIMPLE_MOVE;
1505 }
1506
1507 n = recog_data.n_operands;
1508 for (i = 0; i < n; ++i)
1509 {
1510 for_each_rtx (&recog_data.operand[i],
1511 find_decomposable_subregs,
1512 &cmi);
1513
1514 /* We handle ASM_OPERANDS as a special case to support
1515 things like x86 rdtsc which returns a DImode value.
1516 We can decompose the output, which will certainly be
1517 operand 0, but not the inputs. */
1518
1519 if (cmi == SIMPLE_MOVE
1520 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1521 {
1522 gcc_assert (i == 0);
1523 cmi = NOT_SIMPLE_MOVE;
1524 }
1525 }
1526 }
1527 }
1528
1529 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1530 if (!bitmap_empty_p (decomposable_context))
1531 {
73663bb7 1532 sbitmap sub_blocks;
7984c787
SB
1533 unsigned int i;
1534 sbitmap_iterator sbi;
e53a16e7
ILT
1535 bitmap_iterator iter;
1536 unsigned int regno;
1537
1538 propagate_pseudo_copies ();
1539
8b1c6fd7 1540 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
f61e445a 1541 bitmap_clear (sub_blocks);
e53a16e7
ILT
1542
1543 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1544 decompose_register (regno);
1545
11cd3bed 1546 FOR_EACH_BB_FN (bb, cfun)
e53a16e7 1547 {
ba4807a0 1548 rtx insn;
e53a16e7 1549
ba4807a0 1550 FOR_BB_INSNS (bb, insn)
e53a16e7 1551 {
11895e28 1552 rtx pat;
e53a16e7
ILT
1553
1554 if (!INSN_P (insn))
1555 continue;
1556
e53a16e7
ILT
1557 pat = PATTERN (insn);
1558 if (GET_CODE (pat) == CLOBBER)
d4fd3465 1559 resolve_clobber (pat, insn);
e53a16e7 1560 else if (GET_CODE (pat) == USE)
d4fd3465 1561 resolve_use (pat, insn);
b5b8b0ac
AO
1562 else if (DEBUG_INSN_P (insn))
1563 resolve_debug (insn);
e53a16e7
ILT
1564 else
1565 {
1566 rtx set;
1567 int i;
1568
1569 recog_memoized (insn);
1570 extract_insn (insn);
1571
af4ba423 1572 set = simple_move (insn, speed_p);
e53a16e7
ILT
1573 if (set)
1574 {
1575 rtx orig_insn = insn;
73663bb7 1576 bool cfi = control_flow_insn_p (insn);
e53a16e7 1577
7984c787
SB
1578 /* We can end up splitting loads to multi-word pseudos
1579 into separate loads to machine word size pseudos.
1580 When this happens, we first had one load that can
1581 throw, and after resolve_simple_move we'll have a
1582 bunch of loads (at least two). All those loads may
1583 trap if we can have non-call exceptions, so they
1584 all will end the current basic block. We split the
1585 block after the outer loop over all insns, but we
1586 make sure here that we will be able to split the
1587 basic block and still produce the correct control
1588 flow graph for it. */
1589 gcc_assert (!cfi
8f4f502f 1590 || (cfun->can_throw_non_call_exceptions
7984c787
SB
1591 && can_throw_internal (insn)));
1592
e53a16e7
ILT
1593 insn = resolve_simple_move (set, insn);
1594 if (insn != orig_insn)
1595 {
e53a16e7
ILT
1596 recog_memoized (insn);
1597 extract_insn (insn);
73663bb7
ILT
1598
1599 if (cfi)
d7c028c0 1600 bitmap_set_bit (sub_blocks, bb->index);
e53a16e7
ILT
1601 }
1602 }
e0892570
AK
1603 else
1604 {
1605 rtx decomposed_shift;
1606
1607 decomposed_shift = resolve_shift_zext (insn);
1608 if (decomposed_shift != NULL_RTX)
1609 {
e0892570
AK
1610 insn = decomposed_shift;
1611 recog_memoized (insn);
1612 extract_insn (insn);
1613 }
1614 }
e53a16e7
ILT
1615
1616 for (i = recog_data.n_operands - 1; i >= 0; --i)
1617 for_each_rtx (recog_data.operand_loc[i],
1618 resolve_subreg_use,
1619 insn);
1620
1621 resolve_reg_notes (insn);
1622
1623 if (num_validated_changes () > 0)
1624 {
1625 for (i = recog_data.n_dups - 1; i >= 0; --i)
1626 {
1627 rtx *pl = recog_data.dup_loc[i];
1628 int dup_num = recog_data.dup_num[i];
1629 rtx *px = recog_data.operand_loc[dup_num];
1630
1a309dfb 1631 validate_unshare_change (insn, pl, *px, 1);
e53a16e7
ILT
1632 }
1633
1634 i = apply_change_group ();
1635 gcc_assert (i);
e53a16e7
ILT
1636 }
1637 }
e53a16e7
ILT
1638 }
1639 }
1640
7984c787
SB
1641 /* If we had insns to split that caused control flow insns in the middle
1642 of a basic block, split those blocks now. Note that we only handle
1643 the case where splitting a load has caused multiple possibly trapping
1644 loads to appear. */
d4ac4ce2 1645 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
7984c787
SB
1646 {
1647 rtx insn, end;
1648 edge fallthru;
1649
06e28de2 1650 bb = BASIC_BLOCK_FOR_FN (cfun, i);
7984c787
SB
1651 insn = BB_HEAD (bb);
1652 end = BB_END (bb);
1653
1654 while (insn != end)
1655 {
1656 if (control_flow_insn_p (insn))
1657 {
1658 /* Split the block after insn. There will be a fallthru
1659 edge, which is OK so we keep it. We have to create the
1660 exception edges ourselves. */
1661 fallthru = split_block (bb, insn);
1662 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1663 bb = fallthru->dest;
1664 insn = BB_HEAD (bb);
1665 }
1666 else
1667 insn = NEXT_INSN (insn);
1668 }
1669 }
73663bb7 1670
73663bb7 1671 sbitmap_free (sub_blocks);
e53a16e7
ILT
1672 }
1673
1674 {
1675 unsigned int i;
1676 bitmap b;
1677
9771b263 1678 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
e53a16e7
ILT
1679 if (b)
1680 BITMAP_FREE (b);
1681 }
1682
9771b263 1683 reg_copy_graph.release ();
e53a16e7
ILT
1684
1685 BITMAP_FREE (decomposable_context);
1686 BITMAP_FREE (non_decomposable_context);
402464a0 1687 BITMAP_FREE (subreg_context);
e53a16e7
ILT
1688}
1689\f
1690/* Gate function for lower subreg pass. */
1691
1692static bool
1693gate_handle_lower_subreg (void)
1694{
1695 return flag_split_wide_types != 0;
1696}
1697
1698/* Implement first lower subreg pass. */
1699
1700static unsigned int
1701rest_of_handle_lower_subreg (void)
1702{
c2c47e8f 1703 decompose_multiword_subregs (false);
e53a16e7
ILT
1704 return 0;
1705}
1706
1707/* Implement second lower subreg pass. */
1708
1709static unsigned int
1710rest_of_handle_lower_subreg2 (void)
1711{
c2c47e8f 1712 decompose_multiword_subregs (true);
e53a16e7
ILT
1713 return 0;
1714}
1715
27a4cd48
DM
1716namespace {
1717
1718const pass_data pass_data_lower_subreg =
e53a16e7 1719{
27a4cd48
DM
1720 RTL_PASS, /* type */
1721 "subreg1", /* name */
1722 OPTGROUP_NONE, /* optinfo_flags */
1723 true, /* has_gate */
1724 true, /* has_execute */
1725 TV_LOWER_SUBREG, /* tv_id */
1726 0, /* properties_required */
1727 0, /* properties_provided */
1728 0, /* properties_destroyed */
1729 0, /* todo_flags_start */
1730 TODO_verify_flow, /* todo_flags_finish */
e53a16e7
ILT
1731};
1732
27a4cd48
DM
1733class pass_lower_subreg : public rtl_opt_pass
1734{
1735public:
c3284718
RS
1736 pass_lower_subreg (gcc::context *ctxt)
1737 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
27a4cd48
DM
1738 {}
1739
1740 /* opt_pass methods: */
1741 bool gate () { return gate_handle_lower_subreg (); }
1742 unsigned int execute () { return rest_of_handle_lower_subreg (); }
1743
1744}; // class pass_lower_subreg
1745
1746} // anon namespace
1747
1748rtl_opt_pass *
1749make_pass_lower_subreg (gcc::context *ctxt)
1750{
1751 return new pass_lower_subreg (ctxt);
1752}
1753
1754namespace {
1755
1756const pass_data pass_data_lower_subreg2 =
e53a16e7 1757{
27a4cd48
DM
1758 RTL_PASS, /* type */
1759 "subreg2", /* name */
1760 OPTGROUP_NONE, /* optinfo_flags */
1761 true, /* has_gate */
1762 true, /* has_execute */
1763 TV_LOWER_SUBREG, /* tv_id */
1764 0, /* properties_required */
1765 0, /* properties_provided */
1766 0, /* properties_destroyed */
1767 0, /* todo_flags_start */
1768 ( TODO_df_finish | TODO_verify_rtl_sharing
1769 | TODO_verify_flow ), /* todo_flags_finish */
e53a16e7 1770};
27a4cd48
DM
1771
1772class pass_lower_subreg2 : public rtl_opt_pass
1773{
1774public:
c3284718
RS
1775 pass_lower_subreg2 (gcc::context *ctxt)
1776 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
27a4cd48
DM
1777 {}
1778
1779 /* opt_pass methods: */
1780 bool gate () { return gate_handle_lower_subreg (); }
1781 unsigned int execute () { return rest_of_handle_lower_subreg2 (); }
1782
1783}; // class pass_lower_subreg2
1784
1785} // anon namespace
1786
1787rtl_opt_pass *
1788make_pass_lower_subreg2 (gcc::context *ctxt)
1789{
1790 return new pass_lower_subreg2 (ctxt);
1791}