]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
re PR middle-end/56108 (Asm statement in transaction_relaxed crashes compiler.)
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
e53a16e7 1/* Decompose multiword subregs.
d1e082c2 2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
e53a16e7
ILT
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
e53a16e7
ILT
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
e53a16e7
ILT
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "machmode.h"
26#include "tm.h"
27#include "rtl.h"
28#include "tm_p.h"
e53a16e7
ILT
29#include "flags.h"
30#include "insn-config.h"
31#include "obstack.h"
32#include "basic-block.h"
33#include "recog.h"
34#include "bitmap.h"
8d074192 35#include "dce.h"
e53a16e7 36#include "expr.h"
7984c787 37#include "except.h"
e53a16e7
ILT
38#include "regs.h"
39#include "tree-pass.h"
6fb5fa3c 40#include "df.h"
af4ba423 41#include "lower-subreg.h"
e53a16e7
ILT
42
43#ifdef STACK_GROWS_DOWNWARD
44# undef STACK_GROWS_DOWNWARD
45# define STACK_GROWS_DOWNWARD 1
46#else
47# define STACK_GROWS_DOWNWARD 0
48#endif
49
e53a16e7
ILT
50
51/* Decompose multi-word pseudo-registers into individual
af4ba423
KZ
52 pseudo-registers when possible and profitable. This is possible
53 when all the uses of a multi-word register are via SUBREG, or are
54 copies of the register to another location. Breaking apart the
55 register permits more CSE and permits better register allocation.
56 This is profitable if the machine does not have move instructions
57 to do this.
58
59 This pass only splits moves with modes that are wider than
60 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
61 modes that are twice the width of word_mode. The latter could be
62 generalized if there was a need to do this, but the trend in
63 architectures is to not need this.
64
65 There are two useful preprocessor defines for use by maintainers:
66
67 #define LOG_COSTS 1
68
69 if you wish to see the actual cost estimates that are being used
70 for each mode wider than word mode and the cost estimates for zero
71 extension and the shifts. This can be useful when port maintainers
72 are tuning insn rtx costs.
73
74 #define FORCE_LOWERING 1
75
76 if you wish to test the pass with all the transformation forced on.
77 This can be useful for finding bugs in the transformations. */
78
79#define LOG_COSTS 0
80#define FORCE_LOWERING 0
e53a16e7
ILT
81
82/* Bit N in this bitmap is set if regno N is used in a context in
83 which we can decompose it. */
84static bitmap decomposable_context;
85
86/* Bit N in this bitmap is set if regno N is used in a context in
87 which it can not be decomposed. */
88static bitmap non_decomposable_context;
89
402464a0
BS
90/* Bit N in this bitmap is set if regno N is used in a subreg
91 which changes the mode but not the size. This typically happens
92 when the register accessed as a floating-point value; we want to
93 avoid generating accesses to its subwords in integer modes. */
94static bitmap subreg_context;
95
e53a16e7
ILT
96/* Bit N in the bitmap in element M of this array is set if there is a
97 copy from reg M to reg N. */
9771b263 98static vec<bitmap> reg_copy_graph;
e53a16e7 99
af4ba423
KZ
100struct target_lower_subreg default_target_lower_subreg;
101#if SWITCHABLE_TARGET
102struct target_lower_subreg *this_target_lower_subreg
103 = &default_target_lower_subreg;
104#endif
105
106#define twice_word_mode \
107 this_target_lower_subreg->x_twice_word_mode
108#define choices \
109 this_target_lower_subreg->x_choices
110
111/* RTXes used while computing costs. */
112struct cost_rtxes {
113 /* Source and target registers. */
114 rtx source;
115 rtx target;
116
117 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
118 rtx zext;
119
120 /* A shift of SOURCE. */
121 rtx shift;
122
123 /* A SET of TARGET. */
124 rtx set;
125};
126
127/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
128 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
129
130static int
131shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
132 enum machine_mode mode, int op1)
133{
af4ba423
KZ
134 PUT_CODE (rtxes->shift, code);
135 PUT_MODE (rtxes->shift, mode);
136 PUT_MODE (rtxes->source, mode);
137 XEXP (rtxes->shift, 1) = GEN_INT (op1);
69523a7c 138 return set_src_cost (rtxes->shift, speed_p);
af4ba423
KZ
139}
140
141/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
142 to true if it is profitable to split a double-word CODE shift
143 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
144 for speed or size profitability.
145
146 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
147 the cost of moving zero into a word-mode register. WORD_MOVE_COST
148 is the cost of moving between word registers. */
149
150static void
151compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
152 bool *splitting, enum rtx_code code,
153 int word_move_zero_cost, int word_move_cost)
154{
155 int wide_cost, narrow_cost, i;
156
157 for (i = 0; i < BITS_PER_WORD; i++)
158 {
159 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
160 i + BITS_PER_WORD);
161 if (i == 0)
162 narrow_cost = word_move_cost;
163 else
164 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
165
166 if (LOG_COSTS)
167 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
168 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
169 i + BITS_PER_WORD, wide_cost, narrow_cost,
170 word_move_zero_cost);
171
172 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
173 splitting[i] = true;
174 }
175}
176
177/* Compute what we should do when optimizing for speed or size; SPEED_P
178 selects which. Use RTXES for computing costs. */
179
180static void
181compute_costs (bool speed_p, struct cost_rtxes *rtxes)
182{
183 unsigned int i;
184 int word_move_zero_cost, word_move_cost;
185
69523a7c 186 PUT_MODE (rtxes->target, word_mode);
af4ba423 187 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
69523a7c 188 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
189
190 SET_SRC (rtxes->set) = rtxes->source;
69523a7c 191 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
192
193 if (LOG_COSTS)
194 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
195 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
196
197 for (i = 0; i < MAX_MACHINE_MODE; i++)
198 {
199 enum machine_mode mode = (enum machine_mode) i;
200 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
201 if (factor > 1)
202 {
203 int mode_move_cost;
204
205 PUT_MODE (rtxes->target, mode);
206 PUT_MODE (rtxes->source, mode);
69523a7c 207 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
208
209 if (LOG_COSTS)
210 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
211 GET_MODE_NAME (mode), mode_move_cost,
212 word_move_cost, factor);
213
214 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
215 {
216 choices[speed_p].move_modes_to_split[i] = true;
217 choices[speed_p].something_to_do = true;
218 }
219 }
220 }
221
222 /* For the moves and shifts, the only case that is checked is one
223 where the mode of the target is an integer mode twice the width
224 of the word_mode.
225
226 If it is not profitable to split a double word move then do not
227 even consider the shifts or the zero extension. */
228 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
229 {
230 int zext_cost;
231
232 /* The only case here to check to see if moving the upper part with a
233 zero is cheaper than doing the zext itself. */
af4ba423 234 PUT_MODE (rtxes->source, word_mode);
69523a7c 235 zext_cost = set_src_cost (rtxes->zext, speed_p);
af4ba423
KZ
236
237 if (LOG_COSTS)
238 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
239 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
240 zext_cost, word_move_cost, word_move_zero_cost);
241
242 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
243 choices[speed_p].splitting_zext = true;
244
245 compute_splitting_shift (speed_p, rtxes,
246 choices[speed_p].splitting_ashift, ASHIFT,
247 word_move_zero_cost, word_move_cost);
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
250 word_move_zero_cost, word_move_cost);
251 }
252}
253
254/* Do one-per-target initialisation. This involves determining
255 which operations on the machine are profitable. If none are found,
256 then the pass just returns when called. */
257
258void
259init_lower_subreg (void)
260{
261 struct cost_rtxes rtxes;
262
263 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
264
265 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
266
267 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
268 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
269 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
270 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
271 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
272
273 if (LOG_COSTS)
274 fprintf (stderr, "\nSize costs\n==========\n\n");
275 compute_costs (false, &rtxes);
276
277 if (LOG_COSTS)
278 fprintf (stderr, "\nSpeed costs\n===========\n\n");
279 compute_costs (true, &rtxes);
280}
2b54c30f
ILT
281
282static bool
283simple_move_operand (rtx x)
284{
285 if (GET_CODE (x) == SUBREG)
286 x = SUBREG_REG (x);
287
288 if (!OBJECT_P (x))
289 return false;
290
291 if (GET_CODE (x) == LABEL_REF
292 || GET_CODE (x) == SYMBOL_REF
7e0c3f57
ILT
293 || GET_CODE (x) == HIGH
294 || GET_CODE (x) == CONST)
2b54c30f
ILT
295 return false;
296
297 if (MEM_P (x)
298 && (MEM_VOLATILE_P (x)
5bfed9a9 299 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
2b54c30f
ILT
300 return false;
301
302 return true;
303}
304
af4ba423
KZ
305/* If INSN is a single set between two objects that we want to split,
306 return the single set. SPEED_P says whether we are optimizing
307 INSN for speed or size.
308
309 INSN should have been passed to recog and extract_insn before this
310 is called. */
e53a16e7
ILT
311
312static rtx
af4ba423 313simple_move (rtx insn, bool speed_p)
e53a16e7
ILT
314{
315 rtx x;
316 rtx set;
317 enum machine_mode mode;
318
319 if (recog_data.n_operands != 2)
320 return NULL_RTX;
321
322 set = single_set (insn);
323 if (!set)
324 return NULL_RTX;
325
326 x = SET_DEST (set);
327 if (x != recog_data.operand[0] && x != recog_data.operand[1])
328 return NULL_RTX;
2b54c30f 329 if (!simple_move_operand (x))
e53a16e7
ILT
330 return NULL_RTX;
331
332 x = SET_SRC (set);
333 if (x != recog_data.operand[0] && x != recog_data.operand[1])
334 return NULL_RTX;
2b54c30f
ILT
335 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
336 things like x86 rdtsc which returns a DImode value. */
337 if (GET_CODE (x) != ASM_OPERANDS
338 && !simple_move_operand (x))
e53a16e7
ILT
339 return NULL_RTX;
340
341 /* We try to decompose in integer modes, to avoid generating
342 inefficient code copying between integer and floating point
343 registers. That means that we can't decompose if this is a
344 non-integer mode for which there is no integer mode of the same
345 size. */
576fe41a 346 mode = GET_MODE (SET_DEST (set));
e53a16e7
ILT
347 if (!SCALAR_INT_MODE_P (mode)
348 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
349 == BLKmode))
350 return NULL_RTX;
351
1f64a081
ILT
352 /* Reject PARTIAL_INT modes. They are used for processor specific
353 purposes and it's probably best not to tamper with them. */
354 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
355 return NULL_RTX;
356
af4ba423
KZ
357 if (!choices[speed_p].move_modes_to_split[(int) mode])
358 return NULL_RTX;
359
e53a16e7
ILT
360 return set;
361}
362
363/* If SET is a copy from one multi-word pseudo-register to another,
364 record that in reg_copy_graph. Return whether it is such a
365 copy. */
366
367static bool
368find_pseudo_copy (rtx set)
369{
370 rtx dest = SET_DEST (set);
371 rtx src = SET_SRC (set);
372 unsigned int rd, rs;
373 bitmap b;
374
375 if (!REG_P (dest) || !REG_P (src))
376 return false;
377
378 rd = REGNO (dest);
379 rs = REGNO (src);
380 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
381 return false;
382
9771b263 383 b = reg_copy_graph[rs];
e53a16e7
ILT
384 if (b == NULL)
385 {
386 b = BITMAP_ALLOC (NULL);
9771b263 387 reg_copy_graph[rs] = b;
e53a16e7
ILT
388 }
389
390 bitmap_set_bit (b, rd);
391
392 return true;
393}
394
395/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
396 where they are copied to another register, add the register to
397 which they are copied to DECOMPOSABLE_CONTEXT. Use
398 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
399 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
400
401static void
402propagate_pseudo_copies (void)
403{
404 bitmap queue, propagate;
405
406 queue = BITMAP_ALLOC (NULL);
407 propagate = BITMAP_ALLOC (NULL);
408
409 bitmap_copy (queue, decomposable_context);
410 do
411 {
412 bitmap_iterator iter;
413 unsigned int i;
414
415 bitmap_clear (propagate);
416
417 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
418 {
9771b263 419 bitmap b = reg_copy_graph[i];
e53a16e7
ILT
420 if (b)
421 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
422 }
423
424 bitmap_and_compl (queue, propagate, decomposable_context);
425 bitmap_ior_into (decomposable_context, propagate);
426 }
427 while (!bitmap_empty_p (queue));
428
429 BITMAP_FREE (queue);
430 BITMAP_FREE (propagate);
431}
432
433/* A pointer to one of these values is passed to
434 find_decomposable_subregs via for_each_rtx. */
435
436enum classify_move_insn
437{
438 /* Not a simple move from one location to another. */
439 NOT_SIMPLE_MOVE,
c2c47e8f
UW
440 /* A simple move we want to decompose. */
441 DECOMPOSABLE_SIMPLE_MOVE,
442 /* Any other simple move. */
e53a16e7
ILT
443 SIMPLE_MOVE
444};
445
446/* This is called via for_each_rtx. If we find a SUBREG which we
447 could use to decompose a pseudo-register, set a bit in
448 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
449 not a simple pseudo-register copy, DATA will point at the type of
450 move, and we set a bit in DECOMPOSABLE_CONTEXT or
451 NON_DECOMPOSABLE_CONTEXT as appropriate. */
452
453static int
454find_decomposable_subregs (rtx *px, void *data)
455{
456 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
457 rtx x = *px;
458
459 if (x == NULL_RTX)
460 return 0;
461
462 if (GET_CODE (x) == SUBREG)
463 {
464 rtx inner = SUBREG_REG (x);
465 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
466
467 if (!REG_P (inner))
468 return 0;
469
470 regno = REGNO (inner);
471 if (HARD_REGISTER_NUM_P (regno))
472 return -1;
473
474 outer_size = GET_MODE_SIZE (GET_MODE (x));
475 inner_size = GET_MODE_SIZE (GET_MODE (inner));
476 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
477 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
478
479 /* We only try to decompose single word subregs of multi-word
480 registers. When we find one, we return -1 to avoid iterating
481 over the inner register.
482
483 ??? This doesn't allow, e.g., DImode subregs of TImode values
484 on 32-bit targets. We would need to record the way the
485 pseudo-register was used, and only decompose if all the uses
486 were the same number and size of pieces. Hopefully this
487 doesn't happen much. */
488
489 if (outer_words == 1 && inner_words > 1)
490 {
491 bitmap_set_bit (decomposable_context, regno);
492 return -1;
493 }
03743286
ILT
494
495 /* If this is a cast from one mode to another, where the modes
496 have the same size, and they are not tieable, then mark this
497 register as non-decomposable. If we decompose it we are
498 likely to mess up whatever the backend is trying to do. */
499 if (outer_words > 1
500 && outer_size == inner_size
501 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
502 {
503 bitmap_set_bit (non_decomposable_context, regno);
402464a0 504 bitmap_set_bit (subreg_context, regno);
03743286
ILT
505 return -1;
506 }
e53a16e7 507 }
2b54c30f 508 else if (REG_P (x))
e53a16e7
ILT
509 {
510 unsigned int regno;
511
512 /* We will see an outer SUBREG before we see the inner REG, so
513 when we see a plain REG here it means a direct reference to
514 the register.
515
516 If this is not a simple copy from one location to another,
517 then we can not decompose this register. If this is a simple
c2c47e8f 518 copy we want to decompose, and the mode is right,
4a8cae83
SB
519 then we mark the register as decomposable.
520 Otherwise we don't say anything about this register --
521 it could be decomposed, but whether that would be
e53a16e7
ILT
522 profitable depends upon how it is used elsewhere.
523
524 We only set bits in the bitmap for multi-word
525 pseudo-registers, since those are the only ones we care about
526 and it keeps the size of the bitmaps down. */
527
528 regno = REGNO (x);
529 if (!HARD_REGISTER_NUM_P (regno)
530 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
531 {
532 switch (*pcmi)
533 {
534 case NOT_SIMPLE_MOVE:
535 bitmap_set_bit (non_decomposable_context, regno);
536 break;
c2c47e8f 537 case DECOMPOSABLE_SIMPLE_MOVE:
e53a16e7
ILT
538 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
539 bitmap_set_bit (decomposable_context, regno);
540 break;
541 case SIMPLE_MOVE:
542 break;
543 default:
544 gcc_unreachable ();
545 }
546 }
547 }
2b54c30f
ILT
548 else if (MEM_P (x))
549 {
550 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
551
552 /* Any registers used in a MEM do not participate in a
c2c47e8f 553 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
2b54c30f
ILT
554 here, and return -1 to block the parent's recursion. */
555 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
556 return -1;
557 }
e53a16e7
ILT
558
559 return 0;
560}
561
562/* Decompose REGNO into word-sized components. We smash the REG node
563 in place. This ensures that (1) something goes wrong quickly if we
564 fail to make some replacement, and (2) the debug information inside
565 the symbol table is automatically kept up to date. */
566
567static void
568decompose_register (unsigned int regno)
569{
570 rtx reg;
571 unsigned int words, i;
572 rtvec v;
573
574 reg = regno_reg_rtx[regno];
575
576 regno_reg_rtx[regno] = NULL_RTX;
e53a16e7
ILT
577
578 words = GET_MODE_SIZE (GET_MODE (reg));
579 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
580
581 v = rtvec_alloc (words);
582 for (i = 0; i < words; ++i)
583 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
584
585 PUT_CODE (reg, CONCATN);
586 XVEC (reg, 0) = v;
587
588 if (dump_file)
589 {
590 fprintf (dump_file, "; Splitting reg %u ->", regno);
591 for (i = 0; i < words; ++i)
592 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
593 fputc ('\n', dump_file);
594 }
595}
596
597/* Get a SUBREG of a CONCATN. */
598
599static rtx
600simplify_subreg_concatn (enum machine_mode outermode, rtx op,
601 unsigned int byte)
602{
603 unsigned int inner_size;
695ae295 604 enum machine_mode innermode, partmode;
e53a16e7
ILT
605 rtx part;
606 unsigned int final_offset;
607
608 gcc_assert (GET_CODE (op) == CONCATN);
609 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
610
611 innermode = GET_MODE (op);
612 gcc_assert (byte < GET_MODE_SIZE (innermode));
613 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
614
615 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
616 part = XVECEXP (op, 0, byte / inner_size);
695ae295
UB
617 partmode = GET_MODE (part);
618
822a55a0
UB
619 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
620 regular CONST_VECTORs. They have vector or integer modes, depending
621 on the capabilities of the target. Cope with them. */
622 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
623 partmode = GET_MODE_INNER (innermode);
624 else if (partmode == VOIDmode)
695ae295 625 {
822a55a0
UB
626 enum mode_class mclass = GET_MODE_CLASS (innermode);
627 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
695ae295
UB
628 }
629
e53a16e7
ILT
630 final_offset = byte % inner_size;
631 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
632 return NULL_RTX;
633
695ae295 634 return simplify_gen_subreg (outermode, part, partmode, final_offset);
e53a16e7
ILT
635}
636
637/* Wrapper around simplify_gen_subreg which handles CONCATN. */
638
639static rtx
640simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
641 enum machine_mode innermode, unsigned int byte)
642{
0e6c5b58
ILT
643 rtx ret;
644
e53a16e7
ILT
645 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
646 If OP is a SUBREG of a CONCATN, then it must be a simple mode
647 change with the same size and offset 0, or it must extract a
648 part. We shouldn't see anything else here. */
649 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
650 {
651 rtx op2;
652
653 if ((GET_MODE_SIZE (GET_MODE (op))
654 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
655 && SUBREG_BYTE (op) == 0)
656 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
657 GET_MODE (SUBREG_REG (op)), byte);
658
659 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
660 SUBREG_BYTE (op));
661 if (op2 == NULL_RTX)
662 {
663 /* We don't handle paradoxical subregs here. */
664 gcc_assert (GET_MODE_SIZE (outermode)
665 <= GET_MODE_SIZE (GET_MODE (op)));
666 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
667 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
668 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
669 byte + SUBREG_BYTE (op));
670 gcc_assert (op2 != NULL_RTX);
671 return op2;
672 }
673
674 op = op2;
675 gcc_assert (op != NULL_RTX);
676 gcc_assert (innermode == GET_MODE (op));
677 }
0e6c5b58 678
e53a16e7
ILT
679 if (GET_CODE (op) == CONCATN)
680 return simplify_subreg_concatn (outermode, op, byte);
0e6c5b58
ILT
681
682 ret = simplify_gen_subreg (outermode, op, innermode, byte);
683
684 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
685 resolve_simple_move will ask for the high part of the paradoxical
686 subreg, which does not have a value. Just return a zero. */
687 if (ret == NULL_RTX
688 && GET_CODE (op) == SUBREG
689 && SUBREG_BYTE (op) == 0
690 && (GET_MODE_SIZE (innermode)
691 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
692 return CONST0_RTX (outermode);
693
694 gcc_assert (ret != NULL_RTX);
695 return ret;
e53a16e7
ILT
696}
697
698/* Return whether we should resolve X into the registers into which it
699 was decomposed. */
700
701static bool
702resolve_reg_p (rtx x)
703{
704 return GET_CODE (x) == CONCATN;
705}
706
707/* Return whether X is a SUBREG of a register which we need to
708 resolve. */
709
710static bool
711resolve_subreg_p (rtx x)
712{
713 if (GET_CODE (x) != SUBREG)
714 return false;
715 return resolve_reg_p (SUBREG_REG (x));
716}
717
718/* This is called via for_each_rtx. Look for SUBREGs which need to be
719 decomposed. */
720
721static int
722resolve_subreg_use (rtx *px, void *data)
723{
724 rtx insn = (rtx) data;
725 rtx x = *px;
726
727 if (x == NULL_RTX)
728 return 0;
729
730 if (resolve_subreg_p (x))
731 {
732 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
733 SUBREG_BYTE (x));
734
735 /* It is possible for a note to contain a reference which we can
736 decompose. In this case, return 1 to the caller to indicate
737 that the note must be removed. */
738 if (!x)
739 {
30d18db4 740 gcc_assert (!insn);
e53a16e7
ILT
741 return 1;
742 }
743
744 validate_change (insn, px, x, 1);
745 return -1;
746 }
747
748 if (resolve_reg_p (x))
749 {
750 /* Return 1 to the caller to indicate that we found a direct
751 reference to a register which is being decomposed. This can
e0892570
AK
752 happen inside notes, multiword shift or zero-extend
753 instructions. */
e53a16e7
ILT
754 return 1;
755 }
756
757 return 0;
758}
759
b5b8b0ac
AO
760/* This is called via for_each_rtx. Look for SUBREGs which can be
761 decomposed and decomposed REGs that need copying. */
762
763static int
764adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
765{
766 rtx x = *px;
767
768 if (x == NULL_RTX)
769 return 0;
770
771 if (resolve_subreg_p (x))
772 {
773 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
774 SUBREG_BYTE (x));
775
776 if (x)
777 *px = x;
778 else
779 x = copy_rtx (*px);
780 }
781
782 if (resolve_reg_p (x))
783 *px = copy_rtx (x);
784
785 return 0;
786}
787
e53a16e7
ILT
788/* Resolve any decomposed registers which appear in register notes on
789 INSN. */
790
791static void
792resolve_reg_notes (rtx insn)
793{
794 rtx *pnote, note;
795
796 note = find_reg_equal_equiv_note (insn);
797 if (note)
798 {
6fb5fa3c 799 int old_count = num_validated_changes ();
e53a16e7 800 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
4a8cae83 801 remove_note (insn, note);
6fb5fa3c
DB
802 else
803 if (old_count != num_validated_changes ())
804 df_notes_rescan (insn);
e53a16e7
ILT
805 }
806
807 pnote = &REG_NOTES (insn);
808 while (*pnote != NULL_RTX)
809 {
60564289 810 bool del = false;
e53a16e7
ILT
811
812 note = *pnote;
813 switch (REG_NOTE_KIND (note))
814 {
6fb5fa3c
DB
815 case REG_DEAD:
816 case REG_UNUSED:
e53a16e7 817 if (resolve_reg_p (XEXP (note, 0)))
60564289 818 del = true;
e53a16e7
ILT
819 break;
820
821 default:
822 break;
823 }
824
60564289 825 if (del)
e53a16e7
ILT
826 *pnote = XEXP (note, 1);
827 else
828 pnote = &XEXP (note, 1);
829 }
830}
831
2b54c30f 832/* Return whether X can be decomposed into subwords. */
e53a16e7
ILT
833
834static bool
2b54c30f 835can_decompose_p (rtx x)
e53a16e7
ILT
836{
837 if (REG_P (x))
838 {
839 unsigned int regno = REGNO (x);
840
841 if (HARD_REGISTER_NUM_P (regno))
488c8379
RS
842 {
843 unsigned int byte, num_bytes;
844
845 num_bytes = GET_MODE_SIZE (GET_MODE (x));
846 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
847 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
848 return false;
849 return true;
850 }
e53a16e7 851 else
402464a0 852 return !bitmap_bit_p (subreg_context, regno);
e53a16e7
ILT
853 }
854
2b54c30f 855 return true;
e53a16e7
ILT
856}
857
858/* Decompose the registers used in a simple move SET within INSN. If
859 we don't change anything, return INSN, otherwise return the start
860 of the sequence of moves. */
861
862static rtx
863resolve_simple_move (rtx set, rtx insn)
864{
865 rtx src, dest, real_dest, insns;
866 enum machine_mode orig_mode, dest_mode;
867 unsigned int words;
868 bool pushing;
869
870 src = SET_SRC (set);
871 dest = SET_DEST (set);
872 orig_mode = GET_MODE (dest);
873
874 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
af4ba423 875 gcc_assert (words > 1);
e53a16e7
ILT
876
877 start_sequence ();
878
879 /* We have to handle copying from a SUBREG of a decomposed reg where
880 the SUBREG is larger than word size. Rather than assume that we
881 can take a word_mode SUBREG of the destination, we copy to a new
882 register and then copy that to the destination. */
883
884 real_dest = NULL_RTX;
885
886 if (GET_CODE (src) == SUBREG
887 && resolve_reg_p (SUBREG_REG (src))
888 && (SUBREG_BYTE (src) != 0
889 || (GET_MODE_SIZE (orig_mode)
890 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
891 {
892 real_dest = dest;
893 dest = gen_reg_rtx (orig_mode);
894 if (REG_P (real_dest))
895 REG_ATTRS (dest) = REG_ATTRS (real_dest);
896 }
897
898 /* Similarly if we are copying to a SUBREG of a decomposed reg where
899 the SUBREG is larger than word size. */
900
901 if (GET_CODE (dest) == SUBREG
902 && resolve_reg_p (SUBREG_REG (dest))
903 && (SUBREG_BYTE (dest) != 0
904 || (GET_MODE_SIZE (orig_mode)
905 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
906 {
907 rtx reg, minsn, smove;
908
909 reg = gen_reg_rtx (orig_mode);
910 minsn = emit_move_insn (reg, src);
911 smove = single_set (minsn);
912 gcc_assert (smove != NULL_RTX);
913 resolve_simple_move (smove, minsn);
914 src = reg;
915 }
916
917 /* If we didn't have any big SUBREGS of decomposed registers, and
918 neither side of the move is a register we are decomposing, then
919 we don't have to do anything here. */
920
921 if (src == SET_SRC (set)
922 && dest == SET_DEST (set)
923 && !resolve_reg_p (src)
924 && !resolve_subreg_p (src)
925 && !resolve_reg_p (dest)
926 && !resolve_subreg_p (dest))
927 {
928 end_sequence ();
929 return insn;
930 }
931
30d18db4
ILT
932 /* It's possible for the code to use a subreg of a decomposed
933 register while forming an address. We need to handle that before
934 passing the address to emit_move_insn. We pass NULL_RTX as the
935 insn parameter to resolve_subreg_use because we can not validate
936 the insn yet. */
937 if (MEM_P (src) || MEM_P (dest))
938 {
939 int acg;
940
941 if (MEM_P (src))
942 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
943 if (MEM_P (dest))
944 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
945 acg = apply_change_group ();
946 gcc_assert (acg);
947 }
948
e53a16e7
ILT
949 /* If SRC is a register which we can't decompose, or has side
950 effects, we need to move via a temporary register. */
951
2b54c30f 952 if (!can_decompose_p (src)
e53a16e7
ILT
953 || side_effects_p (src)
954 || GET_CODE (src) == ASM_OPERANDS)
955 {
956 rtx reg;
957
958 reg = gen_reg_rtx (orig_mode);
959 emit_move_insn (reg, src);
960 src = reg;
961 }
962
963 /* If DEST is a register which we can't decompose, or has side
964 effects, we need to first move to a temporary register. We
965 handle the common case of pushing an operand directly. We also
966 go through a temporary register if it holds a floating point
967 value. This gives us better code on systems which can't move
968 data easily between integer and floating point registers. */
969
970 dest_mode = orig_mode;
971 pushing = push_operand (dest, dest_mode);
2b54c30f 972 if (!can_decompose_p (dest)
e53a16e7
ILT
973 || (side_effects_p (dest) && !pushing)
974 || (!SCALAR_INT_MODE_P (dest_mode)
975 && !resolve_reg_p (dest)
976 && !resolve_subreg_p (dest)))
977 {
978 if (real_dest == NULL_RTX)
979 real_dest = dest;
980 if (!SCALAR_INT_MODE_P (dest_mode))
981 {
982 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
983 MODE_INT, 0);
984 gcc_assert (dest_mode != BLKmode);
985 }
986 dest = gen_reg_rtx (dest_mode);
987 if (REG_P (real_dest))
988 REG_ATTRS (dest) = REG_ATTRS (real_dest);
989 }
990
991 if (pushing)
992 {
993 unsigned int i, j, jinc;
994
995 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
996 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
997 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
998
999 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1000 {
1001 j = 0;
1002 jinc = 1;
1003 }
1004 else
1005 {
1006 j = words - 1;
1007 jinc = -1;
1008 }
1009
1010 for (i = 0; i < words; ++i, j += jinc)
1011 {
1012 rtx temp;
1013
1014 temp = copy_rtx (XEXP (dest, 0));
1015 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1016 j * UNITS_PER_WORD);
1017 emit_move_insn (temp,
1018 simplify_gen_subreg_concatn (word_mode, src,
1019 orig_mode,
1020 j * UNITS_PER_WORD));
1021 }
1022 }
1023 else
1024 {
1025 unsigned int i;
1026
1027 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
c41c1387 1028 emit_clobber (dest);
e53a16e7
ILT
1029
1030 for (i = 0; i < words; ++i)
1031 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1032 dest_mode,
1033 i * UNITS_PER_WORD),
1034 simplify_gen_subreg_concatn (word_mode, src,
1035 orig_mode,
1036 i * UNITS_PER_WORD));
1037 }
1038
1039 if (real_dest != NULL_RTX)
1040 {
1041 rtx mdest, minsn, smove;
1042
1043 if (dest_mode == orig_mode)
1044 mdest = dest;
1045 else
1046 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1047 minsn = emit_move_insn (real_dest, mdest);
1048
1049 smove = single_set (minsn);
1050 gcc_assert (smove != NULL_RTX);
1051
1052 resolve_simple_move (smove, minsn);
1053 }
1054
1055 insns = get_insns ();
1056 end_sequence ();
1057
1d65f45c 1058 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
73663bb7 1059
e53a16e7
ILT
1060 emit_insn_before (insns, insn);
1061
e53a16e7
ILT
1062 delete_insn (insn);
1063
1064 return insns;
1065}
1066
1067/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1068 component registers. Return whether we changed something. */
1069
1070static bool
1071resolve_clobber (rtx pat, rtx insn)
1072{
d70dcf29 1073 rtx reg;
e53a16e7
ILT
1074 enum machine_mode orig_mode;
1075 unsigned int words, i;
7e0c3f57 1076 int ret;
e53a16e7
ILT
1077
1078 reg = XEXP (pat, 0);
9a5a8e58 1079 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
e53a16e7
ILT
1080 return false;
1081
1082 orig_mode = GET_MODE (reg);
1083 words = GET_MODE_SIZE (orig_mode);
1084 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1085
7e0c3f57
ILT
1086 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1087 simplify_gen_subreg_concatn (word_mode, reg,
1088 orig_mode, 0),
1089 0);
6fb5fa3c 1090 df_insn_rescan (insn);
7e0c3f57
ILT
1091 gcc_assert (ret != 0);
1092
e53a16e7
ILT
1093 for (i = words - 1; i > 0; --i)
1094 {
1095 rtx x;
1096
9a5a8e58
ILT
1097 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1098 i * UNITS_PER_WORD);
e53a16e7
ILT
1099 x = gen_rtx_CLOBBER (VOIDmode, x);
1100 emit_insn_after (x, insn);
1101 }
1102
d4fd3465
ILT
1103 resolve_reg_notes (insn);
1104
e53a16e7
ILT
1105 return true;
1106}
1107
1108/* A USE of a decomposed register is no longer meaningful. Return
1109 whether we changed something. */
1110
1111static bool
1112resolve_use (rtx pat, rtx insn)
1113{
1114 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1115 {
1116 delete_insn (insn);
1117 return true;
1118 }
d4fd3465
ILT
1119
1120 resolve_reg_notes (insn);
1121
e53a16e7
ILT
1122 return false;
1123}
1124
b5b8b0ac
AO
1125/* A VAR_LOCATION can be simplified. */
1126
1127static void
1128resolve_debug (rtx insn)
1129{
1130 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1131
1132 df_insn_rescan (insn);
1133
1134 resolve_reg_notes (insn);
1135}
1136
af4ba423
KZ
1137/* Check if INSN is a decomposable multiword-shift or zero-extend and
1138 set the decomposable_context bitmap accordingly. SPEED_P is true
1139 if we are optimizing INSN for speed rather than size. Return true
1140 if INSN is decomposable. */
e0892570 1141
af4ba423
KZ
1142static bool
1143find_decomposable_shift_zext (rtx insn, bool speed_p)
e0892570
AK
1144{
1145 rtx set;
1146 rtx op;
1147 rtx op_operand;
1148
1149 set = single_set (insn);
1150 if (!set)
af4ba423 1151 return false;
e0892570
AK
1152
1153 op = SET_SRC (set);
1154 if (GET_CODE (op) != ASHIFT
1155 && GET_CODE (op) != LSHIFTRT
1156 && GET_CODE (op) != ZERO_EXTEND)
af4ba423 1157 return false;
e0892570
AK
1158
1159 op_operand = XEXP (op, 0);
1160 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1161 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1162 || HARD_REGISTER_NUM_P (REGNO (op_operand))
af4ba423
KZ
1163 || GET_MODE (op) != twice_word_mode)
1164 return false;
e0892570
AK
1165
1166 if (GET_CODE (op) == ZERO_EXTEND)
1167 {
1168 if (GET_MODE (op_operand) != word_mode
af4ba423
KZ
1169 || !choices[speed_p].splitting_zext)
1170 return false;
e0892570
AK
1171 }
1172 else /* left or right shift */
1173 {
af4ba423
KZ
1174 bool *splitting = (GET_CODE (op) == ASHIFT
1175 ? choices[speed_p].splitting_ashift
1176 : choices[speed_p].splitting_lshiftrt);
481683e1 1177 if (!CONST_INT_P (XEXP (op, 1))
af4ba423
KZ
1178 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1179 2 * BITS_PER_WORD - 1)
1180 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1181 return false;
1182
1183 bitmap_set_bit (decomposable_context, REGNO (op_operand));
e0892570
AK
1184 }
1185
1186 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1187
af4ba423 1188 return true;
e0892570
AK
1189}
1190
1191/* Decompose a more than word wide shift (in INSN) of a multiword
1192 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1193 and 'set to zero' insn. Return a pointer to the new insn when a
1194 replacement was done. */
1195
1196static rtx
1197resolve_shift_zext (rtx insn)
1198{
1199 rtx set;
1200 rtx op;
1201 rtx op_operand;
1202 rtx insns;
1203 rtx src_reg, dest_reg, dest_zero;
1204 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1205
1206 set = single_set (insn);
1207 if (!set)
1208 return NULL_RTX;
1209
1210 op = SET_SRC (set);
1211 if (GET_CODE (op) != ASHIFT
1212 && GET_CODE (op) != LSHIFTRT
1213 && GET_CODE (op) != ZERO_EXTEND)
1214 return NULL_RTX;
1215
1216 op_operand = XEXP (op, 0);
1217
af4ba423
KZ
1218 /* We can tear this operation apart only if the regs were already
1219 torn apart. */
e0892570
AK
1220 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1221 return NULL_RTX;
1222
1223 /* src_reg_num is the number of the word mode register which we
1224 are operating on. For a left shift and a zero_extend on little
1225 endian machines this is register 0. */
1226 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1227
acbe5496
AK
1228 if (WORDS_BIG_ENDIAN
1229 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
e0892570
AK
1230 src_reg_num = 1 - src_reg_num;
1231
1232 if (GET_CODE (op) == ZERO_EXTEND)
acbe5496 1233 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
e0892570
AK
1234 else
1235 dest_reg_num = 1 - src_reg_num;
1236
1237 offset1 = UNITS_PER_WORD * dest_reg_num;
1238 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1239 src_offset = UNITS_PER_WORD * src_reg_num;
1240
e0892570
AK
1241 start_sequence ();
1242
1243 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1244 GET_MODE (SET_DEST (set)),
1245 offset1);
1246 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1247 GET_MODE (SET_DEST (set)),
1248 offset2);
1249 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1250 GET_MODE (op_operand),
1251 src_offset);
1252 if (GET_CODE (op) != ZERO_EXTEND)
1253 {
1254 int shift_count = INTVAL (XEXP (op, 1));
1255 if (shift_count > BITS_PER_WORD)
1256 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1257 LSHIFT_EXPR : RSHIFT_EXPR,
1258 word_mode, src_reg,
eb6c3df1 1259 shift_count - BITS_PER_WORD,
e0892570
AK
1260 dest_reg, 1);
1261 }
1262
1263 if (dest_reg != src_reg)
1264 emit_move_insn (dest_reg, src_reg);
1265 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1266 insns = get_insns ();
1267
1268 end_sequence ();
1269
1270 emit_insn_before (insns, insn);
1271
1272 if (dump_file)
1273 {
1274 rtx in;
1275 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1276 for (in = insns; in != insn; in = NEXT_INSN (in))
1277 fprintf (dump_file, "%d ", INSN_UID (in));
1278 fprintf (dump_file, "\n");
1279 }
1280
1281 delete_insn (insn);
1282 return insns;
1283}
1284
af4ba423
KZ
1285/* Print to dump_file a description of what we're doing with shift code CODE.
1286 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1287
1288static void
1289dump_shift_choices (enum rtx_code code, bool *splitting)
1290{
1291 int i;
1292 const char *sep;
1293
1294 fprintf (dump_file,
1295 " Splitting mode %s for %s lowering with shift amounts = ",
1296 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1297 sep = "";
1298 for (i = 0; i < BITS_PER_WORD; i++)
1299 if (splitting[i])
1300 {
1301 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1302 sep = ",";
1303 }
1304 fprintf (dump_file, "\n");
1305}
1306
1307/* Print to dump_file a description of what we're doing when optimizing
1308 for speed or size; SPEED_P says which. DESCRIPTION is a description
1309 of the SPEED_P choice. */
1310
1311static void
1312dump_choices (bool speed_p, const char *description)
1313{
1314 unsigned int i;
1315
1316 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1317
1318 for (i = 0; i < MAX_MACHINE_MODE; i++)
1319 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1320 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1321 choices[speed_p].move_modes_to_split[i]
1322 ? "Splitting"
1323 : "Skipping",
1324 GET_MODE_NAME ((enum machine_mode) i));
1325
1326 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1327 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1328 GET_MODE_NAME (twice_word_mode));
1329
1330 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1331 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1332 fprintf (dump_file, "\n");
1333}
1334
e53a16e7 1335/* Look for registers which are always accessed via word-sized SUBREGs
c2c47e8f
UW
1336 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1337 registers into several word-sized pseudo-registers. */
e53a16e7
ILT
1338
1339static void
c2c47e8f 1340decompose_multiword_subregs (bool decompose_copies)
e53a16e7
ILT
1341{
1342 unsigned int max;
1343 basic_block bb;
af4ba423 1344 bool speed_p;
e53a16e7 1345
af4ba423
KZ
1346 if (dump_file)
1347 {
1348 dump_choices (false, "size");
1349 dump_choices (true, "speed");
1350 }
1351
1352 /* Check if this target even has any modes to consider lowering. */
1353 if (!choices[false].something_to_do && !choices[true].something_to_do)
1354 {
1355 if (dump_file)
1356 fprintf (dump_file, "Nothing to do!\n");
1357 return;
1358 }
6fb5fa3c 1359
e53a16e7
ILT
1360 max = max_reg_num ();
1361
1362 /* First see if there are any multi-word pseudo-registers. If there
1363 aren't, there is nothing we can do. This should speed up this
1364 pass in the normal case, since it should be faster than scanning
1365 all the insns. */
1366 {
1367 unsigned int i;
af4ba423 1368 bool useful_modes_seen = false;
e53a16e7
ILT
1369
1370 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
af4ba423
KZ
1371 if (regno_reg_rtx[i] != NULL)
1372 {
1373 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1374 if (choices[false].move_modes_to_split[(int) mode]
1375 || choices[true].move_modes_to_split[(int) mode])
1376 {
1377 useful_modes_seen = true;
1378 break;
1379 }
1380 }
1381
1382 if (!useful_modes_seen)
e53a16e7 1383 {
af4ba423
KZ
1384 if (dump_file)
1385 fprintf (dump_file, "Nothing to lower in this function.\n");
1386 return;
e53a16e7 1387 }
e53a16e7
ILT
1388 }
1389
8d074192 1390 if (df)
af4ba423
KZ
1391 {
1392 df_set_flags (DF_DEFER_INSN_RESCAN);
1393 run_word_dce ();
1394 }
8d074192 1395
af4ba423
KZ
1396 /* FIXME: It may be possible to change this code to look for each
1397 multi-word pseudo-register and to find each insn which sets or
1398 uses that register. That should be faster than scanning all the
1399 insns. */
e53a16e7
ILT
1400
1401 decomposable_context = BITMAP_ALLOC (NULL);
1402 non_decomposable_context = BITMAP_ALLOC (NULL);
402464a0 1403 subreg_context = BITMAP_ALLOC (NULL);
e53a16e7 1404
9771b263
DN
1405 reg_copy_graph.create (max);
1406 reg_copy_graph.safe_grow_cleared (max);
1407 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
e53a16e7 1408
af4ba423 1409 speed_p = optimize_function_for_speed_p (cfun);
e53a16e7
ILT
1410 FOR_EACH_BB (bb)
1411 {
1412 rtx insn;
1413
1414 FOR_BB_INSNS (bb, insn)
1415 {
1416 rtx set;
1417 enum classify_move_insn cmi;
1418 int i, n;
1419
1420 if (!INSN_P (insn)
1421 || GET_CODE (PATTERN (insn)) == CLOBBER
1422 || GET_CODE (PATTERN (insn)) == USE)
1423 continue;
1424
d5785e76
JJ
1425 recog_memoized (insn);
1426
af4ba423 1427 if (find_decomposable_shift_zext (insn, speed_p))
e0892570
AK
1428 continue;
1429
e53a16e7
ILT
1430 extract_insn (insn);
1431
af4ba423 1432 set = simple_move (insn, speed_p);
e53a16e7
ILT
1433
1434 if (!set)
1435 cmi = NOT_SIMPLE_MOVE;
1436 else
1437 {
c2c47e8f
UW
1438 /* We mark pseudo-to-pseudo copies as decomposable during the
1439 second pass only. The first pass is so early that there is
1440 good chance such moves will be optimized away completely by
1441 subsequent optimizations anyway.
1442
1443 However, we call find_pseudo_copy even during the first pass
1444 so as to properly set up the reg_copy_graph. */
4a8cae83 1445 if (find_pseudo_copy (set))
c2c47e8f 1446 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
e53a16e7
ILT
1447 else
1448 cmi = SIMPLE_MOVE;
1449 }
1450
1451 n = recog_data.n_operands;
1452 for (i = 0; i < n; ++i)
1453 {
1454 for_each_rtx (&recog_data.operand[i],
1455 find_decomposable_subregs,
1456 &cmi);
1457
1458 /* We handle ASM_OPERANDS as a special case to support
1459 things like x86 rdtsc which returns a DImode value.
1460 We can decompose the output, which will certainly be
1461 operand 0, but not the inputs. */
1462
1463 if (cmi == SIMPLE_MOVE
1464 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1465 {
1466 gcc_assert (i == 0);
1467 cmi = NOT_SIMPLE_MOVE;
1468 }
1469 }
1470 }
1471 }
1472
1473 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1474 if (!bitmap_empty_p (decomposable_context))
1475 {
73663bb7 1476 sbitmap sub_blocks;
7984c787
SB
1477 unsigned int i;
1478 sbitmap_iterator sbi;
e53a16e7
ILT
1479 bitmap_iterator iter;
1480 unsigned int regno;
1481
1482 propagate_pseudo_copies ();
1483
73663bb7 1484 sub_blocks = sbitmap_alloc (last_basic_block);
f61e445a 1485 bitmap_clear (sub_blocks);
e53a16e7
ILT
1486
1487 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1488 decompose_register (regno);
1489
1490 FOR_EACH_BB (bb)
1491 {
ba4807a0 1492 rtx insn;
e53a16e7 1493
ba4807a0 1494 FOR_BB_INSNS (bb, insn)
e53a16e7 1495 {
11895e28 1496 rtx pat;
e53a16e7
ILT
1497
1498 if (!INSN_P (insn))
1499 continue;
1500
e53a16e7
ILT
1501 pat = PATTERN (insn);
1502 if (GET_CODE (pat) == CLOBBER)
d4fd3465 1503 resolve_clobber (pat, insn);
e53a16e7 1504 else if (GET_CODE (pat) == USE)
d4fd3465 1505 resolve_use (pat, insn);
b5b8b0ac
AO
1506 else if (DEBUG_INSN_P (insn))
1507 resolve_debug (insn);
e53a16e7
ILT
1508 else
1509 {
1510 rtx set;
1511 int i;
1512
1513 recog_memoized (insn);
1514 extract_insn (insn);
1515
af4ba423 1516 set = simple_move (insn, speed_p);
e53a16e7
ILT
1517 if (set)
1518 {
1519 rtx orig_insn = insn;
73663bb7 1520 bool cfi = control_flow_insn_p (insn);
e53a16e7 1521
7984c787
SB
1522 /* We can end up splitting loads to multi-word pseudos
1523 into separate loads to machine word size pseudos.
1524 When this happens, we first had one load that can
1525 throw, and after resolve_simple_move we'll have a
1526 bunch of loads (at least two). All those loads may
1527 trap if we can have non-call exceptions, so they
1528 all will end the current basic block. We split the
1529 block after the outer loop over all insns, but we
1530 make sure here that we will be able to split the
1531 basic block and still produce the correct control
1532 flow graph for it. */
1533 gcc_assert (!cfi
8f4f502f 1534 || (cfun->can_throw_non_call_exceptions
7984c787
SB
1535 && can_throw_internal (insn)));
1536
e53a16e7
ILT
1537 insn = resolve_simple_move (set, insn);
1538 if (insn != orig_insn)
1539 {
e53a16e7
ILT
1540 recog_memoized (insn);
1541 extract_insn (insn);
73663bb7
ILT
1542
1543 if (cfi)
d7c028c0 1544 bitmap_set_bit (sub_blocks, bb->index);
e53a16e7
ILT
1545 }
1546 }
e0892570
AK
1547 else
1548 {
1549 rtx decomposed_shift;
1550
1551 decomposed_shift = resolve_shift_zext (insn);
1552 if (decomposed_shift != NULL_RTX)
1553 {
e0892570
AK
1554 insn = decomposed_shift;
1555 recog_memoized (insn);
1556 extract_insn (insn);
1557 }
1558 }
e53a16e7
ILT
1559
1560 for (i = recog_data.n_operands - 1; i >= 0; --i)
1561 for_each_rtx (recog_data.operand_loc[i],
1562 resolve_subreg_use,
1563 insn);
1564
1565 resolve_reg_notes (insn);
1566
1567 if (num_validated_changes () > 0)
1568 {
1569 for (i = recog_data.n_dups - 1; i >= 0; --i)
1570 {
1571 rtx *pl = recog_data.dup_loc[i];
1572 int dup_num = recog_data.dup_num[i];
1573 rtx *px = recog_data.operand_loc[dup_num];
1574
1a309dfb 1575 validate_unshare_change (insn, pl, *px, 1);
e53a16e7
ILT
1576 }
1577
1578 i = apply_change_group ();
1579 gcc_assert (i);
e53a16e7
ILT
1580 }
1581 }
e53a16e7
ILT
1582 }
1583 }
1584
7984c787
SB
1585 /* If we had insns to split that caused control flow insns in the middle
1586 of a basic block, split those blocks now. Note that we only handle
1587 the case where splitting a load has caused multiple possibly trapping
1588 loads to appear. */
d4ac4ce2 1589 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
7984c787
SB
1590 {
1591 rtx insn, end;
1592 edge fallthru;
1593
1594 bb = BASIC_BLOCK (i);
1595 insn = BB_HEAD (bb);
1596 end = BB_END (bb);
1597
1598 while (insn != end)
1599 {
1600 if (control_flow_insn_p (insn))
1601 {
1602 /* Split the block after insn. There will be a fallthru
1603 edge, which is OK so we keep it. We have to create the
1604 exception edges ourselves. */
1605 fallthru = split_block (bb, insn);
1606 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1607 bb = fallthru->dest;
1608 insn = BB_HEAD (bb);
1609 }
1610 else
1611 insn = NEXT_INSN (insn);
1612 }
1613 }
73663bb7 1614
73663bb7 1615 sbitmap_free (sub_blocks);
e53a16e7
ILT
1616 }
1617
1618 {
1619 unsigned int i;
1620 bitmap b;
1621
9771b263 1622 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
e53a16e7
ILT
1623 if (b)
1624 BITMAP_FREE (b);
1625 }
1626
9771b263 1627 reg_copy_graph.release ();
e53a16e7
ILT
1628
1629 BITMAP_FREE (decomposable_context);
1630 BITMAP_FREE (non_decomposable_context);
402464a0 1631 BITMAP_FREE (subreg_context);
e53a16e7
ILT
1632}
1633\f
1634/* Gate function for lower subreg pass. */
1635
1636static bool
1637gate_handle_lower_subreg (void)
1638{
1639 return flag_split_wide_types != 0;
1640}
1641
1642/* Implement first lower subreg pass. */
1643
1644static unsigned int
1645rest_of_handle_lower_subreg (void)
1646{
c2c47e8f 1647 decompose_multiword_subregs (false);
e53a16e7
ILT
1648 return 0;
1649}
1650
1651/* Implement second lower subreg pass. */
1652
1653static unsigned int
1654rest_of_handle_lower_subreg2 (void)
1655{
c2c47e8f 1656 decompose_multiword_subregs (true);
e53a16e7
ILT
1657 return 0;
1658}
1659
8ddbbcae 1660struct rtl_opt_pass pass_lower_subreg =
e53a16e7 1661{
8ddbbcae
JH
1662 {
1663 RTL_PASS,
00b251a0 1664 "subreg1", /* name */
2b4e6bf1 1665 OPTGROUP_NONE, /* optinfo_flags */
e53a16e7
ILT
1666 gate_handle_lower_subreg, /* gate */
1667 rest_of_handle_lower_subreg, /* execute */
1668 NULL, /* sub */
1669 NULL, /* next */
1670 0, /* static_pass_number */
1671 TV_LOWER_SUBREG, /* tv_id */
1672 0, /* properties_required */
1673 0, /* properties_provided */
1674 0, /* properties_destroyed */
1675 0, /* todo_flags_start */
73663bb7 1676 TODO_ggc_collect |
8ddbbcae
JH
1677 TODO_verify_flow /* todo_flags_finish */
1678 }
e53a16e7
ILT
1679};
1680
8ddbbcae 1681struct rtl_opt_pass pass_lower_subreg2 =
e53a16e7 1682{
8ddbbcae
JH
1683 {
1684 RTL_PASS,
e53a16e7 1685 "subreg2", /* name */
2b4e6bf1 1686 OPTGROUP_NONE, /* optinfo_flags */
e53a16e7
ILT
1687 gate_handle_lower_subreg, /* gate */
1688 rest_of_handle_lower_subreg2, /* execute */
1689 NULL, /* sub */
1690 NULL, /* next */
1691 0, /* static_pass_number */
1692 TV_LOWER_SUBREG, /* tv_id */
1693 0, /* properties_required */
1694 0, /* properties_provided */
1695 0, /* properties_destroyed */
1696 0, /* todo_flags_start */
a36b8a1e 1697 TODO_df_finish | TODO_verify_rtl_sharing |
73663bb7 1698 TODO_ggc_collect |
8ddbbcae
JH
1699 TODO_verify_flow /* todo_flags_finish */
1700 }
e53a16e7 1701};