]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
gcc/
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
1a6a0f2a 1/* Decompose multiword subregs.
08b31038 2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
1a6a0f2a 4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
8c4c00c1 11Software Foundation; either version 3, or (at your option) any later
1a6a0f2a 12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
8c4c00c1 20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
1a6a0f2a 22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "machmode.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tm_p.h"
30#include "timevar.h"
31#include "flags.h"
32#include "insn-config.h"
33#include "obstack.h"
34#include "basic-block.h"
35#include "recog.h"
36#include "bitmap.h"
0e8e9be3 37#include "dce.h"
1a6a0f2a 38#include "expr.h"
db1c50be 39#include "except.h"
1a6a0f2a 40#include "regs.h"
41#include "tree-pass.h"
3072d30e 42#include "df.h"
c7944dce 43#include "lower-subreg.h"
1a6a0f2a 44
45#ifdef STACK_GROWS_DOWNWARD
46# undef STACK_GROWS_DOWNWARD
47# define STACK_GROWS_DOWNWARD 1
48#else
49# define STACK_GROWS_DOWNWARD 0
50#endif
51
52DEF_VEC_P (bitmap);
53DEF_VEC_ALLOC_P (bitmap,heap);
54
55/* Decompose multi-word pseudo-registers into individual
c7944dce 56 pseudo-registers when possible and profitable. This is possible
57 when all the uses of a multi-word register are via SUBREG, or are
58 copies of the register to another location. Breaking apart the
59 register permits more CSE and permits better register allocation.
60 This is profitable if the machine does not have move instructions
61 to do this.
62
63 This pass only splits moves with modes that are wider than
64 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
65 modes that are twice the width of word_mode. The latter could be
66 generalized if there was a need to do this, but the trend in
67 architectures is to not need this.
68
69 There are two useful preprocessor defines for use by maintainers:
70
71 #define LOG_COSTS 1
72
73 if you wish to see the actual cost estimates that are being used
74 for each mode wider than word mode and the cost estimates for zero
75 extension and the shifts. This can be useful when port maintainers
76 are tuning insn rtx costs.
77
78 #define FORCE_LOWERING 1
79
80 if you wish to test the pass with all the transformation forced on.
81 This can be useful for finding bugs in the transformations. */
82
83#define LOG_COSTS 0
84#define FORCE_LOWERING 0
1a6a0f2a 85
86/* Bit N in this bitmap is set if regno N is used in a context in
87 which we can decompose it. */
88static bitmap decomposable_context;
89
90/* Bit N in this bitmap is set if regno N is used in a context in
91 which it can not be decomposed. */
92static bitmap non_decomposable_context;
93
5277d36e 94/* Bit N in this bitmap is set if regno N is used in a subreg
95 which changes the mode but not the size. This typically happens
96 when the register accessed as a floating-point value; we want to
97 avoid generating accesses to its subwords in integer modes. */
98static bitmap subreg_context;
99
1a6a0f2a 100/* Bit N in the bitmap in element M of this array is set if there is a
101 copy from reg M to reg N. */
102static VEC(bitmap,heap) *reg_copy_graph;
103
c7944dce 104struct target_lower_subreg default_target_lower_subreg;
105#if SWITCHABLE_TARGET
106struct target_lower_subreg *this_target_lower_subreg
107 = &default_target_lower_subreg;
108#endif
109
110#define twice_word_mode \
111 this_target_lower_subreg->x_twice_word_mode
112#define choices \
113 this_target_lower_subreg->x_choices
114
115/* RTXes used while computing costs. */
116struct cost_rtxes {
117 /* Source and target registers. */
118 rtx source;
119 rtx target;
120
121 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
122 rtx zext;
123
124 /* A shift of SOURCE. */
125 rtx shift;
126
127 /* A SET of TARGET. */
128 rtx set;
129};
130
131/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
132 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
133
134static int
135shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
136 enum machine_mode mode, int op1)
137{
c7944dce 138 PUT_CODE (rtxes->shift, code);
139 PUT_MODE (rtxes->shift, mode);
140 PUT_MODE (rtxes->source, mode);
141 XEXP (rtxes->shift, 1) = GEN_INT (op1);
a6d935b7 142 return set_src_cost (rtxes->shift, speed_p);
c7944dce 143}
144
145/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
146 to true if it is profitable to split a double-word CODE shift
147 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
148 for speed or size profitability.
149
150 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
151 the cost of moving zero into a word-mode register. WORD_MOVE_COST
152 is the cost of moving between word registers. */
153
154static void
155compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
156 bool *splitting, enum rtx_code code,
157 int word_move_zero_cost, int word_move_cost)
158{
159 int wide_cost, narrow_cost, i;
160
161 for (i = 0; i < BITS_PER_WORD; i++)
162 {
163 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
164 i + BITS_PER_WORD);
165 if (i == 0)
166 narrow_cost = word_move_cost;
167 else
168 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
169
170 if (LOG_COSTS)
171 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
172 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
173 i + BITS_PER_WORD, wide_cost, narrow_cost,
174 word_move_zero_cost);
175
176 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
177 splitting[i] = true;
178 }
179}
180
181/* Compute what we should do when optimizing for speed or size; SPEED_P
182 selects which. Use RTXES for computing costs. */
183
184static void
185compute_costs (bool speed_p, struct cost_rtxes *rtxes)
186{
187 unsigned int i;
188 int word_move_zero_cost, word_move_cost;
189
a6d935b7 190 PUT_MODE (rtxes->target, word_mode);
c7944dce 191 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
a6d935b7 192 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 193
194 SET_SRC (rtxes->set) = rtxes->source;
a6d935b7 195 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 196
197 if (LOG_COSTS)
198 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
199 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
200
201 for (i = 0; i < MAX_MACHINE_MODE; i++)
202 {
203 enum machine_mode mode = (enum machine_mode) i;
204 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
205 if (factor > 1)
206 {
207 int mode_move_cost;
208
209 PUT_MODE (rtxes->target, mode);
210 PUT_MODE (rtxes->source, mode);
a6d935b7 211 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 212
213 if (LOG_COSTS)
214 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
215 GET_MODE_NAME (mode), mode_move_cost,
216 word_move_cost, factor);
217
218 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
219 {
220 choices[speed_p].move_modes_to_split[i] = true;
221 choices[speed_p].something_to_do = true;
222 }
223 }
224 }
225
226 /* For the moves and shifts, the only case that is checked is one
227 where the mode of the target is an integer mode twice the width
228 of the word_mode.
229
230 If it is not profitable to split a double word move then do not
231 even consider the shifts or the zero extension. */
232 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
233 {
234 int zext_cost;
235
236 /* The only case here to check to see if moving the upper part with a
237 zero is cheaper than doing the zext itself. */
c7944dce 238 PUT_MODE (rtxes->source, word_mode);
a6d935b7 239 zext_cost = set_src_cost (rtxes->zext, speed_p);
c7944dce 240
241 if (LOG_COSTS)
242 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
243 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
244 zext_cost, word_move_cost, word_move_zero_cost);
245
246 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
247 choices[speed_p].splitting_zext = true;
248
249 compute_splitting_shift (speed_p, rtxes,
250 choices[speed_p].splitting_ashift, ASHIFT,
251 word_move_zero_cost, word_move_cost);
252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
254 word_move_zero_cost, word_move_cost);
255 }
256}
257
258/* Do one-per-target initialisation. This involves determining
259 which operations on the machine are profitable. If none are found,
260 then the pass just returns when called. */
261
262void
263init_lower_subreg (void)
264{
265 struct cost_rtxes rtxes;
266
267 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
268
269 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
270
271 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
272 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
273 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
274 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
275 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
276
277 if (LOG_COSTS)
278 fprintf (stderr, "\nSize costs\n==========\n\n");
279 compute_costs (false, &rtxes);
280
281 if (LOG_COSTS)
282 fprintf (stderr, "\nSpeed costs\n===========\n\n");
283 compute_costs (true, &rtxes);
284}
67c3f580 285
286static bool
287simple_move_operand (rtx x)
288{
289 if (GET_CODE (x) == SUBREG)
290 x = SUBREG_REG (x);
291
292 if (!OBJECT_P (x))
293 return false;
294
295 if (GET_CODE (x) == LABEL_REF
296 || GET_CODE (x) == SYMBOL_REF
ab9eaa97 297 || GET_CODE (x) == HIGH
298 || GET_CODE (x) == CONST)
67c3f580 299 return false;
300
301 if (MEM_P (x)
302 && (MEM_VOLATILE_P (x)
303 || mode_dependent_address_p (XEXP (x, 0))))
304 return false;
305
306 return true;
307}
308
c7944dce 309/* If INSN is a single set between two objects that we want to split,
310 return the single set. SPEED_P says whether we are optimizing
311 INSN for speed or size.
312
313 INSN should have been passed to recog and extract_insn before this
314 is called. */
1a6a0f2a 315
316static rtx
c7944dce 317simple_move (rtx insn, bool speed_p)
1a6a0f2a 318{
319 rtx x;
320 rtx set;
321 enum machine_mode mode;
322
323 if (recog_data.n_operands != 2)
324 return NULL_RTX;
325
326 set = single_set (insn);
327 if (!set)
328 return NULL_RTX;
329
330 x = SET_DEST (set);
331 if (x != recog_data.operand[0] && x != recog_data.operand[1])
332 return NULL_RTX;
67c3f580 333 if (!simple_move_operand (x))
1a6a0f2a 334 return NULL_RTX;
335
336 x = SET_SRC (set);
337 if (x != recog_data.operand[0] && x != recog_data.operand[1])
338 return NULL_RTX;
67c3f580 339 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
340 things like x86 rdtsc which returns a DImode value. */
341 if (GET_CODE (x) != ASM_OPERANDS
342 && !simple_move_operand (x))
1a6a0f2a 343 return NULL_RTX;
344
345 /* We try to decompose in integer modes, to avoid generating
346 inefficient code copying between integer and floating point
347 registers. That means that we can't decompose if this is a
348 non-integer mode for which there is no integer mode of the same
349 size. */
350 mode = GET_MODE (SET_SRC (set));
351 if (!SCALAR_INT_MODE_P (mode)
352 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
353 == BLKmode))
354 return NULL_RTX;
355
5e016dfc 356 /* Reject PARTIAL_INT modes. They are used for processor specific
357 purposes and it's probably best not to tamper with them. */
358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
359 return NULL_RTX;
360
c7944dce 361 if (!choices[speed_p].move_modes_to_split[(int) mode])
362 return NULL_RTX;
363
1a6a0f2a 364 return set;
365}
366
367/* If SET is a copy from one multi-word pseudo-register to another,
368 record that in reg_copy_graph. Return whether it is such a
369 copy. */
370
371static bool
372find_pseudo_copy (rtx set)
373{
374 rtx dest = SET_DEST (set);
375 rtx src = SET_SRC (set);
376 unsigned int rd, rs;
377 bitmap b;
378
379 if (!REG_P (dest) || !REG_P (src))
380 return false;
381
382 rd = REGNO (dest);
383 rs = REGNO (src);
384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
385 return false;
386
1a6a0f2a 387 b = VEC_index (bitmap, reg_copy_graph, rs);
388 if (b == NULL)
389 {
390 b = BITMAP_ALLOC (NULL);
391 VEC_replace (bitmap, reg_copy_graph, rs, b);
392 }
393
394 bitmap_set_bit (b, rd);
395
396 return true;
397}
398
399/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
400 where they are copied to another register, add the register to
401 which they are copied to DECOMPOSABLE_CONTEXT. Use
402 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
404
405static void
406propagate_pseudo_copies (void)
407{
408 bitmap queue, propagate;
409
410 queue = BITMAP_ALLOC (NULL);
411 propagate = BITMAP_ALLOC (NULL);
412
413 bitmap_copy (queue, decomposable_context);
414 do
415 {
416 bitmap_iterator iter;
417 unsigned int i;
418
419 bitmap_clear (propagate);
420
421 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
422 {
423 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
424 if (b)
425 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
426 }
427
428 bitmap_and_compl (queue, propagate, decomposable_context);
429 bitmap_ior_into (decomposable_context, propagate);
430 }
431 while (!bitmap_empty_p (queue));
432
433 BITMAP_FREE (queue);
434 BITMAP_FREE (propagate);
435}
436
437/* A pointer to one of these values is passed to
438 find_decomposable_subregs via for_each_rtx. */
439
440enum classify_move_insn
441{
442 /* Not a simple move from one location to another. */
443 NOT_SIMPLE_MOVE,
1e5b92fa 444 /* A simple move from one pseudo-register to another. */
1a6a0f2a 445 SIMPLE_PSEUDO_REG_MOVE,
1e5b92fa 446 /* A simple move involving a non-pseudo-register. */
1a6a0f2a 447 SIMPLE_MOVE
448};
449
450/* This is called via for_each_rtx. If we find a SUBREG which we
451 could use to decompose a pseudo-register, set a bit in
452 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
453 not a simple pseudo-register copy, DATA will point at the type of
454 move, and we set a bit in DECOMPOSABLE_CONTEXT or
455 NON_DECOMPOSABLE_CONTEXT as appropriate. */
456
457static int
458find_decomposable_subregs (rtx *px, void *data)
459{
460 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
461 rtx x = *px;
462
463 if (x == NULL_RTX)
464 return 0;
465
466 if (GET_CODE (x) == SUBREG)
467 {
468 rtx inner = SUBREG_REG (x);
469 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
470
471 if (!REG_P (inner))
472 return 0;
473
474 regno = REGNO (inner);
475 if (HARD_REGISTER_NUM_P (regno))
476 return -1;
477
478 outer_size = GET_MODE_SIZE (GET_MODE (x));
479 inner_size = GET_MODE_SIZE (GET_MODE (inner));
480 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
482
483 /* We only try to decompose single word subregs of multi-word
484 registers. When we find one, we return -1 to avoid iterating
485 over the inner register.
486
487 ??? This doesn't allow, e.g., DImode subregs of TImode values
488 on 32-bit targets. We would need to record the way the
489 pseudo-register was used, and only decompose if all the uses
490 were the same number and size of pieces. Hopefully this
491 doesn't happen much. */
492
493 if (outer_words == 1 && inner_words > 1)
494 {
495 bitmap_set_bit (decomposable_context, regno);
496 return -1;
497 }
4e7a1eb8 498
499 /* If this is a cast from one mode to another, where the modes
500 have the same size, and they are not tieable, then mark this
501 register as non-decomposable. If we decompose it we are
502 likely to mess up whatever the backend is trying to do. */
503 if (outer_words > 1
504 && outer_size == inner_size
505 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
506 {
507 bitmap_set_bit (non_decomposable_context, regno);
5277d36e 508 bitmap_set_bit (subreg_context, regno);
4e7a1eb8 509 return -1;
510 }
1a6a0f2a 511 }
67c3f580 512 else if (REG_P (x))
1a6a0f2a 513 {
514 unsigned int regno;
515
516 /* We will see an outer SUBREG before we see the inner REG, so
517 when we see a plain REG here it means a direct reference to
518 the register.
519
520 If this is not a simple copy from one location to another,
521 then we can not decompose this register. If this is a simple
1e5b92fa 522 copy from one pseudo-register to another, and the mode is right
523 then we mark the register as decomposable.
524 Otherwise we don't say anything about this register --
525 it could be decomposed, but whether that would be
1a6a0f2a 526 profitable depends upon how it is used elsewhere.
527
528 We only set bits in the bitmap for multi-word
529 pseudo-registers, since those are the only ones we care about
530 and it keeps the size of the bitmaps down. */
531
532 regno = REGNO (x);
533 if (!HARD_REGISTER_NUM_P (regno)
534 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
535 {
536 switch (*pcmi)
537 {
538 case NOT_SIMPLE_MOVE:
539 bitmap_set_bit (non_decomposable_context, regno);
540 break;
541 case SIMPLE_PSEUDO_REG_MOVE:
542 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
543 bitmap_set_bit (decomposable_context, regno);
544 break;
545 case SIMPLE_MOVE:
546 break;
547 default:
548 gcc_unreachable ();
549 }
550 }
551 }
67c3f580 552 else if (MEM_P (x))
553 {
554 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
555
556 /* Any registers used in a MEM do not participate in a
557 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion
558 here, and return -1 to block the parent's recursion. */
559 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
560 return -1;
561 }
1a6a0f2a 562
563 return 0;
564}
565
566/* Decompose REGNO into word-sized components. We smash the REG node
567 in place. This ensures that (1) something goes wrong quickly if we
568 fail to make some replacement, and (2) the debug information inside
569 the symbol table is automatically kept up to date. */
570
571static void
572decompose_register (unsigned int regno)
573{
574 rtx reg;
575 unsigned int words, i;
576 rtvec v;
577
578 reg = regno_reg_rtx[regno];
579
580 regno_reg_rtx[regno] = NULL_RTX;
1a6a0f2a 581
582 words = GET_MODE_SIZE (GET_MODE (reg));
583 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
584
585 v = rtvec_alloc (words);
586 for (i = 0; i < words; ++i)
587 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
588
589 PUT_CODE (reg, CONCATN);
590 XVEC (reg, 0) = v;
591
592 if (dump_file)
593 {
594 fprintf (dump_file, "; Splitting reg %u ->", regno);
595 for (i = 0; i < words; ++i)
596 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
597 fputc ('\n', dump_file);
598 }
599}
600
601/* Get a SUBREG of a CONCATN. */
602
603static rtx
604simplify_subreg_concatn (enum machine_mode outermode, rtx op,
605 unsigned int byte)
606{
607 unsigned int inner_size;
50bdfec8 608 enum machine_mode innermode, partmode;
1a6a0f2a 609 rtx part;
610 unsigned int final_offset;
611
612 gcc_assert (GET_CODE (op) == CONCATN);
613 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
614
615 innermode = GET_MODE (op);
616 gcc_assert (byte < GET_MODE_SIZE (innermode));
617 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
618
619 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
620 part = XVECEXP (op, 0, byte / inner_size);
50bdfec8 621 partmode = GET_MODE (part);
622
598ffe59 623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
624 regular CONST_VECTORs. They have vector or integer modes, depending
625 on the capabilities of the target. Cope with them. */
626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
627 partmode = GET_MODE_INNER (innermode);
628 else if (partmode == VOIDmode)
50bdfec8 629 {
598ffe59 630 enum mode_class mclass = GET_MODE_CLASS (innermode);
631 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
50bdfec8 632 }
633
1a6a0f2a 634 final_offset = byte % inner_size;
635 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
636 return NULL_RTX;
637
50bdfec8 638 return simplify_gen_subreg (outermode, part, partmode, final_offset);
1a6a0f2a 639}
640
641/* Wrapper around simplify_gen_subreg which handles CONCATN. */
642
643static rtx
644simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
645 enum machine_mode innermode, unsigned int byte)
646{
3fa57b79 647 rtx ret;
648
1a6a0f2a 649 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
650 If OP is a SUBREG of a CONCATN, then it must be a simple mode
651 change with the same size and offset 0, or it must extract a
652 part. We shouldn't see anything else here. */
653 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
654 {
655 rtx op2;
656
657 if ((GET_MODE_SIZE (GET_MODE (op))
658 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
659 && SUBREG_BYTE (op) == 0)
660 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
661 GET_MODE (SUBREG_REG (op)), byte);
662
663 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
664 SUBREG_BYTE (op));
665 if (op2 == NULL_RTX)
666 {
667 /* We don't handle paradoxical subregs here. */
668 gcc_assert (GET_MODE_SIZE (outermode)
669 <= GET_MODE_SIZE (GET_MODE (op)));
670 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
671 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
672 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
673 byte + SUBREG_BYTE (op));
674 gcc_assert (op2 != NULL_RTX);
675 return op2;
676 }
677
678 op = op2;
679 gcc_assert (op != NULL_RTX);
680 gcc_assert (innermode == GET_MODE (op));
681 }
3fa57b79 682
1a6a0f2a 683 if (GET_CODE (op) == CONCATN)
684 return simplify_subreg_concatn (outermode, op, byte);
3fa57b79 685
686 ret = simplify_gen_subreg (outermode, op, innermode, byte);
687
688 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
689 resolve_simple_move will ask for the high part of the paradoxical
690 subreg, which does not have a value. Just return a zero. */
691 if (ret == NULL_RTX
692 && GET_CODE (op) == SUBREG
693 && SUBREG_BYTE (op) == 0
694 && (GET_MODE_SIZE (innermode)
695 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
696 return CONST0_RTX (outermode);
697
698 gcc_assert (ret != NULL_RTX);
699 return ret;
1a6a0f2a 700}
701
702/* Return whether we should resolve X into the registers into which it
703 was decomposed. */
704
705static bool
706resolve_reg_p (rtx x)
707{
708 return GET_CODE (x) == CONCATN;
709}
710
711/* Return whether X is a SUBREG of a register which we need to
712 resolve. */
713
714static bool
715resolve_subreg_p (rtx x)
716{
717 if (GET_CODE (x) != SUBREG)
718 return false;
719 return resolve_reg_p (SUBREG_REG (x));
720}
721
722/* This is called via for_each_rtx. Look for SUBREGs which need to be
723 decomposed. */
724
725static int
726resolve_subreg_use (rtx *px, void *data)
727{
728 rtx insn = (rtx) data;
729 rtx x = *px;
730
731 if (x == NULL_RTX)
732 return 0;
733
734 if (resolve_subreg_p (x))
735 {
736 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
737 SUBREG_BYTE (x));
738
739 /* It is possible for a note to contain a reference which we can
740 decompose. In this case, return 1 to the caller to indicate
741 that the note must be removed. */
742 if (!x)
743 {
ccd1ec59 744 gcc_assert (!insn);
1a6a0f2a 745 return 1;
746 }
747
748 validate_change (insn, px, x, 1);
749 return -1;
750 }
751
752 if (resolve_reg_p (x))
753 {
754 /* Return 1 to the caller to indicate that we found a direct
755 reference to a register which is being decomposed. This can
9cf5d19e 756 happen inside notes, multiword shift or zero-extend
757 instructions. */
1a6a0f2a 758 return 1;
759 }
760
761 return 0;
762}
763
9845d120 764/* This is called via for_each_rtx. Look for SUBREGs which can be
765 decomposed and decomposed REGs that need copying. */
766
767static int
768adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
769{
770 rtx x = *px;
771
772 if (x == NULL_RTX)
773 return 0;
774
775 if (resolve_subreg_p (x))
776 {
777 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
778 SUBREG_BYTE (x));
779
780 if (x)
781 *px = x;
782 else
783 x = copy_rtx (*px);
784 }
785
786 if (resolve_reg_p (x))
787 *px = copy_rtx (x);
788
789 return 0;
790}
791
1a6a0f2a 792/* Resolve any decomposed registers which appear in register notes on
793 INSN. */
794
795static void
796resolve_reg_notes (rtx insn)
797{
798 rtx *pnote, note;
799
800 note = find_reg_equal_equiv_note (insn);
801 if (note)
802 {
3072d30e 803 int old_count = num_validated_changes ();
1a6a0f2a 804 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
1e5b92fa 805 remove_note (insn, note);
3072d30e 806 else
807 if (old_count != num_validated_changes ())
808 df_notes_rescan (insn);
1a6a0f2a 809 }
810
811 pnote = &REG_NOTES (insn);
812 while (*pnote != NULL_RTX)
813 {
9ce37fa7 814 bool del = false;
1a6a0f2a 815
816 note = *pnote;
817 switch (REG_NOTE_KIND (note))
818 {
3072d30e 819 case REG_DEAD:
820 case REG_UNUSED:
1a6a0f2a 821 if (resolve_reg_p (XEXP (note, 0)))
9ce37fa7 822 del = true;
1a6a0f2a 823 break;
824
825 default:
826 break;
827 }
828
9ce37fa7 829 if (del)
1a6a0f2a 830 *pnote = XEXP (note, 1);
831 else
832 pnote = &XEXP (note, 1);
833 }
834}
835
67c3f580 836/* Return whether X can be decomposed into subwords. */
1a6a0f2a 837
838static bool
67c3f580 839can_decompose_p (rtx x)
1a6a0f2a 840{
841 if (REG_P (x))
842 {
843 unsigned int regno = REGNO (x);
844
845 if (HARD_REGISTER_NUM_P (regno))
5f961ca4 846 {
847 unsigned int byte, num_bytes;
848
849 num_bytes = GET_MODE_SIZE (GET_MODE (x));
850 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
851 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
852 return false;
853 return true;
854 }
1a6a0f2a 855 else
5277d36e 856 return !bitmap_bit_p (subreg_context, regno);
1a6a0f2a 857 }
858
67c3f580 859 return true;
1a6a0f2a 860}
861
862/* Decompose the registers used in a simple move SET within INSN. If
863 we don't change anything, return INSN, otherwise return the start
864 of the sequence of moves. */
865
866static rtx
867resolve_simple_move (rtx set, rtx insn)
868{
869 rtx src, dest, real_dest, insns;
870 enum machine_mode orig_mode, dest_mode;
871 unsigned int words;
872 bool pushing;
873
874 src = SET_SRC (set);
875 dest = SET_DEST (set);
876 orig_mode = GET_MODE (dest);
877
878 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
c7944dce 879 gcc_assert (words > 1);
1a6a0f2a 880
881 start_sequence ();
882
883 /* We have to handle copying from a SUBREG of a decomposed reg where
884 the SUBREG is larger than word size. Rather than assume that we
885 can take a word_mode SUBREG of the destination, we copy to a new
886 register and then copy that to the destination. */
887
888 real_dest = NULL_RTX;
889
890 if (GET_CODE (src) == SUBREG
891 && resolve_reg_p (SUBREG_REG (src))
892 && (SUBREG_BYTE (src) != 0
893 || (GET_MODE_SIZE (orig_mode)
894 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
895 {
896 real_dest = dest;
897 dest = gen_reg_rtx (orig_mode);
898 if (REG_P (real_dest))
899 REG_ATTRS (dest) = REG_ATTRS (real_dest);
900 }
901
902 /* Similarly if we are copying to a SUBREG of a decomposed reg where
903 the SUBREG is larger than word size. */
904
905 if (GET_CODE (dest) == SUBREG
906 && resolve_reg_p (SUBREG_REG (dest))
907 && (SUBREG_BYTE (dest) != 0
908 || (GET_MODE_SIZE (orig_mode)
909 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
910 {
911 rtx reg, minsn, smove;
912
913 reg = gen_reg_rtx (orig_mode);
914 minsn = emit_move_insn (reg, src);
915 smove = single_set (minsn);
916 gcc_assert (smove != NULL_RTX);
917 resolve_simple_move (smove, minsn);
918 src = reg;
919 }
920
921 /* If we didn't have any big SUBREGS of decomposed registers, and
922 neither side of the move is a register we are decomposing, then
923 we don't have to do anything here. */
924
925 if (src == SET_SRC (set)
926 && dest == SET_DEST (set)
927 && !resolve_reg_p (src)
928 && !resolve_subreg_p (src)
929 && !resolve_reg_p (dest)
930 && !resolve_subreg_p (dest))
931 {
932 end_sequence ();
933 return insn;
934 }
935
ccd1ec59 936 /* It's possible for the code to use a subreg of a decomposed
937 register while forming an address. We need to handle that before
938 passing the address to emit_move_insn. We pass NULL_RTX as the
939 insn parameter to resolve_subreg_use because we can not validate
940 the insn yet. */
941 if (MEM_P (src) || MEM_P (dest))
942 {
943 int acg;
944
945 if (MEM_P (src))
946 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
947 if (MEM_P (dest))
948 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
949 acg = apply_change_group ();
950 gcc_assert (acg);
951 }
952
1a6a0f2a 953 /* If SRC is a register which we can't decompose, or has side
954 effects, we need to move via a temporary register. */
955
67c3f580 956 if (!can_decompose_p (src)
1a6a0f2a 957 || side_effects_p (src)
958 || GET_CODE (src) == ASM_OPERANDS)
959 {
960 rtx reg;
961
962 reg = gen_reg_rtx (orig_mode);
963 emit_move_insn (reg, src);
964 src = reg;
965 }
966
967 /* If DEST is a register which we can't decompose, or has side
968 effects, we need to first move to a temporary register. We
969 handle the common case of pushing an operand directly. We also
970 go through a temporary register if it holds a floating point
971 value. This gives us better code on systems which can't move
972 data easily between integer and floating point registers. */
973
974 dest_mode = orig_mode;
975 pushing = push_operand (dest, dest_mode);
67c3f580 976 if (!can_decompose_p (dest)
1a6a0f2a 977 || (side_effects_p (dest) && !pushing)
978 || (!SCALAR_INT_MODE_P (dest_mode)
979 && !resolve_reg_p (dest)
980 && !resolve_subreg_p (dest)))
981 {
982 if (real_dest == NULL_RTX)
983 real_dest = dest;
984 if (!SCALAR_INT_MODE_P (dest_mode))
985 {
986 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
987 MODE_INT, 0);
988 gcc_assert (dest_mode != BLKmode);
989 }
990 dest = gen_reg_rtx (dest_mode);
991 if (REG_P (real_dest))
992 REG_ATTRS (dest) = REG_ATTRS (real_dest);
993 }
994
995 if (pushing)
996 {
997 unsigned int i, j, jinc;
998
999 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1001 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1002
1003 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1004 {
1005 j = 0;
1006 jinc = 1;
1007 }
1008 else
1009 {
1010 j = words - 1;
1011 jinc = -1;
1012 }
1013
1014 for (i = 0; i < words; ++i, j += jinc)
1015 {
1016 rtx temp;
1017
1018 temp = copy_rtx (XEXP (dest, 0));
1019 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1020 j * UNITS_PER_WORD);
1021 emit_move_insn (temp,
1022 simplify_gen_subreg_concatn (word_mode, src,
1023 orig_mode,
1024 j * UNITS_PER_WORD));
1025 }
1026 }
1027 else
1028 {
1029 unsigned int i;
1030
1031 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
18b42941 1032 emit_clobber (dest);
1a6a0f2a 1033
1034 for (i = 0; i < words; ++i)
1035 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1036 dest_mode,
1037 i * UNITS_PER_WORD),
1038 simplify_gen_subreg_concatn (word_mode, src,
1039 orig_mode,
1040 i * UNITS_PER_WORD));
1041 }
1042
1043 if (real_dest != NULL_RTX)
1044 {
1045 rtx mdest, minsn, smove;
1046
1047 if (dest_mode == orig_mode)
1048 mdest = dest;
1049 else
1050 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1051 minsn = emit_move_insn (real_dest, mdest);
1052
1053 smove = single_set (minsn);
1054 gcc_assert (smove != NULL_RTX);
1055
1056 resolve_simple_move (smove, minsn);
1057 }
1058
1059 insns = get_insns ();
1060 end_sequence ();
1061
e38def9c 1062 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
97bb2849 1063
1a6a0f2a 1064 emit_insn_before (insns, insn);
1065
1a6a0f2a 1066 delete_insn (insn);
1067
1068 return insns;
1069}
1070
1071/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1072 component registers. Return whether we changed something. */
1073
1074static bool
1075resolve_clobber (rtx pat, rtx insn)
1076{
e29831db 1077 rtx reg;
1a6a0f2a 1078 enum machine_mode orig_mode;
1079 unsigned int words, i;
ab9eaa97 1080 int ret;
1a6a0f2a 1081
1082 reg = XEXP (pat, 0);
2289a5f2 1083 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1a6a0f2a 1084 return false;
1085
1086 orig_mode = GET_MODE (reg);
1087 words = GET_MODE_SIZE (orig_mode);
1088 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1089
ab9eaa97 1090 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1091 simplify_gen_subreg_concatn (word_mode, reg,
1092 orig_mode, 0),
1093 0);
3072d30e 1094 df_insn_rescan (insn);
ab9eaa97 1095 gcc_assert (ret != 0);
1096
1a6a0f2a 1097 for (i = words - 1; i > 0; --i)
1098 {
1099 rtx x;
1100
2289a5f2 1101 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1102 i * UNITS_PER_WORD);
1a6a0f2a 1103 x = gen_rtx_CLOBBER (VOIDmode, x);
1104 emit_insn_after (x, insn);
1105 }
1106
db2200eb 1107 resolve_reg_notes (insn);
1108
1a6a0f2a 1109 return true;
1110}
1111
1112/* A USE of a decomposed register is no longer meaningful. Return
1113 whether we changed something. */
1114
1115static bool
1116resolve_use (rtx pat, rtx insn)
1117{
1118 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1119 {
1120 delete_insn (insn);
1121 return true;
1122 }
db2200eb 1123
1124 resolve_reg_notes (insn);
1125
1a6a0f2a 1126 return false;
1127}
1128
9845d120 1129/* A VAR_LOCATION can be simplified. */
1130
1131static void
1132resolve_debug (rtx insn)
1133{
1134 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1135
1136 df_insn_rescan (insn);
1137
1138 resolve_reg_notes (insn);
1139}
1140
c7944dce 1141/* Check if INSN is a decomposable multiword-shift or zero-extend and
1142 set the decomposable_context bitmap accordingly. SPEED_P is true
1143 if we are optimizing INSN for speed rather than size. Return true
1144 if INSN is decomposable. */
9cf5d19e 1145
c7944dce 1146static bool
1147find_decomposable_shift_zext (rtx insn, bool speed_p)
9cf5d19e 1148{
1149 rtx set;
1150 rtx op;
1151 rtx op_operand;
1152
1153 set = single_set (insn);
1154 if (!set)
c7944dce 1155 return false;
9cf5d19e 1156
1157 op = SET_SRC (set);
1158 if (GET_CODE (op) != ASHIFT
1159 && GET_CODE (op) != LSHIFTRT
1160 && GET_CODE (op) != ZERO_EXTEND)
c7944dce 1161 return false;
9cf5d19e 1162
1163 op_operand = XEXP (op, 0);
1164 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1165 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1166 || HARD_REGISTER_NUM_P (REGNO (op_operand))
c7944dce 1167 || GET_MODE (op) != twice_word_mode)
1168 return false;
9cf5d19e 1169
1170 if (GET_CODE (op) == ZERO_EXTEND)
1171 {
1172 if (GET_MODE (op_operand) != word_mode
c7944dce 1173 || !choices[speed_p].splitting_zext)
1174 return false;
9cf5d19e 1175 }
1176 else /* left or right shift */
1177 {
c7944dce 1178 bool *splitting = (GET_CODE (op) == ASHIFT
1179 ? choices[speed_p].splitting_ashift
1180 : choices[speed_p].splitting_lshiftrt);
971ba038 1181 if (!CONST_INT_P (XEXP (op, 1))
c7944dce 1182 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1183 2 * BITS_PER_WORD - 1)
1184 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1185 return false;
1186
1187 bitmap_set_bit (decomposable_context, REGNO (op_operand));
9cf5d19e 1188 }
1189
1190 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1191
c7944dce 1192 return true;
9cf5d19e 1193}
1194
1195/* Decompose a more than word wide shift (in INSN) of a multiword
1196 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1197 and 'set to zero' insn. Return a pointer to the new insn when a
1198 replacement was done. */
1199
1200static rtx
1201resolve_shift_zext (rtx insn)
1202{
1203 rtx set;
1204 rtx op;
1205 rtx op_operand;
1206 rtx insns;
1207 rtx src_reg, dest_reg, dest_zero;
1208 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1209
1210 set = single_set (insn);
1211 if (!set)
1212 return NULL_RTX;
1213
1214 op = SET_SRC (set);
1215 if (GET_CODE (op) != ASHIFT
1216 && GET_CODE (op) != LSHIFTRT
1217 && GET_CODE (op) != ZERO_EXTEND)
1218 return NULL_RTX;
1219
1220 op_operand = XEXP (op, 0);
1221
c7944dce 1222 /* We can tear this operation apart only if the regs were already
1223 torn apart. */
9cf5d19e 1224 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1225 return NULL_RTX;
1226
1227 /* src_reg_num is the number of the word mode register which we
1228 are operating on. For a left shift and a zero_extend on little
1229 endian machines this is register 0. */
1230 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1231
4d8ad352 1232 if (WORDS_BIG_ENDIAN
1233 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
9cf5d19e 1234 src_reg_num = 1 - src_reg_num;
1235
1236 if (GET_CODE (op) == ZERO_EXTEND)
4d8ad352 1237 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
9cf5d19e 1238 else
1239 dest_reg_num = 1 - src_reg_num;
1240
1241 offset1 = UNITS_PER_WORD * dest_reg_num;
1242 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1243 src_offset = UNITS_PER_WORD * src_reg_num;
1244
9cf5d19e 1245 start_sequence ();
1246
1247 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1248 GET_MODE (SET_DEST (set)),
1249 offset1);
1250 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1251 GET_MODE (SET_DEST (set)),
1252 offset2);
1253 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1254 GET_MODE (op_operand),
1255 src_offset);
1256 if (GET_CODE (op) != ZERO_EXTEND)
1257 {
1258 int shift_count = INTVAL (XEXP (op, 1));
1259 if (shift_count > BITS_PER_WORD)
1260 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1261 LSHIFT_EXPR : RSHIFT_EXPR,
1262 word_mode, src_reg,
f5ff0b21 1263 shift_count - BITS_PER_WORD,
9cf5d19e 1264 dest_reg, 1);
1265 }
1266
1267 if (dest_reg != src_reg)
1268 emit_move_insn (dest_reg, src_reg);
1269 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1270 insns = get_insns ();
1271
1272 end_sequence ();
1273
1274 emit_insn_before (insns, insn);
1275
1276 if (dump_file)
1277 {
1278 rtx in;
1279 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1280 for (in = insns; in != insn; in = NEXT_INSN (in))
1281 fprintf (dump_file, "%d ", INSN_UID (in));
1282 fprintf (dump_file, "\n");
1283 }
1284
1285 delete_insn (insn);
1286 return insns;
1287}
1288
c7944dce 1289/* Print to dump_file a description of what we're doing with shift code CODE.
1290 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1291
1292static void
1293dump_shift_choices (enum rtx_code code, bool *splitting)
1294{
1295 int i;
1296 const char *sep;
1297
1298 fprintf (dump_file,
1299 " Splitting mode %s for %s lowering with shift amounts = ",
1300 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1301 sep = "";
1302 for (i = 0; i < BITS_PER_WORD; i++)
1303 if (splitting[i])
1304 {
1305 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1306 sep = ",";
1307 }
1308 fprintf (dump_file, "\n");
1309}
1310
1311/* Print to dump_file a description of what we're doing when optimizing
1312 for speed or size; SPEED_P says which. DESCRIPTION is a description
1313 of the SPEED_P choice. */
1314
1315static void
1316dump_choices (bool speed_p, const char *description)
1317{
1318 unsigned int i;
1319
1320 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1321
1322 for (i = 0; i < MAX_MACHINE_MODE; i++)
1323 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1324 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1325 choices[speed_p].move_modes_to_split[i]
1326 ? "Splitting"
1327 : "Skipping",
1328 GET_MODE_NAME ((enum machine_mode) i));
1329
1330 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1331 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1332 GET_MODE_NAME (twice_word_mode));
1333
1334 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1335 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1336 fprintf (dump_file, "\n");
1337}
1338
1a6a0f2a 1339/* Look for registers which are always accessed via word-sized SUBREGs
1340 or via copies. Decompose these registers into several word-sized
1341 pseudo-registers. */
1342
1343static void
3072d30e 1344decompose_multiword_subregs (void)
1a6a0f2a 1345{
1346 unsigned int max;
1347 basic_block bb;
c7944dce 1348 bool speed_p;
1a6a0f2a 1349
c7944dce 1350 if (dump_file)
1351 {
1352 dump_choices (false, "size");
1353 dump_choices (true, "speed");
1354 }
1355
1356 /* Check if this target even has any modes to consider lowering. */
1357 if (!choices[false].something_to_do && !choices[true].something_to_do)
1358 {
1359 if (dump_file)
1360 fprintf (dump_file, "Nothing to do!\n");
1361 return;
1362 }
3072d30e 1363
1a6a0f2a 1364 max = max_reg_num ();
1365
1366 /* First see if there are any multi-word pseudo-registers. If there
1367 aren't, there is nothing we can do. This should speed up this
1368 pass in the normal case, since it should be faster than scanning
1369 all the insns. */
1370 {
1371 unsigned int i;
c7944dce 1372 bool useful_modes_seen = false;
1a6a0f2a 1373
1374 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
c7944dce 1375 if (regno_reg_rtx[i] != NULL)
1376 {
1377 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1378 if (choices[false].move_modes_to_split[(int) mode]
1379 || choices[true].move_modes_to_split[(int) mode])
1380 {
1381 useful_modes_seen = true;
1382 break;
1383 }
1384 }
1385
1386 if (!useful_modes_seen)
1a6a0f2a 1387 {
c7944dce 1388 if (dump_file)
1389 fprintf (dump_file, "Nothing to lower in this function.\n");
1390 return;
1a6a0f2a 1391 }
1a6a0f2a 1392 }
1393
0e8e9be3 1394 if (df)
c7944dce 1395 {
1396 df_set_flags (DF_DEFER_INSN_RESCAN);
1397 run_word_dce ();
1398 }
0e8e9be3 1399
c7944dce 1400 /* FIXME: It may be possible to change this code to look for each
1401 multi-word pseudo-register and to find each insn which sets or
1402 uses that register. That should be faster than scanning all the
1403 insns. */
1a6a0f2a 1404
1405 decomposable_context = BITMAP_ALLOC (NULL);
1406 non_decomposable_context = BITMAP_ALLOC (NULL);
5277d36e 1407 subreg_context = BITMAP_ALLOC (NULL);
1a6a0f2a 1408
1409 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1410 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1411 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1412
c7944dce 1413 speed_p = optimize_function_for_speed_p (cfun);
1a6a0f2a 1414 FOR_EACH_BB (bb)
1415 {
1416 rtx insn;
1417
1418 FOR_BB_INSNS (bb, insn)
1419 {
1420 rtx set;
1421 enum classify_move_insn cmi;
1422 int i, n;
1423
1424 if (!INSN_P (insn)
1425 || GET_CODE (PATTERN (insn)) == CLOBBER
1426 || GET_CODE (PATTERN (insn)) == USE)
1427 continue;
1428
08b31038 1429 recog_memoized (insn);
1430
c7944dce 1431 if (find_decomposable_shift_zext (insn, speed_p))
9cf5d19e 1432 continue;
1433
1a6a0f2a 1434 extract_insn (insn);
1435
c7944dce 1436 set = simple_move (insn, speed_p);
1a6a0f2a 1437
1438 if (!set)
1439 cmi = NOT_SIMPLE_MOVE;
1440 else
1441 {
1e5b92fa 1442 if (find_pseudo_copy (set))
1a6a0f2a 1443 cmi = SIMPLE_PSEUDO_REG_MOVE;
1a6a0f2a 1444 else
1445 cmi = SIMPLE_MOVE;
1446 }
1447
1448 n = recog_data.n_operands;
1449 for (i = 0; i < n; ++i)
1450 {
1451 for_each_rtx (&recog_data.operand[i],
1452 find_decomposable_subregs,
1453 &cmi);
1454
1455 /* We handle ASM_OPERANDS as a special case to support
1456 things like x86 rdtsc which returns a DImode value.
1457 We can decompose the output, which will certainly be
1458 operand 0, but not the inputs. */
1459
1460 if (cmi == SIMPLE_MOVE
1461 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1462 {
1463 gcc_assert (i == 0);
1464 cmi = NOT_SIMPLE_MOVE;
1465 }
1466 }
1467 }
1468 }
1469
1470 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1471 if (!bitmap_empty_p (decomposable_context))
1472 {
97bb2849 1473 sbitmap sub_blocks;
db1c50be 1474 unsigned int i;
1475 sbitmap_iterator sbi;
1a6a0f2a 1476 bitmap_iterator iter;
1477 unsigned int regno;
1478
1479 propagate_pseudo_copies ();
1480
97bb2849 1481 sub_blocks = sbitmap_alloc (last_basic_block);
1482 sbitmap_zero (sub_blocks);
1a6a0f2a 1483
1484 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1485 decompose_register (regno);
1486
1487 FOR_EACH_BB (bb)
1488 {
201f6961 1489 rtx insn;
c7944dce 1490 bool speed_p;
1a6a0f2a 1491
c7944dce 1492 speed_p = optimize_bb_for_speed_p (bb);
201f6961 1493 FOR_BB_INSNS (bb, insn)
1a6a0f2a 1494 {
da7a04f1 1495 rtx pat;
1a6a0f2a 1496
1497 if (!INSN_P (insn))
1498 continue;
1499
1a6a0f2a 1500 pat = PATTERN (insn);
1501 if (GET_CODE (pat) == CLOBBER)
db2200eb 1502 resolve_clobber (pat, insn);
1a6a0f2a 1503 else if (GET_CODE (pat) == USE)
db2200eb 1504 resolve_use (pat, insn);
9845d120 1505 else if (DEBUG_INSN_P (insn))
1506 resolve_debug (insn);
1a6a0f2a 1507 else
1508 {
1509 rtx set;
1510 int i;
1511
1512 recog_memoized (insn);
1513 extract_insn (insn);
1514
c7944dce 1515 set = simple_move (insn, speed_p);
1a6a0f2a 1516 if (set)
1517 {
1518 rtx orig_insn = insn;
97bb2849 1519 bool cfi = control_flow_insn_p (insn);
1a6a0f2a 1520
db1c50be 1521 /* We can end up splitting loads to multi-word pseudos
1522 into separate loads to machine word size pseudos.
1523 When this happens, we first had one load that can
1524 throw, and after resolve_simple_move we'll have a
1525 bunch of loads (at least two). All those loads may
1526 trap if we can have non-call exceptions, so they
1527 all will end the current basic block. We split the
1528 block after the outer loop over all insns, but we
1529 make sure here that we will be able to split the
1530 basic block and still produce the correct control
1531 flow graph for it. */
1532 gcc_assert (!cfi
cbeb677e 1533 || (cfun->can_throw_non_call_exceptions
db1c50be 1534 && can_throw_internal (insn)));
1535
1a6a0f2a 1536 insn = resolve_simple_move (set, insn);
1537 if (insn != orig_insn)
1538 {
1a6a0f2a 1539 recog_memoized (insn);
1540 extract_insn (insn);
97bb2849 1541
1542 if (cfi)
1543 SET_BIT (sub_blocks, bb->index);
1a6a0f2a 1544 }
1545 }
9cf5d19e 1546 else
1547 {
1548 rtx decomposed_shift;
1549
1550 decomposed_shift = resolve_shift_zext (insn);
1551 if (decomposed_shift != NULL_RTX)
1552 {
9cf5d19e 1553 insn = decomposed_shift;
1554 recog_memoized (insn);
1555 extract_insn (insn);
1556 }
1557 }
1a6a0f2a 1558
1559 for (i = recog_data.n_operands - 1; i >= 0; --i)
1560 for_each_rtx (recog_data.operand_loc[i],
1561 resolve_subreg_use,
1562 insn);
1563
1564 resolve_reg_notes (insn);
1565
1566 if (num_validated_changes () > 0)
1567 {
1568 for (i = recog_data.n_dups - 1; i >= 0; --i)
1569 {
1570 rtx *pl = recog_data.dup_loc[i];
1571 int dup_num = recog_data.dup_num[i];
1572 rtx *px = recog_data.operand_loc[dup_num];
1573
c47adb48 1574 validate_unshare_change (insn, pl, *px, 1);
1a6a0f2a 1575 }
1576
1577 i = apply_change_group ();
1578 gcc_assert (i);
1a6a0f2a 1579 }
1580 }
1a6a0f2a 1581 }
1582 }
1583
db1c50be 1584 /* If we had insns to split that caused control flow insns in the middle
1585 of a basic block, split those blocks now. Note that we only handle
1586 the case where splitting a load has caused multiple possibly trapping
1587 loads to appear. */
1588 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1589 {
1590 rtx insn, end;
1591 edge fallthru;
1592
1593 bb = BASIC_BLOCK (i);
1594 insn = BB_HEAD (bb);
1595 end = BB_END (bb);
1596
1597 while (insn != end)
1598 {
1599 if (control_flow_insn_p (insn))
1600 {
1601 /* Split the block after insn. There will be a fallthru
1602 edge, which is OK so we keep it. We have to create the
1603 exception edges ourselves. */
1604 fallthru = split_block (bb, insn);
1605 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1606 bb = fallthru->dest;
1607 insn = BB_HEAD (bb);
1608 }
1609 else
1610 insn = NEXT_INSN (insn);
1611 }
1612 }
97bb2849 1613
97bb2849 1614 sbitmap_free (sub_blocks);
1a6a0f2a 1615 }
1616
1617 {
1618 unsigned int i;
1619 bitmap b;
1620
48148244 1621 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1a6a0f2a 1622 if (b)
1623 BITMAP_FREE (b);
1624 }
1625
48e1416a 1626 VEC_free (bitmap, heap, reg_copy_graph);
1a6a0f2a 1627
1628 BITMAP_FREE (decomposable_context);
1629 BITMAP_FREE (non_decomposable_context);
5277d36e 1630 BITMAP_FREE (subreg_context);
1a6a0f2a 1631}
1632\f
1633/* Gate function for lower subreg pass. */
1634
1635static bool
1636gate_handle_lower_subreg (void)
1637{
1638 return flag_split_wide_types != 0;
1639}
1640
1641/* Implement first lower subreg pass. */
1642
1643static unsigned int
1644rest_of_handle_lower_subreg (void)
1645{
3072d30e 1646 decompose_multiword_subregs ();
1a6a0f2a 1647 return 0;
1648}
1649
1650/* Implement second lower subreg pass. */
1651
1652static unsigned int
1653rest_of_handle_lower_subreg2 (void)
1654{
3072d30e 1655 decompose_multiword_subregs ();
1a6a0f2a 1656 return 0;
1657}
1658
20099e35 1659struct rtl_opt_pass pass_lower_subreg =
1a6a0f2a 1660{
20099e35 1661 {
1662 RTL_PASS,
b85ccd2c 1663 "subreg1", /* name */
1a6a0f2a 1664 gate_handle_lower_subreg, /* gate */
1665 rest_of_handle_lower_subreg, /* execute */
1666 NULL, /* sub */
1667 NULL, /* next */
1668 0, /* static_pass_number */
1669 TV_LOWER_SUBREG, /* tv_id */
1670 0, /* properties_required */
1671 0, /* properties_provided */
1672 0, /* properties_destroyed */
1673 0, /* todo_flags_start */
97bb2849 1674 TODO_ggc_collect |
20099e35 1675 TODO_verify_flow /* todo_flags_finish */
1676 }
1a6a0f2a 1677};
1678
20099e35 1679struct rtl_opt_pass pass_lower_subreg2 =
1a6a0f2a 1680{
20099e35 1681 {
1682 RTL_PASS,
1a6a0f2a 1683 "subreg2", /* name */
1684 gate_handle_lower_subreg, /* gate */
1685 rest_of_handle_lower_subreg2, /* execute */
1686 NULL, /* sub */
1687 NULL, /* next */
1688 0, /* static_pass_number */
1689 TV_LOWER_SUBREG, /* tv_id */
1690 0, /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
0806b508 1694 TODO_df_finish | TODO_verify_rtl_sharing |
97bb2849 1695 TODO_ggc_collect |
20099e35 1696 TODO_verify_flow /* todo_flags_finish */
1697 }
1a6a0f2a 1698};