]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
gcc/
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
1a6a0f2a 1/* Decompose multiword subregs.
08b31038 2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
1a6a0f2a 4 Contributed by Richard Henderson <rth@redhat.com>
5 Ian Lance Taylor <iant@google.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
8c4c00c1 11Software Foundation; either version 3, or (at your option) any later
1a6a0f2a 12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
8c4c00c1 20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
1a6a0f2a 22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "machmode.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tm_p.h"
30#include "timevar.h"
31#include "flags.h"
32#include "insn-config.h"
33#include "obstack.h"
34#include "basic-block.h"
35#include "recog.h"
36#include "bitmap.h"
0e8e9be3 37#include "dce.h"
1a6a0f2a 38#include "expr.h"
db1c50be 39#include "except.h"
1a6a0f2a 40#include "regs.h"
41#include "tree-pass.h"
3072d30e 42#include "df.h"
c7944dce 43#include "lower-subreg.h"
1a6a0f2a 44
45#ifdef STACK_GROWS_DOWNWARD
46# undef STACK_GROWS_DOWNWARD
47# define STACK_GROWS_DOWNWARD 1
48#else
49# define STACK_GROWS_DOWNWARD 0
50#endif
51
52DEF_VEC_P (bitmap);
53DEF_VEC_ALLOC_P (bitmap,heap);
54
55/* Decompose multi-word pseudo-registers into individual
c7944dce 56 pseudo-registers when possible and profitable. This is possible
57 when all the uses of a multi-word register are via SUBREG, or are
58 copies of the register to another location. Breaking apart the
59 register permits more CSE and permits better register allocation.
60 This is profitable if the machine does not have move instructions
61 to do this.
62
63 This pass only splits moves with modes that are wider than
64 word_mode and ASHIFTs, LSHIFTRTs and ZERO_EXTENDs with integer
65 modes that are twice the width of word_mode. The latter could be
66 generalized if there was a need to do this, but the trend in
67 architectures is to not need this.
68
69 There are two useful preprocessor defines for use by maintainers:
70
71 #define LOG_COSTS 1
72
73 if you wish to see the actual cost estimates that are being used
74 for each mode wider than word mode and the cost estimates for zero
75 extension and the shifts. This can be useful when port maintainers
76 are tuning insn rtx costs.
77
78 #define FORCE_LOWERING 1
79
80 if you wish to test the pass with all the transformation forced on.
81 This can be useful for finding bugs in the transformations. */
82
83#define LOG_COSTS 0
84#define FORCE_LOWERING 0
1a6a0f2a 85
86/* Bit N in this bitmap is set if regno N is used in a context in
87 which we can decompose it. */
88static bitmap decomposable_context;
89
90/* Bit N in this bitmap is set if regno N is used in a context in
91 which it can not be decomposed. */
92static bitmap non_decomposable_context;
93
5277d36e 94/* Bit N in this bitmap is set if regno N is used in a subreg
95 which changes the mode but not the size. This typically happens
96 when the register accessed as a floating-point value; we want to
97 avoid generating accesses to its subwords in integer modes. */
98static bitmap subreg_context;
99
1a6a0f2a 100/* Bit N in the bitmap in element M of this array is set if there is a
101 copy from reg M to reg N. */
102static VEC(bitmap,heap) *reg_copy_graph;
103
c7944dce 104struct target_lower_subreg default_target_lower_subreg;
105#if SWITCHABLE_TARGET
106struct target_lower_subreg *this_target_lower_subreg
107 = &default_target_lower_subreg;
108#endif
109
110#define twice_word_mode \
111 this_target_lower_subreg->x_twice_word_mode
112#define choices \
113 this_target_lower_subreg->x_choices
114
115/* RTXes used while computing costs. */
116struct cost_rtxes {
117 /* Source and target registers. */
118 rtx source;
119 rtx target;
120
121 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
122 rtx zext;
123
124 /* A shift of SOURCE. */
125 rtx shift;
126
127 /* A SET of TARGET. */
128 rtx set;
129};
130
131/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
132 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
133
134static int
135shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
136 enum machine_mode mode, int op1)
137{
138 PUT_MODE (rtxes->target, mode);
139 PUT_CODE (rtxes->shift, code);
140 PUT_MODE (rtxes->shift, mode);
141 PUT_MODE (rtxes->source, mode);
142 XEXP (rtxes->shift, 1) = GEN_INT (op1);
143 SET_SRC (rtxes->set) = rtxes->shift;
144 return insn_rtx_cost (rtxes->set, speed_p);
145}
146
147/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
148 to true if it is profitable to split a double-word CODE shift
149 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
150 for speed or size profitability.
151
152 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
153 the cost of moving zero into a word-mode register. WORD_MOVE_COST
154 is the cost of moving between word registers. */
155
156static void
157compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
158 bool *splitting, enum rtx_code code,
159 int word_move_zero_cost, int word_move_cost)
160{
161 int wide_cost, narrow_cost, i;
162
163 for (i = 0; i < BITS_PER_WORD; i++)
164 {
165 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
166 i + BITS_PER_WORD);
167 if (i == 0)
168 narrow_cost = word_move_cost;
169 else
170 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
171
172 if (LOG_COSTS)
173 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
174 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
175 i + BITS_PER_WORD, wide_cost, narrow_cost,
176 word_move_zero_cost);
177
178 if (FORCE_LOWERING || wide_cost >= narrow_cost + word_move_zero_cost)
179 splitting[i] = true;
180 }
181}
182
183/* Compute what we should do when optimizing for speed or size; SPEED_P
184 selects which. Use RTXES for computing costs. */
185
186static void
187compute_costs (bool speed_p, struct cost_rtxes *rtxes)
188{
189 unsigned int i;
190 int word_move_zero_cost, word_move_cost;
191
192 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
193 word_move_zero_cost = insn_rtx_cost (rtxes->set, speed_p);
194
195 SET_SRC (rtxes->set) = rtxes->source;
196 word_move_cost = insn_rtx_cost (rtxes->set, speed_p);
197
198 if (LOG_COSTS)
199 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
200 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
201
202 for (i = 0; i < MAX_MACHINE_MODE; i++)
203 {
204 enum machine_mode mode = (enum machine_mode) i;
205 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
206 if (factor > 1)
207 {
208 int mode_move_cost;
209
210 PUT_MODE (rtxes->target, mode);
211 PUT_MODE (rtxes->source, mode);
212 mode_move_cost = insn_rtx_cost (rtxes->set, speed_p);
213
214 if (LOG_COSTS)
215 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
216 GET_MODE_NAME (mode), mode_move_cost,
217 word_move_cost, factor);
218
219 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
220 {
221 choices[speed_p].move_modes_to_split[i] = true;
222 choices[speed_p].something_to_do = true;
223 }
224 }
225 }
226
227 /* For the moves and shifts, the only case that is checked is one
228 where the mode of the target is an integer mode twice the width
229 of the word_mode.
230
231 If it is not profitable to split a double word move then do not
232 even consider the shifts or the zero extension. */
233 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
234 {
235 int zext_cost;
236
237 /* The only case here to check to see if moving the upper part with a
238 zero is cheaper than doing the zext itself. */
239 PUT_MODE (rtxes->target, twice_word_mode);
240 PUT_MODE (rtxes->source, word_mode);
241 SET_SRC (rtxes->set) = rtxes->zext;
242 zext_cost = insn_rtx_cost (rtxes->set, speed_p);
243
244 if (LOG_COSTS)
245 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
246 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
247 zext_cost, word_move_cost, word_move_zero_cost);
248
249 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
250 choices[speed_p].splitting_zext = true;
251
252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_ashift, ASHIFT,
254 word_move_zero_cost, word_move_cost);
255 compute_splitting_shift (speed_p, rtxes,
256 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
257 word_move_zero_cost, word_move_cost);
258 }
259}
260
261/* Do one-per-target initialisation. This involves determining
262 which operations on the machine are profitable. If none are found,
263 then the pass just returns when called. */
264
265void
266init_lower_subreg (void)
267{
268 struct cost_rtxes rtxes;
269
270 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
271
272 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
273
274 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
275 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
276 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
277 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
278 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSize costs\n==========\n\n");
282 compute_costs (false, &rtxes);
283
284 if (LOG_COSTS)
285 fprintf (stderr, "\nSpeed costs\n===========\n\n");
286 compute_costs (true, &rtxes);
287}
67c3f580 288
289static bool
290simple_move_operand (rtx x)
291{
292 if (GET_CODE (x) == SUBREG)
293 x = SUBREG_REG (x);
294
295 if (!OBJECT_P (x))
296 return false;
297
298 if (GET_CODE (x) == LABEL_REF
299 || GET_CODE (x) == SYMBOL_REF
ab9eaa97 300 || GET_CODE (x) == HIGH
301 || GET_CODE (x) == CONST)
67c3f580 302 return false;
303
304 if (MEM_P (x)
305 && (MEM_VOLATILE_P (x)
306 || mode_dependent_address_p (XEXP (x, 0))))
307 return false;
308
309 return true;
310}
311
c7944dce 312/* If INSN is a single set between two objects that we want to split,
313 return the single set. SPEED_P says whether we are optimizing
314 INSN for speed or size.
315
316 INSN should have been passed to recog and extract_insn before this
317 is called. */
1a6a0f2a 318
319static rtx
c7944dce 320simple_move (rtx insn, bool speed_p)
1a6a0f2a 321{
322 rtx x;
323 rtx set;
324 enum machine_mode mode;
325
326 if (recog_data.n_operands != 2)
327 return NULL_RTX;
328
329 set = single_set (insn);
330 if (!set)
331 return NULL_RTX;
332
333 x = SET_DEST (set);
334 if (x != recog_data.operand[0] && x != recog_data.operand[1])
335 return NULL_RTX;
67c3f580 336 if (!simple_move_operand (x))
1a6a0f2a 337 return NULL_RTX;
338
339 x = SET_SRC (set);
340 if (x != recog_data.operand[0] && x != recog_data.operand[1])
341 return NULL_RTX;
67c3f580 342 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
343 things like x86 rdtsc which returns a DImode value. */
344 if (GET_CODE (x) != ASM_OPERANDS
345 && !simple_move_operand (x))
1a6a0f2a 346 return NULL_RTX;
347
348 /* We try to decompose in integer modes, to avoid generating
349 inefficient code copying between integer and floating point
350 registers. That means that we can't decompose if this is a
351 non-integer mode for which there is no integer mode of the same
352 size. */
353 mode = GET_MODE (SET_SRC (set));
354 if (!SCALAR_INT_MODE_P (mode)
355 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
356 == BLKmode))
357 return NULL_RTX;
358
5e016dfc 359 /* Reject PARTIAL_INT modes. They are used for processor specific
360 purposes and it's probably best not to tamper with them. */
361 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
362 return NULL_RTX;
363
c7944dce 364 if (!choices[speed_p].move_modes_to_split[(int) mode])
365 return NULL_RTX;
366
1a6a0f2a 367 return set;
368}
369
370/* If SET is a copy from one multi-word pseudo-register to another,
371 record that in reg_copy_graph. Return whether it is such a
372 copy. */
373
374static bool
375find_pseudo_copy (rtx set)
376{
377 rtx dest = SET_DEST (set);
378 rtx src = SET_SRC (set);
379 unsigned int rd, rs;
380 bitmap b;
381
382 if (!REG_P (dest) || !REG_P (src))
383 return false;
384
385 rd = REGNO (dest);
386 rs = REGNO (src);
387 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
388 return false;
389
1a6a0f2a 390 b = VEC_index (bitmap, reg_copy_graph, rs);
391 if (b == NULL)
392 {
393 b = BITMAP_ALLOC (NULL);
394 VEC_replace (bitmap, reg_copy_graph, rs, b);
395 }
396
397 bitmap_set_bit (b, rd);
398
399 return true;
400}
401
402/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
403 where they are copied to another register, add the register to
404 which they are copied to DECOMPOSABLE_CONTEXT. Use
405 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
406 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
407
408static void
409propagate_pseudo_copies (void)
410{
411 bitmap queue, propagate;
412
413 queue = BITMAP_ALLOC (NULL);
414 propagate = BITMAP_ALLOC (NULL);
415
416 bitmap_copy (queue, decomposable_context);
417 do
418 {
419 bitmap_iterator iter;
420 unsigned int i;
421
422 bitmap_clear (propagate);
423
424 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
425 {
426 bitmap b = VEC_index (bitmap, reg_copy_graph, i);
427 if (b)
428 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
429 }
430
431 bitmap_and_compl (queue, propagate, decomposable_context);
432 bitmap_ior_into (decomposable_context, propagate);
433 }
434 while (!bitmap_empty_p (queue));
435
436 BITMAP_FREE (queue);
437 BITMAP_FREE (propagate);
438}
439
440/* A pointer to one of these values is passed to
441 find_decomposable_subregs via for_each_rtx. */
442
443enum classify_move_insn
444{
445 /* Not a simple move from one location to another. */
446 NOT_SIMPLE_MOVE,
1e5b92fa 447 /* A simple move from one pseudo-register to another. */
1a6a0f2a 448 SIMPLE_PSEUDO_REG_MOVE,
1e5b92fa 449 /* A simple move involving a non-pseudo-register. */
1a6a0f2a 450 SIMPLE_MOVE
451};
452
453/* This is called via for_each_rtx. If we find a SUBREG which we
454 could use to decompose a pseudo-register, set a bit in
455 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
456 not a simple pseudo-register copy, DATA will point at the type of
457 move, and we set a bit in DECOMPOSABLE_CONTEXT or
458 NON_DECOMPOSABLE_CONTEXT as appropriate. */
459
460static int
461find_decomposable_subregs (rtx *px, void *data)
462{
463 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
464 rtx x = *px;
465
466 if (x == NULL_RTX)
467 return 0;
468
469 if (GET_CODE (x) == SUBREG)
470 {
471 rtx inner = SUBREG_REG (x);
472 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
473
474 if (!REG_P (inner))
475 return 0;
476
477 regno = REGNO (inner);
478 if (HARD_REGISTER_NUM_P (regno))
479 return -1;
480
481 outer_size = GET_MODE_SIZE (GET_MODE (x));
482 inner_size = GET_MODE_SIZE (GET_MODE (inner));
483 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
484 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
485
486 /* We only try to decompose single word subregs of multi-word
487 registers. When we find one, we return -1 to avoid iterating
488 over the inner register.
489
490 ??? This doesn't allow, e.g., DImode subregs of TImode values
491 on 32-bit targets. We would need to record the way the
492 pseudo-register was used, and only decompose if all the uses
493 were the same number and size of pieces. Hopefully this
494 doesn't happen much. */
495
496 if (outer_words == 1 && inner_words > 1)
497 {
498 bitmap_set_bit (decomposable_context, regno);
499 return -1;
500 }
4e7a1eb8 501
502 /* If this is a cast from one mode to another, where the modes
503 have the same size, and they are not tieable, then mark this
504 register as non-decomposable. If we decompose it we are
505 likely to mess up whatever the backend is trying to do. */
506 if (outer_words > 1
507 && outer_size == inner_size
508 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
509 {
510 bitmap_set_bit (non_decomposable_context, regno);
5277d36e 511 bitmap_set_bit (subreg_context, regno);
4e7a1eb8 512 return -1;
513 }
1a6a0f2a 514 }
67c3f580 515 else if (REG_P (x))
1a6a0f2a 516 {
517 unsigned int regno;
518
519 /* We will see an outer SUBREG before we see the inner REG, so
520 when we see a plain REG here it means a direct reference to
521 the register.
522
523 If this is not a simple copy from one location to another,
524 then we can not decompose this register. If this is a simple
1e5b92fa 525 copy from one pseudo-register to another, and the mode is right
526 then we mark the register as decomposable.
527 Otherwise we don't say anything about this register --
528 it could be decomposed, but whether that would be
1a6a0f2a 529 profitable depends upon how it is used elsewhere.
530
531 We only set bits in the bitmap for multi-word
532 pseudo-registers, since those are the only ones we care about
533 and it keeps the size of the bitmaps down. */
534
535 regno = REGNO (x);
536 if (!HARD_REGISTER_NUM_P (regno)
537 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
538 {
539 switch (*pcmi)
540 {
541 case NOT_SIMPLE_MOVE:
542 bitmap_set_bit (non_decomposable_context, regno);
543 break;
544 case SIMPLE_PSEUDO_REG_MOVE:
545 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
546 bitmap_set_bit (decomposable_context, regno);
547 break;
548 case SIMPLE_MOVE:
549 break;
550 default:
551 gcc_unreachable ();
552 }
553 }
554 }
67c3f580 555 else if (MEM_P (x))
556 {
557 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
558
559 /* Any registers used in a MEM do not participate in a
560 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion
561 here, and return -1 to block the parent's recursion. */
562 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
563 return -1;
564 }
1a6a0f2a 565
566 return 0;
567}
568
569/* Decompose REGNO into word-sized components. We smash the REG node
570 in place. This ensures that (1) something goes wrong quickly if we
571 fail to make some replacement, and (2) the debug information inside
572 the symbol table is automatically kept up to date. */
573
574static void
575decompose_register (unsigned int regno)
576{
577 rtx reg;
578 unsigned int words, i;
579 rtvec v;
580
581 reg = regno_reg_rtx[regno];
582
583 regno_reg_rtx[regno] = NULL_RTX;
1a6a0f2a 584
585 words = GET_MODE_SIZE (GET_MODE (reg));
586 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
587
588 v = rtvec_alloc (words);
589 for (i = 0; i < words; ++i)
590 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
591
592 PUT_CODE (reg, CONCATN);
593 XVEC (reg, 0) = v;
594
595 if (dump_file)
596 {
597 fprintf (dump_file, "; Splitting reg %u ->", regno);
598 for (i = 0; i < words; ++i)
599 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
600 fputc ('\n', dump_file);
601 }
602}
603
604/* Get a SUBREG of a CONCATN. */
605
606static rtx
607simplify_subreg_concatn (enum machine_mode outermode, rtx op,
608 unsigned int byte)
609{
610 unsigned int inner_size;
50bdfec8 611 enum machine_mode innermode, partmode;
1a6a0f2a 612 rtx part;
613 unsigned int final_offset;
614
615 gcc_assert (GET_CODE (op) == CONCATN);
616 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
617
618 innermode = GET_MODE (op);
619 gcc_assert (byte < GET_MODE_SIZE (innermode));
620 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
621
622 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
623 part = XVECEXP (op, 0, byte / inner_size);
50bdfec8 624 partmode = GET_MODE (part);
625
598ffe59 626 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
627 regular CONST_VECTORs. They have vector or integer modes, depending
628 on the capabilities of the target. Cope with them. */
629 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
630 partmode = GET_MODE_INNER (innermode);
631 else if (partmode == VOIDmode)
50bdfec8 632 {
598ffe59 633 enum mode_class mclass = GET_MODE_CLASS (innermode);
634 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
50bdfec8 635 }
636
1a6a0f2a 637 final_offset = byte % inner_size;
638 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
639 return NULL_RTX;
640
50bdfec8 641 return simplify_gen_subreg (outermode, part, partmode, final_offset);
1a6a0f2a 642}
643
644/* Wrapper around simplify_gen_subreg which handles CONCATN. */
645
646static rtx
647simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
648 enum machine_mode innermode, unsigned int byte)
649{
3fa57b79 650 rtx ret;
651
1a6a0f2a 652 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
653 If OP is a SUBREG of a CONCATN, then it must be a simple mode
654 change with the same size and offset 0, or it must extract a
655 part. We shouldn't see anything else here. */
656 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
657 {
658 rtx op2;
659
660 if ((GET_MODE_SIZE (GET_MODE (op))
661 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
662 && SUBREG_BYTE (op) == 0)
663 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
664 GET_MODE (SUBREG_REG (op)), byte);
665
666 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
667 SUBREG_BYTE (op));
668 if (op2 == NULL_RTX)
669 {
670 /* We don't handle paradoxical subregs here. */
671 gcc_assert (GET_MODE_SIZE (outermode)
672 <= GET_MODE_SIZE (GET_MODE (op)));
673 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
674 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
675 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
676 byte + SUBREG_BYTE (op));
677 gcc_assert (op2 != NULL_RTX);
678 return op2;
679 }
680
681 op = op2;
682 gcc_assert (op != NULL_RTX);
683 gcc_assert (innermode == GET_MODE (op));
684 }
3fa57b79 685
1a6a0f2a 686 if (GET_CODE (op) == CONCATN)
687 return simplify_subreg_concatn (outermode, op, byte);
3fa57b79 688
689 ret = simplify_gen_subreg (outermode, op, innermode, byte);
690
691 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
692 resolve_simple_move will ask for the high part of the paradoxical
693 subreg, which does not have a value. Just return a zero. */
694 if (ret == NULL_RTX
695 && GET_CODE (op) == SUBREG
696 && SUBREG_BYTE (op) == 0
697 && (GET_MODE_SIZE (innermode)
698 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
699 return CONST0_RTX (outermode);
700
701 gcc_assert (ret != NULL_RTX);
702 return ret;
1a6a0f2a 703}
704
705/* Return whether we should resolve X into the registers into which it
706 was decomposed. */
707
708static bool
709resolve_reg_p (rtx x)
710{
711 return GET_CODE (x) == CONCATN;
712}
713
714/* Return whether X is a SUBREG of a register which we need to
715 resolve. */
716
717static bool
718resolve_subreg_p (rtx x)
719{
720 if (GET_CODE (x) != SUBREG)
721 return false;
722 return resolve_reg_p (SUBREG_REG (x));
723}
724
725/* This is called via for_each_rtx. Look for SUBREGs which need to be
726 decomposed. */
727
728static int
729resolve_subreg_use (rtx *px, void *data)
730{
731 rtx insn = (rtx) data;
732 rtx x = *px;
733
734 if (x == NULL_RTX)
735 return 0;
736
737 if (resolve_subreg_p (x))
738 {
739 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
740 SUBREG_BYTE (x));
741
742 /* It is possible for a note to contain a reference which we can
743 decompose. In this case, return 1 to the caller to indicate
744 that the note must be removed. */
745 if (!x)
746 {
ccd1ec59 747 gcc_assert (!insn);
1a6a0f2a 748 return 1;
749 }
750
751 validate_change (insn, px, x, 1);
752 return -1;
753 }
754
755 if (resolve_reg_p (x))
756 {
757 /* Return 1 to the caller to indicate that we found a direct
758 reference to a register which is being decomposed. This can
9cf5d19e 759 happen inside notes, multiword shift or zero-extend
760 instructions. */
1a6a0f2a 761 return 1;
762 }
763
764 return 0;
765}
766
9845d120 767/* This is called via for_each_rtx. Look for SUBREGs which can be
768 decomposed and decomposed REGs that need copying. */
769
770static int
771adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
772{
773 rtx x = *px;
774
775 if (x == NULL_RTX)
776 return 0;
777
778 if (resolve_subreg_p (x))
779 {
780 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
781 SUBREG_BYTE (x));
782
783 if (x)
784 *px = x;
785 else
786 x = copy_rtx (*px);
787 }
788
789 if (resolve_reg_p (x))
790 *px = copy_rtx (x);
791
792 return 0;
793}
794
1a6a0f2a 795/* Resolve any decomposed registers which appear in register notes on
796 INSN. */
797
798static void
799resolve_reg_notes (rtx insn)
800{
801 rtx *pnote, note;
802
803 note = find_reg_equal_equiv_note (insn);
804 if (note)
805 {
3072d30e 806 int old_count = num_validated_changes ();
1a6a0f2a 807 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
1e5b92fa 808 remove_note (insn, note);
3072d30e 809 else
810 if (old_count != num_validated_changes ())
811 df_notes_rescan (insn);
1a6a0f2a 812 }
813
814 pnote = &REG_NOTES (insn);
815 while (*pnote != NULL_RTX)
816 {
9ce37fa7 817 bool del = false;
1a6a0f2a 818
819 note = *pnote;
820 switch (REG_NOTE_KIND (note))
821 {
3072d30e 822 case REG_DEAD:
823 case REG_UNUSED:
1a6a0f2a 824 if (resolve_reg_p (XEXP (note, 0)))
9ce37fa7 825 del = true;
1a6a0f2a 826 break;
827
828 default:
829 break;
830 }
831
9ce37fa7 832 if (del)
1a6a0f2a 833 *pnote = XEXP (note, 1);
834 else
835 pnote = &XEXP (note, 1);
836 }
837}
838
67c3f580 839/* Return whether X can be decomposed into subwords. */
1a6a0f2a 840
841static bool
67c3f580 842can_decompose_p (rtx x)
1a6a0f2a 843{
844 if (REG_P (x))
845 {
846 unsigned int regno = REGNO (x);
847
848 if (HARD_REGISTER_NUM_P (regno))
5f961ca4 849 {
850 unsigned int byte, num_bytes;
851
852 num_bytes = GET_MODE_SIZE (GET_MODE (x));
853 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
854 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
855 return false;
856 return true;
857 }
1a6a0f2a 858 else
5277d36e 859 return !bitmap_bit_p (subreg_context, regno);
1a6a0f2a 860 }
861
67c3f580 862 return true;
1a6a0f2a 863}
864
865/* Decompose the registers used in a simple move SET within INSN. If
866 we don't change anything, return INSN, otherwise return the start
867 of the sequence of moves. */
868
869static rtx
870resolve_simple_move (rtx set, rtx insn)
871{
872 rtx src, dest, real_dest, insns;
873 enum machine_mode orig_mode, dest_mode;
874 unsigned int words;
875 bool pushing;
876
877 src = SET_SRC (set);
878 dest = SET_DEST (set);
879 orig_mode = GET_MODE (dest);
880
881 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
c7944dce 882 gcc_assert (words > 1);
1a6a0f2a 883
884 start_sequence ();
885
886 /* We have to handle copying from a SUBREG of a decomposed reg where
887 the SUBREG is larger than word size. Rather than assume that we
888 can take a word_mode SUBREG of the destination, we copy to a new
889 register and then copy that to the destination. */
890
891 real_dest = NULL_RTX;
892
893 if (GET_CODE (src) == SUBREG
894 && resolve_reg_p (SUBREG_REG (src))
895 && (SUBREG_BYTE (src) != 0
896 || (GET_MODE_SIZE (orig_mode)
897 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
898 {
899 real_dest = dest;
900 dest = gen_reg_rtx (orig_mode);
901 if (REG_P (real_dest))
902 REG_ATTRS (dest) = REG_ATTRS (real_dest);
903 }
904
905 /* Similarly if we are copying to a SUBREG of a decomposed reg where
906 the SUBREG is larger than word size. */
907
908 if (GET_CODE (dest) == SUBREG
909 && resolve_reg_p (SUBREG_REG (dest))
910 && (SUBREG_BYTE (dest) != 0
911 || (GET_MODE_SIZE (orig_mode)
912 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
913 {
914 rtx reg, minsn, smove;
915
916 reg = gen_reg_rtx (orig_mode);
917 minsn = emit_move_insn (reg, src);
918 smove = single_set (minsn);
919 gcc_assert (smove != NULL_RTX);
920 resolve_simple_move (smove, minsn);
921 src = reg;
922 }
923
924 /* If we didn't have any big SUBREGS of decomposed registers, and
925 neither side of the move is a register we are decomposing, then
926 we don't have to do anything here. */
927
928 if (src == SET_SRC (set)
929 && dest == SET_DEST (set)
930 && !resolve_reg_p (src)
931 && !resolve_subreg_p (src)
932 && !resolve_reg_p (dest)
933 && !resolve_subreg_p (dest))
934 {
935 end_sequence ();
936 return insn;
937 }
938
ccd1ec59 939 /* It's possible for the code to use a subreg of a decomposed
940 register while forming an address. We need to handle that before
941 passing the address to emit_move_insn. We pass NULL_RTX as the
942 insn parameter to resolve_subreg_use because we can not validate
943 the insn yet. */
944 if (MEM_P (src) || MEM_P (dest))
945 {
946 int acg;
947
948 if (MEM_P (src))
949 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
950 if (MEM_P (dest))
951 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
952 acg = apply_change_group ();
953 gcc_assert (acg);
954 }
955
1a6a0f2a 956 /* If SRC is a register which we can't decompose, or has side
957 effects, we need to move via a temporary register. */
958
67c3f580 959 if (!can_decompose_p (src)
1a6a0f2a 960 || side_effects_p (src)
961 || GET_CODE (src) == ASM_OPERANDS)
962 {
963 rtx reg;
964
965 reg = gen_reg_rtx (orig_mode);
966 emit_move_insn (reg, src);
967 src = reg;
968 }
969
970 /* If DEST is a register which we can't decompose, or has side
971 effects, we need to first move to a temporary register. We
972 handle the common case of pushing an operand directly. We also
973 go through a temporary register if it holds a floating point
974 value. This gives us better code on systems which can't move
975 data easily between integer and floating point registers. */
976
977 dest_mode = orig_mode;
978 pushing = push_operand (dest, dest_mode);
67c3f580 979 if (!can_decompose_p (dest)
1a6a0f2a 980 || (side_effects_p (dest) && !pushing)
981 || (!SCALAR_INT_MODE_P (dest_mode)
982 && !resolve_reg_p (dest)
983 && !resolve_subreg_p (dest)))
984 {
985 if (real_dest == NULL_RTX)
986 real_dest = dest;
987 if (!SCALAR_INT_MODE_P (dest_mode))
988 {
989 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
990 MODE_INT, 0);
991 gcc_assert (dest_mode != BLKmode);
992 }
993 dest = gen_reg_rtx (dest_mode);
994 if (REG_P (real_dest))
995 REG_ATTRS (dest) = REG_ATTRS (real_dest);
996 }
997
998 if (pushing)
999 {
1000 unsigned int i, j, jinc;
1001
1002 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1003 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1004 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1005
1006 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1007 {
1008 j = 0;
1009 jinc = 1;
1010 }
1011 else
1012 {
1013 j = words - 1;
1014 jinc = -1;
1015 }
1016
1017 for (i = 0; i < words; ++i, j += jinc)
1018 {
1019 rtx temp;
1020
1021 temp = copy_rtx (XEXP (dest, 0));
1022 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1023 j * UNITS_PER_WORD);
1024 emit_move_insn (temp,
1025 simplify_gen_subreg_concatn (word_mode, src,
1026 orig_mode,
1027 j * UNITS_PER_WORD));
1028 }
1029 }
1030 else
1031 {
1032 unsigned int i;
1033
1034 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
18b42941 1035 emit_clobber (dest);
1a6a0f2a 1036
1037 for (i = 0; i < words; ++i)
1038 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1039 dest_mode,
1040 i * UNITS_PER_WORD),
1041 simplify_gen_subreg_concatn (word_mode, src,
1042 orig_mode,
1043 i * UNITS_PER_WORD));
1044 }
1045
1046 if (real_dest != NULL_RTX)
1047 {
1048 rtx mdest, minsn, smove;
1049
1050 if (dest_mode == orig_mode)
1051 mdest = dest;
1052 else
1053 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1054 minsn = emit_move_insn (real_dest, mdest);
1055
1056 smove = single_set (minsn);
1057 gcc_assert (smove != NULL_RTX);
1058
1059 resolve_simple_move (smove, minsn);
1060 }
1061
1062 insns = get_insns ();
1063 end_sequence ();
1064
e38def9c 1065 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
97bb2849 1066
1a6a0f2a 1067 emit_insn_before (insns, insn);
1068
1a6a0f2a 1069 delete_insn (insn);
1070
1071 return insns;
1072}
1073
1074/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1075 component registers. Return whether we changed something. */
1076
1077static bool
1078resolve_clobber (rtx pat, rtx insn)
1079{
e29831db 1080 rtx reg;
1a6a0f2a 1081 enum machine_mode orig_mode;
1082 unsigned int words, i;
ab9eaa97 1083 int ret;
1a6a0f2a 1084
1085 reg = XEXP (pat, 0);
2289a5f2 1086 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1a6a0f2a 1087 return false;
1088
1089 orig_mode = GET_MODE (reg);
1090 words = GET_MODE_SIZE (orig_mode);
1091 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1092
ab9eaa97 1093 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1094 simplify_gen_subreg_concatn (word_mode, reg,
1095 orig_mode, 0),
1096 0);
3072d30e 1097 df_insn_rescan (insn);
ab9eaa97 1098 gcc_assert (ret != 0);
1099
1a6a0f2a 1100 for (i = words - 1; i > 0; --i)
1101 {
1102 rtx x;
1103
2289a5f2 1104 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1105 i * UNITS_PER_WORD);
1a6a0f2a 1106 x = gen_rtx_CLOBBER (VOIDmode, x);
1107 emit_insn_after (x, insn);
1108 }
1109
db2200eb 1110 resolve_reg_notes (insn);
1111
1a6a0f2a 1112 return true;
1113}
1114
1115/* A USE of a decomposed register is no longer meaningful. Return
1116 whether we changed something. */
1117
1118static bool
1119resolve_use (rtx pat, rtx insn)
1120{
1121 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1122 {
1123 delete_insn (insn);
1124 return true;
1125 }
db2200eb 1126
1127 resolve_reg_notes (insn);
1128
1a6a0f2a 1129 return false;
1130}
1131
9845d120 1132/* A VAR_LOCATION can be simplified. */
1133
1134static void
1135resolve_debug (rtx insn)
1136{
1137 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1138
1139 df_insn_rescan (insn);
1140
1141 resolve_reg_notes (insn);
1142}
1143
c7944dce 1144/* Check if INSN is a decomposable multiword-shift or zero-extend and
1145 set the decomposable_context bitmap accordingly. SPEED_P is true
1146 if we are optimizing INSN for speed rather than size. Return true
1147 if INSN is decomposable. */
9cf5d19e 1148
c7944dce 1149static bool
1150find_decomposable_shift_zext (rtx insn, bool speed_p)
9cf5d19e 1151{
1152 rtx set;
1153 rtx op;
1154 rtx op_operand;
1155
1156 set = single_set (insn);
1157 if (!set)
c7944dce 1158 return false;
9cf5d19e 1159
1160 op = SET_SRC (set);
1161 if (GET_CODE (op) != ASHIFT
1162 && GET_CODE (op) != LSHIFTRT
1163 && GET_CODE (op) != ZERO_EXTEND)
c7944dce 1164 return false;
9cf5d19e 1165
1166 op_operand = XEXP (op, 0);
1167 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1168 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1169 || HARD_REGISTER_NUM_P (REGNO (op_operand))
c7944dce 1170 || GET_MODE (op) != twice_word_mode)
1171 return false;
9cf5d19e 1172
1173 if (GET_CODE (op) == ZERO_EXTEND)
1174 {
1175 if (GET_MODE (op_operand) != word_mode
c7944dce 1176 || !choices[speed_p].splitting_zext)
1177 return false;
9cf5d19e 1178 }
1179 else /* left or right shift */
1180 {
c7944dce 1181 bool *splitting = (GET_CODE (op) == ASHIFT
1182 ? choices[speed_p].splitting_ashift
1183 : choices[speed_p].splitting_lshiftrt);
971ba038 1184 if (!CONST_INT_P (XEXP (op, 1))
c7944dce 1185 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1186 2 * BITS_PER_WORD - 1)
1187 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1188 return false;
1189
1190 bitmap_set_bit (decomposable_context, REGNO (op_operand));
9cf5d19e 1191 }
1192
1193 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1194
c7944dce 1195 return true;
9cf5d19e 1196}
1197
1198/* Decompose a more than word wide shift (in INSN) of a multiword
1199 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1200 and 'set to zero' insn. Return a pointer to the new insn when a
1201 replacement was done. */
1202
1203static rtx
1204resolve_shift_zext (rtx insn)
1205{
1206 rtx set;
1207 rtx op;
1208 rtx op_operand;
1209 rtx insns;
1210 rtx src_reg, dest_reg, dest_zero;
1211 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1212
1213 set = single_set (insn);
1214 if (!set)
1215 return NULL_RTX;
1216
1217 op = SET_SRC (set);
1218 if (GET_CODE (op) != ASHIFT
1219 && GET_CODE (op) != LSHIFTRT
1220 && GET_CODE (op) != ZERO_EXTEND)
1221 return NULL_RTX;
1222
1223 op_operand = XEXP (op, 0);
1224
c7944dce 1225 /* We can tear this operation apart only if the regs were already
1226 torn apart. */
9cf5d19e 1227 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1228 return NULL_RTX;
1229
1230 /* src_reg_num is the number of the word mode register which we
1231 are operating on. For a left shift and a zero_extend on little
1232 endian machines this is register 0. */
1233 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0;
1234
4d8ad352 1235 if (WORDS_BIG_ENDIAN
1236 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
9cf5d19e 1237 src_reg_num = 1 - src_reg_num;
1238
1239 if (GET_CODE (op) == ZERO_EXTEND)
4d8ad352 1240 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
9cf5d19e 1241 else
1242 dest_reg_num = 1 - src_reg_num;
1243
1244 offset1 = UNITS_PER_WORD * dest_reg_num;
1245 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1246 src_offset = UNITS_PER_WORD * src_reg_num;
1247
9cf5d19e 1248 start_sequence ();
1249
1250 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1251 GET_MODE (SET_DEST (set)),
1252 offset1);
1253 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1254 GET_MODE (SET_DEST (set)),
1255 offset2);
1256 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1257 GET_MODE (op_operand),
1258 src_offset);
1259 if (GET_CODE (op) != ZERO_EXTEND)
1260 {
1261 int shift_count = INTVAL (XEXP (op, 1));
1262 if (shift_count > BITS_PER_WORD)
1263 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1264 LSHIFT_EXPR : RSHIFT_EXPR,
1265 word_mode, src_reg,
f5ff0b21 1266 shift_count - BITS_PER_WORD,
9cf5d19e 1267 dest_reg, 1);
1268 }
1269
1270 if (dest_reg != src_reg)
1271 emit_move_insn (dest_reg, src_reg);
1272 emit_move_insn (dest_zero, CONST0_RTX (word_mode));
1273 insns = get_insns ();
1274
1275 end_sequence ();
1276
1277 emit_insn_before (insns, insn);
1278
1279 if (dump_file)
1280 {
1281 rtx in;
1282 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1283 for (in = insns; in != insn; in = NEXT_INSN (in))
1284 fprintf (dump_file, "%d ", INSN_UID (in));
1285 fprintf (dump_file, "\n");
1286 }
1287
1288 delete_insn (insn);
1289 return insns;
1290}
1291
c7944dce 1292/* Print to dump_file a description of what we're doing with shift code CODE.
1293 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1294
1295static void
1296dump_shift_choices (enum rtx_code code, bool *splitting)
1297{
1298 int i;
1299 const char *sep;
1300
1301 fprintf (dump_file,
1302 " Splitting mode %s for %s lowering with shift amounts = ",
1303 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1304 sep = "";
1305 for (i = 0; i < BITS_PER_WORD; i++)
1306 if (splitting[i])
1307 {
1308 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1309 sep = ",";
1310 }
1311 fprintf (dump_file, "\n");
1312}
1313
1314/* Print to dump_file a description of what we're doing when optimizing
1315 for speed or size; SPEED_P says which. DESCRIPTION is a description
1316 of the SPEED_P choice. */
1317
1318static void
1319dump_choices (bool speed_p, const char *description)
1320{
1321 unsigned int i;
1322
1323 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1324
1325 for (i = 0; i < MAX_MACHINE_MODE; i++)
1326 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1327 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1328 choices[speed_p].move_modes_to_split[i]
1329 ? "Splitting"
1330 : "Skipping",
1331 GET_MODE_NAME ((enum machine_mode) i));
1332
1333 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1334 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1335 GET_MODE_NAME (twice_word_mode));
1336
1337 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1338 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_ashift);
1339 fprintf (dump_file, "\n");
1340}
1341
1a6a0f2a 1342/* Look for registers which are always accessed via word-sized SUBREGs
1343 or via copies. Decompose these registers into several word-sized
1344 pseudo-registers. */
1345
1346static void
3072d30e 1347decompose_multiword_subregs (void)
1a6a0f2a 1348{
1349 unsigned int max;
1350 basic_block bb;
c7944dce 1351 bool speed_p;
1a6a0f2a 1352
c7944dce 1353 if (dump_file)
1354 {
1355 dump_choices (false, "size");
1356 dump_choices (true, "speed");
1357 }
1358
1359 /* Check if this target even has any modes to consider lowering. */
1360 if (!choices[false].something_to_do && !choices[true].something_to_do)
1361 {
1362 if (dump_file)
1363 fprintf (dump_file, "Nothing to do!\n");
1364 return;
1365 }
3072d30e 1366
1a6a0f2a 1367 max = max_reg_num ();
1368
1369 /* First see if there are any multi-word pseudo-registers. If there
1370 aren't, there is nothing we can do. This should speed up this
1371 pass in the normal case, since it should be faster than scanning
1372 all the insns. */
1373 {
1374 unsigned int i;
c7944dce 1375 bool useful_modes_seen = false;
1a6a0f2a 1376
1377 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
c7944dce 1378 if (regno_reg_rtx[i] != NULL)
1379 {
1380 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1381 if (choices[false].move_modes_to_split[(int) mode]
1382 || choices[true].move_modes_to_split[(int) mode])
1383 {
1384 useful_modes_seen = true;
1385 break;
1386 }
1387 }
1388
1389 if (!useful_modes_seen)
1a6a0f2a 1390 {
c7944dce 1391 if (dump_file)
1392 fprintf (dump_file, "Nothing to lower in this function.\n");
1393 return;
1a6a0f2a 1394 }
1a6a0f2a 1395 }
1396
0e8e9be3 1397 if (df)
c7944dce 1398 {
1399 df_set_flags (DF_DEFER_INSN_RESCAN);
1400 run_word_dce ();
1401 }
0e8e9be3 1402
c7944dce 1403 /* FIXME: It may be possible to change this code to look for each
1404 multi-word pseudo-register and to find each insn which sets or
1405 uses that register. That should be faster than scanning all the
1406 insns. */
1a6a0f2a 1407
1408 decomposable_context = BITMAP_ALLOC (NULL);
1409 non_decomposable_context = BITMAP_ALLOC (NULL);
5277d36e 1410 subreg_context = BITMAP_ALLOC (NULL);
1a6a0f2a 1411
1412 reg_copy_graph = VEC_alloc (bitmap, heap, max);
1413 VEC_safe_grow (bitmap, heap, reg_copy_graph, max);
1414 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max);
1415
c7944dce 1416 speed_p = optimize_function_for_speed_p (cfun);
1a6a0f2a 1417 FOR_EACH_BB (bb)
1418 {
1419 rtx insn;
1420
1421 FOR_BB_INSNS (bb, insn)
1422 {
1423 rtx set;
1424 enum classify_move_insn cmi;
1425 int i, n;
1426
1427 if (!INSN_P (insn)
1428 || GET_CODE (PATTERN (insn)) == CLOBBER
1429 || GET_CODE (PATTERN (insn)) == USE)
1430 continue;
1431
08b31038 1432 recog_memoized (insn);
1433
c7944dce 1434 if (find_decomposable_shift_zext (insn, speed_p))
9cf5d19e 1435 continue;
1436
1a6a0f2a 1437 extract_insn (insn);
1438
c7944dce 1439 set = simple_move (insn, speed_p);
1a6a0f2a 1440
1441 if (!set)
1442 cmi = NOT_SIMPLE_MOVE;
1443 else
1444 {
1e5b92fa 1445 if (find_pseudo_copy (set))
1a6a0f2a 1446 cmi = SIMPLE_PSEUDO_REG_MOVE;
1a6a0f2a 1447 else
1448 cmi = SIMPLE_MOVE;
1449 }
1450
1451 n = recog_data.n_operands;
1452 for (i = 0; i < n; ++i)
1453 {
1454 for_each_rtx (&recog_data.operand[i],
1455 find_decomposable_subregs,
1456 &cmi);
1457
1458 /* We handle ASM_OPERANDS as a special case to support
1459 things like x86 rdtsc which returns a DImode value.
1460 We can decompose the output, which will certainly be
1461 operand 0, but not the inputs. */
1462
1463 if (cmi == SIMPLE_MOVE
1464 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1465 {
1466 gcc_assert (i == 0);
1467 cmi = NOT_SIMPLE_MOVE;
1468 }
1469 }
1470 }
1471 }
1472
1473 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1474 if (!bitmap_empty_p (decomposable_context))
1475 {
97bb2849 1476 sbitmap sub_blocks;
db1c50be 1477 unsigned int i;
1478 sbitmap_iterator sbi;
1a6a0f2a 1479 bitmap_iterator iter;
1480 unsigned int regno;
1481
1482 propagate_pseudo_copies ();
1483
97bb2849 1484 sub_blocks = sbitmap_alloc (last_basic_block);
1485 sbitmap_zero (sub_blocks);
1a6a0f2a 1486
1487 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1488 decompose_register (regno);
1489
1490 FOR_EACH_BB (bb)
1491 {
201f6961 1492 rtx insn;
c7944dce 1493 bool speed_p;
1a6a0f2a 1494
c7944dce 1495 speed_p = optimize_bb_for_speed_p (bb);
201f6961 1496 FOR_BB_INSNS (bb, insn)
1a6a0f2a 1497 {
da7a04f1 1498 rtx pat;
1a6a0f2a 1499
1500 if (!INSN_P (insn))
1501 continue;
1502
1a6a0f2a 1503 pat = PATTERN (insn);
1504 if (GET_CODE (pat) == CLOBBER)
db2200eb 1505 resolve_clobber (pat, insn);
1a6a0f2a 1506 else if (GET_CODE (pat) == USE)
db2200eb 1507 resolve_use (pat, insn);
9845d120 1508 else if (DEBUG_INSN_P (insn))
1509 resolve_debug (insn);
1a6a0f2a 1510 else
1511 {
1512 rtx set;
1513 int i;
1514
1515 recog_memoized (insn);
1516 extract_insn (insn);
1517
c7944dce 1518 set = simple_move (insn, speed_p);
1a6a0f2a 1519 if (set)
1520 {
1521 rtx orig_insn = insn;
97bb2849 1522 bool cfi = control_flow_insn_p (insn);
1a6a0f2a 1523
db1c50be 1524 /* We can end up splitting loads to multi-word pseudos
1525 into separate loads to machine word size pseudos.
1526 When this happens, we first had one load that can
1527 throw, and after resolve_simple_move we'll have a
1528 bunch of loads (at least two). All those loads may
1529 trap if we can have non-call exceptions, so they
1530 all will end the current basic block. We split the
1531 block after the outer loop over all insns, but we
1532 make sure here that we will be able to split the
1533 basic block and still produce the correct control
1534 flow graph for it. */
1535 gcc_assert (!cfi
cbeb677e 1536 || (cfun->can_throw_non_call_exceptions
db1c50be 1537 && can_throw_internal (insn)));
1538
1a6a0f2a 1539 insn = resolve_simple_move (set, insn);
1540 if (insn != orig_insn)
1541 {
1a6a0f2a 1542 recog_memoized (insn);
1543 extract_insn (insn);
97bb2849 1544
1545 if (cfi)
1546 SET_BIT (sub_blocks, bb->index);
1a6a0f2a 1547 }
1548 }
9cf5d19e 1549 else
1550 {
1551 rtx decomposed_shift;
1552
1553 decomposed_shift = resolve_shift_zext (insn);
1554 if (decomposed_shift != NULL_RTX)
1555 {
9cf5d19e 1556 insn = decomposed_shift;
1557 recog_memoized (insn);
1558 extract_insn (insn);
1559 }
1560 }
1a6a0f2a 1561
1562 for (i = recog_data.n_operands - 1; i >= 0; --i)
1563 for_each_rtx (recog_data.operand_loc[i],
1564 resolve_subreg_use,
1565 insn);
1566
1567 resolve_reg_notes (insn);
1568
1569 if (num_validated_changes () > 0)
1570 {
1571 for (i = recog_data.n_dups - 1; i >= 0; --i)
1572 {
1573 rtx *pl = recog_data.dup_loc[i];
1574 int dup_num = recog_data.dup_num[i];
1575 rtx *px = recog_data.operand_loc[dup_num];
1576
c47adb48 1577 validate_unshare_change (insn, pl, *px, 1);
1a6a0f2a 1578 }
1579
1580 i = apply_change_group ();
1581 gcc_assert (i);
1a6a0f2a 1582 }
1583 }
1a6a0f2a 1584 }
1585 }
1586
db1c50be 1587 /* If we had insns to split that caused control flow insns in the middle
1588 of a basic block, split those blocks now. Note that we only handle
1589 the case where splitting a load has caused multiple possibly trapping
1590 loads to appear. */
1591 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi)
1592 {
1593 rtx insn, end;
1594 edge fallthru;
1595
1596 bb = BASIC_BLOCK (i);
1597 insn = BB_HEAD (bb);
1598 end = BB_END (bb);
1599
1600 while (insn != end)
1601 {
1602 if (control_flow_insn_p (insn))
1603 {
1604 /* Split the block after insn. There will be a fallthru
1605 edge, which is OK so we keep it. We have to create the
1606 exception edges ourselves. */
1607 fallthru = split_block (bb, insn);
1608 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1609 bb = fallthru->dest;
1610 insn = BB_HEAD (bb);
1611 }
1612 else
1613 insn = NEXT_INSN (insn);
1614 }
1615 }
97bb2849 1616
97bb2849 1617 sbitmap_free (sub_blocks);
1a6a0f2a 1618 }
1619
1620 {
1621 unsigned int i;
1622 bitmap b;
1623
48148244 1624 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b)
1a6a0f2a 1625 if (b)
1626 BITMAP_FREE (b);
1627 }
1628
48e1416a 1629 VEC_free (bitmap, heap, reg_copy_graph);
1a6a0f2a 1630
1631 BITMAP_FREE (decomposable_context);
1632 BITMAP_FREE (non_decomposable_context);
5277d36e 1633 BITMAP_FREE (subreg_context);
1a6a0f2a 1634}
1635\f
1636/* Gate function for lower subreg pass. */
1637
1638static bool
1639gate_handle_lower_subreg (void)
1640{
1641 return flag_split_wide_types != 0;
1642}
1643
1644/* Implement first lower subreg pass. */
1645
1646static unsigned int
1647rest_of_handle_lower_subreg (void)
1648{
3072d30e 1649 decompose_multiword_subregs ();
1a6a0f2a 1650 return 0;
1651}
1652
1653/* Implement second lower subreg pass. */
1654
1655static unsigned int
1656rest_of_handle_lower_subreg2 (void)
1657{
3072d30e 1658 decompose_multiword_subregs ();
1a6a0f2a 1659 return 0;
1660}
1661
20099e35 1662struct rtl_opt_pass pass_lower_subreg =
1a6a0f2a 1663{
20099e35 1664 {
1665 RTL_PASS,
b85ccd2c 1666 "subreg1", /* name */
1a6a0f2a 1667 gate_handle_lower_subreg, /* gate */
1668 rest_of_handle_lower_subreg, /* execute */
1669 NULL, /* sub */
1670 NULL, /* next */
1671 0, /* static_pass_number */
1672 TV_LOWER_SUBREG, /* tv_id */
1673 0, /* properties_required */
1674 0, /* properties_provided */
1675 0, /* properties_destroyed */
1676 0, /* todo_flags_start */
97bb2849 1677 TODO_ggc_collect |
20099e35 1678 TODO_verify_flow /* todo_flags_finish */
1679 }
1a6a0f2a 1680};
1681
20099e35 1682struct rtl_opt_pass pass_lower_subreg2 =
1a6a0f2a 1683{
20099e35 1684 {
1685 RTL_PASS,
1a6a0f2a 1686 "subreg2", /* name */
1687 gate_handle_lower_subreg, /* gate */
1688 rest_of_handle_lower_subreg2, /* execute */
1689 NULL, /* sub */
1690 NULL, /* next */
1691 0, /* static_pass_number */
1692 TV_LOWER_SUBREG, /* tv_id */
1693 0, /* properties_required */
1694 0, /* properties_provided */
1695 0, /* properties_destroyed */
1696 0, /* todo_flags_start */
0806b508 1697 TODO_df_finish | TODO_verify_rtl_sharing |
97bb2849 1698 TODO_ggc_collect |
20099e35 1699 TODO_verify_flow /* todo_flags_finish */
1700 }
1a6a0f2a 1701};