]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
gcc/
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42 #include "lower-subreg.h"
43 #include "rtl-iter.h"
44
45 #ifdef STACK_GROWS_DOWNWARD
46 # undef STACK_GROWS_DOWNWARD
47 # define STACK_GROWS_DOWNWARD 1
48 #else
49 # define STACK_GROWS_DOWNWARD 0
50 #endif
51
52
53 /* Decompose multi-word pseudo-registers into individual
54 pseudo-registers when possible and profitable. This is possible
55 when all the uses of a multi-word register are via SUBREG, or are
56 copies of the register to another location. Breaking apart the
57 register permits more CSE and permits better register allocation.
58 This is profitable if the machine does not have move instructions
59 to do this.
60
61 This pass only splits moves with modes that are wider than
62 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
63 integer modes that are twice the width of word_mode. The latter
64 could be generalized if there was a need to do this, but the trend in
65 architectures is to not need this.
66
67 There are two useful preprocessor defines for use by maintainers:
68
69 #define LOG_COSTS 1
70
71 if you wish to see the actual cost estimates that are being used
72 for each mode wider than word mode and the cost estimates for zero
73 extension and the shifts. This can be useful when port maintainers
74 are tuning insn rtx costs.
75
76 #define FORCE_LOWERING 1
77
78 if you wish to test the pass with all the transformation forced on.
79 This can be useful for finding bugs in the transformations. */
80
81 #define LOG_COSTS 0
82 #define FORCE_LOWERING 0
83
84 /* Bit N in this bitmap is set if regno N is used in a context in
85 which we can decompose it. */
86 static bitmap decomposable_context;
87
88 /* Bit N in this bitmap is set if regno N is used in a context in
89 which it can not be decomposed. */
90 static bitmap non_decomposable_context;
91
92 /* Bit N in this bitmap is set if regno N is used in a subreg
93 which changes the mode but not the size. This typically happens
94 when the register accessed as a floating-point value; we want to
95 avoid generating accesses to its subwords in integer modes. */
96 static bitmap subreg_context;
97
98 /* Bit N in the bitmap in element M of this array is set if there is a
99 copy from reg M to reg N. */
100 static vec<bitmap> reg_copy_graph;
101
102 struct target_lower_subreg default_target_lower_subreg;
103 #if SWITCHABLE_TARGET
104 struct target_lower_subreg *this_target_lower_subreg
105 = &default_target_lower_subreg;
106 #endif
107
108 #define twice_word_mode \
109 this_target_lower_subreg->x_twice_word_mode
110 #define choices \
111 this_target_lower_subreg->x_choices
112
113 /* RTXes used while computing costs. */
114 struct cost_rtxes {
115 /* Source and target registers. */
116 rtx source;
117 rtx target;
118
119 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
120 rtx zext;
121
122 /* A shift of SOURCE. */
123 rtx shift;
124
125 /* A SET of TARGET. */
126 rtx set;
127 };
128
129 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
130 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
131
132 static int
133 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
134 enum machine_mode mode, int op1)
135 {
136 PUT_CODE (rtxes->shift, code);
137 PUT_MODE (rtxes->shift, mode);
138 PUT_MODE (rtxes->source, mode);
139 XEXP (rtxes->shift, 1) = GEN_INT (op1);
140 return set_src_cost (rtxes->shift, speed_p);
141 }
142
143 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
144 to true if it is profitable to split a double-word CODE shift
145 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
146 for speed or size profitability.
147
148 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
149 the cost of moving zero into a word-mode register. WORD_MOVE_COST
150 is the cost of moving between word registers. */
151
152 static void
153 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
154 bool *splitting, enum rtx_code code,
155 int word_move_zero_cost, int word_move_cost)
156 {
157 int wide_cost, narrow_cost, upper_cost, i;
158
159 for (i = 0; i < BITS_PER_WORD; i++)
160 {
161 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
162 i + BITS_PER_WORD);
163 if (i == 0)
164 narrow_cost = word_move_cost;
165 else
166 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
167
168 if (code != ASHIFTRT)
169 upper_cost = word_move_zero_cost;
170 else if (i == BITS_PER_WORD - 1)
171 upper_cost = word_move_cost;
172 else
173 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
174 BITS_PER_WORD - 1);
175
176 if (LOG_COSTS)
177 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
178 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
179 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
180
181 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
182 splitting[i] = true;
183 }
184 }
185
186 /* Compute what we should do when optimizing for speed or size; SPEED_P
187 selects which. Use RTXES for computing costs. */
188
189 static void
190 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
191 {
192 unsigned int i;
193 int word_move_zero_cost, word_move_cost;
194
195 PUT_MODE (rtxes->target, word_mode);
196 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
197 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
198
199 SET_SRC (rtxes->set) = rtxes->source;
200 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
201
202 if (LOG_COSTS)
203 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
204 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
205
206 for (i = 0; i < MAX_MACHINE_MODE; i++)
207 {
208 enum machine_mode mode = (enum machine_mode) i;
209 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
210 if (factor > 1)
211 {
212 int mode_move_cost;
213
214 PUT_MODE (rtxes->target, mode);
215 PUT_MODE (rtxes->source, mode);
216 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
217
218 if (LOG_COSTS)
219 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
220 GET_MODE_NAME (mode), mode_move_cost,
221 word_move_cost, factor);
222
223 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
224 {
225 choices[speed_p].move_modes_to_split[i] = true;
226 choices[speed_p].something_to_do = true;
227 }
228 }
229 }
230
231 /* For the moves and shifts, the only case that is checked is one
232 where the mode of the target is an integer mode twice the width
233 of the word_mode.
234
235 If it is not profitable to split a double word move then do not
236 even consider the shifts or the zero extension. */
237 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
238 {
239 int zext_cost;
240
241 /* The only case here to check to see if moving the upper part with a
242 zero is cheaper than doing the zext itself. */
243 PUT_MODE (rtxes->source, word_mode);
244 zext_cost = set_src_cost (rtxes->zext, speed_p);
245
246 if (LOG_COSTS)
247 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
248 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
249 zext_cost, word_move_cost, word_move_zero_cost);
250
251 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
252 choices[speed_p].splitting_zext = true;
253
254 compute_splitting_shift (speed_p, rtxes,
255 choices[speed_p].splitting_ashift, ASHIFT,
256 word_move_zero_cost, word_move_cost);
257 compute_splitting_shift (speed_p, rtxes,
258 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
259 word_move_zero_cost, word_move_cost);
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
262 word_move_zero_cost, word_move_cost);
263 }
264 }
265
266 /* Do one-per-target initialisation. This involves determining
267 which operations on the machine are profitable. If none are found,
268 then the pass just returns when called. */
269
270 void
271 init_lower_subreg (void)
272 {
273 struct cost_rtxes rtxes;
274
275 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
276
277 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
278
279 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
280 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
281 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
282 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
283 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
284
285 if (LOG_COSTS)
286 fprintf (stderr, "\nSize costs\n==========\n\n");
287 compute_costs (false, &rtxes);
288
289 if (LOG_COSTS)
290 fprintf (stderr, "\nSpeed costs\n===========\n\n");
291 compute_costs (true, &rtxes);
292 }
293
294 static bool
295 simple_move_operand (rtx x)
296 {
297 if (GET_CODE (x) == SUBREG)
298 x = SUBREG_REG (x);
299
300 if (!OBJECT_P (x))
301 return false;
302
303 if (GET_CODE (x) == LABEL_REF
304 || GET_CODE (x) == SYMBOL_REF
305 || GET_CODE (x) == HIGH
306 || GET_CODE (x) == CONST)
307 return false;
308
309 if (MEM_P (x)
310 && (MEM_VOLATILE_P (x)
311 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
312 return false;
313
314 return true;
315 }
316
317 /* If INSN is a single set between two objects that we want to split,
318 return the single set. SPEED_P says whether we are optimizing
319 INSN for speed or size.
320
321 INSN should have been passed to recog and extract_insn before this
322 is called. */
323
324 static rtx
325 simple_move (rtx_insn *insn, bool speed_p)
326 {
327 rtx x;
328 rtx set;
329 enum machine_mode mode;
330
331 if (recog_data.n_operands != 2)
332 return NULL_RTX;
333
334 set = single_set (insn);
335 if (!set)
336 return NULL_RTX;
337
338 x = SET_DEST (set);
339 if (x != recog_data.operand[0] && x != recog_data.operand[1])
340 return NULL_RTX;
341 if (!simple_move_operand (x))
342 return NULL_RTX;
343
344 x = SET_SRC (set);
345 if (x != recog_data.operand[0] && x != recog_data.operand[1])
346 return NULL_RTX;
347 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
348 things like x86 rdtsc which returns a DImode value. */
349 if (GET_CODE (x) != ASM_OPERANDS
350 && !simple_move_operand (x))
351 return NULL_RTX;
352
353 /* We try to decompose in integer modes, to avoid generating
354 inefficient code copying between integer and floating point
355 registers. That means that we can't decompose if this is a
356 non-integer mode for which there is no integer mode of the same
357 size. */
358 mode = GET_MODE (SET_DEST (set));
359 if (!SCALAR_INT_MODE_P (mode)
360 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
361 == BLKmode))
362 return NULL_RTX;
363
364 /* Reject PARTIAL_INT modes. They are used for processor specific
365 purposes and it's probably best not to tamper with them. */
366 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
367 return NULL_RTX;
368
369 if (!choices[speed_p].move_modes_to_split[(int) mode])
370 return NULL_RTX;
371
372 return set;
373 }
374
375 /* If SET is a copy from one multi-word pseudo-register to another,
376 record that in reg_copy_graph. Return whether it is such a
377 copy. */
378
379 static bool
380 find_pseudo_copy (rtx set)
381 {
382 rtx dest = SET_DEST (set);
383 rtx src = SET_SRC (set);
384 unsigned int rd, rs;
385 bitmap b;
386
387 if (!REG_P (dest) || !REG_P (src))
388 return false;
389
390 rd = REGNO (dest);
391 rs = REGNO (src);
392 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
393 return false;
394
395 b = reg_copy_graph[rs];
396 if (b == NULL)
397 {
398 b = BITMAP_ALLOC (NULL);
399 reg_copy_graph[rs] = b;
400 }
401
402 bitmap_set_bit (b, rd);
403
404 return true;
405 }
406
407 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
408 where they are copied to another register, add the register to
409 which they are copied to DECOMPOSABLE_CONTEXT. Use
410 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
411 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
412
413 static void
414 propagate_pseudo_copies (void)
415 {
416 bitmap queue, propagate;
417
418 queue = BITMAP_ALLOC (NULL);
419 propagate = BITMAP_ALLOC (NULL);
420
421 bitmap_copy (queue, decomposable_context);
422 do
423 {
424 bitmap_iterator iter;
425 unsigned int i;
426
427 bitmap_clear (propagate);
428
429 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
430 {
431 bitmap b = reg_copy_graph[i];
432 if (b)
433 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
434 }
435
436 bitmap_and_compl (queue, propagate, decomposable_context);
437 bitmap_ior_into (decomposable_context, propagate);
438 }
439 while (!bitmap_empty_p (queue));
440
441 BITMAP_FREE (queue);
442 BITMAP_FREE (propagate);
443 }
444
445 /* A pointer to one of these values is passed to
446 find_decomposable_subregs via for_each_rtx. */
447
448 enum classify_move_insn
449 {
450 /* Not a simple move from one location to another. */
451 NOT_SIMPLE_MOVE,
452 /* A simple move we want to decompose. */
453 DECOMPOSABLE_SIMPLE_MOVE,
454 /* Any other simple move. */
455 SIMPLE_MOVE
456 };
457
458 /* This is called via for_each_rtx. If we find a SUBREG which we
459 could use to decompose a pseudo-register, set a bit in
460 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
461 not a simple pseudo-register copy, DATA will point at the type of
462 move, and we set a bit in DECOMPOSABLE_CONTEXT or
463 NON_DECOMPOSABLE_CONTEXT as appropriate. */
464
465 static int
466 find_decomposable_subregs (rtx *px, void *data)
467 {
468 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
469 rtx x = *px;
470
471 if (x == NULL_RTX)
472 return 0;
473
474 if (GET_CODE (x) == SUBREG)
475 {
476 rtx inner = SUBREG_REG (x);
477 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
478
479 if (!REG_P (inner))
480 return 0;
481
482 regno = REGNO (inner);
483 if (HARD_REGISTER_NUM_P (regno))
484 return -1;
485
486 outer_size = GET_MODE_SIZE (GET_MODE (x));
487 inner_size = GET_MODE_SIZE (GET_MODE (inner));
488 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
489 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
490
491 /* We only try to decompose single word subregs of multi-word
492 registers. When we find one, we return -1 to avoid iterating
493 over the inner register.
494
495 ??? This doesn't allow, e.g., DImode subregs of TImode values
496 on 32-bit targets. We would need to record the way the
497 pseudo-register was used, and only decompose if all the uses
498 were the same number and size of pieces. Hopefully this
499 doesn't happen much. */
500
501 if (outer_words == 1 && inner_words > 1)
502 {
503 bitmap_set_bit (decomposable_context, regno);
504 return -1;
505 }
506
507 /* If this is a cast from one mode to another, where the modes
508 have the same size, and they are not tieable, then mark this
509 register as non-decomposable. If we decompose it we are
510 likely to mess up whatever the backend is trying to do. */
511 if (outer_words > 1
512 && outer_size == inner_size
513 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
514 {
515 bitmap_set_bit (non_decomposable_context, regno);
516 bitmap_set_bit (subreg_context, regno);
517 return -1;
518 }
519 }
520 else if (REG_P (x))
521 {
522 unsigned int regno;
523
524 /* We will see an outer SUBREG before we see the inner REG, so
525 when we see a plain REG here it means a direct reference to
526 the register.
527
528 If this is not a simple copy from one location to another,
529 then we can not decompose this register. If this is a simple
530 copy we want to decompose, and the mode is right,
531 then we mark the register as decomposable.
532 Otherwise we don't say anything about this register --
533 it could be decomposed, but whether that would be
534 profitable depends upon how it is used elsewhere.
535
536 We only set bits in the bitmap for multi-word
537 pseudo-registers, since those are the only ones we care about
538 and it keeps the size of the bitmaps down. */
539
540 regno = REGNO (x);
541 if (!HARD_REGISTER_NUM_P (regno)
542 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
543 {
544 switch (*pcmi)
545 {
546 case NOT_SIMPLE_MOVE:
547 bitmap_set_bit (non_decomposable_context, regno);
548 break;
549 case DECOMPOSABLE_SIMPLE_MOVE:
550 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
551 bitmap_set_bit (decomposable_context, regno);
552 break;
553 case SIMPLE_MOVE:
554 break;
555 default:
556 gcc_unreachable ();
557 }
558 }
559 }
560 else if (MEM_P (x))
561 {
562 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
563
564 /* Any registers used in a MEM do not participate in a
565 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
566 here, and return -1 to block the parent's recursion. */
567 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
568 return -1;
569 }
570
571 return 0;
572 }
573
574 /* Decompose REGNO into word-sized components. We smash the REG node
575 in place. This ensures that (1) something goes wrong quickly if we
576 fail to make some replacement, and (2) the debug information inside
577 the symbol table is automatically kept up to date. */
578
579 static void
580 decompose_register (unsigned int regno)
581 {
582 rtx reg;
583 unsigned int words, i;
584 rtvec v;
585
586 reg = regno_reg_rtx[regno];
587
588 regno_reg_rtx[regno] = NULL_RTX;
589
590 words = GET_MODE_SIZE (GET_MODE (reg));
591 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
592
593 v = rtvec_alloc (words);
594 for (i = 0; i < words; ++i)
595 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
596
597 PUT_CODE (reg, CONCATN);
598 XVEC (reg, 0) = v;
599
600 if (dump_file)
601 {
602 fprintf (dump_file, "; Splitting reg %u ->", regno);
603 for (i = 0; i < words; ++i)
604 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
605 fputc ('\n', dump_file);
606 }
607 }
608
609 /* Get a SUBREG of a CONCATN. */
610
611 static rtx
612 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
613 unsigned int byte)
614 {
615 unsigned int inner_size;
616 enum machine_mode innermode, partmode;
617 rtx part;
618 unsigned int final_offset;
619
620 gcc_assert (GET_CODE (op) == CONCATN);
621 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
622
623 innermode = GET_MODE (op);
624 gcc_assert (byte < GET_MODE_SIZE (innermode));
625 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
626
627 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
628 part = XVECEXP (op, 0, byte / inner_size);
629 partmode = GET_MODE (part);
630
631 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
632 regular CONST_VECTORs. They have vector or integer modes, depending
633 on the capabilities of the target. Cope with them. */
634 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
635 partmode = GET_MODE_INNER (innermode);
636 else if (partmode == VOIDmode)
637 {
638 enum mode_class mclass = GET_MODE_CLASS (innermode);
639 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
640 }
641
642 final_offset = byte % inner_size;
643 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
644 return NULL_RTX;
645
646 return simplify_gen_subreg (outermode, part, partmode, final_offset);
647 }
648
649 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
650
651 static rtx
652 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
653 enum machine_mode innermode, unsigned int byte)
654 {
655 rtx ret;
656
657 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
658 If OP is a SUBREG of a CONCATN, then it must be a simple mode
659 change with the same size and offset 0, or it must extract a
660 part. We shouldn't see anything else here. */
661 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
662 {
663 rtx op2;
664
665 if ((GET_MODE_SIZE (GET_MODE (op))
666 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
667 && SUBREG_BYTE (op) == 0)
668 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
669 GET_MODE (SUBREG_REG (op)), byte);
670
671 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
672 SUBREG_BYTE (op));
673 if (op2 == NULL_RTX)
674 {
675 /* We don't handle paradoxical subregs here. */
676 gcc_assert (GET_MODE_SIZE (outermode)
677 <= GET_MODE_SIZE (GET_MODE (op)));
678 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
679 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
680 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
681 byte + SUBREG_BYTE (op));
682 gcc_assert (op2 != NULL_RTX);
683 return op2;
684 }
685
686 op = op2;
687 gcc_assert (op != NULL_RTX);
688 gcc_assert (innermode == GET_MODE (op));
689 }
690
691 if (GET_CODE (op) == CONCATN)
692 return simplify_subreg_concatn (outermode, op, byte);
693
694 ret = simplify_gen_subreg (outermode, op, innermode, byte);
695
696 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
697 resolve_simple_move will ask for the high part of the paradoxical
698 subreg, which does not have a value. Just return a zero. */
699 if (ret == NULL_RTX
700 && GET_CODE (op) == SUBREG
701 && SUBREG_BYTE (op) == 0
702 && (GET_MODE_SIZE (innermode)
703 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
704 return CONST0_RTX (outermode);
705
706 gcc_assert (ret != NULL_RTX);
707 return ret;
708 }
709
710 /* Return whether we should resolve X into the registers into which it
711 was decomposed. */
712
713 static bool
714 resolve_reg_p (rtx x)
715 {
716 return GET_CODE (x) == CONCATN;
717 }
718
719 /* Return whether X is a SUBREG of a register which we need to
720 resolve. */
721
722 static bool
723 resolve_subreg_p (rtx x)
724 {
725 if (GET_CODE (x) != SUBREG)
726 return false;
727 return resolve_reg_p (SUBREG_REG (x));
728 }
729
730 /* Look for SUBREGs in *LOC which need to be decomposed. */
731
732 static bool
733 resolve_subreg_use (rtx *loc, rtx insn)
734 {
735 subrtx_ptr_iterator::array_type array;
736 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
737 {
738 rtx *loc = *iter;
739 rtx x = *loc;
740 if (resolve_subreg_p (x))
741 {
742 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
743 SUBREG_BYTE (x));
744
745 /* It is possible for a note to contain a reference which we can
746 decompose. In this case, return 1 to the caller to indicate
747 that the note must be removed. */
748 if (!x)
749 {
750 gcc_assert (!insn);
751 return true;
752 }
753
754 validate_change (insn, loc, x, 1);
755 iter.skip_subrtxes ();
756 }
757 else if (resolve_reg_p (x))
758 /* Return 1 to the caller to indicate that we found a direct
759 reference to a register which is being decomposed. This can
760 happen inside notes, multiword shift or zero-extend
761 instructions. */
762 return true;
763 }
764
765 return false;
766 }
767
768 /* Resolve any decomposed registers which appear in register notes on
769 INSN. */
770
771 static void
772 resolve_reg_notes (rtx_insn *insn)
773 {
774 rtx *pnote, note;
775
776 note = find_reg_equal_equiv_note (insn);
777 if (note)
778 {
779 int old_count = num_validated_changes ();
780 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
781 remove_note (insn, note);
782 else
783 if (old_count != num_validated_changes ())
784 df_notes_rescan (insn);
785 }
786
787 pnote = &REG_NOTES (insn);
788 while (*pnote != NULL_RTX)
789 {
790 bool del = false;
791
792 note = *pnote;
793 switch (REG_NOTE_KIND (note))
794 {
795 case REG_DEAD:
796 case REG_UNUSED:
797 if (resolve_reg_p (XEXP (note, 0)))
798 del = true;
799 break;
800
801 default:
802 break;
803 }
804
805 if (del)
806 *pnote = XEXP (note, 1);
807 else
808 pnote = &XEXP (note, 1);
809 }
810 }
811
812 /* Return whether X can be decomposed into subwords. */
813
814 static bool
815 can_decompose_p (rtx x)
816 {
817 if (REG_P (x))
818 {
819 unsigned int regno = REGNO (x);
820
821 if (HARD_REGISTER_NUM_P (regno))
822 {
823 unsigned int byte, num_bytes;
824
825 num_bytes = GET_MODE_SIZE (GET_MODE (x));
826 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
827 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
828 return false;
829 return true;
830 }
831 else
832 return !bitmap_bit_p (subreg_context, regno);
833 }
834
835 return true;
836 }
837
838 /* Decompose the registers used in a simple move SET within INSN. If
839 we don't change anything, return INSN, otherwise return the start
840 of the sequence of moves. */
841
842 static rtx_insn *
843 resolve_simple_move (rtx set, rtx_insn *insn)
844 {
845 rtx src, dest, real_dest;
846 rtx_insn *insns;
847 enum machine_mode orig_mode, dest_mode;
848 unsigned int words;
849 bool pushing;
850
851 src = SET_SRC (set);
852 dest = SET_DEST (set);
853 orig_mode = GET_MODE (dest);
854
855 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
856 gcc_assert (words > 1);
857
858 start_sequence ();
859
860 /* We have to handle copying from a SUBREG of a decomposed reg where
861 the SUBREG is larger than word size. Rather than assume that we
862 can take a word_mode SUBREG of the destination, we copy to a new
863 register and then copy that to the destination. */
864
865 real_dest = NULL_RTX;
866
867 if (GET_CODE (src) == SUBREG
868 && resolve_reg_p (SUBREG_REG (src))
869 && (SUBREG_BYTE (src) != 0
870 || (GET_MODE_SIZE (orig_mode)
871 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
872 {
873 real_dest = dest;
874 dest = gen_reg_rtx (orig_mode);
875 if (REG_P (real_dest))
876 REG_ATTRS (dest) = REG_ATTRS (real_dest);
877 }
878
879 /* Similarly if we are copying to a SUBREG of a decomposed reg where
880 the SUBREG is larger than word size. */
881
882 if (GET_CODE (dest) == SUBREG
883 && resolve_reg_p (SUBREG_REG (dest))
884 && (SUBREG_BYTE (dest) != 0
885 || (GET_MODE_SIZE (orig_mode)
886 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
887 {
888 rtx reg, smove;
889 rtx_insn *minsn;
890
891 reg = gen_reg_rtx (orig_mode);
892 minsn = emit_move_insn (reg, src);
893 smove = single_set (minsn);
894 gcc_assert (smove != NULL_RTX);
895 resolve_simple_move (smove, minsn);
896 src = reg;
897 }
898
899 /* If we didn't have any big SUBREGS of decomposed registers, and
900 neither side of the move is a register we are decomposing, then
901 we don't have to do anything here. */
902
903 if (src == SET_SRC (set)
904 && dest == SET_DEST (set)
905 && !resolve_reg_p (src)
906 && !resolve_subreg_p (src)
907 && !resolve_reg_p (dest)
908 && !resolve_subreg_p (dest))
909 {
910 end_sequence ();
911 return insn;
912 }
913
914 /* It's possible for the code to use a subreg of a decomposed
915 register while forming an address. We need to handle that before
916 passing the address to emit_move_insn. We pass NULL_RTX as the
917 insn parameter to resolve_subreg_use because we can not validate
918 the insn yet. */
919 if (MEM_P (src) || MEM_P (dest))
920 {
921 int acg;
922
923 if (MEM_P (src))
924 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
925 if (MEM_P (dest))
926 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
927 acg = apply_change_group ();
928 gcc_assert (acg);
929 }
930
931 /* If SRC is a register which we can't decompose, or has side
932 effects, we need to move via a temporary register. */
933
934 if (!can_decompose_p (src)
935 || side_effects_p (src)
936 || GET_CODE (src) == ASM_OPERANDS)
937 {
938 rtx reg;
939
940 reg = gen_reg_rtx (orig_mode);
941
942 #ifdef AUTO_INC_DEC
943 {
944 rtx move = emit_move_insn (reg, src);
945 if (MEM_P (src))
946 {
947 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
948 if (note)
949 add_reg_note (move, REG_INC, XEXP (note, 0));
950 }
951 }
952 #else
953 emit_move_insn (reg, src);
954 #endif
955 src = reg;
956 }
957
958 /* If DEST is a register which we can't decompose, or has side
959 effects, we need to first move to a temporary register. We
960 handle the common case of pushing an operand directly. We also
961 go through a temporary register if it holds a floating point
962 value. This gives us better code on systems which can't move
963 data easily between integer and floating point registers. */
964
965 dest_mode = orig_mode;
966 pushing = push_operand (dest, dest_mode);
967 if (!can_decompose_p (dest)
968 || (side_effects_p (dest) && !pushing)
969 || (!SCALAR_INT_MODE_P (dest_mode)
970 && !resolve_reg_p (dest)
971 && !resolve_subreg_p (dest)))
972 {
973 if (real_dest == NULL_RTX)
974 real_dest = dest;
975 if (!SCALAR_INT_MODE_P (dest_mode))
976 {
977 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
978 MODE_INT, 0);
979 gcc_assert (dest_mode != BLKmode);
980 }
981 dest = gen_reg_rtx (dest_mode);
982 if (REG_P (real_dest))
983 REG_ATTRS (dest) = REG_ATTRS (real_dest);
984 }
985
986 if (pushing)
987 {
988 unsigned int i, j, jinc;
989
990 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
991 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
992 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
993
994 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
995 {
996 j = 0;
997 jinc = 1;
998 }
999 else
1000 {
1001 j = words - 1;
1002 jinc = -1;
1003 }
1004
1005 for (i = 0; i < words; ++i, j += jinc)
1006 {
1007 rtx temp;
1008
1009 temp = copy_rtx (XEXP (dest, 0));
1010 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1011 j * UNITS_PER_WORD);
1012 emit_move_insn (temp,
1013 simplify_gen_subreg_concatn (word_mode, src,
1014 orig_mode,
1015 j * UNITS_PER_WORD));
1016 }
1017 }
1018 else
1019 {
1020 unsigned int i;
1021
1022 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1023 emit_clobber (dest);
1024
1025 for (i = 0; i < words; ++i)
1026 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1027 dest_mode,
1028 i * UNITS_PER_WORD),
1029 simplify_gen_subreg_concatn (word_mode, src,
1030 orig_mode,
1031 i * UNITS_PER_WORD));
1032 }
1033
1034 if (real_dest != NULL_RTX)
1035 {
1036 rtx mdest, smove;
1037 rtx_insn *minsn;
1038
1039 if (dest_mode == orig_mode)
1040 mdest = dest;
1041 else
1042 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1043 minsn = emit_move_insn (real_dest, mdest);
1044
1045 #ifdef AUTO_INC_DEC
1046 if (MEM_P (real_dest)
1047 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1048 {
1049 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1050 if (note)
1051 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1052 }
1053 #endif
1054
1055 smove = single_set (minsn);
1056 gcc_assert (smove != NULL_RTX);
1057
1058 resolve_simple_move (smove, minsn);
1059 }
1060
1061 insns = get_insns ();
1062 end_sequence ();
1063
1064 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1065
1066 emit_insn_before (insns, insn);
1067
1068 /* If we get here via self-recursion, then INSN is not yet in the insns
1069 chain and delete_insn will fail. We only want to remove INSN from the
1070 current sequence. See PR56738. */
1071 if (in_sequence_p ())
1072 remove_insn (insn);
1073 else
1074 delete_insn (insn);
1075
1076 return insns;
1077 }
1078
1079 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1080 component registers. Return whether we changed something. */
1081
1082 static bool
1083 resolve_clobber (rtx pat, rtx_insn *insn)
1084 {
1085 rtx reg;
1086 enum machine_mode orig_mode;
1087 unsigned int words, i;
1088 int ret;
1089
1090 reg = XEXP (pat, 0);
1091 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1092 return false;
1093
1094 orig_mode = GET_MODE (reg);
1095 words = GET_MODE_SIZE (orig_mode);
1096 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1097
1098 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1099 simplify_gen_subreg_concatn (word_mode, reg,
1100 orig_mode, 0),
1101 0);
1102 df_insn_rescan (insn);
1103 gcc_assert (ret != 0);
1104
1105 for (i = words - 1; i > 0; --i)
1106 {
1107 rtx x;
1108
1109 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1110 i * UNITS_PER_WORD);
1111 x = gen_rtx_CLOBBER (VOIDmode, x);
1112 emit_insn_after (x, insn);
1113 }
1114
1115 resolve_reg_notes (insn);
1116
1117 return true;
1118 }
1119
1120 /* A USE of a decomposed register is no longer meaningful. Return
1121 whether we changed something. */
1122
1123 static bool
1124 resolve_use (rtx pat, rtx_insn *insn)
1125 {
1126 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1127 {
1128 delete_insn (insn);
1129 return true;
1130 }
1131
1132 resolve_reg_notes (insn);
1133
1134 return false;
1135 }
1136
1137 /* A VAR_LOCATION can be simplified. */
1138
1139 static void
1140 resolve_debug (rtx_insn *insn)
1141 {
1142 subrtx_ptr_iterator::array_type array;
1143 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1144 {
1145 rtx *loc = *iter;
1146 rtx x = *loc;
1147 if (resolve_subreg_p (x))
1148 {
1149 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1150 SUBREG_BYTE (x));
1151
1152 if (x)
1153 *loc = x;
1154 else
1155 x = copy_rtx (*loc);
1156 }
1157 if (resolve_reg_p (x))
1158 *loc = copy_rtx (x);
1159 }
1160
1161 df_insn_rescan (insn);
1162
1163 resolve_reg_notes (insn);
1164 }
1165
1166 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1167 set the decomposable_context bitmap accordingly. SPEED_P is true
1168 if we are optimizing INSN for speed rather than size. Return true
1169 if INSN is decomposable. */
1170
1171 static bool
1172 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1173 {
1174 rtx set;
1175 rtx op;
1176 rtx op_operand;
1177
1178 set = single_set (insn);
1179 if (!set)
1180 return false;
1181
1182 op = SET_SRC (set);
1183 if (GET_CODE (op) != ASHIFT
1184 && GET_CODE (op) != LSHIFTRT
1185 && GET_CODE (op) != ASHIFTRT
1186 && GET_CODE (op) != ZERO_EXTEND)
1187 return false;
1188
1189 op_operand = XEXP (op, 0);
1190 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1191 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1192 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1193 || GET_MODE (op) != twice_word_mode)
1194 return false;
1195
1196 if (GET_CODE (op) == ZERO_EXTEND)
1197 {
1198 if (GET_MODE (op_operand) != word_mode
1199 || !choices[speed_p].splitting_zext)
1200 return false;
1201 }
1202 else /* left or right shift */
1203 {
1204 bool *splitting = (GET_CODE (op) == ASHIFT
1205 ? choices[speed_p].splitting_ashift
1206 : GET_CODE (op) == ASHIFTRT
1207 ? choices[speed_p].splitting_ashiftrt
1208 : choices[speed_p].splitting_lshiftrt);
1209 if (!CONST_INT_P (XEXP (op, 1))
1210 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1211 2 * BITS_PER_WORD - 1)
1212 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1213 return false;
1214
1215 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1216 }
1217
1218 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1219
1220 return true;
1221 }
1222
1223 /* Decompose a more than word wide shift (in INSN) of a multiword
1224 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1225 and 'set to zero' insn. Return a pointer to the new insn when a
1226 replacement was done. */
1227
1228 static rtx_insn *
1229 resolve_shift_zext (rtx_insn *insn)
1230 {
1231 rtx set;
1232 rtx op;
1233 rtx op_operand;
1234 rtx_insn *insns;
1235 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1236 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1237
1238 set = single_set (insn);
1239 if (!set)
1240 return NULL;
1241
1242 op = SET_SRC (set);
1243 if (GET_CODE (op) != ASHIFT
1244 && GET_CODE (op) != LSHIFTRT
1245 && GET_CODE (op) != ASHIFTRT
1246 && GET_CODE (op) != ZERO_EXTEND)
1247 return NULL;
1248
1249 op_operand = XEXP (op, 0);
1250
1251 /* We can tear this operation apart only if the regs were already
1252 torn apart. */
1253 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1254 return NULL;
1255
1256 /* src_reg_num is the number of the word mode register which we
1257 are operating on. For a left shift and a zero_extend on little
1258 endian machines this is register 0. */
1259 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1260 ? 1 : 0;
1261
1262 if (WORDS_BIG_ENDIAN
1263 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1264 src_reg_num = 1 - src_reg_num;
1265
1266 if (GET_CODE (op) == ZERO_EXTEND)
1267 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1268 else
1269 dest_reg_num = 1 - src_reg_num;
1270
1271 offset1 = UNITS_PER_WORD * dest_reg_num;
1272 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1273 src_offset = UNITS_PER_WORD * src_reg_num;
1274
1275 start_sequence ();
1276
1277 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1278 GET_MODE (SET_DEST (set)),
1279 offset1);
1280 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1281 GET_MODE (SET_DEST (set)),
1282 offset2);
1283 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1284 GET_MODE (op_operand),
1285 src_offset);
1286 if (GET_CODE (op) == ASHIFTRT
1287 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1288 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1289 BITS_PER_WORD - 1, NULL_RTX, 0);
1290
1291 if (GET_CODE (op) != ZERO_EXTEND)
1292 {
1293 int shift_count = INTVAL (XEXP (op, 1));
1294 if (shift_count > BITS_PER_WORD)
1295 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1296 LSHIFT_EXPR : RSHIFT_EXPR,
1297 word_mode, src_reg,
1298 shift_count - BITS_PER_WORD,
1299 dest_reg, GET_CODE (op) != ASHIFTRT);
1300 }
1301
1302 if (dest_reg != src_reg)
1303 emit_move_insn (dest_reg, src_reg);
1304 if (GET_CODE (op) != ASHIFTRT)
1305 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1306 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1307 emit_move_insn (dest_upper, copy_rtx (src_reg));
1308 else
1309 emit_move_insn (dest_upper, upper_src);
1310 insns = get_insns ();
1311
1312 end_sequence ();
1313
1314 emit_insn_before (insns, insn);
1315
1316 if (dump_file)
1317 {
1318 rtx_insn *in;
1319 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1320 for (in = insns; in != insn; in = NEXT_INSN (in))
1321 fprintf (dump_file, "%d ", INSN_UID (in));
1322 fprintf (dump_file, "\n");
1323 }
1324
1325 delete_insn (insn);
1326 return insns;
1327 }
1328
1329 /* Print to dump_file a description of what we're doing with shift code CODE.
1330 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1331
1332 static void
1333 dump_shift_choices (enum rtx_code code, bool *splitting)
1334 {
1335 int i;
1336 const char *sep;
1337
1338 fprintf (dump_file,
1339 " Splitting mode %s for %s lowering with shift amounts = ",
1340 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1341 sep = "";
1342 for (i = 0; i < BITS_PER_WORD; i++)
1343 if (splitting[i])
1344 {
1345 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1346 sep = ",";
1347 }
1348 fprintf (dump_file, "\n");
1349 }
1350
1351 /* Print to dump_file a description of what we're doing when optimizing
1352 for speed or size; SPEED_P says which. DESCRIPTION is a description
1353 of the SPEED_P choice. */
1354
1355 static void
1356 dump_choices (bool speed_p, const char *description)
1357 {
1358 unsigned int i;
1359
1360 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1361
1362 for (i = 0; i < MAX_MACHINE_MODE; i++)
1363 if (GET_MODE_SIZE ((enum machine_mode) i) > UNITS_PER_WORD)
1364 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1365 choices[speed_p].move_modes_to_split[i]
1366 ? "Splitting"
1367 : "Skipping",
1368 GET_MODE_NAME ((enum machine_mode) i));
1369
1370 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1371 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1372 GET_MODE_NAME (twice_word_mode));
1373
1374 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1375 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1376 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1377 fprintf (dump_file, "\n");
1378 }
1379
1380 /* Look for registers which are always accessed via word-sized SUBREGs
1381 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1382 registers into several word-sized pseudo-registers. */
1383
1384 static void
1385 decompose_multiword_subregs (bool decompose_copies)
1386 {
1387 unsigned int max;
1388 basic_block bb;
1389 bool speed_p;
1390
1391 if (dump_file)
1392 {
1393 dump_choices (false, "size");
1394 dump_choices (true, "speed");
1395 }
1396
1397 /* Check if this target even has any modes to consider lowering. */
1398 if (!choices[false].something_to_do && !choices[true].something_to_do)
1399 {
1400 if (dump_file)
1401 fprintf (dump_file, "Nothing to do!\n");
1402 return;
1403 }
1404
1405 max = max_reg_num ();
1406
1407 /* First see if there are any multi-word pseudo-registers. If there
1408 aren't, there is nothing we can do. This should speed up this
1409 pass in the normal case, since it should be faster than scanning
1410 all the insns. */
1411 {
1412 unsigned int i;
1413 bool useful_modes_seen = false;
1414
1415 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1416 if (regno_reg_rtx[i] != NULL)
1417 {
1418 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1419 if (choices[false].move_modes_to_split[(int) mode]
1420 || choices[true].move_modes_to_split[(int) mode])
1421 {
1422 useful_modes_seen = true;
1423 break;
1424 }
1425 }
1426
1427 if (!useful_modes_seen)
1428 {
1429 if (dump_file)
1430 fprintf (dump_file, "Nothing to lower in this function.\n");
1431 return;
1432 }
1433 }
1434
1435 if (df)
1436 {
1437 df_set_flags (DF_DEFER_INSN_RESCAN);
1438 run_word_dce ();
1439 }
1440
1441 /* FIXME: It may be possible to change this code to look for each
1442 multi-word pseudo-register and to find each insn which sets or
1443 uses that register. That should be faster than scanning all the
1444 insns. */
1445
1446 decomposable_context = BITMAP_ALLOC (NULL);
1447 non_decomposable_context = BITMAP_ALLOC (NULL);
1448 subreg_context = BITMAP_ALLOC (NULL);
1449
1450 reg_copy_graph.create (max);
1451 reg_copy_graph.safe_grow_cleared (max);
1452 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1453
1454 speed_p = optimize_function_for_speed_p (cfun);
1455 FOR_EACH_BB_FN (bb, cfun)
1456 {
1457 rtx_insn *insn;
1458
1459 FOR_BB_INSNS (bb, insn)
1460 {
1461 rtx set;
1462 enum classify_move_insn cmi;
1463 int i, n;
1464
1465 if (!INSN_P (insn)
1466 || GET_CODE (PATTERN (insn)) == CLOBBER
1467 || GET_CODE (PATTERN (insn)) == USE)
1468 continue;
1469
1470 recog_memoized (insn);
1471
1472 if (find_decomposable_shift_zext (insn, speed_p))
1473 continue;
1474
1475 extract_insn (insn);
1476
1477 set = simple_move (insn, speed_p);
1478
1479 if (!set)
1480 cmi = NOT_SIMPLE_MOVE;
1481 else
1482 {
1483 /* We mark pseudo-to-pseudo copies as decomposable during the
1484 second pass only. The first pass is so early that there is
1485 good chance such moves will be optimized away completely by
1486 subsequent optimizations anyway.
1487
1488 However, we call find_pseudo_copy even during the first pass
1489 so as to properly set up the reg_copy_graph. */
1490 if (find_pseudo_copy (set))
1491 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1492 else
1493 cmi = SIMPLE_MOVE;
1494 }
1495
1496 n = recog_data.n_operands;
1497 for (i = 0; i < n; ++i)
1498 {
1499 for_each_rtx (&recog_data.operand[i],
1500 find_decomposable_subregs,
1501 &cmi);
1502
1503 /* We handle ASM_OPERANDS as a special case to support
1504 things like x86 rdtsc which returns a DImode value.
1505 We can decompose the output, which will certainly be
1506 operand 0, but not the inputs. */
1507
1508 if (cmi == SIMPLE_MOVE
1509 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1510 {
1511 gcc_assert (i == 0);
1512 cmi = NOT_SIMPLE_MOVE;
1513 }
1514 }
1515 }
1516 }
1517
1518 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1519 if (!bitmap_empty_p (decomposable_context))
1520 {
1521 sbitmap sub_blocks;
1522 unsigned int i;
1523 sbitmap_iterator sbi;
1524 bitmap_iterator iter;
1525 unsigned int regno;
1526
1527 propagate_pseudo_copies ();
1528
1529 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1530 bitmap_clear (sub_blocks);
1531
1532 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1533 decompose_register (regno);
1534
1535 FOR_EACH_BB_FN (bb, cfun)
1536 {
1537 rtx_insn *insn;
1538
1539 FOR_BB_INSNS (bb, insn)
1540 {
1541 rtx pat;
1542
1543 if (!INSN_P (insn))
1544 continue;
1545
1546 pat = PATTERN (insn);
1547 if (GET_CODE (pat) == CLOBBER)
1548 resolve_clobber (pat, insn);
1549 else if (GET_CODE (pat) == USE)
1550 resolve_use (pat, insn);
1551 else if (DEBUG_INSN_P (insn))
1552 resolve_debug (insn);
1553 else
1554 {
1555 rtx set;
1556 int i;
1557
1558 recog_memoized (insn);
1559 extract_insn (insn);
1560
1561 set = simple_move (insn, speed_p);
1562 if (set)
1563 {
1564 rtx_insn *orig_insn = insn;
1565 bool cfi = control_flow_insn_p (insn);
1566
1567 /* We can end up splitting loads to multi-word pseudos
1568 into separate loads to machine word size pseudos.
1569 When this happens, we first had one load that can
1570 throw, and after resolve_simple_move we'll have a
1571 bunch of loads (at least two). All those loads may
1572 trap if we can have non-call exceptions, so they
1573 all will end the current basic block. We split the
1574 block after the outer loop over all insns, but we
1575 make sure here that we will be able to split the
1576 basic block and still produce the correct control
1577 flow graph for it. */
1578 gcc_assert (!cfi
1579 || (cfun->can_throw_non_call_exceptions
1580 && can_throw_internal (insn)));
1581
1582 insn = resolve_simple_move (set, insn);
1583 if (insn != orig_insn)
1584 {
1585 recog_memoized (insn);
1586 extract_insn (insn);
1587
1588 if (cfi)
1589 bitmap_set_bit (sub_blocks, bb->index);
1590 }
1591 }
1592 else
1593 {
1594 rtx_insn *decomposed_shift;
1595
1596 decomposed_shift = resolve_shift_zext (insn);
1597 if (decomposed_shift != NULL_RTX)
1598 {
1599 insn = decomposed_shift;
1600 recog_memoized (insn);
1601 extract_insn (insn);
1602 }
1603 }
1604
1605 for (i = recog_data.n_operands - 1; i >= 0; --i)
1606 resolve_subreg_use (recog_data.operand_loc[i], insn);
1607
1608 resolve_reg_notes (insn);
1609
1610 if (num_validated_changes () > 0)
1611 {
1612 for (i = recog_data.n_dups - 1; i >= 0; --i)
1613 {
1614 rtx *pl = recog_data.dup_loc[i];
1615 int dup_num = recog_data.dup_num[i];
1616 rtx *px = recog_data.operand_loc[dup_num];
1617
1618 validate_unshare_change (insn, pl, *px, 1);
1619 }
1620
1621 i = apply_change_group ();
1622 gcc_assert (i);
1623 }
1624 }
1625 }
1626 }
1627
1628 /* If we had insns to split that caused control flow insns in the middle
1629 of a basic block, split those blocks now. Note that we only handle
1630 the case where splitting a load has caused multiple possibly trapping
1631 loads to appear. */
1632 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1633 {
1634 rtx_insn *insn, *end;
1635 edge fallthru;
1636
1637 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1638 insn = BB_HEAD (bb);
1639 end = BB_END (bb);
1640
1641 while (insn != end)
1642 {
1643 if (control_flow_insn_p (insn))
1644 {
1645 /* Split the block after insn. There will be a fallthru
1646 edge, which is OK so we keep it. We have to create the
1647 exception edges ourselves. */
1648 fallthru = split_block (bb, insn);
1649 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1650 bb = fallthru->dest;
1651 insn = BB_HEAD (bb);
1652 }
1653 else
1654 insn = NEXT_INSN (insn);
1655 }
1656 }
1657
1658 sbitmap_free (sub_blocks);
1659 }
1660
1661 {
1662 unsigned int i;
1663 bitmap b;
1664
1665 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1666 if (b)
1667 BITMAP_FREE (b);
1668 }
1669
1670 reg_copy_graph.release ();
1671
1672 BITMAP_FREE (decomposable_context);
1673 BITMAP_FREE (non_decomposable_context);
1674 BITMAP_FREE (subreg_context);
1675 }
1676 \f
1677 /* Implement first lower subreg pass. */
1678
1679 namespace {
1680
1681 const pass_data pass_data_lower_subreg =
1682 {
1683 RTL_PASS, /* type */
1684 "subreg1", /* name */
1685 OPTGROUP_NONE, /* optinfo_flags */
1686 TV_LOWER_SUBREG, /* tv_id */
1687 0, /* properties_required */
1688 0, /* properties_provided */
1689 0, /* properties_destroyed */
1690 0, /* todo_flags_start */
1691 0, /* todo_flags_finish */
1692 };
1693
1694 class pass_lower_subreg : public rtl_opt_pass
1695 {
1696 public:
1697 pass_lower_subreg (gcc::context *ctxt)
1698 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1699 {}
1700
1701 /* opt_pass methods: */
1702 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1703 virtual unsigned int execute (function *)
1704 {
1705 decompose_multiword_subregs (false);
1706 return 0;
1707 }
1708
1709 }; // class pass_lower_subreg
1710
1711 } // anon namespace
1712
1713 rtl_opt_pass *
1714 make_pass_lower_subreg (gcc::context *ctxt)
1715 {
1716 return new pass_lower_subreg (ctxt);
1717 }
1718
1719 /* Implement second lower subreg pass. */
1720
1721 namespace {
1722
1723 const pass_data pass_data_lower_subreg2 =
1724 {
1725 RTL_PASS, /* type */
1726 "subreg2", /* name */
1727 OPTGROUP_NONE, /* optinfo_flags */
1728 TV_LOWER_SUBREG, /* tv_id */
1729 0, /* properties_required */
1730 0, /* properties_provided */
1731 0, /* properties_destroyed */
1732 0, /* todo_flags_start */
1733 TODO_df_finish, /* todo_flags_finish */
1734 };
1735
1736 class pass_lower_subreg2 : public rtl_opt_pass
1737 {
1738 public:
1739 pass_lower_subreg2 (gcc::context *ctxt)
1740 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1741 {}
1742
1743 /* opt_pass methods: */
1744 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1745 virtual unsigned int execute (function *)
1746 {
1747 decompose_multiword_subregs (true);
1748 return 0;
1749 }
1750
1751 }; // class pass_lower_subreg2
1752
1753 } // anon namespace
1754
1755 rtl_opt_pass *
1756 make_pass_lower_subreg2 (gcc::context *ctxt)
1757 {
1758 return new pass_lower_subreg2 (ctxt);
1759 }