]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
2015-10-30 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "insn-config.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "cfgrtl.h"
36 #include "cfgbuild.h"
37 #include "dce.h"
38 #include "expr.h"
39 #include "tree-pass.h"
40 #include "lower-subreg.h"
41 #include "rtl-iter.h"
42
43
44 /* Decompose multi-word pseudo-registers into individual
45 pseudo-registers when possible and profitable. This is possible
46 when all the uses of a multi-word register are via SUBREG, or are
47 copies of the register to another location. Breaking apart the
48 register permits more CSE and permits better register allocation.
49 This is profitable if the machine does not have move instructions
50 to do this.
51
52 This pass only splits moves with modes that are wider than
53 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
54 integer modes that are twice the width of word_mode. The latter
55 could be generalized if there was a need to do this, but the trend in
56 architectures is to not need this.
57
58 There are two useful preprocessor defines for use by maintainers:
59
60 #define LOG_COSTS 1
61
62 if you wish to see the actual cost estimates that are being used
63 for each mode wider than word mode and the cost estimates for zero
64 extension and the shifts. This can be useful when port maintainers
65 are tuning insn rtx costs.
66
67 #define FORCE_LOWERING 1
68
69 if you wish to test the pass with all the transformation forced on.
70 This can be useful for finding bugs in the transformations. */
71
72 #define LOG_COSTS 0
73 #define FORCE_LOWERING 0
74
75 /* Bit N in this bitmap is set if regno N is used in a context in
76 which we can decompose it. */
77 static bitmap decomposable_context;
78
79 /* Bit N in this bitmap is set if regno N is used in a context in
80 which it can not be decomposed. */
81 static bitmap non_decomposable_context;
82
83 /* Bit N in this bitmap is set if regno N is used in a subreg
84 which changes the mode but not the size. This typically happens
85 when the register accessed as a floating-point value; we want to
86 avoid generating accesses to its subwords in integer modes. */
87 static bitmap subreg_context;
88
89 /* Bit N in the bitmap in element M of this array is set if there is a
90 copy from reg M to reg N. */
91 static vec<bitmap> reg_copy_graph;
92
93 struct target_lower_subreg default_target_lower_subreg;
94 #if SWITCHABLE_TARGET
95 struct target_lower_subreg *this_target_lower_subreg
96 = &default_target_lower_subreg;
97 #endif
98
99 #define twice_word_mode \
100 this_target_lower_subreg->x_twice_word_mode
101 #define choices \
102 this_target_lower_subreg->x_choices
103
104 /* RTXes used while computing costs. */
105 struct cost_rtxes {
106 /* Source and target registers. */
107 rtx source;
108 rtx target;
109
110 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
111 rtx zext;
112
113 /* A shift of SOURCE. */
114 rtx shift;
115
116 /* A SET of TARGET. */
117 rtx set;
118 };
119
120 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
121 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
122
123 static int
124 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
125 machine_mode mode, int op1)
126 {
127 PUT_CODE (rtxes->shift, code);
128 PUT_MODE (rtxes->shift, mode);
129 PUT_MODE (rtxes->source, mode);
130 XEXP (rtxes->shift, 1) = GEN_INT (op1);
131 return set_src_cost (rtxes->shift, mode, speed_p);
132 }
133
134 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
135 to true if it is profitable to split a double-word CODE shift
136 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
137 for speed or size profitability.
138
139 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
140 the cost of moving zero into a word-mode register. WORD_MOVE_COST
141 is the cost of moving between word registers. */
142
143 static void
144 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
145 bool *splitting, enum rtx_code code,
146 int word_move_zero_cost, int word_move_cost)
147 {
148 int wide_cost, narrow_cost, upper_cost, i;
149
150 for (i = 0; i < BITS_PER_WORD; i++)
151 {
152 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
153 i + BITS_PER_WORD);
154 if (i == 0)
155 narrow_cost = word_move_cost;
156 else
157 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
158
159 if (code != ASHIFTRT)
160 upper_cost = word_move_zero_cost;
161 else if (i == BITS_PER_WORD - 1)
162 upper_cost = word_move_cost;
163 else
164 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
165 BITS_PER_WORD - 1);
166
167 if (LOG_COSTS)
168 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
169 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
170 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
171
172 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
173 splitting[i] = true;
174 }
175 }
176
177 /* Compute what we should do when optimizing for speed or size; SPEED_P
178 selects which. Use RTXES for computing costs. */
179
180 static void
181 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
182 {
183 unsigned int i;
184 int word_move_zero_cost, word_move_cost;
185
186 PUT_MODE (rtxes->target, word_mode);
187 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
188 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
189
190 SET_SRC (rtxes->set) = rtxes->source;
191 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
192
193 if (LOG_COSTS)
194 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
195 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
196
197 for (i = 0; i < MAX_MACHINE_MODE; i++)
198 {
199 machine_mode mode = (machine_mode) i;
200 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
201 if (factor > 1)
202 {
203 int mode_move_cost;
204
205 PUT_MODE (rtxes->target, mode);
206 PUT_MODE (rtxes->source, mode);
207 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
208
209 if (LOG_COSTS)
210 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
211 GET_MODE_NAME (mode), mode_move_cost,
212 word_move_cost, factor);
213
214 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
215 {
216 choices[speed_p].move_modes_to_split[i] = true;
217 choices[speed_p].something_to_do = true;
218 }
219 }
220 }
221
222 /* For the moves and shifts, the only case that is checked is one
223 where the mode of the target is an integer mode twice the width
224 of the word_mode.
225
226 If it is not profitable to split a double word move then do not
227 even consider the shifts or the zero extension. */
228 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
229 {
230 int zext_cost;
231
232 /* The only case here to check to see if moving the upper part with a
233 zero is cheaper than doing the zext itself. */
234 PUT_MODE (rtxes->source, word_mode);
235 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
236
237 if (LOG_COSTS)
238 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
239 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
240 zext_cost, word_move_cost, word_move_zero_cost);
241
242 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
243 choices[speed_p].splitting_zext = true;
244
245 compute_splitting_shift (speed_p, rtxes,
246 choices[speed_p].splitting_ashift, ASHIFT,
247 word_move_zero_cost, word_move_cost);
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
250 word_move_zero_cost, word_move_cost);
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
253 word_move_zero_cost, word_move_cost);
254 }
255 }
256
257 /* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
260
261 void
262 init_lower_subreg (void)
263 {
264 struct cost_rtxes rtxes;
265
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
267
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
269
270 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
271 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
272 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
275
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
283 }
284
285 static bool
286 simple_move_operand (rtx x)
287 {
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
290
291 if (!OBJECT_P (x))
292 return false;
293
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
298 return false;
299
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
303 return false;
304
305 return true;
306 }
307
308 /* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
311
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
314
315 static rtx
316 simple_move (rtx_insn *insn, bool speed_p)
317 {
318 rtx x;
319 rtx set;
320 machine_mode mode;
321
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
324
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
328
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
332 if (!simple_move_operand (x))
333 return NULL_RTX;
334
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
342 return NULL_RTX;
343
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
349 mode = GET_MODE (SET_DEST (set));
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
354
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
363 return set;
364 }
365
366 /* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370 static bool
371 find_pseudo_copy (rtx set)
372 {
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
386 b = reg_copy_graph[rs];
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
390 reg_copy_graph[rs] = b;
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396 }
397
398 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404 static void
405 propagate_pseudo_copies (void)
406 {
407 bitmap queue, propagate;
408
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
411
412 bitmap_copy (queue, decomposable_context);
413 do
414 {
415 bitmap_iterator iter;
416 unsigned int i;
417
418 bitmap_clear (propagate);
419
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
421 {
422 bitmap b = reg_copy_graph[i];
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 }
426
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
429 }
430 while (!bitmap_empty_p (queue));
431
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
434 }
435
436 /* A pointer to one of these values is passed to
437 find_decomposable_subregs. */
438
439 enum classify_move_insn
440 {
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
446 SIMPLE_MOVE
447 };
448
449 /* If we find a SUBREG in *LOC which we could use to decompose a
450 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
451 unadorned register which is not a simple pseudo-register copy,
452 DATA will point at the type of move, and we set a bit in
453 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
454
455 static void
456 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
457 {
458 subrtx_var_iterator::array_type array;
459 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
460 {
461 rtx x = *iter;
462 if (GET_CODE (x) == SUBREG)
463 {
464 rtx inner = SUBREG_REG (x);
465 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
466
467 if (!REG_P (inner))
468 continue;
469
470 regno = REGNO (inner);
471 if (HARD_REGISTER_NUM_P (regno))
472 {
473 iter.skip_subrtxes ();
474 continue;
475 }
476
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
485
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
491
492 if (outer_words == 1 && inner_words > 1)
493 {
494 bitmap_set_bit (decomposable_context, regno);
495 iter.skip_subrtxes ();
496 continue;
497 }
498
499 /* If this is a cast from one mode to another, where the modes
500 have the same size, and they are not tieable, then mark this
501 register as non-decomposable. If we decompose it we are
502 likely to mess up whatever the backend is trying to do. */
503 if (outer_words > 1
504 && outer_size == inner_size
505 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
506 {
507 bitmap_set_bit (non_decomposable_context, regno);
508 bitmap_set_bit (subreg_context, regno);
509 iter.skip_subrtxes ();
510 continue;
511 }
512 }
513 else if (REG_P (x))
514 {
515 unsigned int regno;
516
517 /* We will see an outer SUBREG before we see the inner REG, so
518 when we see a plain REG here it means a direct reference to
519 the register.
520
521 If this is not a simple copy from one location to another,
522 then we can not decompose this register. If this is a simple
523 copy we want to decompose, and the mode is right,
524 then we mark the register as decomposable.
525 Otherwise we don't say anything about this register --
526 it could be decomposed, but whether that would be
527 profitable depends upon how it is used elsewhere.
528
529 We only set bits in the bitmap for multi-word
530 pseudo-registers, since those are the only ones we care about
531 and it keeps the size of the bitmaps down. */
532
533 regno = REGNO (x);
534 if (!HARD_REGISTER_NUM_P (regno)
535 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
536 {
537 switch (*pcmi)
538 {
539 case NOT_SIMPLE_MOVE:
540 bitmap_set_bit (non_decomposable_context, regno);
541 break;
542 case DECOMPOSABLE_SIMPLE_MOVE:
543 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
544 bitmap_set_bit (decomposable_context, regno);
545 break;
546 case SIMPLE_MOVE:
547 break;
548 default:
549 gcc_unreachable ();
550 }
551 }
552 }
553 else if (MEM_P (x))
554 {
555 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
556
557 /* Any registers used in a MEM do not participate in a
558 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
559 here, and return -1 to block the parent's recursion. */
560 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
561 iter.skip_subrtxes ();
562 }
563 }
564 }
565
566 /* Decompose REGNO into word-sized components. We smash the REG node
567 in place. This ensures that (1) something goes wrong quickly if we
568 fail to make some replacement, and (2) the debug information inside
569 the symbol table is automatically kept up to date. */
570
571 static void
572 decompose_register (unsigned int regno)
573 {
574 rtx reg;
575 unsigned int words, i;
576 rtvec v;
577
578 reg = regno_reg_rtx[regno];
579
580 regno_reg_rtx[regno] = NULL_RTX;
581
582 words = GET_MODE_SIZE (GET_MODE (reg));
583 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
584
585 v = rtvec_alloc (words);
586 for (i = 0; i < words; ++i)
587 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
588
589 PUT_CODE (reg, CONCATN);
590 XVEC (reg, 0) = v;
591
592 if (dump_file)
593 {
594 fprintf (dump_file, "; Splitting reg %u ->", regno);
595 for (i = 0; i < words; ++i)
596 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
597 fputc ('\n', dump_file);
598 }
599 }
600
601 /* Get a SUBREG of a CONCATN. */
602
603 static rtx
604 simplify_subreg_concatn (machine_mode outermode, rtx op,
605 unsigned int byte)
606 {
607 unsigned int inner_size;
608 machine_mode innermode, partmode;
609 rtx part;
610 unsigned int final_offset;
611
612 gcc_assert (GET_CODE (op) == CONCATN);
613 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
614
615 innermode = GET_MODE (op);
616 gcc_assert (byte < GET_MODE_SIZE (innermode));
617 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
618
619 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
620 part = XVECEXP (op, 0, byte / inner_size);
621 partmode = GET_MODE (part);
622
623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
624 regular CONST_VECTORs. They have vector or integer modes, depending
625 on the capabilities of the target. Cope with them. */
626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
627 partmode = GET_MODE_INNER (innermode);
628 else if (partmode == VOIDmode)
629 {
630 enum mode_class mclass = GET_MODE_CLASS (innermode);
631 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
632 }
633
634 final_offset = byte % inner_size;
635 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
636 return NULL_RTX;
637
638 return simplify_gen_subreg (outermode, part, partmode, final_offset);
639 }
640
641 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
642
643 static rtx
644 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
645 machine_mode innermode, unsigned int byte)
646 {
647 rtx ret;
648
649 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
650 If OP is a SUBREG of a CONCATN, then it must be a simple mode
651 change with the same size and offset 0, or it must extract a
652 part. We shouldn't see anything else here. */
653 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
654 {
655 rtx op2;
656
657 if ((GET_MODE_SIZE (GET_MODE (op))
658 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
659 && SUBREG_BYTE (op) == 0)
660 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
661 GET_MODE (SUBREG_REG (op)), byte);
662
663 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
664 SUBREG_BYTE (op));
665 if (op2 == NULL_RTX)
666 {
667 /* We don't handle paradoxical subregs here. */
668 gcc_assert (GET_MODE_SIZE (outermode)
669 <= GET_MODE_SIZE (GET_MODE (op)));
670 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
671 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
672 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
673 byte + SUBREG_BYTE (op));
674 gcc_assert (op2 != NULL_RTX);
675 return op2;
676 }
677
678 op = op2;
679 gcc_assert (op != NULL_RTX);
680 gcc_assert (innermode == GET_MODE (op));
681 }
682
683 if (GET_CODE (op) == CONCATN)
684 return simplify_subreg_concatn (outermode, op, byte);
685
686 ret = simplify_gen_subreg (outermode, op, innermode, byte);
687
688 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
689 resolve_simple_move will ask for the high part of the paradoxical
690 subreg, which does not have a value. Just return a zero. */
691 if (ret == NULL_RTX
692 && GET_CODE (op) == SUBREG
693 && SUBREG_BYTE (op) == 0
694 && (GET_MODE_SIZE (innermode)
695 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
696 return CONST0_RTX (outermode);
697
698 gcc_assert (ret != NULL_RTX);
699 return ret;
700 }
701
702 /* Return whether we should resolve X into the registers into which it
703 was decomposed. */
704
705 static bool
706 resolve_reg_p (rtx x)
707 {
708 return GET_CODE (x) == CONCATN;
709 }
710
711 /* Return whether X is a SUBREG of a register which we need to
712 resolve. */
713
714 static bool
715 resolve_subreg_p (rtx x)
716 {
717 if (GET_CODE (x) != SUBREG)
718 return false;
719 return resolve_reg_p (SUBREG_REG (x));
720 }
721
722 /* Look for SUBREGs in *LOC which need to be decomposed. */
723
724 static bool
725 resolve_subreg_use (rtx *loc, rtx insn)
726 {
727 subrtx_ptr_iterator::array_type array;
728 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
729 {
730 rtx *loc = *iter;
731 rtx x = *loc;
732 if (resolve_subreg_p (x))
733 {
734 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
735 SUBREG_BYTE (x));
736
737 /* It is possible for a note to contain a reference which we can
738 decompose. In this case, return 1 to the caller to indicate
739 that the note must be removed. */
740 if (!x)
741 {
742 gcc_assert (!insn);
743 return true;
744 }
745
746 validate_change (insn, loc, x, 1);
747 iter.skip_subrtxes ();
748 }
749 else if (resolve_reg_p (x))
750 /* Return 1 to the caller to indicate that we found a direct
751 reference to a register which is being decomposed. This can
752 happen inside notes, multiword shift or zero-extend
753 instructions. */
754 return true;
755 }
756
757 return false;
758 }
759
760 /* Resolve any decomposed registers which appear in register notes on
761 INSN. */
762
763 static void
764 resolve_reg_notes (rtx_insn *insn)
765 {
766 rtx *pnote, note;
767
768 note = find_reg_equal_equiv_note (insn);
769 if (note)
770 {
771 int old_count = num_validated_changes ();
772 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
773 remove_note (insn, note);
774 else
775 if (old_count != num_validated_changes ())
776 df_notes_rescan (insn);
777 }
778
779 pnote = &REG_NOTES (insn);
780 while (*pnote != NULL_RTX)
781 {
782 bool del = false;
783
784 note = *pnote;
785 switch (REG_NOTE_KIND (note))
786 {
787 case REG_DEAD:
788 case REG_UNUSED:
789 if (resolve_reg_p (XEXP (note, 0)))
790 del = true;
791 break;
792
793 default:
794 break;
795 }
796
797 if (del)
798 *pnote = XEXP (note, 1);
799 else
800 pnote = &XEXP (note, 1);
801 }
802 }
803
804 /* Return whether X can be decomposed into subwords. */
805
806 static bool
807 can_decompose_p (rtx x)
808 {
809 if (REG_P (x))
810 {
811 unsigned int regno = REGNO (x);
812
813 if (HARD_REGISTER_NUM_P (regno))
814 {
815 unsigned int byte, num_bytes;
816
817 num_bytes = GET_MODE_SIZE (GET_MODE (x));
818 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
819 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
820 return false;
821 return true;
822 }
823 else
824 return !bitmap_bit_p (subreg_context, regno);
825 }
826
827 return true;
828 }
829
830 /* Decompose the registers used in a simple move SET within INSN. If
831 we don't change anything, return INSN, otherwise return the start
832 of the sequence of moves. */
833
834 static rtx_insn *
835 resolve_simple_move (rtx set, rtx_insn *insn)
836 {
837 rtx src, dest, real_dest;
838 rtx_insn *insns;
839 machine_mode orig_mode, dest_mode;
840 unsigned int words;
841 bool pushing;
842
843 src = SET_SRC (set);
844 dest = SET_DEST (set);
845 orig_mode = GET_MODE (dest);
846
847 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
848 gcc_assert (words > 1);
849
850 start_sequence ();
851
852 /* We have to handle copying from a SUBREG of a decomposed reg where
853 the SUBREG is larger than word size. Rather than assume that we
854 can take a word_mode SUBREG of the destination, we copy to a new
855 register and then copy that to the destination. */
856
857 real_dest = NULL_RTX;
858
859 if (GET_CODE (src) == SUBREG
860 && resolve_reg_p (SUBREG_REG (src))
861 && (SUBREG_BYTE (src) != 0
862 || (GET_MODE_SIZE (orig_mode)
863 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
864 {
865 real_dest = dest;
866 dest = gen_reg_rtx (orig_mode);
867 if (REG_P (real_dest))
868 REG_ATTRS (dest) = REG_ATTRS (real_dest);
869 }
870
871 /* Similarly if we are copying to a SUBREG of a decomposed reg where
872 the SUBREG is larger than word size. */
873
874 if (GET_CODE (dest) == SUBREG
875 && resolve_reg_p (SUBREG_REG (dest))
876 && (SUBREG_BYTE (dest) != 0
877 || (GET_MODE_SIZE (orig_mode)
878 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
879 {
880 rtx reg, smove;
881 rtx_insn *minsn;
882
883 reg = gen_reg_rtx (orig_mode);
884 minsn = emit_move_insn (reg, src);
885 smove = single_set (minsn);
886 gcc_assert (smove != NULL_RTX);
887 resolve_simple_move (smove, minsn);
888 src = reg;
889 }
890
891 /* If we didn't have any big SUBREGS of decomposed registers, and
892 neither side of the move is a register we are decomposing, then
893 we don't have to do anything here. */
894
895 if (src == SET_SRC (set)
896 && dest == SET_DEST (set)
897 && !resolve_reg_p (src)
898 && !resolve_subreg_p (src)
899 && !resolve_reg_p (dest)
900 && !resolve_subreg_p (dest))
901 {
902 end_sequence ();
903 return insn;
904 }
905
906 /* It's possible for the code to use a subreg of a decomposed
907 register while forming an address. We need to handle that before
908 passing the address to emit_move_insn. We pass NULL_RTX as the
909 insn parameter to resolve_subreg_use because we can not validate
910 the insn yet. */
911 if (MEM_P (src) || MEM_P (dest))
912 {
913 int acg;
914
915 if (MEM_P (src))
916 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
917 if (MEM_P (dest))
918 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
919 acg = apply_change_group ();
920 gcc_assert (acg);
921 }
922
923 /* If SRC is a register which we can't decompose, or has side
924 effects, we need to move via a temporary register. */
925
926 if (!can_decompose_p (src)
927 || side_effects_p (src)
928 || GET_CODE (src) == ASM_OPERANDS)
929 {
930 rtx reg;
931
932 reg = gen_reg_rtx (orig_mode);
933
934 if (AUTO_INC_DEC)
935 {
936 rtx move = emit_move_insn (reg, src);
937 if (MEM_P (src))
938 {
939 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
940 if (note)
941 add_reg_note (move, REG_INC, XEXP (note, 0));
942 }
943 }
944 else
945 emit_move_insn (reg, src);
946
947 src = reg;
948 }
949
950 /* If DEST is a register which we can't decompose, or has side
951 effects, we need to first move to a temporary register. We
952 handle the common case of pushing an operand directly. We also
953 go through a temporary register if it holds a floating point
954 value. This gives us better code on systems which can't move
955 data easily between integer and floating point registers. */
956
957 dest_mode = orig_mode;
958 pushing = push_operand (dest, dest_mode);
959 if (!can_decompose_p (dest)
960 || (side_effects_p (dest) && !pushing)
961 || (!SCALAR_INT_MODE_P (dest_mode)
962 && !resolve_reg_p (dest)
963 && !resolve_subreg_p (dest)))
964 {
965 if (real_dest == NULL_RTX)
966 real_dest = dest;
967 if (!SCALAR_INT_MODE_P (dest_mode))
968 {
969 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
970 MODE_INT, 0);
971 gcc_assert (dest_mode != BLKmode);
972 }
973 dest = gen_reg_rtx (dest_mode);
974 if (REG_P (real_dest))
975 REG_ATTRS (dest) = REG_ATTRS (real_dest);
976 }
977
978 if (pushing)
979 {
980 unsigned int i, j, jinc;
981
982 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
983 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
984 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
985
986 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
987 {
988 j = 0;
989 jinc = 1;
990 }
991 else
992 {
993 j = words - 1;
994 jinc = -1;
995 }
996
997 for (i = 0; i < words; ++i, j += jinc)
998 {
999 rtx temp;
1000
1001 temp = copy_rtx (XEXP (dest, 0));
1002 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1003 j * UNITS_PER_WORD);
1004 emit_move_insn (temp,
1005 simplify_gen_subreg_concatn (word_mode, src,
1006 orig_mode,
1007 j * UNITS_PER_WORD));
1008 }
1009 }
1010 else
1011 {
1012 unsigned int i;
1013
1014 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1015 emit_clobber (dest);
1016
1017 for (i = 0; i < words; ++i)
1018 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1019 dest_mode,
1020 i * UNITS_PER_WORD),
1021 simplify_gen_subreg_concatn (word_mode, src,
1022 orig_mode,
1023 i * UNITS_PER_WORD));
1024 }
1025
1026 if (real_dest != NULL_RTX)
1027 {
1028 rtx mdest, smove;
1029 rtx_insn *minsn;
1030
1031 if (dest_mode == orig_mode)
1032 mdest = dest;
1033 else
1034 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1035 minsn = emit_move_insn (real_dest, mdest);
1036
1037 if (AUTO_INC_DEC && MEM_P (real_dest)
1038 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1039 {
1040 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1041 if (note)
1042 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1043 }
1044
1045 smove = single_set (minsn);
1046 gcc_assert (smove != NULL_RTX);
1047
1048 resolve_simple_move (smove, minsn);
1049 }
1050
1051 insns = get_insns ();
1052 end_sequence ();
1053
1054 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1055
1056 emit_insn_before (insns, insn);
1057
1058 /* If we get here via self-recursion, then INSN is not yet in the insns
1059 chain and delete_insn will fail. We only want to remove INSN from the
1060 current sequence. See PR56738. */
1061 if (in_sequence_p ())
1062 remove_insn (insn);
1063 else
1064 delete_insn (insn);
1065
1066 return insns;
1067 }
1068
1069 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1070 component registers. Return whether we changed something. */
1071
1072 static bool
1073 resolve_clobber (rtx pat, rtx_insn *insn)
1074 {
1075 rtx reg;
1076 machine_mode orig_mode;
1077 unsigned int words, i;
1078 int ret;
1079
1080 reg = XEXP (pat, 0);
1081 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1082 return false;
1083
1084 orig_mode = GET_MODE (reg);
1085 words = GET_MODE_SIZE (orig_mode);
1086 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1087
1088 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1089 simplify_gen_subreg_concatn (word_mode, reg,
1090 orig_mode, 0),
1091 0);
1092 df_insn_rescan (insn);
1093 gcc_assert (ret != 0);
1094
1095 for (i = words - 1; i > 0; --i)
1096 {
1097 rtx x;
1098
1099 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1100 i * UNITS_PER_WORD);
1101 x = gen_rtx_CLOBBER (VOIDmode, x);
1102 emit_insn_after (x, insn);
1103 }
1104
1105 resolve_reg_notes (insn);
1106
1107 return true;
1108 }
1109
1110 /* A USE of a decomposed register is no longer meaningful. Return
1111 whether we changed something. */
1112
1113 static bool
1114 resolve_use (rtx pat, rtx_insn *insn)
1115 {
1116 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1117 {
1118 delete_insn (insn);
1119 return true;
1120 }
1121
1122 resolve_reg_notes (insn);
1123
1124 return false;
1125 }
1126
1127 /* A VAR_LOCATION can be simplified. */
1128
1129 static void
1130 resolve_debug (rtx_insn *insn)
1131 {
1132 subrtx_ptr_iterator::array_type array;
1133 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1134 {
1135 rtx *loc = *iter;
1136 rtx x = *loc;
1137 if (resolve_subreg_p (x))
1138 {
1139 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1140 SUBREG_BYTE (x));
1141
1142 if (x)
1143 *loc = x;
1144 else
1145 x = copy_rtx (*loc);
1146 }
1147 if (resolve_reg_p (x))
1148 *loc = copy_rtx (x);
1149 }
1150
1151 df_insn_rescan (insn);
1152
1153 resolve_reg_notes (insn);
1154 }
1155
1156 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1157 set the decomposable_context bitmap accordingly. SPEED_P is true
1158 if we are optimizing INSN for speed rather than size. Return true
1159 if INSN is decomposable. */
1160
1161 static bool
1162 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1163 {
1164 rtx set;
1165 rtx op;
1166 rtx op_operand;
1167
1168 set = single_set (insn);
1169 if (!set)
1170 return false;
1171
1172 op = SET_SRC (set);
1173 if (GET_CODE (op) != ASHIFT
1174 && GET_CODE (op) != LSHIFTRT
1175 && GET_CODE (op) != ASHIFTRT
1176 && GET_CODE (op) != ZERO_EXTEND)
1177 return false;
1178
1179 op_operand = XEXP (op, 0);
1180 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1181 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1182 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1183 || GET_MODE (op) != twice_word_mode)
1184 return false;
1185
1186 if (GET_CODE (op) == ZERO_EXTEND)
1187 {
1188 if (GET_MODE (op_operand) != word_mode
1189 || !choices[speed_p].splitting_zext)
1190 return false;
1191 }
1192 else /* left or right shift */
1193 {
1194 bool *splitting = (GET_CODE (op) == ASHIFT
1195 ? choices[speed_p].splitting_ashift
1196 : GET_CODE (op) == ASHIFTRT
1197 ? choices[speed_p].splitting_ashiftrt
1198 : choices[speed_p].splitting_lshiftrt);
1199 if (!CONST_INT_P (XEXP (op, 1))
1200 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1201 2 * BITS_PER_WORD - 1)
1202 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1203 return false;
1204
1205 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1206 }
1207
1208 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1209
1210 return true;
1211 }
1212
1213 /* Decompose a more than word wide shift (in INSN) of a multiword
1214 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1215 and 'set to zero' insn. Return a pointer to the new insn when a
1216 replacement was done. */
1217
1218 static rtx_insn *
1219 resolve_shift_zext (rtx_insn *insn)
1220 {
1221 rtx set;
1222 rtx op;
1223 rtx op_operand;
1224 rtx_insn *insns;
1225 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1226 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1227
1228 set = single_set (insn);
1229 if (!set)
1230 return NULL;
1231
1232 op = SET_SRC (set);
1233 if (GET_CODE (op) != ASHIFT
1234 && GET_CODE (op) != LSHIFTRT
1235 && GET_CODE (op) != ASHIFTRT
1236 && GET_CODE (op) != ZERO_EXTEND)
1237 return NULL;
1238
1239 op_operand = XEXP (op, 0);
1240
1241 /* We can tear this operation apart only if the regs were already
1242 torn apart. */
1243 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1244 return NULL;
1245
1246 /* src_reg_num is the number of the word mode register which we
1247 are operating on. For a left shift and a zero_extend on little
1248 endian machines this is register 0. */
1249 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1250 ? 1 : 0;
1251
1252 if (WORDS_BIG_ENDIAN
1253 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1254 src_reg_num = 1 - src_reg_num;
1255
1256 if (GET_CODE (op) == ZERO_EXTEND)
1257 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1258 else
1259 dest_reg_num = 1 - src_reg_num;
1260
1261 offset1 = UNITS_PER_WORD * dest_reg_num;
1262 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1263 src_offset = UNITS_PER_WORD * src_reg_num;
1264
1265 start_sequence ();
1266
1267 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1268 GET_MODE (SET_DEST (set)),
1269 offset1);
1270 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1271 GET_MODE (SET_DEST (set)),
1272 offset2);
1273 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1274 GET_MODE (op_operand),
1275 src_offset);
1276 if (GET_CODE (op) == ASHIFTRT
1277 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1278 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1279 BITS_PER_WORD - 1, NULL_RTX, 0);
1280
1281 if (GET_CODE (op) != ZERO_EXTEND)
1282 {
1283 int shift_count = INTVAL (XEXP (op, 1));
1284 if (shift_count > BITS_PER_WORD)
1285 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1286 LSHIFT_EXPR : RSHIFT_EXPR,
1287 word_mode, src_reg,
1288 shift_count - BITS_PER_WORD,
1289 dest_reg, GET_CODE (op) != ASHIFTRT);
1290 }
1291
1292 if (dest_reg != src_reg)
1293 emit_move_insn (dest_reg, src_reg);
1294 if (GET_CODE (op) != ASHIFTRT)
1295 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1296 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1297 emit_move_insn (dest_upper, copy_rtx (src_reg));
1298 else
1299 emit_move_insn (dest_upper, upper_src);
1300 insns = get_insns ();
1301
1302 end_sequence ();
1303
1304 emit_insn_before (insns, insn);
1305
1306 if (dump_file)
1307 {
1308 rtx_insn *in;
1309 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1310 for (in = insns; in != insn; in = NEXT_INSN (in))
1311 fprintf (dump_file, "%d ", INSN_UID (in));
1312 fprintf (dump_file, "\n");
1313 }
1314
1315 delete_insn (insn);
1316 return insns;
1317 }
1318
1319 /* Print to dump_file a description of what we're doing with shift code CODE.
1320 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1321
1322 static void
1323 dump_shift_choices (enum rtx_code code, bool *splitting)
1324 {
1325 int i;
1326 const char *sep;
1327
1328 fprintf (dump_file,
1329 " Splitting mode %s for %s lowering with shift amounts = ",
1330 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1331 sep = "";
1332 for (i = 0; i < BITS_PER_WORD; i++)
1333 if (splitting[i])
1334 {
1335 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1336 sep = ",";
1337 }
1338 fprintf (dump_file, "\n");
1339 }
1340
1341 /* Print to dump_file a description of what we're doing when optimizing
1342 for speed or size; SPEED_P says which. DESCRIPTION is a description
1343 of the SPEED_P choice. */
1344
1345 static void
1346 dump_choices (bool speed_p, const char *description)
1347 {
1348 unsigned int i;
1349
1350 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1351
1352 for (i = 0; i < MAX_MACHINE_MODE; i++)
1353 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1354 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1355 choices[speed_p].move_modes_to_split[i]
1356 ? "Splitting"
1357 : "Skipping",
1358 GET_MODE_NAME ((machine_mode) i));
1359
1360 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1361 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1362 GET_MODE_NAME (twice_word_mode));
1363
1364 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1365 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1366 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1367 fprintf (dump_file, "\n");
1368 }
1369
1370 /* Look for registers which are always accessed via word-sized SUBREGs
1371 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1372 registers into several word-sized pseudo-registers. */
1373
1374 static void
1375 decompose_multiword_subregs (bool decompose_copies)
1376 {
1377 unsigned int max;
1378 basic_block bb;
1379 bool speed_p;
1380
1381 if (dump_file)
1382 {
1383 dump_choices (false, "size");
1384 dump_choices (true, "speed");
1385 }
1386
1387 /* Check if this target even has any modes to consider lowering. */
1388 if (!choices[false].something_to_do && !choices[true].something_to_do)
1389 {
1390 if (dump_file)
1391 fprintf (dump_file, "Nothing to do!\n");
1392 return;
1393 }
1394
1395 max = max_reg_num ();
1396
1397 /* First see if there are any multi-word pseudo-registers. If there
1398 aren't, there is nothing we can do. This should speed up this
1399 pass in the normal case, since it should be faster than scanning
1400 all the insns. */
1401 {
1402 unsigned int i;
1403 bool useful_modes_seen = false;
1404
1405 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1406 if (regno_reg_rtx[i] != NULL)
1407 {
1408 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1409 if (choices[false].move_modes_to_split[(int) mode]
1410 || choices[true].move_modes_to_split[(int) mode])
1411 {
1412 useful_modes_seen = true;
1413 break;
1414 }
1415 }
1416
1417 if (!useful_modes_seen)
1418 {
1419 if (dump_file)
1420 fprintf (dump_file, "Nothing to lower in this function.\n");
1421 return;
1422 }
1423 }
1424
1425 if (df)
1426 {
1427 df_set_flags (DF_DEFER_INSN_RESCAN);
1428 run_word_dce ();
1429 }
1430
1431 /* FIXME: It may be possible to change this code to look for each
1432 multi-word pseudo-register and to find each insn which sets or
1433 uses that register. That should be faster than scanning all the
1434 insns. */
1435
1436 decomposable_context = BITMAP_ALLOC (NULL);
1437 non_decomposable_context = BITMAP_ALLOC (NULL);
1438 subreg_context = BITMAP_ALLOC (NULL);
1439
1440 reg_copy_graph.create (max);
1441 reg_copy_graph.safe_grow_cleared (max);
1442 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1443
1444 speed_p = optimize_function_for_speed_p (cfun);
1445 FOR_EACH_BB_FN (bb, cfun)
1446 {
1447 rtx_insn *insn;
1448
1449 FOR_BB_INSNS (bb, insn)
1450 {
1451 rtx set;
1452 enum classify_move_insn cmi;
1453 int i, n;
1454
1455 if (!INSN_P (insn)
1456 || GET_CODE (PATTERN (insn)) == CLOBBER
1457 || GET_CODE (PATTERN (insn)) == USE)
1458 continue;
1459
1460 recog_memoized (insn);
1461
1462 if (find_decomposable_shift_zext (insn, speed_p))
1463 continue;
1464
1465 extract_insn (insn);
1466
1467 set = simple_move (insn, speed_p);
1468
1469 if (!set)
1470 cmi = NOT_SIMPLE_MOVE;
1471 else
1472 {
1473 /* We mark pseudo-to-pseudo copies as decomposable during the
1474 second pass only. The first pass is so early that there is
1475 good chance such moves will be optimized away completely by
1476 subsequent optimizations anyway.
1477
1478 However, we call find_pseudo_copy even during the first pass
1479 so as to properly set up the reg_copy_graph. */
1480 if (find_pseudo_copy (set))
1481 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1482 else
1483 cmi = SIMPLE_MOVE;
1484 }
1485
1486 n = recog_data.n_operands;
1487 for (i = 0; i < n; ++i)
1488 {
1489 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1490
1491 /* We handle ASM_OPERANDS as a special case to support
1492 things like x86 rdtsc which returns a DImode value.
1493 We can decompose the output, which will certainly be
1494 operand 0, but not the inputs. */
1495
1496 if (cmi == SIMPLE_MOVE
1497 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1498 {
1499 gcc_assert (i == 0);
1500 cmi = NOT_SIMPLE_MOVE;
1501 }
1502 }
1503 }
1504 }
1505
1506 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1507 if (!bitmap_empty_p (decomposable_context))
1508 {
1509 sbitmap sub_blocks;
1510 unsigned int i;
1511 sbitmap_iterator sbi;
1512 bitmap_iterator iter;
1513 unsigned int regno;
1514
1515 propagate_pseudo_copies ();
1516
1517 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1518 bitmap_clear (sub_blocks);
1519
1520 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1521 decompose_register (regno);
1522
1523 FOR_EACH_BB_FN (bb, cfun)
1524 {
1525 rtx_insn *insn;
1526
1527 FOR_BB_INSNS (bb, insn)
1528 {
1529 rtx pat;
1530
1531 if (!INSN_P (insn))
1532 continue;
1533
1534 pat = PATTERN (insn);
1535 if (GET_CODE (pat) == CLOBBER)
1536 resolve_clobber (pat, insn);
1537 else if (GET_CODE (pat) == USE)
1538 resolve_use (pat, insn);
1539 else if (DEBUG_INSN_P (insn))
1540 resolve_debug (insn);
1541 else
1542 {
1543 rtx set;
1544 int i;
1545
1546 recog_memoized (insn);
1547 extract_insn (insn);
1548
1549 set = simple_move (insn, speed_p);
1550 if (set)
1551 {
1552 rtx_insn *orig_insn = insn;
1553 bool cfi = control_flow_insn_p (insn);
1554
1555 /* We can end up splitting loads to multi-word pseudos
1556 into separate loads to machine word size pseudos.
1557 When this happens, we first had one load that can
1558 throw, and after resolve_simple_move we'll have a
1559 bunch of loads (at least two). All those loads may
1560 trap if we can have non-call exceptions, so they
1561 all will end the current basic block. We split the
1562 block after the outer loop over all insns, but we
1563 make sure here that we will be able to split the
1564 basic block and still produce the correct control
1565 flow graph for it. */
1566 gcc_assert (!cfi
1567 || (cfun->can_throw_non_call_exceptions
1568 && can_throw_internal (insn)));
1569
1570 insn = resolve_simple_move (set, insn);
1571 if (insn != orig_insn)
1572 {
1573 recog_memoized (insn);
1574 extract_insn (insn);
1575
1576 if (cfi)
1577 bitmap_set_bit (sub_blocks, bb->index);
1578 }
1579 }
1580 else
1581 {
1582 rtx_insn *decomposed_shift;
1583
1584 decomposed_shift = resolve_shift_zext (insn);
1585 if (decomposed_shift != NULL_RTX)
1586 {
1587 insn = decomposed_shift;
1588 recog_memoized (insn);
1589 extract_insn (insn);
1590 }
1591 }
1592
1593 for (i = recog_data.n_operands - 1; i >= 0; --i)
1594 resolve_subreg_use (recog_data.operand_loc[i], insn);
1595
1596 resolve_reg_notes (insn);
1597
1598 if (num_validated_changes () > 0)
1599 {
1600 for (i = recog_data.n_dups - 1; i >= 0; --i)
1601 {
1602 rtx *pl = recog_data.dup_loc[i];
1603 int dup_num = recog_data.dup_num[i];
1604 rtx *px = recog_data.operand_loc[dup_num];
1605
1606 validate_unshare_change (insn, pl, *px, 1);
1607 }
1608
1609 i = apply_change_group ();
1610 gcc_assert (i);
1611 }
1612 }
1613 }
1614 }
1615
1616 /* If we had insns to split that caused control flow insns in the middle
1617 of a basic block, split those blocks now. Note that we only handle
1618 the case where splitting a load has caused multiple possibly trapping
1619 loads to appear. */
1620 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1621 {
1622 rtx_insn *insn, *end;
1623 edge fallthru;
1624
1625 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1626 insn = BB_HEAD (bb);
1627 end = BB_END (bb);
1628
1629 while (insn != end)
1630 {
1631 if (control_flow_insn_p (insn))
1632 {
1633 /* Split the block after insn. There will be a fallthru
1634 edge, which is OK so we keep it. We have to create the
1635 exception edges ourselves. */
1636 fallthru = split_block (bb, insn);
1637 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1638 bb = fallthru->dest;
1639 insn = BB_HEAD (bb);
1640 }
1641 else
1642 insn = NEXT_INSN (insn);
1643 }
1644 }
1645
1646 sbitmap_free (sub_blocks);
1647 }
1648
1649 {
1650 unsigned int i;
1651 bitmap b;
1652
1653 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1654 if (b)
1655 BITMAP_FREE (b);
1656 }
1657
1658 reg_copy_graph.release ();
1659
1660 BITMAP_FREE (decomposable_context);
1661 BITMAP_FREE (non_decomposable_context);
1662 BITMAP_FREE (subreg_context);
1663 }
1664 \f
1665 /* Implement first lower subreg pass. */
1666
1667 namespace {
1668
1669 const pass_data pass_data_lower_subreg =
1670 {
1671 RTL_PASS, /* type */
1672 "subreg1", /* name */
1673 OPTGROUP_NONE, /* optinfo_flags */
1674 TV_LOWER_SUBREG, /* tv_id */
1675 0, /* properties_required */
1676 0, /* properties_provided */
1677 0, /* properties_destroyed */
1678 0, /* todo_flags_start */
1679 0, /* todo_flags_finish */
1680 };
1681
1682 class pass_lower_subreg : public rtl_opt_pass
1683 {
1684 public:
1685 pass_lower_subreg (gcc::context *ctxt)
1686 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1687 {}
1688
1689 /* opt_pass methods: */
1690 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1691 virtual unsigned int execute (function *)
1692 {
1693 decompose_multiword_subregs (false);
1694 return 0;
1695 }
1696
1697 }; // class pass_lower_subreg
1698
1699 } // anon namespace
1700
1701 rtl_opt_pass *
1702 make_pass_lower_subreg (gcc::context *ctxt)
1703 {
1704 return new pass_lower_subreg (ctxt);
1705 }
1706
1707 /* Implement second lower subreg pass. */
1708
1709 namespace {
1710
1711 const pass_data pass_data_lower_subreg2 =
1712 {
1713 RTL_PASS, /* type */
1714 "subreg2", /* name */
1715 OPTGROUP_NONE, /* optinfo_flags */
1716 TV_LOWER_SUBREG, /* tv_id */
1717 0, /* properties_required */
1718 0, /* properties_provided */
1719 0, /* properties_destroyed */
1720 0, /* todo_flags_start */
1721 TODO_df_finish, /* todo_flags_finish */
1722 };
1723
1724 class pass_lower_subreg2 : public rtl_opt_pass
1725 {
1726 public:
1727 pass_lower_subreg2 (gcc::context *ctxt)
1728 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1729 {}
1730
1731 /* opt_pass methods: */
1732 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1733 virtual unsigned int execute (function *)
1734 {
1735 decompose_multiword_subregs (true);
1736 return 0;
1737 }
1738
1739 }; // class pass_lower_subreg2
1740
1741 } // anon namespace
1742
1743 rtl_opt_pass *
1744 make_pass_lower_subreg2 (gcc::context *ctxt)
1745 {
1746 return new pass_lower_subreg2 (ctxt);
1747 }