]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "alias.h"
27 #include "symtab.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "obstack.h"
34 #include "predict.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "cfgrtl.h"
40 #include "cfgbuild.h"
41 #include "basic-block.h"
42 #include "recog.h"
43 #include "bitmap.h"
44 #include "dce.h"
45 #include "expmed.h"
46 #include "dojump.h"
47 #include "explow.h"
48 #include "calls.h"
49 #include "emit-rtl.h"
50 #include "varasm.h"
51 #include "stmt.h"
52 #include "expr.h"
53 #include "except.h"
54 #include "regs.h"
55 #include "tree-pass.h"
56 #include "df.h"
57 #include "lower-subreg.h"
58 #include "rtl-iter.h"
59
60
61 /* Decompose multi-word pseudo-registers into individual
62 pseudo-registers when possible and profitable. This is possible
63 when all the uses of a multi-word register are via SUBREG, or are
64 copies of the register to another location. Breaking apart the
65 register permits more CSE and permits better register allocation.
66 This is profitable if the machine does not have move instructions
67 to do this.
68
69 This pass only splits moves with modes that are wider than
70 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
71 integer modes that are twice the width of word_mode. The latter
72 could be generalized if there was a need to do this, but the trend in
73 architectures is to not need this.
74
75 There are two useful preprocessor defines for use by maintainers:
76
77 #define LOG_COSTS 1
78
79 if you wish to see the actual cost estimates that are being used
80 for each mode wider than word mode and the cost estimates for zero
81 extension and the shifts. This can be useful when port maintainers
82 are tuning insn rtx costs.
83
84 #define FORCE_LOWERING 1
85
86 if you wish to test the pass with all the transformation forced on.
87 This can be useful for finding bugs in the transformations. */
88
89 #define LOG_COSTS 0
90 #define FORCE_LOWERING 0
91
92 /* Bit N in this bitmap is set if regno N is used in a context in
93 which we can decompose it. */
94 static bitmap decomposable_context;
95
96 /* Bit N in this bitmap is set if regno N is used in a context in
97 which it can not be decomposed. */
98 static bitmap non_decomposable_context;
99
100 /* Bit N in this bitmap is set if regno N is used in a subreg
101 which changes the mode but not the size. This typically happens
102 when the register accessed as a floating-point value; we want to
103 avoid generating accesses to its subwords in integer modes. */
104 static bitmap subreg_context;
105
106 /* Bit N in the bitmap in element M of this array is set if there is a
107 copy from reg M to reg N. */
108 static vec<bitmap> reg_copy_graph;
109
110 struct target_lower_subreg default_target_lower_subreg;
111 #if SWITCHABLE_TARGET
112 struct target_lower_subreg *this_target_lower_subreg
113 = &default_target_lower_subreg;
114 #endif
115
116 #define twice_word_mode \
117 this_target_lower_subreg->x_twice_word_mode
118 #define choices \
119 this_target_lower_subreg->x_choices
120
121 /* RTXes used while computing costs. */
122 struct cost_rtxes {
123 /* Source and target registers. */
124 rtx source;
125 rtx target;
126
127 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
128 rtx zext;
129
130 /* A shift of SOURCE. */
131 rtx shift;
132
133 /* A SET of TARGET. */
134 rtx set;
135 };
136
137 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
138 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
139
140 static int
141 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
142 machine_mode mode, int op1)
143 {
144 PUT_CODE (rtxes->shift, code);
145 PUT_MODE (rtxes->shift, mode);
146 PUT_MODE (rtxes->source, mode);
147 XEXP (rtxes->shift, 1) = GEN_INT (op1);
148 return set_src_cost (rtxes->shift, speed_p);
149 }
150
151 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
152 to true if it is profitable to split a double-word CODE shift
153 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
154 for speed or size profitability.
155
156 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
157 the cost of moving zero into a word-mode register. WORD_MOVE_COST
158 is the cost of moving between word registers. */
159
160 static void
161 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
162 bool *splitting, enum rtx_code code,
163 int word_move_zero_cost, int word_move_cost)
164 {
165 int wide_cost, narrow_cost, upper_cost, i;
166
167 for (i = 0; i < BITS_PER_WORD; i++)
168 {
169 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
170 i + BITS_PER_WORD);
171 if (i == 0)
172 narrow_cost = word_move_cost;
173 else
174 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
175
176 if (code != ASHIFTRT)
177 upper_cost = word_move_zero_cost;
178 else if (i == BITS_PER_WORD - 1)
179 upper_cost = word_move_cost;
180 else
181 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
182 BITS_PER_WORD - 1);
183
184 if (LOG_COSTS)
185 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
186 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
187 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
188
189 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
190 splitting[i] = true;
191 }
192 }
193
194 /* Compute what we should do when optimizing for speed or size; SPEED_P
195 selects which. Use RTXES for computing costs. */
196
197 static void
198 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
199 {
200 unsigned int i;
201 int word_move_zero_cost, word_move_cost;
202
203 PUT_MODE (rtxes->target, word_mode);
204 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
205 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
206
207 SET_SRC (rtxes->set) = rtxes->source;
208 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
209
210 if (LOG_COSTS)
211 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
212 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
213
214 for (i = 0; i < MAX_MACHINE_MODE; i++)
215 {
216 machine_mode mode = (machine_mode) i;
217 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
218 if (factor > 1)
219 {
220 int mode_move_cost;
221
222 PUT_MODE (rtxes->target, mode);
223 PUT_MODE (rtxes->source, mode);
224 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
225
226 if (LOG_COSTS)
227 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
228 GET_MODE_NAME (mode), mode_move_cost,
229 word_move_cost, factor);
230
231 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
232 {
233 choices[speed_p].move_modes_to_split[i] = true;
234 choices[speed_p].something_to_do = true;
235 }
236 }
237 }
238
239 /* For the moves and shifts, the only case that is checked is one
240 where the mode of the target is an integer mode twice the width
241 of the word_mode.
242
243 If it is not profitable to split a double word move then do not
244 even consider the shifts or the zero extension. */
245 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
246 {
247 int zext_cost;
248
249 /* The only case here to check to see if moving the upper part with a
250 zero is cheaper than doing the zext itself. */
251 PUT_MODE (rtxes->source, word_mode);
252 zext_cost = set_src_cost (rtxes->zext, speed_p);
253
254 if (LOG_COSTS)
255 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
256 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
257 zext_cost, word_move_cost, word_move_zero_cost);
258
259 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
260 choices[speed_p].splitting_zext = true;
261
262 compute_splitting_shift (speed_p, rtxes,
263 choices[speed_p].splitting_ashift, ASHIFT,
264 word_move_zero_cost, word_move_cost);
265 compute_splitting_shift (speed_p, rtxes,
266 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
267 word_move_zero_cost, word_move_cost);
268 compute_splitting_shift (speed_p, rtxes,
269 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
270 word_move_zero_cost, word_move_cost);
271 }
272 }
273
274 /* Do one-per-target initialisation. This involves determining
275 which operations on the machine are profitable. If none are found,
276 then the pass just returns when called. */
277
278 void
279 init_lower_subreg (void)
280 {
281 struct cost_rtxes rtxes;
282
283 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
284
285 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
286
287 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
288 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
289 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
290 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
291 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
292
293 if (LOG_COSTS)
294 fprintf (stderr, "\nSize costs\n==========\n\n");
295 compute_costs (false, &rtxes);
296
297 if (LOG_COSTS)
298 fprintf (stderr, "\nSpeed costs\n===========\n\n");
299 compute_costs (true, &rtxes);
300 }
301
302 static bool
303 simple_move_operand (rtx x)
304 {
305 if (GET_CODE (x) == SUBREG)
306 x = SUBREG_REG (x);
307
308 if (!OBJECT_P (x))
309 return false;
310
311 if (GET_CODE (x) == LABEL_REF
312 || GET_CODE (x) == SYMBOL_REF
313 || GET_CODE (x) == HIGH
314 || GET_CODE (x) == CONST)
315 return false;
316
317 if (MEM_P (x)
318 && (MEM_VOLATILE_P (x)
319 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
320 return false;
321
322 return true;
323 }
324
325 /* If INSN is a single set between two objects that we want to split,
326 return the single set. SPEED_P says whether we are optimizing
327 INSN for speed or size.
328
329 INSN should have been passed to recog and extract_insn before this
330 is called. */
331
332 static rtx
333 simple_move (rtx_insn *insn, bool speed_p)
334 {
335 rtx x;
336 rtx set;
337 machine_mode mode;
338
339 if (recog_data.n_operands != 2)
340 return NULL_RTX;
341
342 set = single_set (insn);
343 if (!set)
344 return NULL_RTX;
345
346 x = SET_DEST (set);
347 if (x != recog_data.operand[0] && x != recog_data.operand[1])
348 return NULL_RTX;
349 if (!simple_move_operand (x))
350 return NULL_RTX;
351
352 x = SET_SRC (set);
353 if (x != recog_data.operand[0] && x != recog_data.operand[1])
354 return NULL_RTX;
355 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
356 things like x86 rdtsc which returns a DImode value. */
357 if (GET_CODE (x) != ASM_OPERANDS
358 && !simple_move_operand (x))
359 return NULL_RTX;
360
361 /* We try to decompose in integer modes, to avoid generating
362 inefficient code copying between integer and floating point
363 registers. That means that we can't decompose if this is a
364 non-integer mode for which there is no integer mode of the same
365 size. */
366 mode = GET_MODE (SET_DEST (set));
367 if (!SCALAR_INT_MODE_P (mode)
368 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
369 == BLKmode))
370 return NULL_RTX;
371
372 /* Reject PARTIAL_INT modes. They are used for processor specific
373 purposes and it's probably best not to tamper with them. */
374 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
375 return NULL_RTX;
376
377 if (!choices[speed_p].move_modes_to_split[(int) mode])
378 return NULL_RTX;
379
380 return set;
381 }
382
383 /* If SET is a copy from one multi-word pseudo-register to another,
384 record that in reg_copy_graph. Return whether it is such a
385 copy. */
386
387 static bool
388 find_pseudo_copy (rtx set)
389 {
390 rtx dest = SET_DEST (set);
391 rtx src = SET_SRC (set);
392 unsigned int rd, rs;
393 bitmap b;
394
395 if (!REG_P (dest) || !REG_P (src))
396 return false;
397
398 rd = REGNO (dest);
399 rs = REGNO (src);
400 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
401 return false;
402
403 b = reg_copy_graph[rs];
404 if (b == NULL)
405 {
406 b = BITMAP_ALLOC (NULL);
407 reg_copy_graph[rs] = b;
408 }
409
410 bitmap_set_bit (b, rd);
411
412 return true;
413 }
414
415 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
416 where they are copied to another register, add the register to
417 which they are copied to DECOMPOSABLE_CONTEXT. Use
418 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
419 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
420
421 static void
422 propagate_pseudo_copies (void)
423 {
424 bitmap queue, propagate;
425
426 queue = BITMAP_ALLOC (NULL);
427 propagate = BITMAP_ALLOC (NULL);
428
429 bitmap_copy (queue, decomposable_context);
430 do
431 {
432 bitmap_iterator iter;
433 unsigned int i;
434
435 bitmap_clear (propagate);
436
437 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
438 {
439 bitmap b = reg_copy_graph[i];
440 if (b)
441 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
442 }
443
444 bitmap_and_compl (queue, propagate, decomposable_context);
445 bitmap_ior_into (decomposable_context, propagate);
446 }
447 while (!bitmap_empty_p (queue));
448
449 BITMAP_FREE (queue);
450 BITMAP_FREE (propagate);
451 }
452
453 /* A pointer to one of these values is passed to
454 find_decomposable_subregs. */
455
456 enum classify_move_insn
457 {
458 /* Not a simple move from one location to another. */
459 NOT_SIMPLE_MOVE,
460 /* A simple move we want to decompose. */
461 DECOMPOSABLE_SIMPLE_MOVE,
462 /* Any other simple move. */
463 SIMPLE_MOVE
464 };
465
466 /* If we find a SUBREG in *LOC which we could use to decompose a
467 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
468 unadorned register which is not a simple pseudo-register copy,
469 DATA will point at the type of move, and we set a bit in
470 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
471
472 static void
473 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
474 {
475 subrtx_var_iterator::array_type array;
476 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
477 {
478 rtx x = *iter;
479 if (GET_CODE (x) == SUBREG)
480 {
481 rtx inner = SUBREG_REG (x);
482 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
483
484 if (!REG_P (inner))
485 continue;
486
487 regno = REGNO (inner);
488 if (HARD_REGISTER_NUM_P (regno))
489 {
490 iter.skip_subrtxes ();
491 continue;
492 }
493
494 outer_size = GET_MODE_SIZE (GET_MODE (x));
495 inner_size = GET_MODE_SIZE (GET_MODE (inner));
496 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
497 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
498
499 /* We only try to decompose single word subregs of multi-word
500 registers. When we find one, we return -1 to avoid iterating
501 over the inner register.
502
503 ??? This doesn't allow, e.g., DImode subregs of TImode values
504 on 32-bit targets. We would need to record the way the
505 pseudo-register was used, and only decompose if all the uses
506 were the same number and size of pieces. Hopefully this
507 doesn't happen much. */
508
509 if (outer_words == 1 && inner_words > 1)
510 {
511 bitmap_set_bit (decomposable_context, regno);
512 iter.skip_subrtxes ();
513 continue;
514 }
515
516 /* If this is a cast from one mode to another, where the modes
517 have the same size, and they are not tieable, then mark this
518 register as non-decomposable. If we decompose it we are
519 likely to mess up whatever the backend is trying to do. */
520 if (outer_words > 1
521 && outer_size == inner_size
522 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
523 {
524 bitmap_set_bit (non_decomposable_context, regno);
525 bitmap_set_bit (subreg_context, regno);
526 iter.skip_subrtxes ();
527 continue;
528 }
529 }
530 else if (REG_P (x))
531 {
532 unsigned int regno;
533
534 /* We will see an outer SUBREG before we see the inner REG, so
535 when we see a plain REG here it means a direct reference to
536 the register.
537
538 If this is not a simple copy from one location to another,
539 then we can not decompose this register. If this is a simple
540 copy we want to decompose, and the mode is right,
541 then we mark the register as decomposable.
542 Otherwise we don't say anything about this register --
543 it could be decomposed, but whether that would be
544 profitable depends upon how it is used elsewhere.
545
546 We only set bits in the bitmap for multi-word
547 pseudo-registers, since those are the only ones we care about
548 and it keeps the size of the bitmaps down. */
549
550 regno = REGNO (x);
551 if (!HARD_REGISTER_NUM_P (regno)
552 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
553 {
554 switch (*pcmi)
555 {
556 case NOT_SIMPLE_MOVE:
557 bitmap_set_bit (non_decomposable_context, regno);
558 break;
559 case DECOMPOSABLE_SIMPLE_MOVE:
560 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
561 bitmap_set_bit (decomposable_context, regno);
562 break;
563 case SIMPLE_MOVE:
564 break;
565 default:
566 gcc_unreachable ();
567 }
568 }
569 }
570 else if (MEM_P (x))
571 {
572 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
573
574 /* Any registers used in a MEM do not participate in a
575 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
576 here, and return -1 to block the parent's recursion. */
577 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
578 iter.skip_subrtxes ();
579 }
580 }
581 }
582
583 /* Decompose REGNO into word-sized components. We smash the REG node
584 in place. This ensures that (1) something goes wrong quickly if we
585 fail to make some replacement, and (2) the debug information inside
586 the symbol table is automatically kept up to date. */
587
588 static void
589 decompose_register (unsigned int regno)
590 {
591 rtx reg;
592 unsigned int words, i;
593 rtvec v;
594
595 reg = regno_reg_rtx[regno];
596
597 regno_reg_rtx[regno] = NULL_RTX;
598
599 words = GET_MODE_SIZE (GET_MODE (reg));
600 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
601
602 v = rtvec_alloc (words);
603 for (i = 0; i < words; ++i)
604 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
605
606 PUT_CODE (reg, CONCATN);
607 XVEC (reg, 0) = v;
608
609 if (dump_file)
610 {
611 fprintf (dump_file, "; Splitting reg %u ->", regno);
612 for (i = 0; i < words; ++i)
613 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
614 fputc ('\n', dump_file);
615 }
616 }
617
618 /* Get a SUBREG of a CONCATN. */
619
620 static rtx
621 simplify_subreg_concatn (machine_mode outermode, rtx op,
622 unsigned int byte)
623 {
624 unsigned int inner_size;
625 machine_mode innermode, partmode;
626 rtx part;
627 unsigned int final_offset;
628
629 gcc_assert (GET_CODE (op) == CONCATN);
630 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
631
632 innermode = GET_MODE (op);
633 gcc_assert (byte < GET_MODE_SIZE (innermode));
634 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
635
636 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
637 part = XVECEXP (op, 0, byte / inner_size);
638 partmode = GET_MODE (part);
639
640 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
641 regular CONST_VECTORs. They have vector or integer modes, depending
642 on the capabilities of the target. Cope with them. */
643 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
644 partmode = GET_MODE_INNER (innermode);
645 else if (partmode == VOIDmode)
646 {
647 enum mode_class mclass = GET_MODE_CLASS (innermode);
648 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
649 }
650
651 final_offset = byte % inner_size;
652 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
653 return NULL_RTX;
654
655 return simplify_gen_subreg (outermode, part, partmode, final_offset);
656 }
657
658 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
659
660 static rtx
661 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
662 machine_mode innermode, unsigned int byte)
663 {
664 rtx ret;
665
666 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
667 If OP is a SUBREG of a CONCATN, then it must be a simple mode
668 change with the same size and offset 0, or it must extract a
669 part. We shouldn't see anything else here. */
670 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
671 {
672 rtx op2;
673
674 if ((GET_MODE_SIZE (GET_MODE (op))
675 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
676 && SUBREG_BYTE (op) == 0)
677 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
678 GET_MODE (SUBREG_REG (op)), byte);
679
680 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
681 SUBREG_BYTE (op));
682 if (op2 == NULL_RTX)
683 {
684 /* We don't handle paradoxical subregs here. */
685 gcc_assert (GET_MODE_SIZE (outermode)
686 <= GET_MODE_SIZE (GET_MODE (op)));
687 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
688 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
689 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
690 byte + SUBREG_BYTE (op));
691 gcc_assert (op2 != NULL_RTX);
692 return op2;
693 }
694
695 op = op2;
696 gcc_assert (op != NULL_RTX);
697 gcc_assert (innermode == GET_MODE (op));
698 }
699
700 if (GET_CODE (op) == CONCATN)
701 return simplify_subreg_concatn (outermode, op, byte);
702
703 ret = simplify_gen_subreg (outermode, op, innermode, byte);
704
705 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
706 resolve_simple_move will ask for the high part of the paradoxical
707 subreg, which does not have a value. Just return a zero. */
708 if (ret == NULL_RTX
709 && GET_CODE (op) == SUBREG
710 && SUBREG_BYTE (op) == 0
711 && (GET_MODE_SIZE (innermode)
712 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
713 return CONST0_RTX (outermode);
714
715 gcc_assert (ret != NULL_RTX);
716 return ret;
717 }
718
719 /* Return whether we should resolve X into the registers into which it
720 was decomposed. */
721
722 static bool
723 resolve_reg_p (rtx x)
724 {
725 return GET_CODE (x) == CONCATN;
726 }
727
728 /* Return whether X is a SUBREG of a register which we need to
729 resolve. */
730
731 static bool
732 resolve_subreg_p (rtx x)
733 {
734 if (GET_CODE (x) != SUBREG)
735 return false;
736 return resolve_reg_p (SUBREG_REG (x));
737 }
738
739 /* Look for SUBREGs in *LOC which need to be decomposed. */
740
741 static bool
742 resolve_subreg_use (rtx *loc, rtx insn)
743 {
744 subrtx_ptr_iterator::array_type array;
745 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
746 {
747 rtx *loc = *iter;
748 rtx x = *loc;
749 if (resolve_subreg_p (x))
750 {
751 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
752 SUBREG_BYTE (x));
753
754 /* It is possible for a note to contain a reference which we can
755 decompose. In this case, return 1 to the caller to indicate
756 that the note must be removed. */
757 if (!x)
758 {
759 gcc_assert (!insn);
760 return true;
761 }
762
763 validate_change (insn, loc, x, 1);
764 iter.skip_subrtxes ();
765 }
766 else if (resolve_reg_p (x))
767 /* Return 1 to the caller to indicate that we found a direct
768 reference to a register which is being decomposed. This can
769 happen inside notes, multiword shift or zero-extend
770 instructions. */
771 return true;
772 }
773
774 return false;
775 }
776
777 /* Resolve any decomposed registers which appear in register notes on
778 INSN. */
779
780 static void
781 resolve_reg_notes (rtx_insn *insn)
782 {
783 rtx *pnote, note;
784
785 note = find_reg_equal_equiv_note (insn);
786 if (note)
787 {
788 int old_count = num_validated_changes ();
789 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
790 remove_note (insn, note);
791 else
792 if (old_count != num_validated_changes ())
793 df_notes_rescan (insn);
794 }
795
796 pnote = &REG_NOTES (insn);
797 while (*pnote != NULL_RTX)
798 {
799 bool del = false;
800
801 note = *pnote;
802 switch (REG_NOTE_KIND (note))
803 {
804 case REG_DEAD:
805 case REG_UNUSED:
806 if (resolve_reg_p (XEXP (note, 0)))
807 del = true;
808 break;
809
810 default:
811 break;
812 }
813
814 if (del)
815 *pnote = XEXP (note, 1);
816 else
817 pnote = &XEXP (note, 1);
818 }
819 }
820
821 /* Return whether X can be decomposed into subwords. */
822
823 static bool
824 can_decompose_p (rtx x)
825 {
826 if (REG_P (x))
827 {
828 unsigned int regno = REGNO (x);
829
830 if (HARD_REGISTER_NUM_P (regno))
831 {
832 unsigned int byte, num_bytes;
833
834 num_bytes = GET_MODE_SIZE (GET_MODE (x));
835 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
836 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
837 return false;
838 return true;
839 }
840 else
841 return !bitmap_bit_p (subreg_context, regno);
842 }
843
844 return true;
845 }
846
847 /* Decompose the registers used in a simple move SET within INSN. If
848 we don't change anything, return INSN, otherwise return the start
849 of the sequence of moves. */
850
851 static rtx_insn *
852 resolve_simple_move (rtx set, rtx_insn *insn)
853 {
854 rtx src, dest, real_dest;
855 rtx_insn *insns;
856 machine_mode orig_mode, dest_mode;
857 unsigned int words;
858 bool pushing;
859
860 src = SET_SRC (set);
861 dest = SET_DEST (set);
862 orig_mode = GET_MODE (dest);
863
864 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
865 gcc_assert (words > 1);
866
867 start_sequence ();
868
869 /* We have to handle copying from a SUBREG of a decomposed reg where
870 the SUBREG is larger than word size. Rather than assume that we
871 can take a word_mode SUBREG of the destination, we copy to a new
872 register and then copy that to the destination. */
873
874 real_dest = NULL_RTX;
875
876 if (GET_CODE (src) == SUBREG
877 && resolve_reg_p (SUBREG_REG (src))
878 && (SUBREG_BYTE (src) != 0
879 || (GET_MODE_SIZE (orig_mode)
880 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
881 {
882 real_dest = dest;
883 dest = gen_reg_rtx (orig_mode);
884 if (REG_P (real_dest))
885 REG_ATTRS (dest) = REG_ATTRS (real_dest);
886 }
887
888 /* Similarly if we are copying to a SUBREG of a decomposed reg where
889 the SUBREG is larger than word size. */
890
891 if (GET_CODE (dest) == SUBREG
892 && resolve_reg_p (SUBREG_REG (dest))
893 && (SUBREG_BYTE (dest) != 0
894 || (GET_MODE_SIZE (orig_mode)
895 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
896 {
897 rtx reg, smove;
898 rtx_insn *minsn;
899
900 reg = gen_reg_rtx (orig_mode);
901 minsn = emit_move_insn (reg, src);
902 smove = single_set (minsn);
903 gcc_assert (smove != NULL_RTX);
904 resolve_simple_move (smove, minsn);
905 src = reg;
906 }
907
908 /* If we didn't have any big SUBREGS of decomposed registers, and
909 neither side of the move is a register we are decomposing, then
910 we don't have to do anything here. */
911
912 if (src == SET_SRC (set)
913 && dest == SET_DEST (set)
914 && !resolve_reg_p (src)
915 && !resolve_subreg_p (src)
916 && !resolve_reg_p (dest)
917 && !resolve_subreg_p (dest))
918 {
919 end_sequence ();
920 return insn;
921 }
922
923 /* It's possible for the code to use a subreg of a decomposed
924 register while forming an address. We need to handle that before
925 passing the address to emit_move_insn. We pass NULL_RTX as the
926 insn parameter to resolve_subreg_use because we can not validate
927 the insn yet. */
928 if (MEM_P (src) || MEM_P (dest))
929 {
930 int acg;
931
932 if (MEM_P (src))
933 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
934 if (MEM_P (dest))
935 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
936 acg = apply_change_group ();
937 gcc_assert (acg);
938 }
939
940 /* If SRC is a register which we can't decompose, or has side
941 effects, we need to move via a temporary register. */
942
943 if (!can_decompose_p (src)
944 || side_effects_p (src)
945 || GET_CODE (src) == ASM_OPERANDS)
946 {
947 rtx reg;
948
949 reg = gen_reg_rtx (orig_mode);
950
951 #ifdef AUTO_INC_DEC
952 {
953 rtx move = emit_move_insn (reg, src);
954 if (MEM_P (src))
955 {
956 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
957 if (note)
958 add_reg_note (move, REG_INC, XEXP (note, 0));
959 }
960 }
961 #else
962 emit_move_insn (reg, src);
963 #endif
964 src = reg;
965 }
966
967 /* If DEST is a register which we can't decompose, or has side
968 effects, we need to first move to a temporary register. We
969 handle the common case of pushing an operand directly. We also
970 go through a temporary register if it holds a floating point
971 value. This gives us better code on systems which can't move
972 data easily between integer and floating point registers. */
973
974 dest_mode = orig_mode;
975 pushing = push_operand (dest, dest_mode);
976 if (!can_decompose_p (dest)
977 || (side_effects_p (dest) && !pushing)
978 || (!SCALAR_INT_MODE_P (dest_mode)
979 && !resolve_reg_p (dest)
980 && !resolve_subreg_p (dest)))
981 {
982 if (real_dest == NULL_RTX)
983 real_dest = dest;
984 if (!SCALAR_INT_MODE_P (dest_mode))
985 {
986 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
987 MODE_INT, 0);
988 gcc_assert (dest_mode != BLKmode);
989 }
990 dest = gen_reg_rtx (dest_mode);
991 if (REG_P (real_dest))
992 REG_ATTRS (dest) = REG_ATTRS (real_dest);
993 }
994
995 if (pushing)
996 {
997 unsigned int i, j, jinc;
998
999 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1001 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1002
1003 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1004 {
1005 j = 0;
1006 jinc = 1;
1007 }
1008 else
1009 {
1010 j = words - 1;
1011 jinc = -1;
1012 }
1013
1014 for (i = 0; i < words; ++i, j += jinc)
1015 {
1016 rtx temp;
1017
1018 temp = copy_rtx (XEXP (dest, 0));
1019 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1020 j * UNITS_PER_WORD);
1021 emit_move_insn (temp,
1022 simplify_gen_subreg_concatn (word_mode, src,
1023 orig_mode,
1024 j * UNITS_PER_WORD));
1025 }
1026 }
1027 else
1028 {
1029 unsigned int i;
1030
1031 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1032 emit_clobber (dest);
1033
1034 for (i = 0; i < words; ++i)
1035 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1036 dest_mode,
1037 i * UNITS_PER_WORD),
1038 simplify_gen_subreg_concatn (word_mode, src,
1039 orig_mode,
1040 i * UNITS_PER_WORD));
1041 }
1042
1043 if (real_dest != NULL_RTX)
1044 {
1045 rtx mdest, smove;
1046 rtx_insn *minsn;
1047
1048 if (dest_mode == orig_mode)
1049 mdest = dest;
1050 else
1051 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1052 minsn = emit_move_insn (real_dest, mdest);
1053
1054 #ifdef AUTO_INC_DEC
1055 if (MEM_P (real_dest)
1056 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1057 {
1058 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1059 if (note)
1060 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1061 }
1062 #endif
1063
1064 smove = single_set (minsn);
1065 gcc_assert (smove != NULL_RTX);
1066
1067 resolve_simple_move (smove, minsn);
1068 }
1069
1070 insns = get_insns ();
1071 end_sequence ();
1072
1073 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1074
1075 emit_insn_before (insns, insn);
1076
1077 /* If we get here via self-recursion, then INSN is not yet in the insns
1078 chain and delete_insn will fail. We only want to remove INSN from the
1079 current sequence. See PR56738. */
1080 if (in_sequence_p ())
1081 remove_insn (insn);
1082 else
1083 delete_insn (insn);
1084
1085 return insns;
1086 }
1087
1088 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1089 component registers. Return whether we changed something. */
1090
1091 static bool
1092 resolve_clobber (rtx pat, rtx_insn *insn)
1093 {
1094 rtx reg;
1095 machine_mode orig_mode;
1096 unsigned int words, i;
1097 int ret;
1098
1099 reg = XEXP (pat, 0);
1100 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1101 return false;
1102
1103 orig_mode = GET_MODE (reg);
1104 words = GET_MODE_SIZE (orig_mode);
1105 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1106
1107 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1108 simplify_gen_subreg_concatn (word_mode, reg,
1109 orig_mode, 0),
1110 0);
1111 df_insn_rescan (insn);
1112 gcc_assert (ret != 0);
1113
1114 for (i = words - 1; i > 0; --i)
1115 {
1116 rtx x;
1117
1118 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1119 i * UNITS_PER_WORD);
1120 x = gen_rtx_CLOBBER (VOIDmode, x);
1121 emit_insn_after (x, insn);
1122 }
1123
1124 resolve_reg_notes (insn);
1125
1126 return true;
1127 }
1128
1129 /* A USE of a decomposed register is no longer meaningful. Return
1130 whether we changed something. */
1131
1132 static bool
1133 resolve_use (rtx pat, rtx_insn *insn)
1134 {
1135 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1136 {
1137 delete_insn (insn);
1138 return true;
1139 }
1140
1141 resolve_reg_notes (insn);
1142
1143 return false;
1144 }
1145
1146 /* A VAR_LOCATION can be simplified. */
1147
1148 static void
1149 resolve_debug (rtx_insn *insn)
1150 {
1151 subrtx_ptr_iterator::array_type array;
1152 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1153 {
1154 rtx *loc = *iter;
1155 rtx x = *loc;
1156 if (resolve_subreg_p (x))
1157 {
1158 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1159 SUBREG_BYTE (x));
1160
1161 if (x)
1162 *loc = x;
1163 else
1164 x = copy_rtx (*loc);
1165 }
1166 if (resolve_reg_p (x))
1167 *loc = copy_rtx (x);
1168 }
1169
1170 df_insn_rescan (insn);
1171
1172 resolve_reg_notes (insn);
1173 }
1174
1175 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1176 set the decomposable_context bitmap accordingly. SPEED_P is true
1177 if we are optimizing INSN for speed rather than size. Return true
1178 if INSN is decomposable. */
1179
1180 static bool
1181 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1182 {
1183 rtx set;
1184 rtx op;
1185 rtx op_operand;
1186
1187 set = single_set (insn);
1188 if (!set)
1189 return false;
1190
1191 op = SET_SRC (set);
1192 if (GET_CODE (op) != ASHIFT
1193 && GET_CODE (op) != LSHIFTRT
1194 && GET_CODE (op) != ASHIFTRT
1195 && GET_CODE (op) != ZERO_EXTEND)
1196 return false;
1197
1198 op_operand = XEXP (op, 0);
1199 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1200 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1201 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1202 || GET_MODE (op) != twice_word_mode)
1203 return false;
1204
1205 if (GET_CODE (op) == ZERO_EXTEND)
1206 {
1207 if (GET_MODE (op_operand) != word_mode
1208 || !choices[speed_p].splitting_zext)
1209 return false;
1210 }
1211 else /* left or right shift */
1212 {
1213 bool *splitting = (GET_CODE (op) == ASHIFT
1214 ? choices[speed_p].splitting_ashift
1215 : GET_CODE (op) == ASHIFTRT
1216 ? choices[speed_p].splitting_ashiftrt
1217 : choices[speed_p].splitting_lshiftrt);
1218 if (!CONST_INT_P (XEXP (op, 1))
1219 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1220 2 * BITS_PER_WORD - 1)
1221 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1222 return false;
1223
1224 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1225 }
1226
1227 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1228
1229 return true;
1230 }
1231
1232 /* Decompose a more than word wide shift (in INSN) of a multiword
1233 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1234 and 'set to zero' insn. Return a pointer to the new insn when a
1235 replacement was done. */
1236
1237 static rtx_insn *
1238 resolve_shift_zext (rtx_insn *insn)
1239 {
1240 rtx set;
1241 rtx op;
1242 rtx op_operand;
1243 rtx_insn *insns;
1244 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1245 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1246
1247 set = single_set (insn);
1248 if (!set)
1249 return NULL;
1250
1251 op = SET_SRC (set);
1252 if (GET_CODE (op) != ASHIFT
1253 && GET_CODE (op) != LSHIFTRT
1254 && GET_CODE (op) != ASHIFTRT
1255 && GET_CODE (op) != ZERO_EXTEND)
1256 return NULL;
1257
1258 op_operand = XEXP (op, 0);
1259
1260 /* We can tear this operation apart only if the regs were already
1261 torn apart. */
1262 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1263 return NULL;
1264
1265 /* src_reg_num is the number of the word mode register which we
1266 are operating on. For a left shift and a zero_extend on little
1267 endian machines this is register 0. */
1268 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1269 ? 1 : 0;
1270
1271 if (WORDS_BIG_ENDIAN
1272 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1273 src_reg_num = 1 - src_reg_num;
1274
1275 if (GET_CODE (op) == ZERO_EXTEND)
1276 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1277 else
1278 dest_reg_num = 1 - src_reg_num;
1279
1280 offset1 = UNITS_PER_WORD * dest_reg_num;
1281 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1282 src_offset = UNITS_PER_WORD * src_reg_num;
1283
1284 start_sequence ();
1285
1286 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1287 GET_MODE (SET_DEST (set)),
1288 offset1);
1289 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1290 GET_MODE (SET_DEST (set)),
1291 offset2);
1292 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1293 GET_MODE (op_operand),
1294 src_offset);
1295 if (GET_CODE (op) == ASHIFTRT
1296 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1297 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1298 BITS_PER_WORD - 1, NULL_RTX, 0);
1299
1300 if (GET_CODE (op) != ZERO_EXTEND)
1301 {
1302 int shift_count = INTVAL (XEXP (op, 1));
1303 if (shift_count > BITS_PER_WORD)
1304 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1305 LSHIFT_EXPR : RSHIFT_EXPR,
1306 word_mode, src_reg,
1307 shift_count - BITS_PER_WORD,
1308 dest_reg, GET_CODE (op) != ASHIFTRT);
1309 }
1310
1311 if (dest_reg != src_reg)
1312 emit_move_insn (dest_reg, src_reg);
1313 if (GET_CODE (op) != ASHIFTRT)
1314 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1315 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1316 emit_move_insn (dest_upper, copy_rtx (src_reg));
1317 else
1318 emit_move_insn (dest_upper, upper_src);
1319 insns = get_insns ();
1320
1321 end_sequence ();
1322
1323 emit_insn_before (insns, insn);
1324
1325 if (dump_file)
1326 {
1327 rtx_insn *in;
1328 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1329 for (in = insns; in != insn; in = NEXT_INSN (in))
1330 fprintf (dump_file, "%d ", INSN_UID (in));
1331 fprintf (dump_file, "\n");
1332 }
1333
1334 delete_insn (insn);
1335 return insns;
1336 }
1337
1338 /* Print to dump_file a description of what we're doing with shift code CODE.
1339 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1340
1341 static void
1342 dump_shift_choices (enum rtx_code code, bool *splitting)
1343 {
1344 int i;
1345 const char *sep;
1346
1347 fprintf (dump_file,
1348 " Splitting mode %s for %s lowering with shift amounts = ",
1349 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1350 sep = "";
1351 for (i = 0; i < BITS_PER_WORD; i++)
1352 if (splitting[i])
1353 {
1354 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1355 sep = ",";
1356 }
1357 fprintf (dump_file, "\n");
1358 }
1359
1360 /* Print to dump_file a description of what we're doing when optimizing
1361 for speed or size; SPEED_P says which. DESCRIPTION is a description
1362 of the SPEED_P choice. */
1363
1364 static void
1365 dump_choices (bool speed_p, const char *description)
1366 {
1367 unsigned int i;
1368
1369 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1370
1371 for (i = 0; i < MAX_MACHINE_MODE; i++)
1372 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1373 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1374 choices[speed_p].move_modes_to_split[i]
1375 ? "Splitting"
1376 : "Skipping",
1377 GET_MODE_NAME ((machine_mode) i));
1378
1379 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1380 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1381 GET_MODE_NAME (twice_word_mode));
1382
1383 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1384 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1385 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1386 fprintf (dump_file, "\n");
1387 }
1388
1389 /* Look for registers which are always accessed via word-sized SUBREGs
1390 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1391 registers into several word-sized pseudo-registers. */
1392
1393 static void
1394 decompose_multiword_subregs (bool decompose_copies)
1395 {
1396 unsigned int max;
1397 basic_block bb;
1398 bool speed_p;
1399
1400 if (dump_file)
1401 {
1402 dump_choices (false, "size");
1403 dump_choices (true, "speed");
1404 }
1405
1406 /* Check if this target even has any modes to consider lowering. */
1407 if (!choices[false].something_to_do && !choices[true].something_to_do)
1408 {
1409 if (dump_file)
1410 fprintf (dump_file, "Nothing to do!\n");
1411 return;
1412 }
1413
1414 max = max_reg_num ();
1415
1416 /* First see if there are any multi-word pseudo-registers. If there
1417 aren't, there is nothing we can do. This should speed up this
1418 pass in the normal case, since it should be faster than scanning
1419 all the insns. */
1420 {
1421 unsigned int i;
1422 bool useful_modes_seen = false;
1423
1424 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1425 if (regno_reg_rtx[i] != NULL)
1426 {
1427 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1428 if (choices[false].move_modes_to_split[(int) mode]
1429 || choices[true].move_modes_to_split[(int) mode])
1430 {
1431 useful_modes_seen = true;
1432 break;
1433 }
1434 }
1435
1436 if (!useful_modes_seen)
1437 {
1438 if (dump_file)
1439 fprintf (dump_file, "Nothing to lower in this function.\n");
1440 return;
1441 }
1442 }
1443
1444 if (df)
1445 {
1446 df_set_flags (DF_DEFER_INSN_RESCAN);
1447 run_word_dce ();
1448 }
1449
1450 /* FIXME: It may be possible to change this code to look for each
1451 multi-word pseudo-register and to find each insn which sets or
1452 uses that register. That should be faster than scanning all the
1453 insns. */
1454
1455 decomposable_context = BITMAP_ALLOC (NULL);
1456 non_decomposable_context = BITMAP_ALLOC (NULL);
1457 subreg_context = BITMAP_ALLOC (NULL);
1458
1459 reg_copy_graph.create (max);
1460 reg_copy_graph.safe_grow_cleared (max);
1461 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1462
1463 speed_p = optimize_function_for_speed_p (cfun);
1464 FOR_EACH_BB_FN (bb, cfun)
1465 {
1466 rtx_insn *insn;
1467
1468 FOR_BB_INSNS (bb, insn)
1469 {
1470 rtx set;
1471 enum classify_move_insn cmi;
1472 int i, n;
1473
1474 if (!INSN_P (insn)
1475 || GET_CODE (PATTERN (insn)) == CLOBBER
1476 || GET_CODE (PATTERN (insn)) == USE)
1477 continue;
1478
1479 recog_memoized (insn);
1480
1481 if (find_decomposable_shift_zext (insn, speed_p))
1482 continue;
1483
1484 extract_insn (insn);
1485
1486 set = simple_move (insn, speed_p);
1487
1488 if (!set)
1489 cmi = NOT_SIMPLE_MOVE;
1490 else
1491 {
1492 /* We mark pseudo-to-pseudo copies as decomposable during the
1493 second pass only. The first pass is so early that there is
1494 good chance such moves will be optimized away completely by
1495 subsequent optimizations anyway.
1496
1497 However, we call find_pseudo_copy even during the first pass
1498 so as to properly set up the reg_copy_graph. */
1499 if (find_pseudo_copy (set))
1500 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1501 else
1502 cmi = SIMPLE_MOVE;
1503 }
1504
1505 n = recog_data.n_operands;
1506 for (i = 0; i < n; ++i)
1507 {
1508 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1509
1510 /* We handle ASM_OPERANDS as a special case to support
1511 things like x86 rdtsc which returns a DImode value.
1512 We can decompose the output, which will certainly be
1513 operand 0, but not the inputs. */
1514
1515 if (cmi == SIMPLE_MOVE
1516 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1517 {
1518 gcc_assert (i == 0);
1519 cmi = NOT_SIMPLE_MOVE;
1520 }
1521 }
1522 }
1523 }
1524
1525 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1526 if (!bitmap_empty_p (decomposable_context))
1527 {
1528 sbitmap sub_blocks;
1529 unsigned int i;
1530 sbitmap_iterator sbi;
1531 bitmap_iterator iter;
1532 unsigned int regno;
1533
1534 propagate_pseudo_copies ();
1535
1536 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1537 bitmap_clear (sub_blocks);
1538
1539 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1540 decompose_register (regno);
1541
1542 FOR_EACH_BB_FN (bb, cfun)
1543 {
1544 rtx_insn *insn;
1545
1546 FOR_BB_INSNS (bb, insn)
1547 {
1548 rtx pat;
1549
1550 if (!INSN_P (insn))
1551 continue;
1552
1553 pat = PATTERN (insn);
1554 if (GET_CODE (pat) == CLOBBER)
1555 resolve_clobber (pat, insn);
1556 else if (GET_CODE (pat) == USE)
1557 resolve_use (pat, insn);
1558 else if (DEBUG_INSN_P (insn))
1559 resolve_debug (insn);
1560 else
1561 {
1562 rtx set;
1563 int i;
1564
1565 recog_memoized (insn);
1566 extract_insn (insn);
1567
1568 set = simple_move (insn, speed_p);
1569 if (set)
1570 {
1571 rtx_insn *orig_insn = insn;
1572 bool cfi = control_flow_insn_p (insn);
1573
1574 /* We can end up splitting loads to multi-word pseudos
1575 into separate loads to machine word size pseudos.
1576 When this happens, we first had one load that can
1577 throw, and after resolve_simple_move we'll have a
1578 bunch of loads (at least two). All those loads may
1579 trap if we can have non-call exceptions, so they
1580 all will end the current basic block. We split the
1581 block after the outer loop over all insns, but we
1582 make sure here that we will be able to split the
1583 basic block and still produce the correct control
1584 flow graph for it. */
1585 gcc_assert (!cfi
1586 || (cfun->can_throw_non_call_exceptions
1587 && can_throw_internal (insn)));
1588
1589 insn = resolve_simple_move (set, insn);
1590 if (insn != orig_insn)
1591 {
1592 recog_memoized (insn);
1593 extract_insn (insn);
1594
1595 if (cfi)
1596 bitmap_set_bit (sub_blocks, bb->index);
1597 }
1598 }
1599 else
1600 {
1601 rtx_insn *decomposed_shift;
1602
1603 decomposed_shift = resolve_shift_zext (insn);
1604 if (decomposed_shift != NULL_RTX)
1605 {
1606 insn = decomposed_shift;
1607 recog_memoized (insn);
1608 extract_insn (insn);
1609 }
1610 }
1611
1612 for (i = recog_data.n_operands - 1; i >= 0; --i)
1613 resolve_subreg_use (recog_data.operand_loc[i], insn);
1614
1615 resolve_reg_notes (insn);
1616
1617 if (num_validated_changes () > 0)
1618 {
1619 for (i = recog_data.n_dups - 1; i >= 0; --i)
1620 {
1621 rtx *pl = recog_data.dup_loc[i];
1622 int dup_num = recog_data.dup_num[i];
1623 rtx *px = recog_data.operand_loc[dup_num];
1624
1625 validate_unshare_change (insn, pl, *px, 1);
1626 }
1627
1628 i = apply_change_group ();
1629 gcc_assert (i);
1630 }
1631 }
1632 }
1633 }
1634
1635 /* If we had insns to split that caused control flow insns in the middle
1636 of a basic block, split those blocks now. Note that we only handle
1637 the case where splitting a load has caused multiple possibly trapping
1638 loads to appear. */
1639 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1640 {
1641 rtx_insn *insn, *end;
1642 edge fallthru;
1643
1644 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1645 insn = BB_HEAD (bb);
1646 end = BB_END (bb);
1647
1648 while (insn != end)
1649 {
1650 if (control_flow_insn_p (insn))
1651 {
1652 /* Split the block after insn. There will be a fallthru
1653 edge, which is OK so we keep it. We have to create the
1654 exception edges ourselves. */
1655 fallthru = split_block (bb, insn);
1656 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1657 bb = fallthru->dest;
1658 insn = BB_HEAD (bb);
1659 }
1660 else
1661 insn = NEXT_INSN (insn);
1662 }
1663 }
1664
1665 sbitmap_free (sub_blocks);
1666 }
1667
1668 {
1669 unsigned int i;
1670 bitmap b;
1671
1672 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1673 if (b)
1674 BITMAP_FREE (b);
1675 }
1676
1677 reg_copy_graph.release ();
1678
1679 BITMAP_FREE (decomposable_context);
1680 BITMAP_FREE (non_decomposable_context);
1681 BITMAP_FREE (subreg_context);
1682 }
1683 \f
1684 /* Implement first lower subreg pass. */
1685
1686 namespace {
1687
1688 const pass_data pass_data_lower_subreg =
1689 {
1690 RTL_PASS, /* type */
1691 "subreg1", /* name */
1692 OPTGROUP_NONE, /* optinfo_flags */
1693 TV_LOWER_SUBREG, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 0, /* todo_flags_finish */
1699 };
1700
1701 class pass_lower_subreg : public rtl_opt_pass
1702 {
1703 public:
1704 pass_lower_subreg (gcc::context *ctxt)
1705 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1706 {}
1707
1708 /* opt_pass methods: */
1709 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1710 virtual unsigned int execute (function *)
1711 {
1712 decompose_multiword_subregs (false);
1713 return 0;
1714 }
1715
1716 }; // class pass_lower_subreg
1717
1718 } // anon namespace
1719
1720 rtl_opt_pass *
1721 make_pass_lower_subreg (gcc::context *ctxt)
1722 {
1723 return new pass_lower_subreg (ctxt);
1724 }
1725
1726 /* Implement second lower subreg pass. */
1727
1728 namespace {
1729
1730 const pass_data pass_data_lower_subreg2 =
1731 {
1732 RTL_PASS, /* type */
1733 "subreg2", /* name */
1734 OPTGROUP_NONE, /* optinfo_flags */
1735 TV_LOWER_SUBREG, /* tv_id */
1736 0, /* properties_required */
1737 0, /* properties_provided */
1738 0, /* properties_destroyed */
1739 0, /* todo_flags_start */
1740 TODO_df_finish, /* todo_flags_finish */
1741 };
1742
1743 class pass_lower_subreg2 : public rtl_opt_pass
1744 {
1745 public:
1746 pass_lower_subreg2 (gcc::context *ctxt)
1747 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1748 {}
1749
1750 /* opt_pass methods: */
1751 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1752 virtual unsigned int execute (function *)
1753 {
1754 decompose_multiword_subregs (true);
1755 return 0;
1756 }
1757
1758 }; // class pass_lower_subreg2
1759
1760 } // anon namespace
1761
1762 rtl_opt_pass *
1763 make_pass_lower_subreg2 (gcc::context *ctxt)
1764 {
1765 return new pass_lower_subreg2 (ctxt);
1766 }