]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
2015-07-10 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "cfghooks.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "df.h"
30 #include "alias.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "obstack.h"
35 #include "cfgrtl.h"
36 #include "cfgbuild.h"
37 #include "recog.h"
38 #include "dce.h"
39 #include "expmed.h"
40 #include "dojump.h"
41 #include "explow.h"
42 #include "calls.h"
43 #include "emit-rtl.h"
44 #include "varasm.h"
45 #include "stmt.h"
46 #include "expr.h"
47 #include "except.h"
48 #include "regs.h"
49 #include "tree-pass.h"
50 #include "lower-subreg.h"
51 #include "rtl-iter.h"
52
53
54 /* Decompose multi-word pseudo-registers into individual
55 pseudo-registers when possible and profitable. This is possible
56 when all the uses of a multi-word register are via SUBREG, or are
57 copies of the register to another location. Breaking apart the
58 register permits more CSE and permits better register allocation.
59 This is profitable if the machine does not have move instructions
60 to do this.
61
62 This pass only splits moves with modes that are wider than
63 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
64 integer modes that are twice the width of word_mode. The latter
65 could be generalized if there was a need to do this, but the trend in
66 architectures is to not need this.
67
68 There are two useful preprocessor defines for use by maintainers:
69
70 #define LOG_COSTS 1
71
72 if you wish to see the actual cost estimates that are being used
73 for each mode wider than word mode and the cost estimates for zero
74 extension and the shifts. This can be useful when port maintainers
75 are tuning insn rtx costs.
76
77 #define FORCE_LOWERING 1
78
79 if you wish to test the pass with all the transformation forced on.
80 This can be useful for finding bugs in the transformations. */
81
82 #define LOG_COSTS 0
83 #define FORCE_LOWERING 0
84
85 /* Bit N in this bitmap is set if regno N is used in a context in
86 which we can decompose it. */
87 static bitmap decomposable_context;
88
89 /* Bit N in this bitmap is set if regno N is used in a context in
90 which it can not be decomposed. */
91 static bitmap non_decomposable_context;
92
93 /* Bit N in this bitmap is set if regno N is used in a subreg
94 which changes the mode but not the size. This typically happens
95 when the register accessed as a floating-point value; we want to
96 avoid generating accesses to its subwords in integer modes. */
97 static bitmap subreg_context;
98
99 /* Bit N in the bitmap in element M of this array is set if there is a
100 copy from reg M to reg N. */
101 static vec<bitmap> reg_copy_graph;
102
103 struct target_lower_subreg default_target_lower_subreg;
104 #if SWITCHABLE_TARGET
105 struct target_lower_subreg *this_target_lower_subreg
106 = &default_target_lower_subreg;
107 #endif
108
109 #define twice_word_mode \
110 this_target_lower_subreg->x_twice_word_mode
111 #define choices \
112 this_target_lower_subreg->x_choices
113
114 /* RTXes used while computing costs. */
115 struct cost_rtxes {
116 /* Source and target registers. */
117 rtx source;
118 rtx target;
119
120 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
121 rtx zext;
122
123 /* A shift of SOURCE. */
124 rtx shift;
125
126 /* A SET of TARGET. */
127 rtx set;
128 };
129
130 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
131 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
132
133 static int
134 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
135 machine_mode mode, int op1)
136 {
137 PUT_CODE (rtxes->shift, code);
138 PUT_MODE (rtxes->shift, mode);
139 PUT_MODE (rtxes->source, mode);
140 XEXP (rtxes->shift, 1) = GEN_INT (op1);
141 return set_src_cost (rtxes->shift, mode, speed_p);
142 }
143
144 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
145 to true if it is profitable to split a double-word CODE shift
146 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
147 for speed or size profitability.
148
149 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
150 the cost of moving zero into a word-mode register. WORD_MOVE_COST
151 is the cost of moving between word registers. */
152
153 static void
154 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
155 bool *splitting, enum rtx_code code,
156 int word_move_zero_cost, int word_move_cost)
157 {
158 int wide_cost, narrow_cost, upper_cost, i;
159
160 for (i = 0; i < BITS_PER_WORD; i++)
161 {
162 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
163 i + BITS_PER_WORD);
164 if (i == 0)
165 narrow_cost = word_move_cost;
166 else
167 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
168
169 if (code != ASHIFTRT)
170 upper_cost = word_move_zero_cost;
171 else if (i == BITS_PER_WORD - 1)
172 upper_cost = word_move_cost;
173 else
174 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
175 BITS_PER_WORD - 1);
176
177 if (LOG_COSTS)
178 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
179 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
180 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
181
182 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
183 splitting[i] = true;
184 }
185 }
186
187 /* Compute what we should do when optimizing for speed or size; SPEED_P
188 selects which. Use RTXES for computing costs. */
189
190 static void
191 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
192 {
193 unsigned int i;
194 int word_move_zero_cost, word_move_cost;
195
196 PUT_MODE (rtxes->target, word_mode);
197 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
198 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
199
200 SET_SRC (rtxes->set) = rtxes->source;
201 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
202
203 if (LOG_COSTS)
204 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
205 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
206
207 for (i = 0; i < MAX_MACHINE_MODE; i++)
208 {
209 machine_mode mode = (machine_mode) i;
210 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
211 if (factor > 1)
212 {
213 int mode_move_cost;
214
215 PUT_MODE (rtxes->target, mode);
216 PUT_MODE (rtxes->source, mode);
217 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
218
219 if (LOG_COSTS)
220 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
221 GET_MODE_NAME (mode), mode_move_cost,
222 word_move_cost, factor);
223
224 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
225 {
226 choices[speed_p].move_modes_to_split[i] = true;
227 choices[speed_p].something_to_do = true;
228 }
229 }
230 }
231
232 /* For the moves and shifts, the only case that is checked is one
233 where the mode of the target is an integer mode twice the width
234 of the word_mode.
235
236 If it is not profitable to split a double word move then do not
237 even consider the shifts or the zero extension. */
238 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
239 {
240 int zext_cost;
241
242 /* The only case here to check to see if moving the upper part with a
243 zero is cheaper than doing the zext itself. */
244 PUT_MODE (rtxes->source, word_mode);
245 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
246
247 if (LOG_COSTS)
248 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
249 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
250 zext_cost, word_move_cost, word_move_zero_cost);
251
252 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
253 choices[speed_p].splitting_zext = true;
254
255 compute_splitting_shift (speed_p, rtxes,
256 choices[speed_p].splitting_ashift, ASHIFT,
257 word_move_zero_cost, word_move_cost);
258 compute_splitting_shift (speed_p, rtxes,
259 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
260 word_move_zero_cost, word_move_cost);
261 compute_splitting_shift (speed_p, rtxes,
262 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
263 word_move_zero_cost, word_move_cost);
264 }
265 }
266
267 /* Do one-per-target initialisation. This involves determining
268 which operations on the machine are profitable. If none are found,
269 then the pass just returns when called. */
270
271 void
272 init_lower_subreg (void)
273 {
274 struct cost_rtxes rtxes;
275
276 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
277
278 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
279
280 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
281 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
282 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
283 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
284 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
285
286 if (LOG_COSTS)
287 fprintf (stderr, "\nSize costs\n==========\n\n");
288 compute_costs (false, &rtxes);
289
290 if (LOG_COSTS)
291 fprintf (stderr, "\nSpeed costs\n===========\n\n");
292 compute_costs (true, &rtxes);
293 }
294
295 static bool
296 simple_move_operand (rtx x)
297 {
298 if (GET_CODE (x) == SUBREG)
299 x = SUBREG_REG (x);
300
301 if (!OBJECT_P (x))
302 return false;
303
304 if (GET_CODE (x) == LABEL_REF
305 || GET_CODE (x) == SYMBOL_REF
306 || GET_CODE (x) == HIGH
307 || GET_CODE (x) == CONST)
308 return false;
309
310 if (MEM_P (x)
311 && (MEM_VOLATILE_P (x)
312 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
313 return false;
314
315 return true;
316 }
317
318 /* If INSN is a single set between two objects that we want to split,
319 return the single set. SPEED_P says whether we are optimizing
320 INSN for speed or size.
321
322 INSN should have been passed to recog and extract_insn before this
323 is called. */
324
325 static rtx
326 simple_move (rtx_insn *insn, bool speed_p)
327 {
328 rtx x;
329 rtx set;
330 machine_mode mode;
331
332 if (recog_data.n_operands != 2)
333 return NULL_RTX;
334
335 set = single_set (insn);
336 if (!set)
337 return NULL_RTX;
338
339 x = SET_DEST (set);
340 if (x != recog_data.operand[0] && x != recog_data.operand[1])
341 return NULL_RTX;
342 if (!simple_move_operand (x))
343 return NULL_RTX;
344
345 x = SET_SRC (set);
346 if (x != recog_data.operand[0] && x != recog_data.operand[1])
347 return NULL_RTX;
348 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
349 things like x86 rdtsc which returns a DImode value. */
350 if (GET_CODE (x) != ASM_OPERANDS
351 && !simple_move_operand (x))
352 return NULL_RTX;
353
354 /* We try to decompose in integer modes, to avoid generating
355 inefficient code copying between integer and floating point
356 registers. That means that we can't decompose if this is a
357 non-integer mode for which there is no integer mode of the same
358 size. */
359 mode = GET_MODE (SET_DEST (set));
360 if (!SCALAR_INT_MODE_P (mode)
361 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
362 == BLKmode))
363 return NULL_RTX;
364
365 /* Reject PARTIAL_INT modes. They are used for processor specific
366 purposes and it's probably best not to tamper with them. */
367 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
368 return NULL_RTX;
369
370 if (!choices[speed_p].move_modes_to_split[(int) mode])
371 return NULL_RTX;
372
373 return set;
374 }
375
376 /* If SET is a copy from one multi-word pseudo-register to another,
377 record that in reg_copy_graph. Return whether it is such a
378 copy. */
379
380 static bool
381 find_pseudo_copy (rtx set)
382 {
383 rtx dest = SET_DEST (set);
384 rtx src = SET_SRC (set);
385 unsigned int rd, rs;
386 bitmap b;
387
388 if (!REG_P (dest) || !REG_P (src))
389 return false;
390
391 rd = REGNO (dest);
392 rs = REGNO (src);
393 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
394 return false;
395
396 b = reg_copy_graph[rs];
397 if (b == NULL)
398 {
399 b = BITMAP_ALLOC (NULL);
400 reg_copy_graph[rs] = b;
401 }
402
403 bitmap_set_bit (b, rd);
404
405 return true;
406 }
407
408 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
409 where they are copied to another register, add the register to
410 which they are copied to DECOMPOSABLE_CONTEXT. Use
411 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
412 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
413
414 static void
415 propagate_pseudo_copies (void)
416 {
417 bitmap queue, propagate;
418
419 queue = BITMAP_ALLOC (NULL);
420 propagate = BITMAP_ALLOC (NULL);
421
422 bitmap_copy (queue, decomposable_context);
423 do
424 {
425 bitmap_iterator iter;
426 unsigned int i;
427
428 bitmap_clear (propagate);
429
430 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
431 {
432 bitmap b = reg_copy_graph[i];
433 if (b)
434 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
435 }
436
437 bitmap_and_compl (queue, propagate, decomposable_context);
438 bitmap_ior_into (decomposable_context, propagate);
439 }
440 while (!bitmap_empty_p (queue));
441
442 BITMAP_FREE (queue);
443 BITMAP_FREE (propagate);
444 }
445
446 /* A pointer to one of these values is passed to
447 find_decomposable_subregs. */
448
449 enum classify_move_insn
450 {
451 /* Not a simple move from one location to another. */
452 NOT_SIMPLE_MOVE,
453 /* A simple move we want to decompose. */
454 DECOMPOSABLE_SIMPLE_MOVE,
455 /* Any other simple move. */
456 SIMPLE_MOVE
457 };
458
459 /* If we find a SUBREG in *LOC which we could use to decompose a
460 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
461 unadorned register which is not a simple pseudo-register copy,
462 DATA will point at the type of move, and we set a bit in
463 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
464
465 static void
466 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
467 {
468 subrtx_var_iterator::array_type array;
469 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
470 {
471 rtx x = *iter;
472 if (GET_CODE (x) == SUBREG)
473 {
474 rtx inner = SUBREG_REG (x);
475 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
476
477 if (!REG_P (inner))
478 continue;
479
480 regno = REGNO (inner);
481 if (HARD_REGISTER_NUM_P (regno))
482 {
483 iter.skip_subrtxes ();
484 continue;
485 }
486
487 outer_size = GET_MODE_SIZE (GET_MODE (x));
488 inner_size = GET_MODE_SIZE (GET_MODE (inner));
489 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
490 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
491
492 /* We only try to decompose single word subregs of multi-word
493 registers. When we find one, we return -1 to avoid iterating
494 over the inner register.
495
496 ??? This doesn't allow, e.g., DImode subregs of TImode values
497 on 32-bit targets. We would need to record the way the
498 pseudo-register was used, and only decompose if all the uses
499 were the same number and size of pieces. Hopefully this
500 doesn't happen much. */
501
502 if (outer_words == 1 && inner_words > 1)
503 {
504 bitmap_set_bit (decomposable_context, regno);
505 iter.skip_subrtxes ();
506 continue;
507 }
508
509 /* If this is a cast from one mode to another, where the modes
510 have the same size, and they are not tieable, then mark this
511 register as non-decomposable. If we decompose it we are
512 likely to mess up whatever the backend is trying to do. */
513 if (outer_words > 1
514 && outer_size == inner_size
515 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
516 {
517 bitmap_set_bit (non_decomposable_context, regno);
518 bitmap_set_bit (subreg_context, regno);
519 iter.skip_subrtxes ();
520 continue;
521 }
522 }
523 else if (REG_P (x))
524 {
525 unsigned int regno;
526
527 /* We will see an outer SUBREG before we see the inner REG, so
528 when we see a plain REG here it means a direct reference to
529 the register.
530
531 If this is not a simple copy from one location to another,
532 then we can not decompose this register. If this is a simple
533 copy we want to decompose, and the mode is right,
534 then we mark the register as decomposable.
535 Otherwise we don't say anything about this register --
536 it could be decomposed, but whether that would be
537 profitable depends upon how it is used elsewhere.
538
539 We only set bits in the bitmap for multi-word
540 pseudo-registers, since those are the only ones we care about
541 and it keeps the size of the bitmaps down. */
542
543 regno = REGNO (x);
544 if (!HARD_REGISTER_NUM_P (regno)
545 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
546 {
547 switch (*pcmi)
548 {
549 case NOT_SIMPLE_MOVE:
550 bitmap_set_bit (non_decomposable_context, regno);
551 break;
552 case DECOMPOSABLE_SIMPLE_MOVE:
553 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
554 bitmap_set_bit (decomposable_context, regno);
555 break;
556 case SIMPLE_MOVE:
557 break;
558 default:
559 gcc_unreachable ();
560 }
561 }
562 }
563 else if (MEM_P (x))
564 {
565 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
566
567 /* Any registers used in a MEM do not participate in a
568 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
569 here, and return -1 to block the parent's recursion. */
570 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
571 iter.skip_subrtxes ();
572 }
573 }
574 }
575
576 /* Decompose REGNO into word-sized components. We smash the REG node
577 in place. This ensures that (1) something goes wrong quickly if we
578 fail to make some replacement, and (2) the debug information inside
579 the symbol table is automatically kept up to date. */
580
581 static void
582 decompose_register (unsigned int regno)
583 {
584 rtx reg;
585 unsigned int words, i;
586 rtvec v;
587
588 reg = regno_reg_rtx[regno];
589
590 regno_reg_rtx[regno] = NULL_RTX;
591
592 words = GET_MODE_SIZE (GET_MODE (reg));
593 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
594
595 v = rtvec_alloc (words);
596 for (i = 0; i < words; ++i)
597 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
598
599 PUT_CODE (reg, CONCATN);
600 XVEC (reg, 0) = v;
601
602 if (dump_file)
603 {
604 fprintf (dump_file, "; Splitting reg %u ->", regno);
605 for (i = 0; i < words; ++i)
606 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
607 fputc ('\n', dump_file);
608 }
609 }
610
611 /* Get a SUBREG of a CONCATN. */
612
613 static rtx
614 simplify_subreg_concatn (machine_mode outermode, rtx op,
615 unsigned int byte)
616 {
617 unsigned int inner_size;
618 machine_mode innermode, partmode;
619 rtx part;
620 unsigned int final_offset;
621
622 gcc_assert (GET_CODE (op) == CONCATN);
623 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
624
625 innermode = GET_MODE (op);
626 gcc_assert (byte < GET_MODE_SIZE (innermode));
627 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
628
629 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
630 part = XVECEXP (op, 0, byte / inner_size);
631 partmode = GET_MODE (part);
632
633 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
634 regular CONST_VECTORs. They have vector or integer modes, depending
635 on the capabilities of the target. Cope with them. */
636 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
637 partmode = GET_MODE_INNER (innermode);
638 else if (partmode == VOIDmode)
639 {
640 enum mode_class mclass = GET_MODE_CLASS (innermode);
641 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
642 }
643
644 final_offset = byte % inner_size;
645 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
646 return NULL_RTX;
647
648 return simplify_gen_subreg (outermode, part, partmode, final_offset);
649 }
650
651 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
652
653 static rtx
654 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
655 machine_mode innermode, unsigned int byte)
656 {
657 rtx ret;
658
659 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
660 If OP is a SUBREG of a CONCATN, then it must be a simple mode
661 change with the same size and offset 0, or it must extract a
662 part. We shouldn't see anything else here. */
663 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
664 {
665 rtx op2;
666
667 if ((GET_MODE_SIZE (GET_MODE (op))
668 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
669 && SUBREG_BYTE (op) == 0)
670 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
671 GET_MODE (SUBREG_REG (op)), byte);
672
673 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
674 SUBREG_BYTE (op));
675 if (op2 == NULL_RTX)
676 {
677 /* We don't handle paradoxical subregs here. */
678 gcc_assert (GET_MODE_SIZE (outermode)
679 <= GET_MODE_SIZE (GET_MODE (op)));
680 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
681 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
682 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
683 byte + SUBREG_BYTE (op));
684 gcc_assert (op2 != NULL_RTX);
685 return op2;
686 }
687
688 op = op2;
689 gcc_assert (op != NULL_RTX);
690 gcc_assert (innermode == GET_MODE (op));
691 }
692
693 if (GET_CODE (op) == CONCATN)
694 return simplify_subreg_concatn (outermode, op, byte);
695
696 ret = simplify_gen_subreg (outermode, op, innermode, byte);
697
698 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
699 resolve_simple_move will ask for the high part of the paradoxical
700 subreg, which does not have a value. Just return a zero. */
701 if (ret == NULL_RTX
702 && GET_CODE (op) == SUBREG
703 && SUBREG_BYTE (op) == 0
704 && (GET_MODE_SIZE (innermode)
705 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
706 return CONST0_RTX (outermode);
707
708 gcc_assert (ret != NULL_RTX);
709 return ret;
710 }
711
712 /* Return whether we should resolve X into the registers into which it
713 was decomposed. */
714
715 static bool
716 resolve_reg_p (rtx x)
717 {
718 return GET_CODE (x) == CONCATN;
719 }
720
721 /* Return whether X is a SUBREG of a register which we need to
722 resolve. */
723
724 static bool
725 resolve_subreg_p (rtx x)
726 {
727 if (GET_CODE (x) != SUBREG)
728 return false;
729 return resolve_reg_p (SUBREG_REG (x));
730 }
731
732 /* Look for SUBREGs in *LOC which need to be decomposed. */
733
734 static bool
735 resolve_subreg_use (rtx *loc, rtx insn)
736 {
737 subrtx_ptr_iterator::array_type array;
738 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
739 {
740 rtx *loc = *iter;
741 rtx x = *loc;
742 if (resolve_subreg_p (x))
743 {
744 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
745 SUBREG_BYTE (x));
746
747 /* It is possible for a note to contain a reference which we can
748 decompose. In this case, return 1 to the caller to indicate
749 that the note must be removed. */
750 if (!x)
751 {
752 gcc_assert (!insn);
753 return true;
754 }
755
756 validate_change (insn, loc, x, 1);
757 iter.skip_subrtxes ();
758 }
759 else if (resolve_reg_p (x))
760 /* Return 1 to the caller to indicate that we found a direct
761 reference to a register which is being decomposed. This can
762 happen inside notes, multiword shift or zero-extend
763 instructions. */
764 return true;
765 }
766
767 return false;
768 }
769
770 /* Resolve any decomposed registers which appear in register notes on
771 INSN. */
772
773 static void
774 resolve_reg_notes (rtx_insn *insn)
775 {
776 rtx *pnote, note;
777
778 note = find_reg_equal_equiv_note (insn);
779 if (note)
780 {
781 int old_count = num_validated_changes ();
782 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
783 remove_note (insn, note);
784 else
785 if (old_count != num_validated_changes ())
786 df_notes_rescan (insn);
787 }
788
789 pnote = &REG_NOTES (insn);
790 while (*pnote != NULL_RTX)
791 {
792 bool del = false;
793
794 note = *pnote;
795 switch (REG_NOTE_KIND (note))
796 {
797 case REG_DEAD:
798 case REG_UNUSED:
799 if (resolve_reg_p (XEXP (note, 0)))
800 del = true;
801 break;
802
803 default:
804 break;
805 }
806
807 if (del)
808 *pnote = XEXP (note, 1);
809 else
810 pnote = &XEXP (note, 1);
811 }
812 }
813
814 /* Return whether X can be decomposed into subwords. */
815
816 static bool
817 can_decompose_p (rtx x)
818 {
819 if (REG_P (x))
820 {
821 unsigned int regno = REGNO (x);
822
823 if (HARD_REGISTER_NUM_P (regno))
824 {
825 unsigned int byte, num_bytes;
826
827 num_bytes = GET_MODE_SIZE (GET_MODE (x));
828 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
829 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
830 return false;
831 return true;
832 }
833 else
834 return !bitmap_bit_p (subreg_context, regno);
835 }
836
837 return true;
838 }
839
840 /* Decompose the registers used in a simple move SET within INSN. If
841 we don't change anything, return INSN, otherwise return the start
842 of the sequence of moves. */
843
844 static rtx_insn *
845 resolve_simple_move (rtx set, rtx_insn *insn)
846 {
847 rtx src, dest, real_dest;
848 rtx_insn *insns;
849 machine_mode orig_mode, dest_mode;
850 unsigned int words;
851 bool pushing;
852
853 src = SET_SRC (set);
854 dest = SET_DEST (set);
855 orig_mode = GET_MODE (dest);
856
857 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
858 gcc_assert (words > 1);
859
860 start_sequence ();
861
862 /* We have to handle copying from a SUBREG of a decomposed reg where
863 the SUBREG is larger than word size. Rather than assume that we
864 can take a word_mode SUBREG of the destination, we copy to a new
865 register and then copy that to the destination. */
866
867 real_dest = NULL_RTX;
868
869 if (GET_CODE (src) == SUBREG
870 && resolve_reg_p (SUBREG_REG (src))
871 && (SUBREG_BYTE (src) != 0
872 || (GET_MODE_SIZE (orig_mode)
873 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
874 {
875 real_dest = dest;
876 dest = gen_reg_rtx (orig_mode);
877 if (REG_P (real_dest))
878 REG_ATTRS (dest) = REG_ATTRS (real_dest);
879 }
880
881 /* Similarly if we are copying to a SUBREG of a decomposed reg where
882 the SUBREG is larger than word size. */
883
884 if (GET_CODE (dest) == SUBREG
885 && resolve_reg_p (SUBREG_REG (dest))
886 && (SUBREG_BYTE (dest) != 0
887 || (GET_MODE_SIZE (orig_mode)
888 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
889 {
890 rtx reg, smove;
891 rtx_insn *minsn;
892
893 reg = gen_reg_rtx (orig_mode);
894 minsn = emit_move_insn (reg, src);
895 smove = single_set (minsn);
896 gcc_assert (smove != NULL_RTX);
897 resolve_simple_move (smove, minsn);
898 src = reg;
899 }
900
901 /* If we didn't have any big SUBREGS of decomposed registers, and
902 neither side of the move is a register we are decomposing, then
903 we don't have to do anything here. */
904
905 if (src == SET_SRC (set)
906 && dest == SET_DEST (set)
907 && !resolve_reg_p (src)
908 && !resolve_subreg_p (src)
909 && !resolve_reg_p (dest)
910 && !resolve_subreg_p (dest))
911 {
912 end_sequence ();
913 return insn;
914 }
915
916 /* It's possible for the code to use a subreg of a decomposed
917 register while forming an address. We need to handle that before
918 passing the address to emit_move_insn. We pass NULL_RTX as the
919 insn parameter to resolve_subreg_use because we can not validate
920 the insn yet. */
921 if (MEM_P (src) || MEM_P (dest))
922 {
923 int acg;
924
925 if (MEM_P (src))
926 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
927 if (MEM_P (dest))
928 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
929 acg = apply_change_group ();
930 gcc_assert (acg);
931 }
932
933 /* If SRC is a register which we can't decompose, or has side
934 effects, we need to move via a temporary register. */
935
936 if (!can_decompose_p (src)
937 || side_effects_p (src)
938 || GET_CODE (src) == ASM_OPERANDS)
939 {
940 rtx reg;
941
942 reg = gen_reg_rtx (orig_mode);
943
944 if (AUTO_INC_DEC)
945 {
946 rtx move = emit_move_insn (reg, src);
947 if (MEM_P (src))
948 {
949 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
950 if (note)
951 add_reg_note (move, REG_INC, XEXP (note, 0));
952 }
953 }
954 else
955 emit_move_insn (reg, src);
956
957 src = reg;
958 }
959
960 /* If DEST is a register which we can't decompose, or has side
961 effects, we need to first move to a temporary register. We
962 handle the common case of pushing an operand directly. We also
963 go through a temporary register if it holds a floating point
964 value. This gives us better code on systems which can't move
965 data easily between integer and floating point registers. */
966
967 dest_mode = orig_mode;
968 pushing = push_operand (dest, dest_mode);
969 if (!can_decompose_p (dest)
970 || (side_effects_p (dest) && !pushing)
971 || (!SCALAR_INT_MODE_P (dest_mode)
972 && !resolve_reg_p (dest)
973 && !resolve_subreg_p (dest)))
974 {
975 if (real_dest == NULL_RTX)
976 real_dest = dest;
977 if (!SCALAR_INT_MODE_P (dest_mode))
978 {
979 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
980 MODE_INT, 0);
981 gcc_assert (dest_mode != BLKmode);
982 }
983 dest = gen_reg_rtx (dest_mode);
984 if (REG_P (real_dest))
985 REG_ATTRS (dest) = REG_ATTRS (real_dest);
986 }
987
988 if (pushing)
989 {
990 unsigned int i, j, jinc;
991
992 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
993 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
994 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
995
996 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
997 {
998 j = 0;
999 jinc = 1;
1000 }
1001 else
1002 {
1003 j = words - 1;
1004 jinc = -1;
1005 }
1006
1007 for (i = 0; i < words; ++i, j += jinc)
1008 {
1009 rtx temp;
1010
1011 temp = copy_rtx (XEXP (dest, 0));
1012 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1013 j * UNITS_PER_WORD);
1014 emit_move_insn (temp,
1015 simplify_gen_subreg_concatn (word_mode, src,
1016 orig_mode,
1017 j * UNITS_PER_WORD));
1018 }
1019 }
1020 else
1021 {
1022 unsigned int i;
1023
1024 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1025 emit_clobber (dest);
1026
1027 for (i = 0; i < words; ++i)
1028 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1029 dest_mode,
1030 i * UNITS_PER_WORD),
1031 simplify_gen_subreg_concatn (word_mode, src,
1032 orig_mode,
1033 i * UNITS_PER_WORD));
1034 }
1035
1036 if (real_dest != NULL_RTX)
1037 {
1038 rtx mdest, smove;
1039 rtx_insn *minsn;
1040
1041 if (dest_mode == orig_mode)
1042 mdest = dest;
1043 else
1044 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1045 minsn = emit_move_insn (real_dest, mdest);
1046
1047 if (AUTO_INC_DEC && MEM_P (real_dest)
1048 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1049 {
1050 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1051 if (note)
1052 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1053 }
1054
1055 smove = single_set (minsn);
1056 gcc_assert (smove != NULL_RTX);
1057
1058 resolve_simple_move (smove, minsn);
1059 }
1060
1061 insns = get_insns ();
1062 end_sequence ();
1063
1064 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1065
1066 emit_insn_before (insns, insn);
1067
1068 /* If we get here via self-recursion, then INSN is not yet in the insns
1069 chain and delete_insn will fail. We only want to remove INSN from the
1070 current sequence. See PR56738. */
1071 if (in_sequence_p ())
1072 remove_insn (insn);
1073 else
1074 delete_insn (insn);
1075
1076 return insns;
1077 }
1078
1079 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1080 component registers. Return whether we changed something. */
1081
1082 static bool
1083 resolve_clobber (rtx pat, rtx_insn *insn)
1084 {
1085 rtx reg;
1086 machine_mode orig_mode;
1087 unsigned int words, i;
1088 int ret;
1089
1090 reg = XEXP (pat, 0);
1091 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1092 return false;
1093
1094 orig_mode = GET_MODE (reg);
1095 words = GET_MODE_SIZE (orig_mode);
1096 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1097
1098 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1099 simplify_gen_subreg_concatn (word_mode, reg,
1100 orig_mode, 0),
1101 0);
1102 df_insn_rescan (insn);
1103 gcc_assert (ret != 0);
1104
1105 for (i = words - 1; i > 0; --i)
1106 {
1107 rtx x;
1108
1109 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1110 i * UNITS_PER_WORD);
1111 x = gen_rtx_CLOBBER (VOIDmode, x);
1112 emit_insn_after (x, insn);
1113 }
1114
1115 resolve_reg_notes (insn);
1116
1117 return true;
1118 }
1119
1120 /* A USE of a decomposed register is no longer meaningful. Return
1121 whether we changed something. */
1122
1123 static bool
1124 resolve_use (rtx pat, rtx_insn *insn)
1125 {
1126 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1127 {
1128 delete_insn (insn);
1129 return true;
1130 }
1131
1132 resolve_reg_notes (insn);
1133
1134 return false;
1135 }
1136
1137 /* A VAR_LOCATION can be simplified. */
1138
1139 static void
1140 resolve_debug (rtx_insn *insn)
1141 {
1142 subrtx_ptr_iterator::array_type array;
1143 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1144 {
1145 rtx *loc = *iter;
1146 rtx x = *loc;
1147 if (resolve_subreg_p (x))
1148 {
1149 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1150 SUBREG_BYTE (x));
1151
1152 if (x)
1153 *loc = x;
1154 else
1155 x = copy_rtx (*loc);
1156 }
1157 if (resolve_reg_p (x))
1158 *loc = copy_rtx (x);
1159 }
1160
1161 df_insn_rescan (insn);
1162
1163 resolve_reg_notes (insn);
1164 }
1165
1166 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1167 set the decomposable_context bitmap accordingly. SPEED_P is true
1168 if we are optimizing INSN for speed rather than size. Return true
1169 if INSN is decomposable. */
1170
1171 static bool
1172 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1173 {
1174 rtx set;
1175 rtx op;
1176 rtx op_operand;
1177
1178 set = single_set (insn);
1179 if (!set)
1180 return false;
1181
1182 op = SET_SRC (set);
1183 if (GET_CODE (op) != ASHIFT
1184 && GET_CODE (op) != LSHIFTRT
1185 && GET_CODE (op) != ASHIFTRT
1186 && GET_CODE (op) != ZERO_EXTEND)
1187 return false;
1188
1189 op_operand = XEXP (op, 0);
1190 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1191 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1192 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1193 || GET_MODE (op) != twice_word_mode)
1194 return false;
1195
1196 if (GET_CODE (op) == ZERO_EXTEND)
1197 {
1198 if (GET_MODE (op_operand) != word_mode
1199 || !choices[speed_p].splitting_zext)
1200 return false;
1201 }
1202 else /* left or right shift */
1203 {
1204 bool *splitting = (GET_CODE (op) == ASHIFT
1205 ? choices[speed_p].splitting_ashift
1206 : GET_CODE (op) == ASHIFTRT
1207 ? choices[speed_p].splitting_ashiftrt
1208 : choices[speed_p].splitting_lshiftrt);
1209 if (!CONST_INT_P (XEXP (op, 1))
1210 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1211 2 * BITS_PER_WORD - 1)
1212 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1213 return false;
1214
1215 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1216 }
1217
1218 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1219
1220 return true;
1221 }
1222
1223 /* Decompose a more than word wide shift (in INSN) of a multiword
1224 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1225 and 'set to zero' insn. Return a pointer to the new insn when a
1226 replacement was done. */
1227
1228 static rtx_insn *
1229 resolve_shift_zext (rtx_insn *insn)
1230 {
1231 rtx set;
1232 rtx op;
1233 rtx op_operand;
1234 rtx_insn *insns;
1235 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1236 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1237
1238 set = single_set (insn);
1239 if (!set)
1240 return NULL;
1241
1242 op = SET_SRC (set);
1243 if (GET_CODE (op) != ASHIFT
1244 && GET_CODE (op) != LSHIFTRT
1245 && GET_CODE (op) != ASHIFTRT
1246 && GET_CODE (op) != ZERO_EXTEND)
1247 return NULL;
1248
1249 op_operand = XEXP (op, 0);
1250
1251 /* We can tear this operation apart only if the regs were already
1252 torn apart. */
1253 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1254 return NULL;
1255
1256 /* src_reg_num is the number of the word mode register which we
1257 are operating on. For a left shift and a zero_extend on little
1258 endian machines this is register 0. */
1259 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1260 ? 1 : 0;
1261
1262 if (WORDS_BIG_ENDIAN
1263 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1264 src_reg_num = 1 - src_reg_num;
1265
1266 if (GET_CODE (op) == ZERO_EXTEND)
1267 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1268 else
1269 dest_reg_num = 1 - src_reg_num;
1270
1271 offset1 = UNITS_PER_WORD * dest_reg_num;
1272 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1273 src_offset = UNITS_PER_WORD * src_reg_num;
1274
1275 start_sequence ();
1276
1277 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1278 GET_MODE (SET_DEST (set)),
1279 offset1);
1280 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1281 GET_MODE (SET_DEST (set)),
1282 offset2);
1283 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1284 GET_MODE (op_operand),
1285 src_offset);
1286 if (GET_CODE (op) == ASHIFTRT
1287 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1288 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1289 BITS_PER_WORD - 1, NULL_RTX, 0);
1290
1291 if (GET_CODE (op) != ZERO_EXTEND)
1292 {
1293 int shift_count = INTVAL (XEXP (op, 1));
1294 if (shift_count > BITS_PER_WORD)
1295 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1296 LSHIFT_EXPR : RSHIFT_EXPR,
1297 word_mode, src_reg,
1298 shift_count - BITS_PER_WORD,
1299 dest_reg, GET_CODE (op) != ASHIFTRT);
1300 }
1301
1302 if (dest_reg != src_reg)
1303 emit_move_insn (dest_reg, src_reg);
1304 if (GET_CODE (op) != ASHIFTRT)
1305 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1306 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1307 emit_move_insn (dest_upper, copy_rtx (src_reg));
1308 else
1309 emit_move_insn (dest_upper, upper_src);
1310 insns = get_insns ();
1311
1312 end_sequence ();
1313
1314 emit_insn_before (insns, insn);
1315
1316 if (dump_file)
1317 {
1318 rtx_insn *in;
1319 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1320 for (in = insns; in != insn; in = NEXT_INSN (in))
1321 fprintf (dump_file, "%d ", INSN_UID (in));
1322 fprintf (dump_file, "\n");
1323 }
1324
1325 delete_insn (insn);
1326 return insns;
1327 }
1328
1329 /* Print to dump_file a description of what we're doing with shift code CODE.
1330 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1331
1332 static void
1333 dump_shift_choices (enum rtx_code code, bool *splitting)
1334 {
1335 int i;
1336 const char *sep;
1337
1338 fprintf (dump_file,
1339 " Splitting mode %s for %s lowering with shift amounts = ",
1340 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1341 sep = "";
1342 for (i = 0; i < BITS_PER_WORD; i++)
1343 if (splitting[i])
1344 {
1345 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1346 sep = ",";
1347 }
1348 fprintf (dump_file, "\n");
1349 }
1350
1351 /* Print to dump_file a description of what we're doing when optimizing
1352 for speed or size; SPEED_P says which. DESCRIPTION is a description
1353 of the SPEED_P choice. */
1354
1355 static void
1356 dump_choices (bool speed_p, const char *description)
1357 {
1358 unsigned int i;
1359
1360 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1361
1362 for (i = 0; i < MAX_MACHINE_MODE; i++)
1363 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1364 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1365 choices[speed_p].move_modes_to_split[i]
1366 ? "Splitting"
1367 : "Skipping",
1368 GET_MODE_NAME ((machine_mode) i));
1369
1370 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1371 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1372 GET_MODE_NAME (twice_word_mode));
1373
1374 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1375 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1376 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1377 fprintf (dump_file, "\n");
1378 }
1379
1380 /* Look for registers which are always accessed via word-sized SUBREGs
1381 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1382 registers into several word-sized pseudo-registers. */
1383
1384 static void
1385 decompose_multiword_subregs (bool decompose_copies)
1386 {
1387 unsigned int max;
1388 basic_block bb;
1389 bool speed_p;
1390
1391 if (dump_file)
1392 {
1393 dump_choices (false, "size");
1394 dump_choices (true, "speed");
1395 }
1396
1397 /* Check if this target even has any modes to consider lowering. */
1398 if (!choices[false].something_to_do && !choices[true].something_to_do)
1399 {
1400 if (dump_file)
1401 fprintf (dump_file, "Nothing to do!\n");
1402 return;
1403 }
1404
1405 max = max_reg_num ();
1406
1407 /* First see if there are any multi-word pseudo-registers. If there
1408 aren't, there is nothing we can do. This should speed up this
1409 pass in the normal case, since it should be faster than scanning
1410 all the insns. */
1411 {
1412 unsigned int i;
1413 bool useful_modes_seen = false;
1414
1415 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1416 if (regno_reg_rtx[i] != NULL)
1417 {
1418 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1419 if (choices[false].move_modes_to_split[(int) mode]
1420 || choices[true].move_modes_to_split[(int) mode])
1421 {
1422 useful_modes_seen = true;
1423 break;
1424 }
1425 }
1426
1427 if (!useful_modes_seen)
1428 {
1429 if (dump_file)
1430 fprintf (dump_file, "Nothing to lower in this function.\n");
1431 return;
1432 }
1433 }
1434
1435 if (df)
1436 {
1437 df_set_flags (DF_DEFER_INSN_RESCAN);
1438 run_word_dce ();
1439 }
1440
1441 /* FIXME: It may be possible to change this code to look for each
1442 multi-word pseudo-register and to find each insn which sets or
1443 uses that register. That should be faster than scanning all the
1444 insns. */
1445
1446 decomposable_context = BITMAP_ALLOC (NULL);
1447 non_decomposable_context = BITMAP_ALLOC (NULL);
1448 subreg_context = BITMAP_ALLOC (NULL);
1449
1450 reg_copy_graph.create (max);
1451 reg_copy_graph.safe_grow_cleared (max);
1452 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1453
1454 speed_p = optimize_function_for_speed_p (cfun);
1455 FOR_EACH_BB_FN (bb, cfun)
1456 {
1457 rtx_insn *insn;
1458
1459 FOR_BB_INSNS (bb, insn)
1460 {
1461 rtx set;
1462 enum classify_move_insn cmi;
1463 int i, n;
1464
1465 if (!INSN_P (insn)
1466 || GET_CODE (PATTERN (insn)) == CLOBBER
1467 || GET_CODE (PATTERN (insn)) == USE)
1468 continue;
1469
1470 recog_memoized (insn);
1471
1472 if (find_decomposable_shift_zext (insn, speed_p))
1473 continue;
1474
1475 extract_insn (insn);
1476
1477 set = simple_move (insn, speed_p);
1478
1479 if (!set)
1480 cmi = NOT_SIMPLE_MOVE;
1481 else
1482 {
1483 /* We mark pseudo-to-pseudo copies as decomposable during the
1484 second pass only. The first pass is so early that there is
1485 good chance such moves will be optimized away completely by
1486 subsequent optimizations anyway.
1487
1488 However, we call find_pseudo_copy even during the first pass
1489 so as to properly set up the reg_copy_graph. */
1490 if (find_pseudo_copy (set))
1491 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1492 else
1493 cmi = SIMPLE_MOVE;
1494 }
1495
1496 n = recog_data.n_operands;
1497 for (i = 0; i < n; ++i)
1498 {
1499 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1500
1501 /* We handle ASM_OPERANDS as a special case to support
1502 things like x86 rdtsc which returns a DImode value.
1503 We can decompose the output, which will certainly be
1504 operand 0, but not the inputs. */
1505
1506 if (cmi == SIMPLE_MOVE
1507 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1508 {
1509 gcc_assert (i == 0);
1510 cmi = NOT_SIMPLE_MOVE;
1511 }
1512 }
1513 }
1514 }
1515
1516 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1517 if (!bitmap_empty_p (decomposable_context))
1518 {
1519 sbitmap sub_blocks;
1520 unsigned int i;
1521 sbitmap_iterator sbi;
1522 bitmap_iterator iter;
1523 unsigned int regno;
1524
1525 propagate_pseudo_copies ();
1526
1527 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1528 bitmap_clear (sub_blocks);
1529
1530 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1531 decompose_register (regno);
1532
1533 FOR_EACH_BB_FN (bb, cfun)
1534 {
1535 rtx_insn *insn;
1536
1537 FOR_BB_INSNS (bb, insn)
1538 {
1539 rtx pat;
1540
1541 if (!INSN_P (insn))
1542 continue;
1543
1544 pat = PATTERN (insn);
1545 if (GET_CODE (pat) == CLOBBER)
1546 resolve_clobber (pat, insn);
1547 else if (GET_CODE (pat) == USE)
1548 resolve_use (pat, insn);
1549 else if (DEBUG_INSN_P (insn))
1550 resolve_debug (insn);
1551 else
1552 {
1553 rtx set;
1554 int i;
1555
1556 recog_memoized (insn);
1557 extract_insn (insn);
1558
1559 set = simple_move (insn, speed_p);
1560 if (set)
1561 {
1562 rtx_insn *orig_insn = insn;
1563 bool cfi = control_flow_insn_p (insn);
1564
1565 /* We can end up splitting loads to multi-word pseudos
1566 into separate loads to machine word size pseudos.
1567 When this happens, we first had one load that can
1568 throw, and after resolve_simple_move we'll have a
1569 bunch of loads (at least two). All those loads may
1570 trap if we can have non-call exceptions, so they
1571 all will end the current basic block. We split the
1572 block after the outer loop over all insns, but we
1573 make sure here that we will be able to split the
1574 basic block and still produce the correct control
1575 flow graph for it. */
1576 gcc_assert (!cfi
1577 || (cfun->can_throw_non_call_exceptions
1578 && can_throw_internal (insn)));
1579
1580 insn = resolve_simple_move (set, insn);
1581 if (insn != orig_insn)
1582 {
1583 recog_memoized (insn);
1584 extract_insn (insn);
1585
1586 if (cfi)
1587 bitmap_set_bit (sub_blocks, bb->index);
1588 }
1589 }
1590 else
1591 {
1592 rtx_insn *decomposed_shift;
1593
1594 decomposed_shift = resolve_shift_zext (insn);
1595 if (decomposed_shift != NULL_RTX)
1596 {
1597 insn = decomposed_shift;
1598 recog_memoized (insn);
1599 extract_insn (insn);
1600 }
1601 }
1602
1603 for (i = recog_data.n_operands - 1; i >= 0; --i)
1604 resolve_subreg_use (recog_data.operand_loc[i], insn);
1605
1606 resolve_reg_notes (insn);
1607
1608 if (num_validated_changes () > 0)
1609 {
1610 for (i = recog_data.n_dups - 1; i >= 0; --i)
1611 {
1612 rtx *pl = recog_data.dup_loc[i];
1613 int dup_num = recog_data.dup_num[i];
1614 rtx *px = recog_data.operand_loc[dup_num];
1615
1616 validate_unshare_change (insn, pl, *px, 1);
1617 }
1618
1619 i = apply_change_group ();
1620 gcc_assert (i);
1621 }
1622 }
1623 }
1624 }
1625
1626 /* If we had insns to split that caused control flow insns in the middle
1627 of a basic block, split those blocks now. Note that we only handle
1628 the case where splitting a load has caused multiple possibly trapping
1629 loads to appear. */
1630 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1631 {
1632 rtx_insn *insn, *end;
1633 edge fallthru;
1634
1635 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1636 insn = BB_HEAD (bb);
1637 end = BB_END (bb);
1638
1639 while (insn != end)
1640 {
1641 if (control_flow_insn_p (insn))
1642 {
1643 /* Split the block after insn. There will be a fallthru
1644 edge, which is OK so we keep it. We have to create the
1645 exception edges ourselves. */
1646 fallthru = split_block (bb, insn);
1647 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1648 bb = fallthru->dest;
1649 insn = BB_HEAD (bb);
1650 }
1651 else
1652 insn = NEXT_INSN (insn);
1653 }
1654 }
1655
1656 sbitmap_free (sub_blocks);
1657 }
1658
1659 {
1660 unsigned int i;
1661 bitmap b;
1662
1663 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1664 if (b)
1665 BITMAP_FREE (b);
1666 }
1667
1668 reg_copy_graph.release ();
1669
1670 BITMAP_FREE (decomposable_context);
1671 BITMAP_FREE (non_decomposable_context);
1672 BITMAP_FREE (subreg_context);
1673 }
1674 \f
1675 /* Implement first lower subreg pass. */
1676
1677 namespace {
1678
1679 const pass_data pass_data_lower_subreg =
1680 {
1681 RTL_PASS, /* type */
1682 "subreg1", /* name */
1683 OPTGROUP_NONE, /* optinfo_flags */
1684 TV_LOWER_SUBREG, /* tv_id */
1685 0, /* properties_required */
1686 0, /* properties_provided */
1687 0, /* properties_destroyed */
1688 0, /* todo_flags_start */
1689 0, /* todo_flags_finish */
1690 };
1691
1692 class pass_lower_subreg : public rtl_opt_pass
1693 {
1694 public:
1695 pass_lower_subreg (gcc::context *ctxt)
1696 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1697 {}
1698
1699 /* opt_pass methods: */
1700 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1701 virtual unsigned int execute (function *)
1702 {
1703 decompose_multiword_subregs (false);
1704 return 0;
1705 }
1706
1707 }; // class pass_lower_subreg
1708
1709 } // anon namespace
1710
1711 rtl_opt_pass *
1712 make_pass_lower_subreg (gcc::context *ctxt)
1713 {
1714 return new pass_lower_subreg (ctxt);
1715 }
1716
1717 /* Implement second lower subreg pass. */
1718
1719 namespace {
1720
1721 const pass_data pass_data_lower_subreg2 =
1722 {
1723 RTL_PASS, /* type */
1724 "subreg2", /* name */
1725 OPTGROUP_NONE, /* optinfo_flags */
1726 TV_LOWER_SUBREG, /* tv_id */
1727 0, /* properties_required */
1728 0, /* properties_provided */
1729 0, /* properties_destroyed */
1730 0, /* todo_flags_start */
1731 TODO_df_finish, /* todo_flags_finish */
1732 };
1733
1734 class pass_lower_subreg2 : public rtl_opt_pass
1735 {
1736 public:
1737 pass_lower_subreg2 (gcc::context *ctxt)
1738 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1739 {}
1740
1741 /* opt_pass methods: */
1742 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1743 virtual unsigned int execute (function *)
1744 {
1745 decompose_multiword_subregs (true);
1746 return 0;
1747 }
1748
1749 }; // class pass_lower_subreg2
1750
1751 } // anon namespace
1752
1753 rtl_opt_pass *
1754 make_pass_lower_subreg2 (gcc::context *ctxt)
1755 {
1756 return new pass_lower_subreg2 (ctxt);
1757 }