]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
always define AUTO_INC_DEC
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "alias.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "obstack.h"
34 #include "cfgrtl.h"
35 #include "cfgbuild.h"
36 #include "recog.h"
37 #include "dce.h"
38 #include "expmed.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "calls.h"
42 #include "emit-rtl.h"
43 #include "varasm.h"
44 #include "stmt.h"
45 #include "expr.h"
46 #include "except.h"
47 #include "regs.h"
48 #include "tree-pass.h"
49 #include "lower-subreg.h"
50 #include "rtl-iter.h"
51
52
53 /* Decompose multi-word pseudo-registers into individual
54 pseudo-registers when possible and profitable. This is possible
55 when all the uses of a multi-word register are via SUBREG, or are
56 copies of the register to another location. Breaking apart the
57 register permits more CSE and permits better register allocation.
58 This is profitable if the machine does not have move instructions
59 to do this.
60
61 This pass only splits moves with modes that are wider than
62 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
63 integer modes that are twice the width of word_mode. The latter
64 could be generalized if there was a need to do this, but the trend in
65 architectures is to not need this.
66
67 There are two useful preprocessor defines for use by maintainers:
68
69 #define LOG_COSTS 1
70
71 if you wish to see the actual cost estimates that are being used
72 for each mode wider than word mode and the cost estimates for zero
73 extension and the shifts. This can be useful when port maintainers
74 are tuning insn rtx costs.
75
76 #define FORCE_LOWERING 1
77
78 if you wish to test the pass with all the transformation forced on.
79 This can be useful for finding bugs in the transformations. */
80
81 #define LOG_COSTS 0
82 #define FORCE_LOWERING 0
83
84 /* Bit N in this bitmap is set if regno N is used in a context in
85 which we can decompose it. */
86 static bitmap decomposable_context;
87
88 /* Bit N in this bitmap is set if regno N is used in a context in
89 which it can not be decomposed. */
90 static bitmap non_decomposable_context;
91
92 /* Bit N in this bitmap is set if regno N is used in a subreg
93 which changes the mode but not the size. This typically happens
94 when the register accessed as a floating-point value; we want to
95 avoid generating accesses to its subwords in integer modes. */
96 static bitmap subreg_context;
97
98 /* Bit N in the bitmap in element M of this array is set if there is a
99 copy from reg M to reg N. */
100 static vec<bitmap> reg_copy_graph;
101
102 struct target_lower_subreg default_target_lower_subreg;
103 #if SWITCHABLE_TARGET
104 struct target_lower_subreg *this_target_lower_subreg
105 = &default_target_lower_subreg;
106 #endif
107
108 #define twice_word_mode \
109 this_target_lower_subreg->x_twice_word_mode
110 #define choices \
111 this_target_lower_subreg->x_choices
112
113 /* RTXes used while computing costs. */
114 struct cost_rtxes {
115 /* Source and target registers. */
116 rtx source;
117 rtx target;
118
119 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
120 rtx zext;
121
122 /* A shift of SOURCE. */
123 rtx shift;
124
125 /* A SET of TARGET. */
126 rtx set;
127 };
128
129 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
130 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
131
132 static int
133 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
134 machine_mode mode, int op1)
135 {
136 PUT_CODE (rtxes->shift, code);
137 PUT_MODE (rtxes->shift, mode);
138 PUT_MODE (rtxes->source, mode);
139 XEXP (rtxes->shift, 1) = GEN_INT (op1);
140 return set_src_cost (rtxes->shift, mode, speed_p);
141 }
142
143 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
144 to true if it is profitable to split a double-word CODE shift
145 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
146 for speed or size profitability.
147
148 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
149 the cost of moving zero into a word-mode register. WORD_MOVE_COST
150 is the cost of moving between word registers. */
151
152 static void
153 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
154 bool *splitting, enum rtx_code code,
155 int word_move_zero_cost, int word_move_cost)
156 {
157 int wide_cost, narrow_cost, upper_cost, i;
158
159 for (i = 0; i < BITS_PER_WORD; i++)
160 {
161 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
162 i + BITS_PER_WORD);
163 if (i == 0)
164 narrow_cost = word_move_cost;
165 else
166 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
167
168 if (code != ASHIFTRT)
169 upper_cost = word_move_zero_cost;
170 else if (i == BITS_PER_WORD - 1)
171 upper_cost = word_move_cost;
172 else
173 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
174 BITS_PER_WORD - 1);
175
176 if (LOG_COSTS)
177 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
178 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
179 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
180
181 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
182 splitting[i] = true;
183 }
184 }
185
186 /* Compute what we should do when optimizing for speed or size; SPEED_P
187 selects which. Use RTXES for computing costs. */
188
189 static void
190 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
191 {
192 unsigned int i;
193 int word_move_zero_cost, word_move_cost;
194
195 PUT_MODE (rtxes->target, word_mode);
196 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
197 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
198
199 SET_SRC (rtxes->set) = rtxes->source;
200 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
201
202 if (LOG_COSTS)
203 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
204 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
205
206 for (i = 0; i < MAX_MACHINE_MODE; i++)
207 {
208 machine_mode mode = (machine_mode) i;
209 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
210 if (factor > 1)
211 {
212 int mode_move_cost;
213
214 PUT_MODE (rtxes->target, mode);
215 PUT_MODE (rtxes->source, mode);
216 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
217
218 if (LOG_COSTS)
219 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
220 GET_MODE_NAME (mode), mode_move_cost,
221 word_move_cost, factor);
222
223 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
224 {
225 choices[speed_p].move_modes_to_split[i] = true;
226 choices[speed_p].something_to_do = true;
227 }
228 }
229 }
230
231 /* For the moves and shifts, the only case that is checked is one
232 where the mode of the target is an integer mode twice the width
233 of the word_mode.
234
235 If it is not profitable to split a double word move then do not
236 even consider the shifts or the zero extension. */
237 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
238 {
239 int zext_cost;
240
241 /* The only case here to check to see if moving the upper part with a
242 zero is cheaper than doing the zext itself. */
243 PUT_MODE (rtxes->source, word_mode);
244 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
245
246 if (LOG_COSTS)
247 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
248 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
249 zext_cost, word_move_cost, word_move_zero_cost);
250
251 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
252 choices[speed_p].splitting_zext = true;
253
254 compute_splitting_shift (speed_p, rtxes,
255 choices[speed_p].splitting_ashift, ASHIFT,
256 word_move_zero_cost, word_move_cost);
257 compute_splitting_shift (speed_p, rtxes,
258 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
259 word_move_zero_cost, word_move_cost);
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
262 word_move_zero_cost, word_move_cost);
263 }
264 }
265
266 /* Do one-per-target initialisation. This involves determining
267 which operations on the machine are profitable. If none are found,
268 then the pass just returns when called. */
269
270 void
271 init_lower_subreg (void)
272 {
273 struct cost_rtxes rtxes;
274
275 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
276
277 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
278
279 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
280 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
281 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
282 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
283 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
284
285 if (LOG_COSTS)
286 fprintf (stderr, "\nSize costs\n==========\n\n");
287 compute_costs (false, &rtxes);
288
289 if (LOG_COSTS)
290 fprintf (stderr, "\nSpeed costs\n===========\n\n");
291 compute_costs (true, &rtxes);
292 }
293
294 static bool
295 simple_move_operand (rtx x)
296 {
297 if (GET_CODE (x) == SUBREG)
298 x = SUBREG_REG (x);
299
300 if (!OBJECT_P (x))
301 return false;
302
303 if (GET_CODE (x) == LABEL_REF
304 || GET_CODE (x) == SYMBOL_REF
305 || GET_CODE (x) == HIGH
306 || GET_CODE (x) == CONST)
307 return false;
308
309 if (MEM_P (x)
310 && (MEM_VOLATILE_P (x)
311 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
312 return false;
313
314 return true;
315 }
316
317 /* If INSN is a single set between two objects that we want to split,
318 return the single set. SPEED_P says whether we are optimizing
319 INSN for speed or size.
320
321 INSN should have been passed to recog and extract_insn before this
322 is called. */
323
324 static rtx
325 simple_move (rtx_insn *insn, bool speed_p)
326 {
327 rtx x;
328 rtx set;
329 machine_mode mode;
330
331 if (recog_data.n_operands != 2)
332 return NULL_RTX;
333
334 set = single_set (insn);
335 if (!set)
336 return NULL_RTX;
337
338 x = SET_DEST (set);
339 if (x != recog_data.operand[0] && x != recog_data.operand[1])
340 return NULL_RTX;
341 if (!simple_move_operand (x))
342 return NULL_RTX;
343
344 x = SET_SRC (set);
345 if (x != recog_data.operand[0] && x != recog_data.operand[1])
346 return NULL_RTX;
347 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
348 things like x86 rdtsc which returns a DImode value. */
349 if (GET_CODE (x) != ASM_OPERANDS
350 && !simple_move_operand (x))
351 return NULL_RTX;
352
353 /* We try to decompose in integer modes, to avoid generating
354 inefficient code copying between integer and floating point
355 registers. That means that we can't decompose if this is a
356 non-integer mode for which there is no integer mode of the same
357 size. */
358 mode = GET_MODE (SET_DEST (set));
359 if (!SCALAR_INT_MODE_P (mode)
360 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
361 == BLKmode))
362 return NULL_RTX;
363
364 /* Reject PARTIAL_INT modes. They are used for processor specific
365 purposes and it's probably best not to tamper with them. */
366 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
367 return NULL_RTX;
368
369 if (!choices[speed_p].move_modes_to_split[(int) mode])
370 return NULL_RTX;
371
372 return set;
373 }
374
375 /* If SET is a copy from one multi-word pseudo-register to another,
376 record that in reg_copy_graph. Return whether it is such a
377 copy. */
378
379 static bool
380 find_pseudo_copy (rtx set)
381 {
382 rtx dest = SET_DEST (set);
383 rtx src = SET_SRC (set);
384 unsigned int rd, rs;
385 bitmap b;
386
387 if (!REG_P (dest) || !REG_P (src))
388 return false;
389
390 rd = REGNO (dest);
391 rs = REGNO (src);
392 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
393 return false;
394
395 b = reg_copy_graph[rs];
396 if (b == NULL)
397 {
398 b = BITMAP_ALLOC (NULL);
399 reg_copy_graph[rs] = b;
400 }
401
402 bitmap_set_bit (b, rd);
403
404 return true;
405 }
406
407 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
408 where they are copied to another register, add the register to
409 which they are copied to DECOMPOSABLE_CONTEXT. Use
410 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
411 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
412
413 static void
414 propagate_pseudo_copies (void)
415 {
416 bitmap queue, propagate;
417
418 queue = BITMAP_ALLOC (NULL);
419 propagate = BITMAP_ALLOC (NULL);
420
421 bitmap_copy (queue, decomposable_context);
422 do
423 {
424 bitmap_iterator iter;
425 unsigned int i;
426
427 bitmap_clear (propagate);
428
429 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
430 {
431 bitmap b = reg_copy_graph[i];
432 if (b)
433 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
434 }
435
436 bitmap_and_compl (queue, propagate, decomposable_context);
437 bitmap_ior_into (decomposable_context, propagate);
438 }
439 while (!bitmap_empty_p (queue));
440
441 BITMAP_FREE (queue);
442 BITMAP_FREE (propagate);
443 }
444
445 /* A pointer to one of these values is passed to
446 find_decomposable_subregs. */
447
448 enum classify_move_insn
449 {
450 /* Not a simple move from one location to another. */
451 NOT_SIMPLE_MOVE,
452 /* A simple move we want to decompose. */
453 DECOMPOSABLE_SIMPLE_MOVE,
454 /* Any other simple move. */
455 SIMPLE_MOVE
456 };
457
458 /* If we find a SUBREG in *LOC which we could use to decompose a
459 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
460 unadorned register which is not a simple pseudo-register copy,
461 DATA will point at the type of move, and we set a bit in
462 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
463
464 static void
465 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
466 {
467 subrtx_var_iterator::array_type array;
468 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
469 {
470 rtx x = *iter;
471 if (GET_CODE (x) == SUBREG)
472 {
473 rtx inner = SUBREG_REG (x);
474 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
475
476 if (!REG_P (inner))
477 continue;
478
479 regno = REGNO (inner);
480 if (HARD_REGISTER_NUM_P (regno))
481 {
482 iter.skip_subrtxes ();
483 continue;
484 }
485
486 outer_size = GET_MODE_SIZE (GET_MODE (x));
487 inner_size = GET_MODE_SIZE (GET_MODE (inner));
488 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
489 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
490
491 /* We only try to decompose single word subregs of multi-word
492 registers. When we find one, we return -1 to avoid iterating
493 over the inner register.
494
495 ??? This doesn't allow, e.g., DImode subregs of TImode values
496 on 32-bit targets. We would need to record the way the
497 pseudo-register was used, and only decompose if all the uses
498 were the same number and size of pieces. Hopefully this
499 doesn't happen much. */
500
501 if (outer_words == 1 && inner_words > 1)
502 {
503 bitmap_set_bit (decomposable_context, regno);
504 iter.skip_subrtxes ();
505 continue;
506 }
507
508 /* If this is a cast from one mode to another, where the modes
509 have the same size, and they are not tieable, then mark this
510 register as non-decomposable. If we decompose it we are
511 likely to mess up whatever the backend is trying to do. */
512 if (outer_words > 1
513 && outer_size == inner_size
514 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
515 {
516 bitmap_set_bit (non_decomposable_context, regno);
517 bitmap_set_bit (subreg_context, regno);
518 iter.skip_subrtxes ();
519 continue;
520 }
521 }
522 else if (REG_P (x))
523 {
524 unsigned int regno;
525
526 /* We will see an outer SUBREG before we see the inner REG, so
527 when we see a plain REG here it means a direct reference to
528 the register.
529
530 If this is not a simple copy from one location to another,
531 then we can not decompose this register. If this is a simple
532 copy we want to decompose, and the mode is right,
533 then we mark the register as decomposable.
534 Otherwise we don't say anything about this register --
535 it could be decomposed, but whether that would be
536 profitable depends upon how it is used elsewhere.
537
538 We only set bits in the bitmap for multi-word
539 pseudo-registers, since those are the only ones we care about
540 and it keeps the size of the bitmaps down. */
541
542 regno = REGNO (x);
543 if (!HARD_REGISTER_NUM_P (regno)
544 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
545 {
546 switch (*pcmi)
547 {
548 case NOT_SIMPLE_MOVE:
549 bitmap_set_bit (non_decomposable_context, regno);
550 break;
551 case DECOMPOSABLE_SIMPLE_MOVE:
552 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
553 bitmap_set_bit (decomposable_context, regno);
554 break;
555 case SIMPLE_MOVE:
556 break;
557 default:
558 gcc_unreachable ();
559 }
560 }
561 }
562 else if (MEM_P (x))
563 {
564 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
565
566 /* Any registers used in a MEM do not participate in a
567 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
568 here, and return -1 to block the parent's recursion. */
569 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
570 iter.skip_subrtxes ();
571 }
572 }
573 }
574
575 /* Decompose REGNO into word-sized components. We smash the REG node
576 in place. This ensures that (1) something goes wrong quickly if we
577 fail to make some replacement, and (2) the debug information inside
578 the symbol table is automatically kept up to date. */
579
580 static void
581 decompose_register (unsigned int regno)
582 {
583 rtx reg;
584 unsigned int words, i;
585 rtvec v;
586
587 reg = regno_reg_rtx[regno];
588
589 regno_reg_rtx[regno] = NULL_RTX;
590
591 words = GET_MODE_SIZE (GET_MODE (reg));
592 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
593
594 v = rtvec_alloc (words);
595 for (i = 0; i < words; ++i)
596 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
597
598 PUT_CODE (reg, CONCATN);
599 XVEC (reg, 0) = v;
600
601 if (dump_file)
602 {
603 fprintf (dump_file, "; Splitting reg %u ->", regno);
604 for (i = 0; i < words; ++i)
605 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
606 fputc ('\n', dump_file);
607 }
608 }
609
610 /* Get a SUBREG of a CONCATN. */
611
612 static rtx
613 simplify_subreg_concatn (machine_mode outermode, rtx op,
614 unsigned int byte)
615 {
616 unsigned int inner_size;
617 machine_mode innermode, partmode;
618 rtx part;
619 unsigned int final_offset;
620
621 gcc_assert (GET_CODE (op) == CONCATN);
622 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
623
624 innermode = GET_MODE (op);
625 gcc_assert (byte < GET_MODE_SIZE (innermode));
626 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
627
628 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
629 part = XVECEXP (op, 0, byte / inner_size);
630 partmode = GET_MODE (part);
631
632 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
633 regular CONST_VECTORs. They have vector or integer modes, depending
634 on the capabilities of the target. Cope with them. */
635 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
636 partmode = GET_MODE_INNER (innermode);
637 else if (partmode == VOIDmode)
638 {
639 enum mode_class mclass = GET_MODE_CLASS (innermode);
640 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
641 }
642
643 final_offset = byte % inner_size;
644 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
645 return NULL_RTX;
646
647 return simplify_gen_subreg (outermode, part, partmode, final_offset);
648 }
649
650 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
651
652 static rtx
653 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
654 machine_mode innermode, unsigned int byte)
655 {
656 rtx ret;
657
658 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
659 If OP is a SUBREG of a CONCATN, then it must be a simple mode
660 change with the same size and offset 0, or it must extract a
661 part. We shouldn't see anything else here. */
662 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
663 {
664 rtx op2;
665
666 if ((GET_MODE_SIZE (GET_MODE (op))
667 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
668 && SUBREG_BYTE (op) == 0)
669 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
670 GET_MODE (SUBREG_REG (op)), byte);
671
672 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
673 SUBREG_BYTE (op));
674 if (op2 == NULL_RTX)
675 {
676 /* We don't handle paradoxical subregs here. */
677 gcc_assert (GET_MODE_SIZE (outermode)
678 <= GET_MODE_SIZE (GET_MODE (op)));
679 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
680 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
681 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
682 byte + SUBREG_BYTE (op));
683 gcc_assert (op2 != NULL_RTX);
684 return op2;
685 }
686
687 op = op2;
688 gcc_assert (op != NULL_RTX);
689 gcc_assert (innermode == GET_MODE (op));
690 }
691
692 if (GET_CODE (op) == CONCATN)
693 return simplify_subreg_concatn (outermode, op, byte);
694
695 ret = simplify_gen_subreg (outermode, op, innermode, byte);
696
697 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
698 resolve_simple_move will ask for the high part of the paradoxical
699 subreg, which does not have a value. Just return a zero. */
700 if (ret == NULL_RTX
701 && GET_CODE (op) == SUBREG
702 && SUBREG_BYTE (op) == 0
703 && (GET_MODE_SIZE (innermode)
704 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
705 return CONST0_RTX (outermode);
706
707 gcc_assert (ret != NULL_RTX);
708 return ret;
709 }
710
711 /* Return whether we should resolve X into the registers into which it
712 was decomposed. */
713
714 static bool
715 resolve_reg_p (rtx x)
716 {
717 return GET_CODE (x) == CONCATN;
718 }
719
720 /* Return whether X is a SUBREG of a register which we need to
721 resolve. */
722
723 static bool
724 resolve_subreg_p (rtx x)
725 {
726 if (GET_CODE (x) != SUBREG)
727 return false;
728 return resolve_reg_p (SUBREG_REG (x));
729 }
730
731 /* Look for SUBREGs in *LOC which need to be decomposed. */
732
733 static bool
734 resolve_subreg_use (rtx *loc, rtx insn)
735 {
736 subrtx_ptr_iterator::array_type array;
737 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
738 {
739 rtx *loc = *iter;
740 rtx x = *loc;
741 if (resolve_subreg_p (x))
742 {
743 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 SUBREG_BYTE (x));
745
746 /* It is possible for a note to contain a reference which we can
747 decompose. In this case, return 1 to the caller to indicate
748 that the note must be removed. */
749 if (!x)
750 {
751 gcc_assert (!insn);
752 return true;
753 }
754
755 validate_change (insn, loc, x, 1);
756 iter.skip_subrtxes ();
757 }
758 else if (resolve_reg_p (x))
759 /* Return 1 to the caller to indicate that we found a direct
760 reference to a register which is being decomposed. This can
761 happen inside notes, multiword shift or zero-extend
762 instructions. */
763 return true;
764 }
765
766 return false;
767 }
768
769 /* Resolve any decomposed registers which appear in register notes on
770 INSN. */
771
772 static void
773 resolve_reg_notes (rtx_insn *insn)
774 {
775 rtx *pnote, note;
776
777 note = find_reg_equal_equiv_note (insn);
778 if (note)
779 {
780 int old_count = num_validated_changes ();
781 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
782 remove_note (insn, note);
783 else
784 if (old_count != num_validated_changes ())
785 df_notes_rescan (insn);
786 }
787
788 pnote = &REG_NOTES (insn);
789 while (*pnote != NULL_RTX)
790 {
791 bool del = false;
792
793 note = *pnote;
794 switch (REG_NOTE_KIND (note))
795 {
796 case REG_DEAD:
797 case REG_UNUSED:
798 if (resolve_reg_p (XEXP (note, 0)))
799 del = true;
800 break;
801
802 default:
803 break;
804 }
805
806 if (del)
807 *pnote = XEXP (note, 1);
808 else
809 pnote = &XEXP (note, 1);
810 }
811 }
812
813 /* Return whether X can be decomposed into subwords. */
814
815 static bool
816 can_decompose_p (rtx x)
817 {
818 if (REG_P (x))
819 {
820 unsigned int regno = REGNO (x);
821
822 if (HARD_REGISTER_NUM_P (regno))
823 {
824 unsigned int byte, num_bytes;
825
826 num_bytes = GET_MODE_SIZE (GET_MODE (x));
827 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
828 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
829 return false;
830 return true;
831 }
832 else
833 return !bitmap_bit_p (subreg_context, regno);
834 }
835
836 return true;
837 }
838
839 /* Decompose the registers used in a simple move SET within INSN. If
840 we don't change anything, return INSN, otherwise return the start
841 of the sequence of moves. */
842
843 static rtx_insn *
844 resolve_simple_move (rtx set, rtx_insn *insn)
845 {
846 rtx src, dest, real_dest;
847 rtx_insn *insns;
848 machine_mode orig_mode, dest_mode;
849 unsigned int words;
850 bool pushing;
851
852 src = SET_SRC (set);
853 dest = SET_DEST (set);
854 orig_mode = GET_MODE (dest);
855
856 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
857 gcc_assert (words > 1);
858
859 start_sequence ();
860
861 /* We have to handle copying from a SUBREG of a decomposed reg where
862 the SUBREG is larger than word size. Rather than assume that we
863 can take a word_mode SUBREG of the destination, we copy to a new
864 register and then copy that to the destination. */
865
866 real_dest = NULL_RTX;
867
868 if (GET_CODE (src) == SUBREG
869 && resolve_reg_p (SUBREG_REG (src))
870 && (SUBREG_BYTE (src) != 0
871 || (GET_MODE_SIZE (orig_mode)
872 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
873 {
874 real_dest = dest;
875 dest = gen_reg_rtx (orig_mode);
876 if (REG_P (real_dest))
877 REG_ATTRS (dest) = REG_ATTRS (real_dest);
878 }
879
880 /* Similarly if we are copying to a SUBREG of a decomposed reg where
881 the SUBREG is larger than word size. */
882
883 if (GET_CODE (dest) == SUBREG
884 && resolve_reg_p (SUBREG_REG (dest))
885 && (SUBREG_BYTE (dest) != 0
886 || (GET_MODE_SIZE (orig_mode)
887 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
888 {
889 rtx reg, smove;
890 rtx_insn *minsn;
891
892 reg = gen_reg_rtx (orig_mode);
893 minsn = emit_move_insn (reg, src);
894 smove = single_set (minsn);
895 gcc_assert (smove != NULL_RTX);
896 resolve_simple_move (smove, minsn);
897 src = reg;
898 }
899
900 /* If we didn't have any big SUBREGS of decomposed registers, and
901 neither side of the move is a register we are decomposing, then
902 we don't have to do anything here. */
903
904 if (src == SET_SRC (set)
905 && dest == SET_DEST (set)
906 && !resolve_reg_p (src)
907 && !resolve_subreg_p (src)
908 && !resolve_reg_p (dest)
909 && !resolve_subreg_p (dest))
910 {
911 end_sequence ();
912 return insn;
913 }
914
915 /* It's possible for the code to use a subreg of a decomposed
916 register while forming an address. We need to handle that before
917 passing the address to emit_move_insn. We pass NULL_RTX as the
918 insn parameter to resolve_subreg_use because we can not validate
919 the insn yet. */
920 if (MEM_P (src) || MEM_P (dest))
921 {
922 int acg;
923
924 if (MEM_P (src))
925 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
926 if (MEM_P (dest))
927 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
928 acg = apply_change_group ();
929 gcc_assert (acg);
930 }
931
932 /* If SRC is a register which we can't decompose, or has side
933 effects, we need to move via a temporary register. */
934
935 if (!can_decompose_p (src)
936 || side_effects_p (src)
937 || GET_CODE (src) == ASM_OPERANDS)
938 {
939 rtx reg;
940
941 reg = gen_reg_rtx (orig_mode);
942
943 #if AUTO_INC_DEC
944 {
945 rtx move = emit_move_insn (reg, src);
946 if (MEM_P (src))
947 {
948 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
949 if (note)
950 add_reg_note (move, REG_INC, XEXP (note, 0));
951 }
952 }
953 #else
954 emit_move_insn (reg, src);
955 #endif
956 src = reg;
957 }
958
959 /* If DEST is a register which we can't decompose, or has side
960 effects, we need to first move to a temporary register. We
961 handle the common case of pushing an operand directly. We also
962 go through a temporary register if it holds a floating point
963 value. This gives us better code on systems which can't move
964 data easily between integer and floating point registers. */
965
966 dest_mode = orig_mode;
967 pushing = push_operand (dest, dest_mode);
968 if (!can_decompose_p (dest)
969 || (side_effects_p (dest) && !pushing)
970 || (!SCALAR_INT_MODE_P (dest_mode)
971 && !resolve_reg_p (dest)
972 && !resolve_subreg_p (dest)))
973 {
974 if (real_dest == NULL_RTX)
975 real_dest = dest;
976 if (!SCALAR_INT_MODE_P (dest_mode))
977 {
978 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
979 MODE_INT, 0);
980 gcc_assert (dest_mode != BLKmode);
981 }
982 dest = gen_reg_rtx (dest_mode);
983 if (REG_P (real_dest))
984 REG_ATTRS (dest) = REG_ATTRS (real_dest);
985 }
986
987 if (pushing)
988 {
989 unsigned int i, j, jinc;
990
991 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
992 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
993 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
994
995 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
996 {
997 j = 0;
998 jinc = 1;
999 }
1000 else
1001 {
1002 j = words - 1;
1003 jinc = -1;
1004 }
1005
1006 for (i = 0; i < words; ++i, j += jinc)
1007 {
1008 rtx temp;
1009
1010 temp = copy_rtx (XEXP (dest, 0));
1011 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1012 j * UNITS_PER_WORD);
1013 emit_move_insn (temp,
1014 simplify_gen_subreg_concatn (word_mode, src,
1015 orig_mode,
1016 j * UNITS_PER_WORD));
1017 }
1018 }
1019 else
1020 {
1021 unsigned int i;
1022
1023 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1024 emit_clobber (dest);
1025
1026 for (i = 0; i < words; ++i)
1027 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1028 dest_mode,
1029 i * UNITS_PER_WORD),
1030 simplify_gen_subreg_concatn (word_mode, src,
1031 orig_mode,
1032 i * UNITS_PER_WORD));
1033 }
1034
1035 if (real_dest != NULL_RTX)
1036 {
1037 rtx mdest, smove;
1038 rtx_insn *minsn;
1039
1040 if (dest_mode == orig_mode)
1041 mdest = dest;
1042 else
1043 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1044 minsn = emit_move_insn (real_dest, mdest);
1045
1046 #if AUTO_INC_DEC
1047 if (MEM_P (real_dest)
1048 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1049 {
1050 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1051 if (note)
1052 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1053 }
1054 #endif
1055
1056 smove = single_set (minsn);
1057 gcc_assert (smove != NULL_RTX);
1058
1059 resolve_simple_move (smove, minsn);
1060 }
1061
1062 insns = get_insns ();
1063 end_sequence ();
1064
1065 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1066
1067 emit_insn_before (insns, insn);
1068
1069 /* If we get here via self-recursion, then INSN is not yet in the insns
1070 chain and delete_insn will fail. We only want to remove INSN from the
1071 current sequence. See PR56738. */
1072 if (in_sequence_p ())
1073 remove_insn (insn);
1074 else
1075 delete_insn (insn);
1076
1077 return insns;
1078 }
1079
1080 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1081 component registers. Return whether we changed something. */
1082
1083 static bool
1084 resolve_clobber (rtx pat, rtx_insn *insn)
1085 {
1086 rtx reg;
1087 machine_mode orig_mode;
1088 unsigned int words, i;
1089 int ret;
1090
1091 reg = XEXP (pat, 0);
1092 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1093 return false;
1094
1095 orig_mode = GET_MODE (reg);
1096 words = GET_MODE_SIZE (orig_mode);
1097 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1098
1099 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1100 simplify_gen_subreg_concatn (word_mode, reg,
1101 orig_mode, 0),
1102 0);
1103 df_insn_rescan (insn);
1104 gcc_assert (ret != 0);
1105
1106 for (i = words - 1; i > 0; --i)
1107 {
1108 rtx x;
1109
1110 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1111 i * UNITS_PER_WORD);
1112 x = gen_rtx_CLOBBER (VOIDmode, x);
1113 emit_insn_after (x, insn);
1114 }
1115
1116 resolve_reg_notes (insn);
1117
1118 return true;
1119 }
1120
1121 /* A USE of a decomposed register is no longer meaningful. Return
1122 whether we changed something. */
1123
1124 static bool
1125 resolve_use (rtx pat, rtx_insn *insn)
1126 {
1127 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1128 {
1129 delete_insn (insn);
1130 return true;
1131 }
1132
1133 resolve_reg_notes (insn);
1134
1135 return false;
1136 }
1137
1138 /* A VAR_LOCATION can be simplified. */
1139
1140 static void
1141 resolve_debug (rtx_insn *insn)
1142 {
1143 subrtx_ptr_iterator::array_type array;
1144 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1145 {
1146 rtx *loc = *iter;
1147 rtx x = *loc;
1148 if (resolve_subreg_p (x))
1149 {
1150 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1151 SUBREG_BYTE (x));
1152
1153 if (x)
1154 *loc = x;
1155 else
1156 x = copy_rtx (*loc);
1157 }
1158 if (resolve_reg_p (x))
1159 *loc = copy_rtx (x);
1160 }
1161
1162 df_insn_rescan (insn);
1163
1164 resolve_reg_notes (insn);
1165 }
1166
1167 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1168 set the decomposable_context bitmap accordingly. SPEED_P is true
1169 if we are optimizing INSN for speed rather than size. Return true
1170 if INSN is decomposable. */
1171
1172 static bool
1173 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1174 {
1175 rtx set;
1176 rtx op;
1177 rtx op_operand;
1178
1179 set = single_set (insn);
1180 if (!set)
1181 return false;
1182
1183 op = SET_SRC (set);
1184 if (GET_CODE (op) != ASHIFT
1185 && GET_CODE (op) != LSHIFTRT
1186 && GET_CODE (op) != ASHIFTRT
1187 && GET_CODE (op) != ZERO_EXTEND)
1188 return false;
1189
1190 op_operand = XEXP (op, 0);
1191 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1192 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1193 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1194 || GET_MODE (op) != twice_word_mode)
1195 return false;
1196
1197 if (GET_CODE (op) == ZERO_EXTEND)
1198 {
1199 if (GET_MODE (op_operand) != word_mode
1200 || !choices[speed_p].splitting_zext)
1201 return false;
1202 }
1203 else /* left or right shift */
1204 {
1205 bool *splitting = (GET_CODE (op) == ASHIFT
1206 ? choices[speed_p].splitting_ashift
1207 : GET_CODE (op) == ASHIFTRT
1208 ? choices[speed_p].splitting_ashiftrt
1209 : choices[speed_p].splitting_lshiftrt);
1210 if (!CONST_INT_P (XEXP (op, 1))
1211 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1212 2 * BITS_PER_WORD - 1)
1213 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1214 return false;
1215
1216 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1217 }
1218
1219 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1220
1221 return true;
1222 }
1223
1224 /* Decompose a more than word wide shift (in INSN) of a multiword
1225 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1226 and 'set to zero' insn. Return a pointer to the new insn when a
1227 replacement was done. */
1228
1229 static rtx_insn *
1230 resolve_shift_zext (rtx_insn *insn)
1231 {
1232 rtx set;
1233 rtx op;
1234 rtx op_operand;
1235 rtx_insn *insns;
1236 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1237 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1238
1239 set = single_set (insn);
1240 if (!set)
1241 return NULL;
1242
1243 op = SET_SRC (set);
1244 if (GET_CODE (op) != ASHIFT
1245 && GET_CODE (op) != LSHIFTRT
1246 && GET_CODE (op) != ASHIFTRT
1247 && GET_CODE (op) != ZERO_EXTEND)
1248 return NULL;
1249
1250 op_operand = XEXP (op, 0);
1251
1252 /* We can tear this operation apart only if the regs were already
1253 torn apart. */
1254 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1255 return NULL;
1256
1257 /* src_reg_num is the number of the word mode register which we
1258 are operating on. For a left shift and a zero_extend on little
1259 endian machines this is register 0. */
1260 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1261 ? 1 : 0;
1262
1263 if (WORDS_BIG_ENDIAN
1264 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1265 src_reg_num = 1 - src_reg_num;
1266
1267 if (GET_CODE (op) == ZERO_EXTEND)
1268 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1269 else
1270 dest_reg_num = 1 - src_reg_num;
1271
1272 offset1 = UNITS_PER_WORD * dest_reg_num;
1273 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1274 src_offset = UNITS_PER_WORD * src_reg_num;
1275
1276 start_sequence ();
1277
1278 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1279 GET_MODE (SET_DEST (set)),
1280 offset1);
1281 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1282 GET_MODE (SET_DEST (set)),
1283 offset2);
1284 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1285 GET_MODE (op_operand),
1286 src_offset);
1287 if (GET_CODE (op) == ASHIFTRT
1288 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1289 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1290 BITS_PER_WORD - 1, NULL_RTX, 0);
1291
1292 if (GET_CODE (op) != ZERO_EXTEND)
1293 {
1294 int shift_count = INTVAL (XEXP (op, 1));
1295 if (shift_count > BITS_PER_WORD)
1296 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1297 LSHIFT_EXPR : RSHIFT_EXPR,
1298 word_mode, src_reg,
1299 shift_count - BITS_PER_WORD,
1300 dest_reg, GET_CODE (op) != ASHIFTRT);
1301 }
1302
1303 if (dest_reg != src_reg)
1304 emit_move_insn (dest_reg, src_reg);
1305 if (GET_CODE (op) != ASHIFTRT)
1306 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1307 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1308 emit_move_insn (dest_upper, copy_rtx (src_reg));
1309 else
1310 emit_move_insn (dest_upper, upper_src);
1311 insns = get_insns ();
1312
1313 end_sequence ();
1314
1315 emit_insn_before (insns, insn);
1316
1317 if (dump_file)
1318 {
1319 rtx_insn *in;
1320 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1321 for (in = insns; in != insn; in = NEXT_INSN (in))
1322 fprintf (dump_file, "%d ", INSN_UID (in));
1323 fprintf (dump_file, "\n");
1324 }
1325
1326 delete_insn (insn);
1327 return insns;
1328 }
1329
1330 /* Print to dump_file a description of what we're doing with shift code CODE.
1331 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1332
1333 static void
1334 dump_shift_choices (enum rtx_code code, bool *splitting)
1335 {
1336 int i;
1337 const char *sep;
1338
1339 fprintf (dump_file,
1340 " Splitting mode %s for %s lowering with shift amounts = ",
1341 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1342 sep = "";
1343 for (i = 0; i < BITS_PER_WORD; i++)
1344 if (splitting[i])
1345 {
1346 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1347 sep = ",";
1348 }
1349 fprintf (dump_file, "\n");
1350 }
1351
1352 /* Print to dump_file a description of what we're doing when optimizing
1353 for speed or size; SPEED_P says which. DESCRIPTION is a description
1354 of the SPEED_P choice. */
1355
1356 static void
1357 dump_choices (bool speed_p, const char *description)
1358 {
1359 unsigned int i;
1360
1361 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1362
1363 for (i = 0; i < MAX_MACHINE_MODE; i++)
1364 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1365 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1366 choices[speed_p].move_modes_to_split[i]
1367 ? "Splitting"
1368 : "Skipping",
1369 GET_MODE_NAME ((machine_mode) i));
1370
1371 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1372 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1373 GET_MODE_NAME (twice_word_mode));
1374
1375 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1376 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1377 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1378 fprintf (dump_file, "\n");
1379 }
1380
1381 /* Look for registers which are always accessed via word-sized SUBREGs
1382 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1383 registers into several word-sized pseudo-registers. */
1384
1385 static void
1386 decompose_multiword_subregs (bool decompose_copies)
1387 {
1388 unsigned int max;
1389 basic_block bb;
1390 bool speed_p;
1391
1392 if (dump_file)
1393 {
1394 dump_choices (false, "size");
1395 dump_choices (true, "speed");
1396 }
1397
1398 /* Check if this target even has any modes to consider lowering. */
1399 if (!choices[false].something_to_do && !choices[true].something_to_do)
1400 {
1401 if (dump_file)
1402 fprintf (dump_file, "Nothing to do!\n");
1403 return;
1404 }
1405
1406 max = max_reg_num ();
1407
1408 /* First see if there are any multi-word pseudo-registers. If there
1409 aren't, there is nothing we can do. This should speed up this
1410 pass in the normal case, since it should be faster than scanning
1411 all the insns. */
1412 {
1413 unsigned int i;
1414 bool useful_modes_seen = false;
1415
1416 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1417 if (regno_reg_rtx[i] != NULL)
1418 {
1419 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1420 if (choices[false].move_modes_to_split[(int) mode]
1421 || choices[true].move_modes_to_split[(int) mode])
1422 {
1423 useful_modes_seen = true;
1424 break;
1425 }
1426 }
1427
1428 if (!useful_modes_seen)
1429 {
1430 if (dump_file)
1431 fprintf (dump_file, "Nothing to lower in this function.\n");
1432 return;
1433 }
1434 }
1435
1436 if (df)
1437 {
1438 df_set_flags (DF_DEFER_INSN_RESCAN);
1439 run_word_dce ();
1440 }
1441
1442 /* FIXME: It may be possible to change this code to look for each
1443 multi-word pseudo-register and to find each insn which sets or
1444 uses that register. That should be faster than scanning all the
1445 insns. */
1446
1447 decomposable_context = BITMAP_ALLOC (NULL);
1448 non_decomposable_context = BITMAP_ALLOC (NULL);
1449 subreg_context = BITMAP_ALLOC (NULL);
1450
1451 reg_copy_graph.create (max);
1452 reg_copy_graph.safe_grow_cleared (max);
1453 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1454
1455 speed_p = optimize_function_for_speed_p (cfun);
1456 FOR_EACH_BB_FN (bb, cfun)
1457 {
1458 rtx_insn *insn;
1459
1460 FOR_BB_INSNS (bb, insn)
1461 {
1462 rtx set;
1463 enum classify_move_insn cmi;
1464 int i, n;
1465
1466 if (!INSN_P (insn)
1467 || GET_CODE (PATTERN (insn)) == CLOBBER
1468 || GET_CODE (PATTERN (insn)) == USE)
1469 continue;
1470
1471 recog_memoized (insn);
1472
1473 if (find_decomposable_shift_zext (insn, speed_p))
1474 continue;
1475
1476 extract_insn (insn);
1477
1478 set = simple_move (insn, speed_p);
1479
1480 if (!set)
1481 cmi = NOT_SIMPLE_MOVE;
1482 else
1483 {
1484 /* We mark pseudo-to-pseudo copies as decomposable during the
1485 second pass only. The first pass is so early that there is
1486 good chance such moves will be optimized away completely by
1487 subsequent optimizations anyway.
1488
1489 However, we call find_pseudo_copy even during the first pass
1490 so as to properly set up the reg_copy_graph. */
1491 if (find_pseudo_copy (set))
1492 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1493 else
1494 cmi = SIMPLE_MOVE;
1495 }
1496
1497 n = recog_data.n_operands;
1498 for (i = 0; i < n; ++i)
1499 {
1500 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1501
1502 /* We handle ASM_OPERANDS as a special case to support
1503 things like x86 rdtsc which returns a DImode value.
1504 We can decompose the output, which will certainly be
1505 operand 0, but not the inputs. */
1506
1507 if (cmi == SIMPLE_MOVE
1508 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1509 {
1510 gcc_assert (i == 0);
1511 cmi = NOT_SIMPLE_MOVE;
1512 }
1513 }
1514 }
1515 }
1516
1517 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1518 if (!bitmap_empty_p (decomposable_context))
1519 {
1520 sbitmap sub_blocks;
1521 unsigned int i;
1522 sbitmap_iterator sbi;
1523 bitmap_iterator iter;
1524 unsigned int regno;
1525
1526 propagate_pseudo_copies ();
1527
1528 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1529 bitmap_clear (sub_blocks);
1530
1531 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1532 decompose_register (regno);
1533
1534 FOR_EACH_BB_FN (bb, cfun)
1535 {
1536 rtx_insn *insn;
1537
1538 FOR_BB_INSNS (bb, insn)
1539 {
1540 rtx pat;
1541
1542 if (!INSN_P (insn))
1543 continue;
1544
1545 pat = PATTERN (insn);
1546 if (GET_CODE (pat) == CLOBBER)
1547 resolve_clobber (pat, insn);
1548 else if (GET_CODE (pat) == USE)
1549 resolve_use (pat, insn);
1550 else if (DEBUG_INSN_P (insn))
1551 resolve_debug (insn);
1552 else
1553 {
1554 rtx set;
1555 int i;
1556
1557 recog_memoized (insn);
1558 extract_insn (insn);
1559
1560 set = simple_move (insn, speed_p);
1561 if (set)
1562 {
1563 rtx_insn *orig_insn = insn;
1564 bool cfi = control_flow_insn_p (insn);
1565
1566 /* We can end up splitting loads to multi-word pseudos
1567 into separate loads to machine word size pseudos.
1568 When this happens, we first had one load that can
1569 throw, and after resolve_simple_move we'll have a
1570 bunch of loads (at least two). All those loads may
1571 trap if we can have non-call exceptions, so they
1572 all will end the current basic block. We split the
1573 block after the outer loop over all insns, but we
1574 make sure here that we will be able to split the
1575 basic block and still produce the correct control
1576 flow graph for it. */
1577 gcc_assert (!cfi
1578 || (cfun->can_throw_non_call_exceptions
1579 && can_throw_internal (insn)));
1580
1581 insn = resolve_simple_move (set, insn);
1582 if (insn != orig_insn)
1583 {
1584 recog_memoized (insn);
1585 extract_insn (insn);
1586
1587 if (cfi)
1588 bitmap_set_bit (sub_blocks, bb->index);
1589 }
1590 }
1591 else
1592 {
1593 rtx_insn *decomposed_shift;
1594
1595 decomposed_shift = resolve_shift_zext (insn);
1596 if (decomposed_shift != NULL_RTX)
1597 {
1598 insn = decomposed_shift;
1599 recog_memoized (insn);
1600 extract_insn (insn);
1601 }
1602 }
1603
1604 for (i = recog_data.n_operands - 1; i >= 0; --i)
1605 resolve_subreg_use (recog_data.operand_loc[i], insn);
1606
1607 resolve_reg_notes (insn);
1608
1609 if (num_validated_changes () > 0)
1610 {
1611 for (i = recog_data.n_dups - 1; i >= 0; --i)
1612 {
1613 rtx *pl = recog_data.dup_loc[i];
1614 int dup_num = recog_data.dup_num[i];
1615 rtx *px = recog_data.operand_loc[dup_num];
1616
1617 validate_unshare_change (insn, pl, *px, 1);
1618 }
1619
1620 i = apply_change_group ();
1621 gcc_assert (i);
1622 }
1623 }
1624 }
1625 }
1626
1627 /* If we had insns to split that caused control flow insns in the middle
1628 of a basic block, split those blocks now. Note that we only handle
1629 the case where splitting a load has caused multiple possibly trapping
1630 loads to appear. */
1631 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1632 {
1633 rtx_insn *insn, *end;
1634 edge fallthru;
1635
1636 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1637 insn = BB_HEAD (bb);
1638 end = BB_END (bb);
1639
1640 while (insn != end)
1641 {
1642 if (control_flow_insn_p (insn))
1643 {
1644 /* Split the block after insn. There will be a fallthru
1645 edge, which is OK so we keep it. We have to create the
1646 exception edges ourselves. */
1647 fallthru = split_block (bb, insn);
1648 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1649 bb = fallthru->dest;
1650 insn = BB_HEAD (bb);
1651 }
1652 else
1653 insn = NEXT_INSN (insn);
1654 }
1655 }
1656
1657 sbitmap_free (sub_blocks);
1658 }
1659
1660 {
1661 unsigned int i;
1662 bitmap b;
1663
1664 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1665 if (b)
1666 BITMAP_FREE (b);
1667 }
1668
1669 reg_copy_graph.release ();
1670
1671 BITMAP_FREE (decomposable_context);
1672 BITMAP_FREE (non_decomposable_context);
1673 BITMAP_FREE (subreg_context);
1674 }
1675 \f
1676 /* Implement first lower subreg pass. */
1677
1678 namespace {
1679
1680 const pass_data pass_data_lower_subreg =
1681 {
1682 RTL_PASS, /* type */
1683 "subreg1", /* name */
1684 OPTGROUP_NONE, /* optinfo_flags */
1685 TV_LOWER_SUBREG, /* tv_id */
1686 0, /* properties_required */
1687 0, /* properties_provided */
1688 0, /* properties_destroyed */
1689 0, /* todo_flags_start */
1690 0, /* todo_flags_finish */
1691 };
1692
1693 class pass_lower_subreg : public rtl_opt_pass
1694 {
1695 public:
1696 pass_lower_subreg (gcc::context *ctxt)
1697 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1698 {}
1699
1700 /* opt_pass methods: */
1701 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1702 virtual unsigned int execute (function *)
1703 {
1704 decompose_multiword_subregs (false);
1705 return 0;
1706 }
1707
1708 }; // class pass_lower_subreg
1709
1710 } // anon namespace
1711
1712 rtl_opt_pass *
1713 make_pass_lower_subreg (gcc::context *ctxt)
1714 {
1715 return new pass_lower_subreg (ctxt);
1716 }
1717
1718 /* Implement second lower subreg pass. */
1719
1720 namespace {
1721
1722 const pass_data pass_data_lower_subreg2 =
1723 {
1724 RTL_PASS, /* type */
1725 "subreg2", /* name */
1726 OPTGROUP_NONE, /* optinfo_flags */
1727 TV_LOWER_SUBREG, /* tv_id */
1728 0, /* properties_required */
1729 0, /* properties_provided */
1730 0, /* properties_destroyed */
1731 0, /* todo_flags_start */
1732 TODO_df_finish, /* todo_flags_finish */
1733 };
1734
1735 class pass_lower_subreg2 : public rtl_opt_pass
1736 {
1737 public:
1738 pass_lower_subreg2 (gcc::context *ctxt)
1739 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1740 {}
1741
1742 /* opt_pass methods: */
1743 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1744 virtual unsigned int execute (function *)
1745 {
1746 decompose_multiword_subregs (true);
1747 return 0;
1748 }
1749
1750 }; // class pass_lower_subreg2
1751
1752 } // anon namespace
1753
1754 rtl_opt_pass *
1755 make_pass_lower_subreg2 (gcc::context *ctxt)
1756 {
1757 return new pass_lower_subreg2 (ctxt);
1758 }