]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
2015-10-15 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "double-int.h"
30 #include "input.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "wide-int.h"
34 #include "inchash.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "obstack.h"
41 #include "predict.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfgbuild.h"
48 #include "basic-block.h"
49 #include "recog.h"
50 #include "bitmap.h"
51 #include "dce.h"
52 #include "hashtab.h"
53 #include "statistics.h"
54 #include "real.h"
55 #include "fixed-value.h"
56 #include "expmed.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "calls.h"
60 #include "emit-rtl.h"
61 #include "varasm.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "except.h"
65 #include "regs.h"
66 #include "tree-pass.h"
67 #include "df.h"
68 #include "lower-subreg.h"
69 #include "rtl-iter.h"
70
71 #ifdef STACK_GROWS_DOWNWARD
72 # undef STACK_GROWS_DOWNWARD
73 # define STACK_GROWS_DOWNWARD 1
74 #else
75 # define STACK_GROWS_DOWNWARD 0
76 #endif
77
78
79 /* Decompose multi-word pseudo-registers into individual
80 pseudo-registers when possible and profitable. This is possible
81 when all the uses of a multi-word register are via SUBREG, or are
82 copies of the register to another location. Breaking apart the
83 register permits more CSE and permits better register allocation.
84 This is profitable if the machine does not have move instructions
85 to do this.
86
87 This pass only splits moves with modes that are wider than
88 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
89 integer modes that are twice the width of word_mode. The latter
90 could be generalized if there was a need to do this, but the trend in
91 architectures is to not need this.
92
93 There are two useful preprocessor defines for use by maintainers:
94
95 #define LOG_COSTS 1
96
97 if you wish to see the actual cost estimates that are being used
98 for each mode wider than word mode and the cost estimates for zero
99 extension and the shifts. This can be useful when port maintainers
100 are tuning insn rtx costs.
101
102 #define FORCE_LOWERING 1
103
104 if you wish to test the pass with all the transformation forced on.
105 This can be useful for finding bugs in the transformations. */
106
107 #define LOG_COSTS 0
108 #define FORCE_LOWERING 0
109
110 /* Bit N in this bitmap is set if regno N is used in a context in
111 which we can decompose it. */
112 static bitmap decomposable_context;
113
114 /* Bit N in this bitmap is set if regno N is used in a context in
115 which it can not be decomposed. */
116 static bitmap non_decomposable_context;
117
118 /* Bit N in this bitmap is set if regno N is used in a subreg
119 which changes the mode but not the size. This typically happens
120 when the register accessed as a floating-point value; we want to
121 avoid generating accesses to its subwords in integer modes. */
122 static bitmap subreg_context;
123
124 /* Bit N in the bitmap in element M of this array is set if there is a
125 copy from reg M to reg N. */
126 static vec<bitmap> reg_copy_graph;
127
128 struct target_lower_subreg default_target_lower_subreg;
129 #if SWITCHABLE_TARGET
130 struct target_lower_subreg *this_target_lower_subreg
131 = &default_target_lower_subreg;
132 #endif
133
134 #define twice_word_mode \
135 this_target_lower_subreg->x_twice_word_mode
136 #define choices \
137 this_target_lower_subreg->x_choices
138
139 /* RTXes used while computing costs. */
140 struct cost_rtxes {
141 /* Source and target registers. */
142 rtx source;
143 rtx target;
144
145 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
146 rtx zext;
147
148 /* A shift of SOURCE. */
149 rtx shift;
150
151 /* A SET of TARGET. */
152 rtx set;
153 };
154
155 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
156 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
157
158 static int
159 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
160 machine_mode mode, int op1)
161 {
162 PUT_CODE (rtxes->shift, code);
163 PUT_MODE (rtxes->shift, mode);
164 PUT_MODE (rtxes->source, mode);
165 XEXP (rtxes->shift, 1) = GEN_INT (op1);
166 return set_src_cost (rtxes->shift, speed_p);
167 }
168
169 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
170 to true if it is profitable to split a double-word CODE shift
171 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
172 for speed or size profitability.
173
174 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
175 the cost of moving zero into a word-mode register. WORD_MOVE_COST
176 is the cost of moving between word registers. */
177
178 static void
179 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
180 bool *splitting, enum rtx_code code,
181 int word_move_zero_cost, int word_move_cost)
182 {
183 int wide_cost, narrow_cost, upper_cost, i;
184
185 for (i = 0; i < BITS_PER_WORD; i++)
186 {
187 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
188 i + BITS_PER_WORD);
189 if (i == 0)
190 narrow_cost = word_move_cost;
191 else
192 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
193
194 if (code != ASHIFTRT)
195 upper_cost = word_move_zero_cost;
196 else if (i == BITS_PER_WORD - 1)
197 upper_cost = word_move_cost;
198 else
199 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
200 BITS_PER_WORD - 1);
201
202 if (LOG_COSTS)
203 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
204 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
205 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
206
207 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
208 splitting[i] = true;
209 }
210 }
211
212 /* Compute what we should do when optimizing for speed or size; SPEED_P
213 selects which. Use RTXES for computing costs. */
214
215 static void
216 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
217 {
218 unsigned int i;
219 int word_move_zero_cost, word_move_cost;
220
221 PUT_MODE (rtxes->target, word_mode);
222 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
223 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
224
225 SET_SRC (rtxes->set) = rtxes->source;
226 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
227
228 if (LOG_COSTS)
229 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
230 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
231
232 for (i = 0; i < MAX_MACHINE_MODE; i++)
233 {
234 machine_mode mode = (machine_mode) i;
235 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
236 if (factor > 1)
237 {
238 int mode_move_cost;
239
240 PUT_MODE (rtxes->target, mode);
241 PUT_MODE (rtxes->source, mode);
242 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
243
244 if (LOG_COSTS)
245 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
246 GET_MODE_NAME (mode), mode_move_cost,
247 word_move_cost, factor);
248
249 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
250 {
251 choices[speed_p].move_modes_to_split[i] = true;
252 choices[speed_p].something_to_do = true;
253 }
254 }
255 }
256
257 /* For the moves and shifts, the only case that is checked is one
258 where the mode of the target is an integer mode twice the width
259 of the word_mode.
260
261 If it is not profitable to split a double word move then do not
262 even consider the shifts or the zero extension. */
263 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
264 {
265 int zext_cost;
266
267 /* The only case here to check to see if moving the upper part with a
268 zero is cheaper than doing the zext itself. */
269 PUT_MODE (rtxes->source, word_mode);
270 zext_cost = set_src_cost (rtxes->zext, speed_p);
271
272 if (LOG_COSTS)
273 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
274 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
275 zext_cost, word_move_cost, word_move_zero_cost);
276
277 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
278 choices[speed_p].splitting_zext = true;
279
280 compute_splitting_shift (speed_p, rtxes,
281 choices[speed_p].splitting_ashift, ASHIFT,
282 word_move_zero_cost, word_move_cost);
283 compute_splitting_shift (speed_p, rtxes,
284 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
285 word_move_zero_cost, word_move_cost);
286 compute_splitting_shift (speed_p, rtxes,
287 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
288 word_move_zero_cost, word_move_cost);
289 }
290 }
291
292 /* Do one-per-target initialisation. This involves determining
293 which operations on the machine are profitable. If none are found,
294 then the pass just returns when called. */
295
296 void
297 init_lower_subreg (void)
298 {
299 struct cost_rtxes rtxes;
300
301 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
302
303 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
304
305 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
306 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
307 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
308 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
309 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
310
311 if (LOG_COSTS)
312 fprintf (stderr, "\nSize costs\n==========\n\n");
313 compute_costs (false, &rtxes);
314
315 if (LOG_COSTS)
316 fprintf (stderr, "\nSpeed costs\n===========\n\n");
317 compute_costs (true, &rtxes);
318 }
319
320 static bool
321 simple_move_operand (rtx x)
322 {
323 if (GET_CODE (x) == SUBREG)
324 x = SUBREG_REG (x);
325
326 if (!OBJECT_P (x))
327 return false;
328
329 if (GET_CODE (x) == LABEL_REF
330 || GET_CODE (x) == SYMBOL_REF
331 || GET_CODE (x) == HIGH
332 || GET_CODE (x) == CONST)
333 return false;
334
335 if (MEM_P (x)
336 && (MEM_VOLATILE_P (x)
337 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
338 return false;
339
340 return true;
341 }
342
343 /* If INSN is a single set between two objects that we want to split,
344 return the single set. SPEED_P says whether we are optimizing
345 INSN for speed or size.
346
347 INSN should have been passed to recog and extract_insn before this
348 is called. */
349
350 static rtx
351 simple_move (rtx_insn *insn, bool speed_p)
352 {
353 rtx x;
354 rtx set;
355 machine_mode mode;
356
357 if (recog_data.n_operands != 2)
358 return NULL_RTX;
359
360 set = single_set (insn);
361 if (!set)
362 return NULL_RTX;
363
364 x = SET_DEST (set);
365 if (x != recog_data.operand[0] && x != recog_data.operand[1])
366 return NULL_RTX;
367 if (!simple_move_operand (x))
368 return NULL_RTX;
369
370 x = SET_SRC (set);
371 if (x != recog_data.operand[0] && x != recog_data.operand[1])
372 return NULL_RTX;
373 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
374 things like x86 rdtsc which returns a DImode value. */
375 if (GET_CODE (x) != ASM_OPERANDS
376 && !simple_move_operand (x))
377 return NULL_RTX;
378
379 /* We try to decompose in integer modes, to avoid generating
380 inefficient code copying between integer and floating point
381 registers. That means that we can't decompose if this is a
382 non-integer mode for which there is no integer mode of the same
383 size. */
384 mode = GET_MODE (SET_DEST (set));
385 if (!SCALAR_INT_MODE_P (mode)
386 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
387 == BLKmode))
388 return NULL_RTX;
389
390 /* Reject PARTIAL_INT modes. They are used for processor specific
391 purposes and it's probably best not to tamper with them. */
392 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393 return NULL_RTX;
394
395 if (!choices[speed_p].move_modes_to_split[(int) mode])
396 return NULL_RTX;
397
398 return set;
399 }
400
401 /* If SET is a copy from one multi-word pseudo-register to another,
402 record that in reg_copy_graph. Return whether it is such a
403 copy. */
404
405 static bool
406 find_pseudo_copy (rtx set)
407 {
408 rtx dest = SET_DEST (set);
409 rtx src = SET_SRC (set);
410 unsigned int rd, rs;
411 bitmap b;
412
413 if (!REG_P (dest) || !REG_P (src))
414 return false;
415
416 rd = REGNO (dest);
417 rs = REGNO (src);
418 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
419 return false;
420
421 b = reg_copy_graph[rs];
422 if (b == NULL)
423 {
424 b = BITMAP_ALLOC (NULL);
425 reg_copy_graph[rs] = b;
426 }
427
428 bitmap_set_bit (b, rd);
429
430 return true;
431 }
432
433 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
434 where they are copied to another register, add the register to
435 which they are copied to DECOMPOSABLE_CONTEXT. Use
436 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
437 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
438
439 static void
440 propagate_pseudo_copies (void)
441 {
442 bitmap queue, propagate;
443
444 queue = BITMAP_ALLOC (NULL);
445 propagate = BITMAP_ALLOC (NULL);
446
447 bitmap_copy (queue, decomposable_context);
448 do
449 {
450 bitmap_iterator iter;
451 unsigned int i;
452
453 bitmap_clear (propagate);
454
455 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
456 {
457 bitmap b = reg_copy_graph[i];
458 if (b)
459 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
460 }
461
462 bitmap_and_compl (queue, propagate, decomposable_context);
463 bitmap_ior_into (decomposable_context, propagate);
464 }
465 while (!bitmap_empty_p (queue));
466
467 BITMAP_FREE (queue);
468 BITMAP_FREE (propagate);
469 }
470
471 /* A pointer to one of these values is passed to
472 find_decomposable_subregs. */
473
474 enum classify_move_insn
475 {
476 /* Not a simple move from one location to another. */
477 NOT_SIMPLE_MOVE,
478 /* A simple move we want to decompose. */
479 DECOMPOSABLE_SIMPLE_MOVE,
480 /* Any other simple move. */
481 SIMPLE_MOVE
482 };
483
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
486 unadorned register which is not a simple pseudo-register copy,
487 DATA will point at the type of move, and we set a bit in
488 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
489
490 static void
491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492 {
493 subrtx_var_iterator::array_type array;
494 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495 {
496 rtx x = *iter;
497 if (GET_CODE (x) == SUBREG)
498 {
499 rtx inner = SUBREG_REG (x);
500 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501
502 if (!REG_P (inner))
503 continue;
504
505 regno = REGNO (inner);
506 if (HARD_REGISTER_NUM_P (regno))
507 {
508 iter.skip_subrtxes ();
509 continue;
510 }
511
512 outer_size = GET_MODE_SIZE (GET_MODE (x));
513 inner_size = GET_MODE_SIZE (GET_MODE (inner));
514 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
515 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
516
517 /* We only try to decompose single word subregs of multi-word
518 registers. When we find one, we return -1 to avoid iterating
519 over the inner register.
520
521 ??? This doesn't allow, e.g., DImode subregs of TImode values
522 on 32-bit targets. We would need to record the way the
523 pseudo-register was used, and only decompose if all the uses
524 were the same number and size of pieces. Hopefully this
525 doesn't happen much. */
526
527 if (outer_words == 1 && inner_words > 1)
528 {
529 bitmap_set_bit (decomposable_context, regno);
530 iter.skip_subrtxes ();
531 continue;
532 }
533
534 /* If this is a cast from one mode to another, where the modes
535 have the same size, and they are not tieable, then mark this
536 register as non-decomposable. If we decompose it we are
537 likely to mess up whatever the backend is trying to do. */
538 if (outer_words > 1
539 && outer_size == inner_size
540 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
541 {
542 bitmap_set_bit (non_decomposable_context, regno);
543 bitmap_set_bit (subreg_context, regno);
544 iter.skip_subrtxes ();
545 continue;
546 }
547 }
548 else if (REG_P (x))
549 {
550 unsigned int regno;
551
552 /* We will see an outer SUBREG before we see the inner REG, so
553 when we see a plain REG here it means a direct reference to
554 the register.
555
556 If this is not a simple copy from one location to another,
557 then we can not decompose this register. If this is a simple
558 copy we want to decompose, and the mode is right,
559 then we mark the register as decomposable.
560 Otherwise we don't say anything about this register --
561 it could be decomposed, but whether that would be
562 profitable depends upon how it is used elsewhere.
563
564 We only set bits in the bitmap for multi-word
565 pseudo-registers, since those are the only ones we care about
566 and it keeps the size of the bitmaps down. */
567
568 regno = REGNO (x);
569 if (!HARD_REGISTER_NUM_P (regno)
570 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
571 {
572 switch (*pcmi)
573 {
574 case NOT_SIMPLE_MOVE:
575 bitmap_set_bit (non_decomposable_context, regno);
576 break;
577 case DECOMPOSABLE_SIMPLE_MOVE:
578 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
579 bitmap_set_bit (decomposable_context, regno);
580 break;
581 case SIMPLE_MOVE:
582 break;
583 default:
584 gcc_unreachable ();
585 }
586 }
587 }
588 else if (MEM_P (x))
589 {
590 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
591
592 /* Any registers used in a MEM do not participate in a
593 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
594 here, and return -1 to block the parent's recursion. */
595 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
596 iter.skip_subrtxes ();
597 }
598 }
599 }
600
601 /* Decompose REGNO into word-sized components. We smash the REG node
602 in place. This ensures that (1) something goes wrong quickly if we
603 fail to make some replacement, and (2) the debug information inside
604 the symbol table is automatically kept up to date. */
605
606 static void
607 decompose_register (unsigned int regno)
608 {
609 rtx reg;
610 unsigned int words, i;
611 rtvec v;
612
613 reg = regno_reg_rtx[regno];
614
615 regno_reg_rtx[regno] = NULL_RTX;
616
617 words = GET_MODE_SIZE (GET_MODE (reg));
618 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
619
620 v = rtvec_alloc (words);
621 for (i = 0; i < words; ++i)
622 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
623
624 PUT_CODE (reg, CONCATN);
625 XVEC (reg, 0) = v;
626
627 if (dump_file)
628 {
629 fprintf (dump_file, "; Splitting reg %u ->", regno);
630 for (i = 0; i < words; ++i)
631 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
632 fputc ('\n', dump_file);
633 }
634 }
635
636 /* Get a SUBREG of a CONCATN. */
637
638 static rtx
639 simplify_subreg_concatn (machine_mode outermode, rtx op,
640 unsigned int byte)
641 {
642 unsigned int inner_size;
643 machine_mode innermode, partmode;
644 rtx part;
645 unsigned int final_offset;
646
647 gcc_assert (GET_CODE (op) == CONCATN);
648 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
649
650 innermode = GET_MODE (op);
651 gcc_assert (byte < GET_MODE_SIZE (innermode));
652 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
653
654 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
655 part = XVECEXP (op, 0, byte / inner_size);
656 partmode = GET_MODE (part);
657
658 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
659 regular CONST_VECTORs. They have vector or integer modes, depending
660 on the capabilities of the target. Cope with them. */
661 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
662 partmode = GET_MODE_INNER (innermode);
663 else if (partmode == VOIDmode)
664 {
665 enum mode_class mclass = GET_MODE_CLASS (innermode);
666 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
667 }
668
669 final_offset = byte % inner_size;
670 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
671 return NULL_RTX;
672
673 return simplify_gen_subreg (outermode, part, partmode, final_offset);
674 }
675
676 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
677
678 static rtx
679 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
680 machine_mode innermode, unsigned int byte)
681 {
682 rtx ret;
683
684 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
685 If OP is a SUBREG of a CONCATN, then it must be a simple mode
686 change with the same size and offset 0, or it must extract a
687 part. We shouldn't see anything else here. */
688 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
689 {
690 rtx op2;
691
692 if ((GET_MODE_SIZE (GET_MODE (op))
693 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
694 && SUBREG_BYTE (op) == 0)
695 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
696 GET_MODE (SUBREG_REG (op)), byte);
697
698 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
699 SUBREG_BYTE (op));
700 if (op2 == NULL_RTX)
701 {
702 /* We don't handle paradoxical subregs here. */
703 gcc_assert (GET_MODE_SIZE (outermode)
704 <= GET_MODE_SIZE (GET_MODE (op)));
705 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
706 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
707 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
708 byte + SUBREG_BYTE (op));
709 gcc_assert (op2 != NULL_RTX);
710 return op2;
711 }
712
713 op = op2;
714 gcc_assert (op != NULL_RTX);
715 gcc_assert (innermode == GET_MODE (op));
716 }
717
718 if (GET_CODE (op) == CONCATN)
719 return simplify_subreg_concatn (outermode, op, byte);
720
721 ret = simplify_gen_subreg (outermode, op, innermode, byte);
722
723 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
724 resolve_simple_move will ask for the high part of the paradoxical
725 subreg, which does not have a value. Just return a zero. */
726 if (ret == NULL_RTX
727 && GET_CODE (op) == SUBREG
728 && SUBREG_BYTE (op) == 0
729 && (GET_MODE_SIZE (innermode)
730 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
731 return CONST0_RTX (outermode);
732
733 gcc_assert (ret != NULL_RTX);
734 return ret;
735 }
736
737 /* Return whether we should resolve X into the registers into which it
738 was decomposed. */
739
740 static bool
741 resolve_reg_p (rtx x)
742 {
743 return GET_CODE (x) == CONCATN;
744 }
745
746 /* Return whether X is a SUBREG of a register which we need to
747 resolve. */
748
749 static bool
750 resolve_subreg_p (rtx x)
751 {
752 if (GET_CODE (x) != SUBREG)
753 return false;
754 return resolve_reg_p (SUBREG_REG (x));
755 }
756
757 /* Look for SUBREGs in *LOC which need to be decomposed. */
758
759 static bool
760 resolve_subreg_use (rtx *loc, rtx insn)
761 {
762 subrtx_ptr_iterator::array_type array;
763 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
764 {
765 rtx *loc = *iter;
766 rtx x = *loc;
767 if (resolve_subreg_p (x))
768 {
769 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
770 SUBREG_BYTE (x));
771
772 /* It is possible for a note to contain a reference which we can
773 decompose. In this case, return 1 to the caller to indicate
774 that the note must be removed. */
775 if (!x)
776 {
777 gcc_assert (!insn);
778 return true;
779 }
780
781 validate_change (insn, loc, x, 1);
782 iter.skip_subrtxes ();
783 }
784 else if (resolve_reg_p (x))
785 /* Return 1 to the caller to indicate that we found a direct
786 reference to a register which is being decomposed. This can
787 happen inside notes, multiword shift or zero-extend
788 instructions. */
789 return true;
790 }
791
792 return false;
793 }
794
795 /* Resolve any decomposed registers which appear in register notes on
796 INSN. */
797
798 static void
799 resolve_reg_notes (rtx_insn *insn)
800 {
801 rtx *pnote, note;
802
803 note = find_reg_equal_equiv_note (insn);
804 if (note)
805 {
806 int old_count = num_validated_changes ();
807 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
808 remove_note (insn, note);
809 else
810 if (old_count != num_validated_changes ())
811 df_notes_rescan (insn);
812 }
813
814 pnote = &REG_NOTES (insn);
815 while (*pnote != NULL_RTX)
816 {
817 bool del = false;
818
819 note = *pnote;
820 switch (REG_NOTE_KIND (note))
821 {
822 case REG_DEAD:
823 case REG_UNUSED:
824 if (resolve_reg_p (XEXP (note, 0)))
825 del = true;
826 break;
827
828 default:
829 break;
830 }
831
832 if (del)
833 *pnote = XEXP (note, 1);
834 else
835 pnote = &XEXP (note, 1);
836 }
837 }
838
839 /* Return whether X can be decomposed into subwords. */
840
841 static bool
842 can_decompose_p (rtx x)
843 {
844 if (REG_P (x))
845 {
846 unsigned int regno = REGNO (x);
847
848 if (HARD_REGISTER_NUM_P (regno))
849 {
850 unsigned int byte, num_bytes;
851
852 num_bytes = GET_MODE_SIZE (GET_MODE (x));
853 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
854 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
855 return false;
856 return true;
857 }
858 else
859 return !bitmap_bit_p (subreg_context, regno);
860 }
861
862 return true;
863 }
864
865 /* Decompose the registers used in a simple move SET within INSN. If
866 we don't change anything, return INSN, otherwise return the start
867 of the sequence of moves. */
868
869 static rtx_insn *
870 resolve_simple_move (rtx set, rtx_insn *insn)
871 {
872 rtx src, dest, real_dest;
873 rtx_insn *insns;
874 machine_mode orig_mode, dest_mode;
875 unsigned int words;
876 bool pushing;
877
878 src = SET_SRC (set);
879 dest = SET_DEST (set);
880 orig_mode = GET_MODE (dest);
881
882 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
883 gcc_assert (words > 1);
884
885 start_sequence ();
886
887 /* We have to handle copying from a SUBREG of a decomposed reg where
888 the SUBREG is larger than word size. Rather than assume that we
889 can take a word_mode SUBREG of the destination, we copy to a new
890 register and then copy that to the destination. */
891
892 real_dest = NULL_RTX;
893
894 if (GET_CODE (src) == SUBREG
895 && resolve_reg_p (SUBREG_REG (src))
896 && (SUBREG_BYTE (src) != 0
897 || (GET_MODE_SIZE (orig_mode)
898 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
899 {
900 real_dest = dest;
901 dest = gen_reg_rtx (orig_mode);
902 if (REG_P (real_dest))
903 REG_ATTRS (dest) = REG_ATTRS (real_dest);
904 }
905
906 /* Similarly if we are copying to a SUBREG of a decomposed reg where
907 the SUBREG is larger than word size. */
908
909 if (GET_CODE (dest) == SUBREG
910 && resolve_reg_p (SUBREG_REG (dest))
911 && (SUBREG_BYTE (dest) != 0
912 || (GET_MODE_SIZE (orig_mode)
913 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
914 {
915 rtx reg, smove;
916 rtx_insn *minsn;
917
918 reg = gen_reg_rtx (orig_mode);
919 minsn = emit_move_insn (reg, src);
920 smove = single_set (minsn);
921 gcc_assert (smove != NULL_RTX);
922 resolve_simple_move (smove, minsn);
923 src = reg;
924 }
925
926 /* If we didn't have any big SUBREGS of decomposed registers, and
927 neither side of the move is a register we are decomposing, then
928 we don't have to do anything here. */
929
930 if (src == SET_SRC (set)
931 && dest == SET_DEST (set)
932 && !resolve_reg_p (src)
933 && !resolve_subreg_p (src)
934 && !resolve_reg_p (dest)
935 && !resolve_subreg_p (dest))
936 {
937 end_sequence ();
938 return insn;
939 }
940
941 /* It's possible for the code to use a subreg of a decomposed
942 register while forming an address. We need to handle that before
943 passing the address to emit_move_insn. We pass NULL_RTX as the
944 insn parameter to resolve_subreg_use because we can not validate
945 the insn yet. */
946 if (MEM_P (src) || MEM_P (dest))
947 {
948 int acg;
949
950 if (MEM_P (src))
951 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
952 if (MEM_P (dest))
953 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
954 acg = apply_change_group ();
955 gcc_assert (acg);
956 }
957
958 /* If SRC is a register which we can't decompose, or has side
959 effects, we need to move via a temporary register. */
960
961 if (!can_decompose_p (src)
962 || side_effects_p (src)
963 || GET_CODE (src) == ASM_OPERANDS)
964 {
965 rtx reg;
966
967 reg = gen_reg_rtx (orig_mode);
968
969 #ifdef AUTO_INC_DEC
970 {
971 rtx move = emit_move_insn (reg, src);
972 if (MEM_P (src))
973 {
974 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
975 if (note)
976 add_reg_note (move, REG_INC, XEXP (note, 0));
977 }
978 }
979 #else
980 emit_move_insn (reg, src);
981 #endif
982 src = reg;
983 }
984
985 /* If DEST is a register which we can't decompose, or has side
986 effects, we need to first move to a temporary register. We
987 handle the common case of pushing an operand directly. We also
988 go through a temporary register if it holds a floating point
989 value. This gives us better code on systems which can't move
990 data easily between integer and floating point registers. */
991
992 dest_mode = orig_mode;
993 pushing = push_operand (dest, dest_mode);
994 if (!can_decompose_p (dest)
995 || (side_effects_p (dest) && !pushing)
996 || (!SCALAR_INT_MODE_P (dest_mode)
997 && !resolve_reg_p (dest)
998 && !resolve_subreg_p (dest)))
999 {
1000 if (real_dest == NULL_RTX)
1001 real_dest = dest;
1002 if (!SCALAR_INT_MODE_P (dest_mode))
1003 {
1004 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1005 MODE_INT, 0);
1006 gcc_assert (dest_mode != BLKmode);
1007 }
1008 dest = gen_reg_rtx (dest_mode);
1009 if (REG_P (real_dest))
1010 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1011 }
1012
1013 if (pushing)
1014 {
1015 unsigned int i, j, jinc;
1016
1017 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1018 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1019 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1020
1021 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1022 {
1023 j = 0;
1024 jinc = 1;
1025 }
1026 else
1027 {
1028 j = words - 1;
1029 jinc = -1;
1030 }
1031
1032 for (i = 0; i < words; ++i, j += jinc)
1033 {
1034 rtx temp;
1035
1036 temp = copy_rtx (XEXP (dest, 0));
1037 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1038 j * UNITS_PER_WORD);
1039 emit_move_insn (temp,
1040 simplify_gen_subreg_concatn (word_mode, src,
1041 orig_mode,
1042 j * UNITS_PER_WORD));
1043 }
1044 }
1045 else
1046 {
1047 unsigned int i;
1048
1049 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1050 emit_clobber (dest);
1051
1052 for (i = 0; i < words; ++i)
1053 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1054 dest_mode,
1055 i * UNITS_PER_WORD),
1056 simplify_gen_subreg_concatn (word_mode, src,
1057 orig_mode,
1058 i * UNITS_PER_WORD));
1059 }
1060
1061 if (real_dest != NULL_RTX)
1062 {
1063 rtx mdest, smove;
1064 rtx_insn *minsn;
1065
1066 if (dest_mode == orig_mode)
1067 mdest = dest;
1068 else
1069 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1070 minsn = emit_move_insn (real_dest, mdest);
1071
1072 #ifdef AUTO_INC_DEC
1073 if (MEM_P (real_dest)
1074 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1075 {
1076 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1077 if (note)
1078 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1079 }
1080 #endif
1081
1082 smove = single_set (minsn);
1083 gcc_assert (smove != NULL_RTX);
1084
1085 resolve_simple_move (smove, minsn);
1086 }
1087
1088 insns = get_insns ();
1089 end_sequence ();
1090
1091 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1092
1093 emit_insn_before (insns, insn);
1094
1095 /* If we get here via self-recursion, then INSN is not yet in the insns
1096 chain and delete_insn will fail. We only want to remove INSN from the
1097 current sequence. See PR56738. */
1098 if (in_sequence_p ())
1099 remove_insn (insn);
1100 else
1101 delete_insn (insn);
1102
1103 return insns;
1104 }
1105
1106 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1107 component registers. Return whether we changed something. */
1108
1109 static bool
1110 resolve_clobber (rtx pat, rtx_insn *insn)
1111 {
1112 rtx reg;
1113 machine_mode orig_mode;
1114 unsigned int words, i;
1115 int ret;
1116
1117 reg = XEXP (pat, 0);
1118 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1119 return false;
1120
1121 orig_mode = GET_MODE (reg);
1122 words = GET_MODE_SIZE (orig_mode);
1123 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1124
1125 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1126 simplify_gen_subreg_concatn (word_mode, reg,
1127 orig_mode, 0),
1128 0);
1129 df_insn_rescan (insn);
1130 gcc_assert (ret != 0);
1131
1132 for (i = words - 1; i > 0; --i)
1133 {
1134 rtx x;
1135
1136 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1137 i * UNITS_PER_WORD);
1138 x = gen_rtx_CLOBBER (VOIDmode, x);
1139 emit_insn_after (x, insn);
1140 }
1141
1142 resolve_reg_notes (insn);
1143
1144 return true;
1145 }
1146
1147 /* A USE of a decomposed register is no longer meaningful. Return
1148 whether we changed something. */
1149
1150 static bool
1151 resolve_use (rtx pat, rtx_insn *insn)
1152 {
1153 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1154 {
1155 delete_insn (insn);
1156 return true;
1157 }
1158
1159 resolve_reg_notes (insn);
1160
1161 return false;
1162 }
1163
1164 /* A VAR_LOCATION can be simplified. */
1165
1166 static void
1167 resolve_debug (rtx_insn *insn)
1168 {
1169 subrtx_ptr_iterator::array_type array;
1170 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1171 {
1172 rtx *loc = *iter;
1173 rtx x = *loc;
1174 if (resolve_subreg_p (x))
1175 {
1176 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1177 SUBREG_BYTE (x));
1178
1179 if (x)
1180 *loc = x;
1181 else
1182 x = copy_rtx (*loc);
1183 }
1184 if (resolve_reg_p (x))
1185 *loc = copy_rtx (x);
1186 }
1187
1188 df_insn_rescan (insn);
1189
1190 resolve_reg_notes (insn);
1191 }
1192
1193 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1194 set the decomposable_context bitmap accordingly. SPEED_P is true
1195 if we are optimizing INSN for speed rather than size. Return true
1196 if INSN is decomposable. */
1197
1198 static bool
1199 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1200 {
1201 rtx set;
1202 rtx op;
1203 rtx op_operand;
1204
1205 set = single_set (insn);
1206 if (!set)
1207 return false;
1208
1209 op = SET_SRC (set);
1210 if (GET_CODE (op) != ASHIFT
1211 && GET_CODE (op) != LSHIFTRT
1212 && GET_CODE (op) != ASHIFTRT
1213 && GET_CODE (op) != ZERO_EXTEND)
1214 return false;
1215
1216 op_operand = XEXP (op, 0);
1217 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1218 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1219 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1220 || GET_MODE (op) != twice_word_mode)
1221 return false;
1222
1223 if (GET_CODE (op) == ZERO_EXTEND)
1224 {
1225 if (GET_MODE (op_operand) != word_mode
1226 || !choices[speed_p].splitting_zext)
1227 return false;
1228 }
1229 else /* left or right shift */
1230 {
1231 bool *splitting = (GET_CODE (op) == ASHIFT
1232 ? choices[speed_p].splitting_ashift
1233 : GET_CODE (op) == ASHIFTRT
1234 ? choices[speed_p].splitting_ashiftrt
1235 : choices[speed_p].splitting_lshiftrt);
1236 if (!CONST_INT_P (XEXP (op, 1))
1237 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1238 2 * BITS_PER_WORD - 1)
1239 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1240 return false;
1241
1242 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1243 }
1244
1245 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1246
1247 return true;
1248 }
1249
1250 /* Decompose a more than word wide shift (in INSN) of a multiword
1251 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1252 and 'set to zero' insn. Return a pointer to the new insn when a
1253 replacement was done. */
1254
1255 static rtx_insn *
1256 resolve_shift_zext (rtx_insn *insn)
1257 {
1258 rtx set;
1259 rtx op;
1260 rtx op_operand;
1261 rtx_insn *insns;
1262 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1263 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1264
1265 set = single_set (insn);
1266 if (!set)
1267 return NULL;
1268
1269 op = SET_SRC (set);
1270 if (GET_CODE (op) != ASHIFT
1271 && GET_CODE (op) != LSHIFTRT
1272 && GET_CODE (op) != ASHIFTRT
1273 && GET_CODE (op) != ZERO_EXTEND)
1274 return NULL;
1275
1276 op_operand = XEXP (op, 0);
1277
1278 /* We can tear this operation apart only if the regs were already
1279 torn apart. */
1280 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1281 return NULL;
1282
1283 /* src_reg_num is the number of the word mode register which we
1284 are operating on. For a left shift and a zero_extend on little
1285 endian machines this is register 0. */
1286 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1287 ? 1 : 0;
1288
1289 if (WORDS_BIG_ENDIAN
1290 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1291 src_reg_num = 1 - src_reg_num;
1292
1293 if (GET_CODE (op) == ZERO_EXTEND)
1294 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1295 else
1296 dest_reg_num = 1 - src_reg_num;
1297
1298 offset1 = UNITS_PER_WORD * dest_reg_num;
1299 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1300 src_offset = UNITS_PER_WORD * src_reg_num;
1301
1302 start_sequence ();
1303
1304 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1305 GET_MODE (SET_DEST (set)),
1306 offset1);
1307 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1308 GET_MODE (SET_DEST (set)),
1309 offset2);
1310 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1311 GET_MODE (op_operand),
1312 src_offset);
1313 if (GET_CODE (op) == ASHIFTRT
1314 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1315 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1316 BITS_PER_WORD - 1, NULL_RTX, 0);
1317
1318 if (GET_CODE (op) != ZERO_EXTEND)
1319 {
1320 int shift_count = INTVAL (XEXP (op, 1));
1321 if (shift_count > BITS_PER_WORD)
1322 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1323 LSHIFT_EXPR : RSHIFT_EXPR,
1324 word_mode, src_reg,
1325 shift_count - BITS_PER_WORD,
1326 dest_reg, GET_CODE (op) != ASHIFTRT);
1327 }
1328
1329 if (dest_reg != src_reg)
1330 emit_move_insn (dest_reg, src_reg);
1331 if (GET_CODE (op) != ASHIFTRT)
1332 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1333 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1334 emit_move_insn (dest_upper, copy_rtx (src_reg));
1335 else
1336 emit_move_insn (dest_upper, upper_src);
1337 insns = get_insns ();
1338
1339 end_sequence ();
1340
1341 emit_insn_before (insns, insn);
1342
1343 if (dump_file)
1344 {
1345 rtx_insn *in;
1346 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1347 for (in = insns; in != insn; in = NEXT_INSN (in))
1348 fprintf (dump_file, "%d ", INSN_UID (in));
1349 fprintf (dump_file, "\n");
1350 }
1351
1352 delete_insn (insn);
1353 return insns;
1354 }
1355
1356 /* Print to dump_file a description of what we're doing with shift code CODE.
1357 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1358
1359 static void
1360 dump_shift_choices (enum rtx_code code, bool *splitting)
1361 {
1362 int i;
1363 const char *sep;
1364
1365 fprintf (dump_file,
1366 " Splitting mode %s for %s lowering with shift amounts = ",
1367 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1368 sep = "";
1369 for (i = 0; i < BITS_PER_WORD; i++)
1370 if (splitting[i])
1371 {
1372 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1373 sep = ",";
1374 }
1375 fprintf (dump_file, "\n");
1376 }
1377
1378 /* Print to dump_file a description of what we're doing when optimizing
1379 for speed or size; SPEED_P says which. DESCRIPTION is a description
1380 of the SPEED_P choice. */
1381
1382 static void
1383 dump_choices (bool speed_p, const char *description)
1384 {
1385 unsigned int i;
1386
1387 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1388
1389 for (i = 0; i < MAX_MACHINE_MODE; i++)
1390 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1391 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1392 choices[speed_p].move_modes_to_split[i]
1393 ? "Splitting"
1394 : "Skipping",
1395 GET_MODE_NAME ((machine_mode) i));
1396
1397 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1398 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1399 GET_MODE_NAME (twice_word_mode));
1400
1401 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1402 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1403 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1404 fprintf (dump_file, "\n");
1405 }
1406
1407 /* Look for registers which are always accessed via word-sized SUBREGs
1408 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1409 registers into several word-sized pseudo-registers. */
1410
1411 static void
1412 decompose_multiword_subregs (bool decompose_copies)
1413 {
1414 unsigned int max;
1415 basic_block bb;
1416 bool speed_p;
1417
1418 if (dump_file)
1419 {
1420 dump_choices (false, "size");
1421 dump_choices (true, "speed");
1422 }
1423
1424 /* Check if this target even has any modes to consider lowering. */
1425 if (!choices[false].something_to_do && !choices[true].something_to_do)
1426 {
1427 if (dump_file)
1428 fprintf (dump_file, "Nothing to do!\n");
1429 return;
1430 }
1431
1432 max = max_reg_num ();
1433
1434 /* First see if there are any multi-word pseudo-registers. If there
1435 aren't, there is nothing we can do. This should speed up this
1436 pass in the normal case, since it should be faster than scanning
1437 all the insns. */
1438 {
1439 unsigned int i;
1440 bool useful_modes_seen = false;
1441
1442 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1443 if (regno_reg_rtx[i] != NULL)
1444 {
1445 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1446 if (choices[false].move_modes_to_split[(int) mode]
1447 || choices[true].move_modes_to_split[(int) mode])
1448 {
1449 useful_modes_seen = true;
1450 break;
1451 }
1452 }
1453
1454 if (!useful_modes_seen)
1455 {
1456 if (dump_file)
1457 fprintf (dump_file, "Nothing to lower in this function.\n");
1458 return;
1459 }
1460 }
1461
1462 if (df)
1463 {
1464 df_set_flags (DF_DEFER_INSN_RESCAN);
1465 run_word_dce ();
1466 }
1467
1468 /* FIXME: It may be possible to change this code to look for each
1469 multi-word pseudo-register and to find each insn which sets or
1470 uses that register. That should be faster than scanning all the
1471 insns. */
1472
1473 decomposable_context = BITMAP_ALLOC (NULL);
1474 non_decomposable_context = BITMAP_ALLOC (NULL);
1475 subreg_context = BITMAP_ALLOC (NULL);
1476
1477 reg_copy_graph.create (max);
1478 reg_copy_graph.safe_grow_cleared (max);
1479 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1480
1481 speed_p = optimize_function_for_speed_p (cfun);
1482 FOR_EACH_BB_FN (bb, cfun)
1483 {
1484 rtx_insn *insn;
1485
1486 FOR_BB_INSNS (bb, insn)
1487 {
1488 rtx set;
1489 enum classify_move_insn cmi;
1490 int i, n;
1491
1492 if (!INSN_P (insn)
1493 || GET_CODE (PATTERN (insn)) == CLOBBER
1494 || GET_CODE (PATTERN (insn)) == USE)
1495 continue;
1496
1497 recog_memoized (insn);
1498
1499 if (find_decomposable_shift_zext (insn, speed_p))
1500 continue;
1501
1502 extract_insn (insn);
1503
1504 set = simple_move (insn, speed_p);
1505
1506 if (!set)
1507 cmi = NOT_SIMPLE_MOVE;
1508 else
1509 {
1510 /* We mark pseudo-to-pseudo copies as decomposable during the
1511 second pass only. The first pass is so early that there is
1512 good chance such moves will be optimized away completely by
1513 subsequent optimizations anyway.
1514
1515 However, we call find_pseudo_copy even during the first pass
1516 so as to properly set up the reg_copy_graph. */
1517 if (find_pseudo_copy (set))
1518 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1519 else
1520 cmi = SIMPLE_MOVE;
1521 }
1522
1523 n = recog_data.n_operands;
1524 for (i = 0; i < n; ++i)
1525 {
1526 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1527
1528 /* We handle ASM_OPERANDS as a special case to support
1529 things like x86 rdtsc which returns a DImode value.
1530 We can decompose the output, which will certainly be
1531 operand 0, but not the inputs. */
1532
1533 if (cmi == SIMPLE_MOVE
1534 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1535 {
1536 gcc_assert (i == 0);
1537 cmi = NOT_SIMPLE_MOVE;
1538 }
1539 }
1540 }
1541 }
1542
1543 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1544 if (!bitmap_empty_p (decomposable_context))
1545 {
1546 sbitmap sub_blocks;
1547 unsigned int i;
1548 sbitmap_iterator sbi;
1549 bitmap_iterator iter;
1550 unsigned int regno;
1551
1552 propagate_pseudo_copies ();
1553
1554 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1555 bitmap_clear (sub_blocks);
1556
1557 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1558 decompose_register (regno);
1559
1560 FOR_EACH_BB_FN (bb, cfun)
1561 {
1562 rtx_insn *insn;
1563
1564 FOR_BB_INSNS (bb, insn)
1565 {
1566 rtx pat;
1567
1568 if (!INSN_P (insn))
1569 continue;
1570
1571 pat = PATTERN (insn);
1572 if (GET_CODE (pat) == CLOBBER)
1573 resolve_clobber (pat, insn);
1574 else if (GET_CODE (pat) == USE)
1575 resolve_use (pat, insn);
1576 else if (DEBUG_INSN_P (insn))
1577 resolve_debug (insn);
1578 else
1579 {
1580 rtx set;
1581 int i;
1582
1583 recog_memoized (insn);
1584 extract_insn (insn);
1585
1586 set = simple_move (insn, speed_p);
1587 if (set)
1588 {
1589 rtx_insn *orig_insn = insn;
1590 bool cfi = control_flow_insn_p (insn);
1591
1592 /* We can end up splitting loads to multi-word pseudos
1593 into separate loads to machine word size pseudos.
1594 When this happens, we first had one load that can
1595 throw, and after resolve_simple_move we'll have a
1596 bunch of loads (at least two). All those loads may
1597 trap if we can have non-call exceptions, so they
1598 all will end the current basic block. We split the
1599 block after the outer loop over all insns, but we
1600 make sure here that we will be able to split the
1601 basic block and still produce the correct control
1602 flow graph for it. */
1603 gcc_assert (!cfi
1604 || (cfun->can_throw_non_call_exceptions
1605 && can_throw_internal (insn)));
1606
1607 insn = resolve_simple_move (set, insn);
1608 if (insn != orig_insn)
1609 {
1610 recog_memoized (insn);
1611 extract_insn (insn);
1612
1613 if (cfi)
1614 bitmap_set_bit (sub_blocks, bb->index);
1615 }
1616 }
1617 else
1618 {
1619 rtx_insn *decomposed_shift;
1620
1621 decomposed_shift = resolve_shift_zext (insn);
1622 if (decomposed_shift != NULL_RTX)
1623 {
1624 insn = decomposed_shift;
1625 recog_memoized (insn);
1626 extract_insn (insn);
1627 }
1628 }
1629
1630 for (i = recog_data.n_operands - 1; i >= 0; --i)
1631 resolve_subreg_use (recog_data.operand_loc[i], insn);
1632
1633 resolve_reg_notes (insn);
1634
1635 if (num_validated_changes () > 0)
1636 {
1637 for (i = recog_data.n_dups - 1; i >= 0; --i)
1638 {
1639 rtx *pl = recog_data.dup_loc[i];
1640 int dup_num = recog_data.dup_num[i];
1641 rtx *px = recog_data.operand_loc[dup_num];
1642
1643 validate_unshare_change (insn, pl, *px, 1);
1644 }
1645
1646 i = apply_change_group ();
1647 gcc_assert (i);
1648 }
1649 }
1650 }
1651 }
1652
1653 /* If we had insns to split that caused control flow insns in the middle
1654 of a basic block, split those blocks now. Note that we only handle
1655 the case where splitting a load has caused multiple possibly trapping
1656 loads to appear. */
1657 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1658 {
1659 rtx_insn *insn, *end;
1660 edge fallthru;
1661
1662 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1663 insn = BB_HEAD (bb);
1664 end = BB_END (bb);
1665
1666 while (insn != end)
1667 {
1668 if (control_flow_insn_p (insn))
1669 {
1670 /* Split the block after insn. There will be a fallthru
1671 edge, which is OK so we keep it. We have to create the
1672 exception edges ourselves. */
1673 fallthru = split_block (bb, insn);
1674 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1675 bb = fallthru->dest;
1676 insn = BB_HEAD (bb);
1677 }
1678 else
1679 insn = NEXT_INSN (insn);
1680 }
1681 }
1682
1683 sbitmap_free (sub_blocks);
1684 }
1685
1686 {
1687 unsigned int i;
1688 bitmap b;
1689
1690 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1691 if (b)
1692 BITMAP_FREE (b);
1693 }
1694
1695 reg_copy_graph.release ();
1696
1697 BITMAP_FREE (decomposable_context);
1698 BITMAP_FREE (non_decomposable_context);
1699 BITMAP_FREE (subreg_context);
1700 }
1701 \f
1702 /* Implement first lower subreg pass. */
1703
1704 namespace {
1705
1706 const pass_data pass_data_lower_subreg =
1707 {
1708 RTL_PASS, /* type */
1709 "subreg1", /* name */
1710 OPTGROUP_NONE, /* optinfo_flags */
1711 TV_LOWER_SUBREG, /* tv_id */
1712 0, /* properties_required */
1713 0, /* properties_provided */
1714 0, /* properties_destroyed */
1715 0, /* todo_flags_start */
1716 0, /* todo_flags_finish */
1717 };
1718
1719 class pass_lower_subreg : public rtl_opt_pass
1720 {
1721 public:
1722 pass_lower_subreg (gcc::context *ctxt)
1723 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1724 {}
1725
1726 /* opt_pass methods: */
1727 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1728 virtual unsigned int execute (function *)
1729 {
1730 decompose_multiword_subregs (false);
1731 return 0;
1732 }
1733
1734 }; // class pass_lower_subreg
1735
1736 } // anon namespace
1737
1738 rtl_opt_pass *
1739 make_pass_lower_subreg (gcc::context *ctxt)
1740 {
1741 return new pass_lower_subreg (ctxt);
1742 }
1743
1744 /* Implement second lower subreg pass. */
1745
1746 namespace {
1747
1748 const pass_data pass_data_lower_subreg2 =
1749 {
1750 RTL_PASS, /* type */
1751 "subreg2", /* name */
1752 OPTGROUP_NONE, /* optinfo_flags */
1753 TV_LOWER_SUBREG, /* tv_id */
1754 0, /* properties_required */
1755 0, /* properties_provided */
1756 0, /* properties_destroyed */
1757 0, /* todo_flags_start */
1758 TODO_df_finish, /* todo_flags_finish */
1759 };
1760
1761 class pass_lower_subreg2 : public rtl_opt_pass
1762 {
1763 public:
1764 pass_lower_subreg2 (gcc::context *ctxt)
1765 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1766 {}
1767
1768 /* opt_pass methods: */
1769 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1770 virtual unsigned int execute (function *)
1771 {
1772 decompose_multiword_subregs (true);
1773 return 0;
1774 }
1775
1776 }; // class pass_lower_subreg2
1777
1778 } // anon namespace
1779
1780 rtl_opt_pass *
1781 make_pass_lower_subreg2 (gcc::context *ctxt)
1782 {
1783 return new pass_lower_subreg2 (ctxt);
1784 }