]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
[17/77] Add an int_mode_for_size helper function
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43
44
45 /* Decompose multi-word pseudo-registers into individual
46 pseudo-registers when possible and profitable. This is possible
47 when all the uses of a multi-word register are via SUBREG, or are
48 copies of the register to another location. Breaking apart the
49 register permits more CSE and permits better register allocation.
50 This is profitable if the machine does not have move instructions
51 to do this.
52
53 This pass only splits moves with modes that are wider than
54 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
55 integer modes that are twice the width of word_mode. The latter
56 could be generalized if there was a need to do this, but the trend in
57 architectures is to not need this.
58
59 There are two useful preprocessor defines for use by maintainers:
60
61 #define LOG_COSTS 1
62
63 if you wish to see the actual cost estimates that are being used
64 for each mode wider than word mode and the cost estimates for zero
65 extension and the shifts. This can be useful when port maintainers
66 are tuning insn rtx costs.
67
68 #define FORCE_LOWERING 1
69
70 if you wish to test the pass with all the transformation forced on.
71 This can be useful for finding bugs in the transformations. */
72
73 #define LOG_COSTS 0
74 #define FORCE_LOWERING 0
75
76 /* Bit N in this bitmap is set if regno N is used in a context in
77 which we can decompose it. */
78 static bitmap decomposable_context;
79
80 /* Bit N in this bitmap is set if regno N is used in a context in
81 which it can not be decomposed. */
82 static bitmap non_decomposable_context;
83
84 /* Bit N in this bitmap is set if regno N is used in a subreg
85 which changes the mode but not the size. This typically happens
86 when the register accessed as a floating-point value; we want to
87 avoid generating accesses to its subwords in integer modes. */
88 static bitmap subreg_context;
89
90 /* Bit N in the bitmap in element M of this array is set if there is a
91 copy from reg M to reg N. */
92 static vec<bitmap> reg_copy_graph;
93
94 struct target_lower_subreg default_target_lower_subreg;
95 #if SWITCHABLE_TARGET
96 struct target_lower_subreg *this_target_lower_subreg
97 = &default_target_lower_subreg;
98 #endif
99
100 #define twice_word_mode \
101 this_target_lower_subreg->x_twice_word_mode
102 #define choices \
103 this_target_lower_subreg->x_choices
104
105 /* RTXes used while computing costs. */
106 struct cost_rtxes {
107 /* Source and target registers. */
108 rtx source;
109 rtx target;
110
111 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
112 rtx zext;
113
114 /* A shift of SOURCE. */
115 rtx shift;
116
117 /* A SET of TARGET. */
118 rtx set;
119 };
120
121 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
122 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
123
124 static int
125 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
126 machine_mode mode, int op1)
127 {
128 PUT_CODE (rtxes->shift, code);
129 PUT_MODE (rtxes->shift, mode);
130 PUT_MODE (rtxes->source, mode);
131 XEXP (rtxes->shift, 1) = GEN_INT (op1);
132 return set_src_cost (rtxes->shift, mode, speed_p);
133 }
134
135 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
136 to true if it is profitable to split a double-word CODE shift
137 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
138 for speed or size profitability.
139
140 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
141 the cost of moving zero into a word-mode register. WORD_MOVE_COST
142 is the cost of moving between word registers. */
143
144 static void
145 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
146 bool *splitting, enum rtx_code code,
147 int word_move_zero_cost, int word_move_cost)
148 {
149 int wide_cost, narrow_cost, upper_cost, i;
150
151 for (i = 0; i < BITS_PER_WORD; i++)
152 {
153 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
154 i + BITS_PER_WORD);
155 if (i == 0)
156 narrow_cost = word_move_cost;
157 else
158 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
159
160 if (code != ASHIFTRT)
161 upper_cost = word_move_zero_cost;
162 else if (i == BITS_PER_WORD - 1)
163 upper_cost = word_move_cost;
164 else
165 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
166 BITS_PER_WORD - 1);
167
168 if (LOG_COSTS)
169 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
170 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
171 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
172
173 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
174 splitting[i] = true;
175 }
176 }
177
178 /* Compute what we should do when optimizing for speed or size; SPEED_P
179 selects which. Use RTXES for computing costs. */
180
181 static void
182 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
183 {
184 unsigned int i;
185 int word_move_zero_cost, word_move_cost;
186
187 PUT_MODE (rtxes->target, word_mode);
188 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
189 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
190
191 SET_SRC (rtxes->set) = rtxes->source;
192 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
193
194 if (LOG_COSTS)
195 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
196 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
197
198 for (i = 0; i < MAX_MACHINE_MODE; i++)
199 {
200 machine_mode mode = (machine_mode) i;
201 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
202 if (factor > 1)
203 {
204 int mode_move_cost;
205
206 PUT_MODE (rtxes->target, mode);
207 PUT_MODE (rtxes->source, mode);
208 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
209
210 if (LOG_COSTS)
211 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
212 GET_MODE_NAME (mode), mode_move_cost,
213 word_move_cost, factor);
214
215 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
216 {
217 choices[speed_p].move_modes_to_split[i] = true;
218 choices[speed_p].something_to_do = true;
219 }
220 }
221 }
222
223 /* For the moves and shifts, the only case that is checked is one
224 where the mode of the target is an integer mode twice the width
225 of the word_mode.
226
227 If it is not profitable to split a double word move then do not
228 even consider the shifts or the zero extension. */
229 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
230 {
231 int zext_cost;
232
233 /* The only case here to check to see if moving the upper part with a
234 zero is cheaper than doing the zext itself. */
235 PUT_MODE (rtxes->source, word_mode);
236 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
237
238 if (LOG_COSTS)
239 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
240 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
241 zext_cost, word_move_cost, word_move_zero_cost);
242
243 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
244 choices[speed_p].splitting_zext = true;
245
246 compute_splitting_shift (speed_p, rtxes,
247 choices[speed_p].splitting_ashift, ASHIFT,
248 word_move_zero_cost, word_move_cost);
249 compute_splitting_shift (speed_p, rtxes,
250 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
251 word_move_zero_cost, word_move_cost);
252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
254 word_move_zero_cost, word_move_cost);
255 }
256 }
257
258 /* Do one-per-target initialisation. This involves determining
259 which operations on the machine are profitable. If none are found,
260 then the pass just returns when called. */
261
262 void
263 init_lower_subreg (void)
264 {
265 struct cost_rtxes rtxes;
266
267 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
268
269 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
270
271 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
272 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
273 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
274 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
275 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
276
277 if (LOG_COSTS)
278 fprintf (stderr, "\nSize costs\n==========\n\n");
279 compute_costs (false, &rtxes);
280
281 if (LOG_COSTS)
282 fprintf (stderr, "\nSpeed costs\n===========\n\n");
283 compute_costs (true, &rtxes);
284 }
285
286 static bool
287 simple_move_operand (rtx x)
288 {
289 if (GET_CODE (x) == SUBREG)
290 x = SUBREG_REG (x);
291
292 if (!OBJECT_P (x))
293 return false;
294
295 if (GET_CODE (x) == LABEL_REF
296 || GET_CODE (x) == SYMBOL_REF
297 || GET_CODE (x) == HIGH
298 || GET_CODE (x) == CONST)
299 return false;
300
301 if (MEM_P (x)
302 && (MEM_VOLATILE_P (x)
303 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
304 return false;
305
306 return true;
307 }
308
309 /* If INSN is a single set between two objects that we want to split,
310 return the single set. SPEED_P says whether we are optimizing
311 INSN for speed or size.
312
313 INSN should have been passed to recog and extract_insn before this
314 is called. */
315
316 static rtx
317 simple_move (rtx_insn *insn, bool speed_p)
318 {
319 rtx x;
320 rtx set;
321 machine_mode mode;
322
323 if (recog_data.n_operands != 2)
324 return NULL_RTX;
325
326 set = single_set (insn);
327 if (!set)
328 return NULL_RTX;
329
330 x = SET_DEST (set);
331 if (x != recog_data.operand[0] && x != recog_data.operand[1])
332 return NULL_RTX;
333 if (!simple_move_operand (x))
334 return NULL_RTX;
335
336 x = SET_SRC (set);
337 if (x != recog_data.operand[0] && x != recog_data.operand[1])
338 return NULL_RTX;
339 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
340 things like x86 rdtsc which returns a DImode value. */
341 if (GET_CODE (x) != ASM_OPERANDS
342 && !simple_move_operand (x))
343 return NULL_RTX;
344
345 /* We try to decompose in integer modes, to avoid generating
346 inefficient code copying between integer and floating point
347 registers. That means that we can't decompose if this is a
348 non-integer mode for which there is no integer mode of the same
349 size. */
350 mode = GET_MODE (SET_DEST (set));
351 if (!SCALAR_INT_MODE_P (mode)
352 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
353 return NULL_RTX;
354
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
363 return set;
364 }
365
366 /* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370 static bool
371 find_pseudo_copy (rtx set)
372 {
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
386 b = reg_copy_graph[rs];
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
390 reg_copy_graph[rs] = b;
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396 }
397
398 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404 static void
405 propagate_pseudo_copies (void)
406 {
407 auto_bitmap queue, propagate;
408
409 bitmap_copy (queue, decomposable_context);
410 do
411 {
412 bitmap_iterator iter;
413 unsigned int i;
414
415 bitmap_clear (propagate);
416
417 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
418 {
419 bitmap b = reg_copy_graph[i];
420 if (b)
421 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
422 }
423
424 bitmap_and_compl (queue, propagate, decomposable_context);
425 bitmap_ior_into (decomposable_context, propagate);
426 }
427 while (!bitmap_empty_p (queue));
428 }
429
430 /* A pointer to one of these values is passed to
431 find_decomposable_subregs. */
432
433 enum classify_move_insn
434 {
435 /* Not a simple move from one location to another. */
436 NOT_SIMPLE_MOVE,
437 /* A simple move we want to decompose. */
438 DECOMPOSABLE_SIMPLE_MOVE,
439 /* Any other simple move. */
440 SIMPLE_MOVE
441 };
442
443 /* If we find a SUBREG in *LOC which we could use to decompose a
444 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
445 unadorned register which is not a simple pseudo-register copy,
446 DATA will point at the type of move, and we set a bit in
447 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
448
449 static void
450 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
451 {
452 subrtx_var_iterator::array_type array;
453 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
454 {
455 rtx x = *iter;
456 if (GET_CODE (x) == SUBREG)
457 {
458 rtx inner = SUBREG_REG (x);
459 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
460
461 if (!REG_P (inner))
462 continue;
463
464 regno = REGNO (inner);
465 if (HARD_REGISTER_NUM_P (regno))
466 {
467 iter.skip_subrtxes ();
468 continue;
469 }
470
471 outer_size = GET_MODE_SIZE (GET_MODE (x));
472 inner_size = GET_MODE_SIZE (GET_MODE (inner));
473 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
474 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
475
476 /* We only try to decompose single word subregs of multi-word
477 registers. When we find one, we return -1 to avoid iterating
478 over the inner register.
479
480 ??? This doesn't allow, e.g., DImode subregs of TImode values
481 on 32-bit targets. We would need to record the way the
482 pseudo-register was used, and only decompose if all the uses
483 were the same number and size of pieces. Hopefully this
484 doesn't happen much. */
485
486 if (outer_words == 1 && inner_words > 1)
487 {
488 bitmap_set_bit (decomposable_context, regno);
489 iter.skip_subrtxes ();
490 continue;
491 }
492
493 /* If this is a cast from one mode to another, where the modes
494 have the same size, and they are not tieable, then mark this
495 register as non-decomposable. If we decompose it we are
496 likely to mess up whatever the backend is trying to do. */
497 if (outer_words > 1
498 && outer_size == inner_size
499 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
500 {
501 bitmap_set_bit (non_decomposable_context, regno);
502 bitmap_set_bit (subreg_context, regno);
503 iter.skip_subrtxes ();
504 continue;
505 }
506 }
507 else if (REG_P (x))
508 {
509 unsigned int regno;
510
511 /* We will see an outer SUBREG before we see the inner REG, so
512 when we see a plain REG here it means a direct reference to
513 the register.
514
515 If this is not a simple copy from one location to another,
516 then we can not decompose this register. If this is a simple
517 copy we want to decompose, and the mode is right,
518 then we mark the register as decomposable.
519 Otherwise we don't say anything about this register --
520 it could be decomposed, but whether that would be
521 profitable depends upon how it is used elsewhere.
522
523 We only set bits in the bitmap for multi-word
524 pseudo-registers, since those are the only ones we care about
525 and it keeps the size of the bitmaps down. */
526
527 regno = REGNO (x);
528 if (!HARD_REGISTER_NUM_P (regno)
529 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
530 {
531 switch (*pcmi)
532 {
533 case NOT_SIMPLE_MOVE:
534 bitmap_set_bit (non_decomposable_context, regno);
535 break;
536 case DECOMPOSABLE_SIMPLE_MOVE:
537 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
538 bitmap_set_bit (decomposable_context, regno);
539 break;
540 case SIMPLE_MOVE:
541 break;
542 default:
543 gcc_unreachable ();
544 }
545 }
546 }
547 else if (MEM_P (x))
548 {
549 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
550
551 /* Any registers used in a MEM do not participate in a
552 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
553 here, and return -1 to block the parent's recursion. */
554 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
555 iter.skip_subrtxes ();
556 }
557 }
558 }
559
560 /* Decompose REGNO into word-sized components. We smash the REG node
561 in place. This ensures that (1) something goes wrong quickly if we
562 fail to make some replacement, and (2) the debug information inside
563 the symbol table is automatically kept up to date. */
564
565 static void
566 decompose_register (unsigned int regno)
567 {
568 rtx reg;
569 unsigned int words, i;
570 rtvec v;
571
572 reg = regno_reg_rtx[regno];
573
574 regno_reg_rtx[regno] = NULL_RTX;
575
576 words = GET_MODE_SIZE (GET_MODE (reg));
577 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
578
579 v = rtvec_alloc (words);
580 for (i = 0; i < words; ++i)
581 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
582
583 PUT_CODE (reg, CONCATN);
584 XVEC (reg, 0) = v;
585
586 if (dump_file)
587 {
588 fprintf (dump_file, "; Splitting reg %u ->", regno);
589 for (i = 0; i < words; ++i)
590 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
591 fputc ('\n', dump_file);
592 }
593 }
594
595 /* Get a SUBREG of a CONCATN. */
596
597 static rtx
598 simplify_subreg_concatn (machine_mode outermode, rtx op,
599 unsigned int byte)
600 {
601 unsigned int inner_size;
602 machine_mode innermode, partmode;
603 rtx part;
604 unsigned int final_offset;
605
606 gcc_assert (GET_CODE (op) == CONCATN);
607 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
608
609 innermode = GET_MODE (op);
610 gcc_assert (byte < GET_MODE_SIZE (innermode));
611 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
612 return NULL_RTX;
613
614 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
615 part = XVECEXP (op, 0, byte / inner_size);
616 partmode = GET_MODE (part);
617
618 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
619 regular CONST_VECTORs. They have vector or integer modes, depending
620 on the capabilities of the target. Cope with them. */
621 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
622 partmode = GET_MODE_INNER (innermode);
623 else if (partmode == VOIDmode)
624 {
625 enum mode_class mclass = GET_MODE_CLASS (innermode);
626 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
627 }
628
629 final_offset = byte % inner_size;
630 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
631 return NULL_RTX;
632
633 return simplify_gen_subreg (outermode, part, partmode, final_offset);
634 }
635
636 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
637
638 static rtx
639 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
640 machine_mode innermode, unsigned int byte)
641 {
642 rtx ret;
643
644 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
645 If OP is a SUBREG of a CONCATN, then it must be a simple mode
646 change with the same size and offset 0, or it must extract a
647 part. We shouldn't see anything else here. */
648 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
649 {
650 rtx op2;
651
652 if ((GET_MODE_SIZE (GET_MODE (op))
653 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
654 && SUBREG_BYTE (op) == 0)
655 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
656 GET_MODE (SUBREG_REG (op)), byte);
657
658 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
659 SUBREG_BYTE (op));
660 if (op2 == NULL_RTX)
661 {
662 /* We don't handle paradoxical subregs here. */
663 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
664 gcc_assert (!paradoxical_subreg_p (op));
665 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
666 byte + SUBREG_BYTE (op));
667 gcc_assert (op2 != NULL_RTX);
668 return op2;
669 }
670
671 op = op2;
672 gcc_assert (op != NULL_RTX);
673 gcc_assert (innermode == GET_MODE (op));
674 }
675
676 if (GET_CODE (op) == CONCATN)
677 return simplify_subreg_concatn (outermode, op, byte);
678
679 ret = simplify_gen_subreg (outermode, op, innermode, byte);
680
681 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
682 resolve_simple_move will ask for the high part of the paradoxical
683 subreg, which does not have a value. Just return a zero. */
684 if (ret == NULL_RTX
685 && paradoxical_subreg_p (op))
686 return CONST0_RTX (outermode);
687
688 gcc_assert (ret != NULL_RTX);
689 return ret;
690 }
691
692 /* Return whether we should resolve X into the registers into which it
693 was decomposed. */
694
695 static bool
696 resolve_reg_p (rtx x)
697 {
698 return GET_CODE (x) == CONCATN;
699 }
700
701 /* Return whether X is a SUBREG of a register which we need to
702 resolve. */
703
704 static bool
705 resolve_subreg_p (rtx x)
706 {
707 if (GET_CODE (x) != SUBREG)
708 return false;
709 return resolve_reg_p (SUBREG_REG (x));
710 }
711
712 /* Look for SUBREGs in *LOC which need to be decomposed. */
713
714 static bool
715 resolve_subreg_use (rtx *loc, rtx insn)
716 {
717 subrtx_ptr_iterator::array_type array;
718 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
719 {
720 rtx *loc = *iter;
721 rtx x = *loc;
722 if (resolve_subreg_p (x))
723 {
724 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
725 SUBREG_BYTE (x));
726
727 /* It is possible for a note to contain a reference which we can
728 decompose. In this case, return 1 to the caller to indicate
729 that the note must be removed. */
730 if (!x)
731 {
732 gcc_assert (!insn);
733 return true;
734 }
735
736 validate_change (insn, loc, x, 1);
737 iter.skip_subrtxes ();
738 }
739 else if (resolve_reg_p (x))
740 /* Return 1 to the caller to indicate that we found a direct
741 reference to a register which is being decomposed. This can
742 happen inside notes, multiword shift or zero-extend
743 instructions. */
744 return true;
745 }
746
747 return false;
748 }
749
750 /* Resolve any decomposed registers which appear in register notes on
751 INSN. */
752
753 static void
754 resolve_reg_notes (rtx_insn *insn)
755 {
756 rtx *pnote, note;
757
758 note = find_reg_equal_equiv_note (insn);
759 if (note)
760 {
761 int old_count = num_validated_changes ();
762 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
763 remove_note (insn, note);
764 else
765 if (old_count != num_validated_changes ())
766 df_notes_rescan (insn);
767 }
768
769 pnote = &REG_NOTES (insn);
770 while (*pnote != NULL_RTX)
771 {
772 bool del = false;
773
774 note = *pnote;
775 switch (REG_NOTE_KIND (note))
776 {
777 case REG_DEAD:
778 case REG_UNUSED:
779 if (resolve_reg_p (XEXP (note, 0)))
780 del = true;
781 break;
782
783 default:
784 break;
785 }
786
787 if (del)
788 *pnote = XEXP (note, 1);
789 else
790 pnote = &XEXP (note, 1);
791 }
792 }
793
794 /* Return whether X can be decomposed into subwords. */
795
796 static bool
797 can_decompose_p (rtx x)
798 {
799 if (REG_P (x))
800 {
801 unsigned int regno = REGNO (x);
802
803 if (HARD_REGISTER_NUM_P (regno))
804 {
805 unsigned int byte, num_bytes;
806
807 num_bytes = GET_MODE_SIZE (GET_MODE (x));
808 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
809 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
810 return false;
811 return true;
812 }
813 else
814 return !bitmap_bit_p (subreg_context, regno);
815 }
816
817 return true;
818 }
819
820 /* Decompose the registers used in a simple move SET within INSN. If
821 we don't change anything, return INSN, otherwise return the start
822 of the sequence of moves. */
823
824 static rtx_insn *
825 resolve_simple_move (rtx set, rtx_insn *insn)
826 {
827 rtx src, dest, real_dest;
828 rtx_insn *insns;
829 machine_mode orig_mode, dest_mode;
830 unsigned int words;
831 bool pushing;
832
833 src = SET_SRC (set);
834 dest = SET_DEST (set);
835 orig_mode = GET_MODE (dest);
836
837 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
838 gcc_assert (words > 1);
839
840 start_sequence ();
841
842 /* We have to handle copying from a SUBREG of a decomposed reg where
843 the SUBREG is larger than word size. Rather than assume that we
844 can take a word_mode SUBREG of the destination, we copy to a new
845 register and then copy that to the destination. */
846
847 real_dest = NULL_RTX;
848
849 if (GET_CODE (src) == SUBREG
850 && resolve_reg_p (SUBREG_REG (src))
851 && (SUBREG_BYTE (src) != 0
852 || (GET_MODE_SIZE (orig_mode)
853 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
854 {
855 real_dest = dest;
856 dest = gen_reg_rtx (orig_mode);
857 if (REG_P (real_dest))
858 REG_ATTRS (dest) = REG_ATTRS (real_dest);
859 }
860
861 /* Similarly if we are copying to a SUBREG of a decomposed reg where
862 the SUBREG is larger than word size. */
863
864 if (GET_CODE (dest) == SUBREG
865 && resolve_reg_p (SUBREG_REG (dest))
866 && (SUBREG_BYTE (dest) != 0
867 || (GET_MODE_SIZE (orig_mode)
868 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
869 {
870 rtx reg, smove;
871 rtx_insn *minsn;
872
873 reg = gen_reg_rtx (orig_mode);
874 minsn = emit_move_insn (reg, src);
875 smove = single_set (minsn);
876 gcc_assert (smove != NULL_RTX);
877 resolve_simple_move (smove, minsn);
878 src = reg;
879 }
880
881 /* If we didn't have any big SUBREGS of decomposed registers, and
882 neither side of the move is a register we are decomposing, then
883 we don't have to do anything here. */
884
885 if (src == SET_SRC (set)
886 && dest == SET_DEST (set)
887 && !resolve_reg_p (src)
888 && !resolve_subreg_p (src)
889 && !resolve_reg_p (dest)
890 && !resolve_subreg_p (dest))
891 {
892 end_sequence ();
893 return insn;
894 }
895
896 /* It's possible for the code to use a subreg of a decomposed
897 register while forming an address. We need to handle that before
898 passing the address to emit_move_insn. We pass NULL_RTX as the
899 insn parameter to resolve_subreg_use because we can not validate
900 the insn yet. */
901 if (MEM_P (src) || MEM_P (dest))
902 {
903 int acg;
904
905 if (MEM_P (src))
906 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
907 if (MEM_P (dest))
908 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
909 acg = apply_change_group ();
910 gcc_assert (acg);
911 }
912
913 /* If SRC is a register which we can't decompose, or has side
914 effects, we need to move via a temporary register. */
915
916 if (!can_decompose_p (src)
917 || side_effects_p (src)
918 || GET_CODE (src) == ASM_OPERANDS)
919 {
920 rtx reg;
921
922 reg = gen_reg_rtx (orig_mode);
923
924 if (AUTO_INC_DEC)
925 {
926 rtx_insn *move = emit_move_insn (reg, src);
927 if (MEM_P (src))
928 {
929 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
930 if (note)
931 add_reg_note (move, REG_INC, XEXP (note, 0));
932 }
933 }
934 else
935 emit_move_insn (reg, src);
936
937 src = reg;
938 }
939
940 /* If DEST is a register which we can't decompose, or has side
941 effects, we need to first move to a temporary register. We
942 handle the common case of pushing an operand directly. We also
943 go through a temporary register if it holds a floating point
944 value. This gives us better code on systems which can't move
945 data easily between integer and floating point registers. */
946
947 dest_mode = orig_mode;
948 pushing = push_operand (dest, dest_mode);
949 if (!can_decompose_p (dest)
950 || (side_effects_p (dest) && !pushing)
951 || (!SCALAR_INT_MODE_P (dest_mode)
952 && !resolve_reg_p (dest)
953 && !resolve_subreg_p (dest)))
954 {
955 if (real_dest == NULL_RTX)
956 real_dest = dest;
957 if (!SCALAR_INT_MODE_P (dest_mode))
958 {
959 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
960 MODE_INT, 0);
961 gcc_assert (dest_mode != BLKmode);
962 }
963 dest = gen_reg_rtx (dest_mode);
964 if (REG_P (real_dest))
965 REG_ATTRS (dest) = REG_ATTRS (real_dest);
966 }
967
968 if (pushing)
969 {
970 unsigned int i, j, jinc;
971
972 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
973 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
974 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
975
976 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
977 {
978 j = 0;
979 jinc = 1;
980 }
981 else
982 {
983 j = words - 1;
984 jinc = -1;
985 }
986
987 for (i = 0; i < words; ++i, j += jinc)
988 {
989 rtx temp;
990
991 temp = copy_rtx (XEXP (dest, 0));
992 temp = adjust_automodify_address_nv (dest, word_mode, temp,
993 j * UNITS_PER_WORD);
994 emit_move_insn (temp,
995 simplify_gen_subreg_concatn (word_mode, src,
996 orig_mode,
997 j * UNITS_PER_WORD));
998 }
999 }
1000 else
1001 {
1002 unsigned int i;
1003
1004 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1005 emit_clobber (dest);
1006
1007 for (i = 0; i < words; ++i)
1008 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1009 dest_mode,
1010 i * UNITS_PER_WORD),
1011 simplify_gen_subreg_concatn (word_mode, src,
1012 orig_mode,
1013 i * UNITS_PER_WORD));
1014 }
1015
1016 if (real_dest != NULL_RTX)
1017 {
1018 rtx mdest, smove;
1019 rtx_insn *minsn;
1020
1021 if (dest_mode == orig_mode)
1022 mdest = dest;
1023 else
1024 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1025 minsn = emit_move_insn (real_dest, mdest);
1026
1027 if (AUTO_INC_DEC && MEM_P (real_dest)
1028 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1029 {
1030 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1031 if (note)
1032 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1033 }
1034
1035 smove = single_set (minsn);
1036 gcc_assert (smove != NULL_RTX);
1037
1038 resolve_simple_move (smove, minsn);
1039 }
1040
1041 insns = get_insns ();
1042 end_sequence ();
1043
1044 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1045
1046 emit_insn_before (insns, insn);
1047
1048 /* If we get here via self-recursion, then INSN is not yet in the insns
1049 chain and delete_insn will fail. We only want to remove INSN from the
1050 current sequence. See PR56738. */
1051 if (in_sequence_p ())
1052 remove_insn (insn);
1053 else
1054 delete_insn (insn);
1055
1056 return insns;
1057 }
1058
1059 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1060 component registers. Return whether we changed something. */
1061
1062 static bool
1063 resolve_clobber (rtx pat, rtx_insn *insn)
1064 {
1065 rtx reg;
1066 machine_mode orig_mode;
1067 unsigned int words, i;
1068 int ret;
1069
1070 reg = XEXP (pat, 0);
1071 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1072 return false;
1073
1074 orig_mode = GET_MODE (reg);
1075 words = GET_MODE_SIZE (orig_mode);
1076 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1077
1078 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1079 simplify_gen_subreg_concatn (word_mode, reg,
1080 orig_mode, 0),
1081 0);
1082 df_insn_rescan (insn);
1083 gcc_assert (ret != 0);
1084
1085 for (i = words - 1; i > 0; --i)
1086 {
1087 rtx x;
1088
1089 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1090 i * UNITS_PER_WORD);
1091 x = gen_rtx_CLOBBER (VOIDmode, x);
1092 emit_insn_after (x, insn);
1093 }
1094
1095 resolve_reg_notes (insn);
1096
1097 return true;
1098 }
1099
1100 /* A USE of a decomposed register is no longer meaningful. Return
1101 whether we changed something. */
1102
1103 static bool
1104 resolve_use (rtx pat, rtx_insn *insn)
1105 {
1106 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1107 {
1108 delete_insn (insn);
1109 return true;
1110 }
1111
1112 resolve_reg_notes (insn);
1113
1114 return false;
1115 }
1116
1117 /* A VAR_LOCATION can be simplified. */
1118
1119 static void
1120 resolve_debug (rtx_insn *insn)
1121 {
1122 subrtx_ptr_iterator::array_type array;
1123 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1124 {
1125 rtx *loc = *iter;
1126 rtx x = *loc;
1127 if (resolve_subreg_p (x))
1128 {
1129 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1130 SUBREG_BYTE (x));
1131
1132 if (x)
1133 *loc = x;
1134 else
1135 x = copy_rtx (*loc);
1136 }
1137 if (resolve_reg_p (x))
1138 *loc = copy_rtx (x);
1139 }
1140
1141 df_insn_rescan (insn);
1142
1143 resolve_reg_notes (insn);
1144 }
1145
1146 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1147 set the decomposable_context bitmap accordingly. SPEED_P is true
1148 if we are optimizing INSN for speed rather than size. Return true
1149 if INSN is decomposable. */
1150
1151 static bool
1152 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1153 {
1154 rtx set;
1155 rtx op;
1156 rtx op_operand;
1157
1158 set = single_set (insn);
1159 if (!set)
1160 return false;
1161
1162 op = SET_SRC (set);
1163 if (GET_CODE (op) != ASHIFT
1164 && GET_CODE (op) != LSHIFTRT
1165 && GET_CODE (op) != ASHIFTRT
1166 && GET_CODE (op) != ZERO_EXTEND)
1167 return false;
1168
1169 op_operand = XEXP (op, 0);
1170 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1171 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1172 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1173 || GET_MODE (op) != twice_word_mode)
1174 return false;
1175
1176 if (GET_CODE (op) == ZERO_EXTEND)
1177 {
1178 if (GET_MODE (op_operand) != word_mode
1179 || !choices[speed_p].splitting_zext)
1180 return false;
1181 }
1182 else /* left or right shift */
1183 {
1184 bool *splitting = (GET_CODE (op) == ASHIFT
1185 ? choices[speed_p].splitting_ashift
1186 : GET_CODE (op) == ASHIFTRT
1187 ? choices[speed_p].splitting_ashiftrt
1188 : choices[speed_p].splitting_lshiftrt);
1189 if (!CONST_INT_P (XEXP (op, 1))
1190 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1191 2 * BITS_PER_WORD - 1)
1192 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1193 return false;
1194
1195 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1196 }
1197
1198 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1199
1200 return true;
1201 }
1202
1203 /* Decompose a more than word wide shift (in INSN) of a multiword
1204 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1205 and 'set to zero' insn. Return a pointer to the new insn when a
1206 replacement was done. */
1207
1208 static rtx_insn *
1209 resolve_shift_zext (rtx_insn *insn)
1210 {
1211 rtx set;
1212 rtx op;
1213 rtx op_operand;
1214 rtx_insn *insns;
1215 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1216 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1217
1218 set = single_set (insn);
1219 if (!set)
1220 return NULL;
1221
1222 op = SET_SRC (set);
1223 if (GET_CODE (op) != ASHIFT
1224 && GET_CODE (op) != LSHIFTRT
1225 && GET_CODE (op) != ASHIFTRT
1226 && GET_CODE (op) != ZERO_EXTEND)
1227 return NULL;
1228
1229 op_operand = XEXP (op, 0);
1230
1231 /* We can tear this operation apart only if the regs were already
1232 torn apart. */
1233 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1234 return NULL;
1235
1236 /* src_reg_num is the number of the word mode register which we
1237 are operating on. For a left shift and a zero_extend on little
1238 endian machines this is register 0. */
1239 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1240 ? 1 : 0;
1241
1242 if (WORDS_BIG_ENDIAN
1243 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1244 src_reg_num = 1 - src_reg_num;
1245
1246 if (GET_CODE (op) == ZERO_EXTEND)
1247 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1248 else
1249 dest_reg_num = 1 - src_reg_num;
1250
1251 offset1 = UNITS_PER_WORD * dest_reg_num;
1252 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1253 src_offset = UNITS_PER_WORD * src_reg_num;
1254
1255 start_sequence ();
1256
1257 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1258 GET_MODE (SET_DEST (set)),
1259 offset1);
1260 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1261 GET_MODE (SET_DEST (set)),
1262 offset2);
1263 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1264 GET_MODE (op_operand),
1265 src_offset);
1266 if (GET_CODE (op) == ASHIFTRT
1267 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1268 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1269 BITS_PER_WORD - 1, NULL_RTX, 0);
1270
1271 if (GET_CODE (op) != ZERO_EXTEND)
1272 {
1273 int shift_count = INTVAL (XEXP (op, 1));
1274 if (shift_count > BITS_PER_WORD)
1275 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1276 LSHIFT_EXPR : RSHIFT_EXPR,
1277 word_mode, src_reg,
1278 shift_count - BITS_PER_WORD,
1279 dest_reg, GET_CODE (op) != ASHIFTRT);
1280 }
1281
1282 if (dest_reg != src_reg)
1283 emit_move_insn (dest_reg, src_reg);
1284 if (GET_CODE (op) != ASHIFTRT)
1285 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1286 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1287 emit_move_insn (dest_upper, copy_rtx (src_reg));
1288 else
1289 emit_move_insn (dest_upper, upper_src);
1290 insns = get_insns ();
1291
1292 end_sequence ();
1293
1294 emit_insn_before (insns, insn);
1295
1296 if (dump_file)
1297 {
1298 rtx_insn *in;
1299 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1300 for (in = insns; in != insn; in = NEXT_INSN (in))
1301 fprintf (dump_file, "%d ", INSN_UID (in));
1302 fprintf (dump_file, "\n");
1303 }
1304
1305 delete_insn (insn);
1306 return insns;
1307 }
1308
1309 /* Print to dump_file a description of what we're doing with shift code CODE.
1310 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1311
1312 static void
1313 dump_shift_choices (enum rtx_code code, bool *splitting)
1314 {
1315 int i;
1316 const char *sep;
1317
1318 fprintf (dump_file,
1319 " Splitting mode %s for %s lowering with shift amounts = ",
1320 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1321 sep = "";
1322 for (i = 0; i < BITS_PER_WORD; i++)
1323 if (splitting[i])
1324 {
1325 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1326 sep = ",";
1327 }
1328 fprintf (dump_file, "\n");
1329 }
1330
1331 /* Print to dump_file a description of what we're doing when optimizing
1332 for speed or size; SPEED_P says which. DESCRIPTION is a description
1333 of the SPEED_P choice. */
1334
1335 static void
1336 dump_choices (bool speed_p, const char *description)
1337 {
1338 unsigned int i;
1339
1340 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1341
1342 for (i = 0; i < MAX_MACHINE_MODE; i++)
1343 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1344 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1345 choices[speed_p].move_modes_to_split[i]
1346 ? "Splitting"
1347 : "Skipping",
1348 GET_MODE_NAME ((machine_mode) i));
1349
1350 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1351 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1352 GET_MODE_NAME (twice_word_mode));
1353
1354 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1355 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1356 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1357 fprintf (dump_file, "\n");
1358 }
1359
1360 /* Look for registers which are always accessed via word-sized SUBREGs
1361 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1362 registers into several word-sized pseudo-registers. */
1363
1364 static void
1365 decompose_multiword_subregs (bool decompose_copies)
1366 {
1367 unsigned int max;
1368 basic_block bb;
1369 bool speed_p;
1370
1371 if (dump_file)
1372 {
1373 dump_choices (false, "size");
1374 dump_choices (true, "speed");
1375 }
1376
1377 /* Check if this target even has any modes to consider lowering. */
1378 if (!choices[false].something_to_do && !choices[true].something_to_do)
1379 {
1380 if (dump_file)
1381 fprintf (dump_file, "Nothing to do!\n");
1382 return;
1383 }
1384
1385 max = max_reg_num ();
1386
1387 /* First see if there are any multi-word pseudo-registers. If there
1388 aren't, there is nothing we can do. This should speed up this
1389 pass in the normal case, since it should be faster than scanning
1390 all the insns. */
1391 {
1392 unsigned int i;
1393 bool useful_modes_seen = false;
1394
1395 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1396 if (regno_reg_rtx[i] != NULL)
1397 {
1398 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1399 if (choices[false].move_modes_to_split[(int) mode]
1400 || choices[true].move_modes_to_split[(int) mode])
1401 {
1402 useful_modes_seen = true;
1403 break;
1404 }
1405 }
1406
1407 if (!useful_modes_seen)
1408 {
1409 if (dump_file)
1410 fprintf (dump_file, "Nothing to lower in this function.\n");
1411 return;
1412 }
1413 }
1414
1415 if (df)
1416 {
1417 df_set_flags (DF_DEFER_INSN_RESCAN);
1418 run_word_dce ();
1419 }
1420
1421 /* FIXME: It may be possible to change this code to look for each
1422 multi-word pseudo-register and to find each insn which sets or
1423 uses that register. That should be faster than scanning all the
1424 insns. */
1425
1426 decomposable_context = BITMAP_ALLOC (NULL);
1427 non_decomposable_context = BITMAP_ALLOC (NULL);
1428 subreg_context = BITMAP_ALLOC (NULL);
1429
1430 reg_copy_graph.create (max);
1431 reg_copy_graph.safe_grow_cleared (max);
1432 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1433
1434 speed_p = optimize_function_for_speed_p (cfun);
1435 FOR_EACH_BB_FN (bb, cfun)
1436 {
1437 rtx_insn *insn;
1438
1439 FOR_BB_INSNS (bb, insn)
1440 {
1441 rtx set;
1442 enum classify_move_insn cmi;
1443 int i, n;
1444
1445 if (!INSN_P (insn)
1446 || GET_CODE (PATTERN (insn)) == CLOBBER
1447 || GET_CODE (PATTERN (insn)) == USE)
1448 continue;
1449
1450 recog_memoized (insn);
1451
1452 if (find_decomposable_shift_zext (insn, speed_p))
1453 continue;
1454
1455 extract_insn (insn);
1456
1457 set = simple_move (insn, speed_p);
1458
1459 if (!set)
1460 cmi = NOT_SIMPLE_MOVE;
1461 else
1462 {
1463 /* We mark pseudo-to-pseudo copies as decomposable during the
1464 second pass only. The first pass is so early that there is
1465 good chance such moves will be optimized away completely by
1466 subsequent optimizations anyway.
1467
1468 However, we call find_pseudo_copy even during the first pass
1469 so as to properly set up the reg_copy_graph. */
1470 if (find_pseudo_copy (set))
1471 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1472 else
1473 cmi = SIMPLE_MOVE;
1474 }
1475
1476 n = recog_data.n_operands;
1477 for (i = 0; i < n; ++i)
1478 {
1479 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1480
1481 /* We handle ASM_OPERANDS as a special case to support
1482 things like x86 rdtsc which returns a DImode value.
1483 We can decompose the output, which will certainly be
1484 operand 0, but not the inputs. */
1485
1486 if (cmi == SIMPLE_MOVE
1487 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1488 {
1489 gcc_assert (i == 0);
1490 cmi = NOT_SIMPLE_MOVE;
1491 }
1492 }
1493 }
1494 }
1495
1496 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1497 if (!bitmap_empty_p (decomposable_context))
1498 {
1499 unsigned int i;
1500 sbitmap_iterator sbi;
1501 bitmap_iterator iter;
1502 unsigned int regno;
1503
1504 propagate_pseudo_copies ();
1505
1506 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1507 bitmap_clear (sub_blocks);
1508
1509 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1510 decompose_register (regno);
1511
1512 FOR_EACH_BB_FN (bb, cfun)
1513 {
1514 rtx_insn *insn;
1515
1516 FOR_BB_INSNS (bb, insn)
1517 {
1518 rtx pat;
1519
1520 if (!INSN_P (insn))
1521 continue;
1522
1523 pat = PATTERN (insn);
1524 if (GET_CODE (pat) == CLOBBER)
1525 resolve_clobber (pat, insn);
1526 else if (GET_CODE (pat) == USE)
1527 resolve_use (pat, insn);
1528 else if (DEBUG_INSN_P (insn))
1529 resolve_debug (insn);
1530 else
1531 {
1532 rtx set;
1533 int i;
1534
1535 recog_memoized (insn);
1536 extract_insn (insn);
1537
1538 set = simple_move (insn, speed_p);
1539 if (set)
1540 {
1541 rtx_insn *orig_insn = insn;
1542 bool cfi = control_flow_insn_p (insn);
1543
1544 /* We can end up splitting loads to multi-word pseudos
1545 into separate loads to machine word size pseudos.
1546 When this happens, we first had one load that can
1547 throw, and after resolve_simple_move we'll have a
1548 bunch of loads (at least two). All those loads may
1549 trap if we can have non-call exceptions, so they
1550 all will end the current basic block. We split the
1551 block after the outer loop over all insns, but we
1552 make sure here that we will be able to split the
1553 basic block and still produce the correct control
1554 flow graph for it. */
1555 gcc_assert (!cfi
1556 || (cfun->can_throw_non_call_exceptions
1557 && can_throw_internal (insn)));
1558
1559 insn = resolve_simple_move (set, insn);
1560 if (insn != orig_insn)
1561 {
1562 recog_memoized (insn);
1563 extract_insn (insn);
1564
1565 if (cfi)
1566 bitmap_set_bit (sub_blocks, bb->index);
1567 }
1568 }
1569 else
1570 {
1571 rtx_insn *decomposed_shift;
1572
1573 decomposed_shift = resolve_shift_zext (insn);
1574 if (decomposed_shift != NULL_RTX)
1575 {
1576 insn = decomposed_shift;
1577 recog_memoized (insn);
1578 extract_insn (insn);
1579 }
1580 }
1581
1582 for (i = recog_data.n_operands - 1; i >= 0; --i)
1583 resolve_subreg_use (recog_data.operand_loc[i], insn);
1584
1585 resolve_reg_notes (insn);
1586
1587 if (num_validated_changes () > 0)
1588 {
1589 for (i = recog_data.n_dups - 1; i >= 0; --i)
1590 {
1591 rtx *pl = recog_data.dup_loc[i];
1592 int dup_num = recog_data.dup_num[i];
1593 rtx *px = recog_data.operand_loc[dup_num];
1594
1595 validate_unshare_change (insn, pl, *px, 1);
1596 }
1597
1598 i = apply_change_group ();
1599 gcc_assert (i);
1600 }
1601 }
1602 }
1603 }
1604
1605 /* If we had insns to split that caused control flow insns in the middle
1606 of a basic block, split those blocks now. Note that we only handle
1607 the case where splitting a load has caused multiple possibly trapping
1608 loads to appear. */
1609 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1610 {
1611 rtx_insn *insn, *end;
1612 edge fallthru;
1613
1614 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1615 insn = BB_HEAD (bb);
1616 end = BB_END (bb);
1617
1618 while (insn != end)
1619 {
1620 if (control_flow_insn_p (insn))
1621 {
1622 /* Split the block after insn. There will be a fallthru
1623 edge, which is OK so we keep it. We have to create the
1624 exception edges ourselves. */
1625 fallthru = split_block (bb, insn);
1626 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1627 bb = fallthru->dest;
1628 insn = BB_HEAD (bb);
1629 }
1630 else
1631 insn = NEXT_INSN (insn);
1632 }
1633 }
1634 }
1635
1636 {
1637 unsigned int i;
1638 bitmap b;
1639
1640 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1641 if (b)
1642 BITMAP_FREE (b);
1643 }
1644
1645 reg_copy_graph.release ();
1646
1647 BITMAP_FREE (decomposable_context);
1648 BITMAP_FREE (non_decomposable_context);
1649 BITMAP_FREE (subreg_context);
1650 }
1651 \f
1652 /* Implement first lower subreg pass. */
1653
1654 namespace {
1655
1656 const pass_data pass_data_lower_subreg =
1657 {
1658 RTL_PASS, /* type */
1659 "subreg1", /* name */
1660 OPTGROUP_NONE, /* optinfo_flags */
1661 TV_LOWER_SUBREG, /* tv_id */
1662 0, /* properties_required */
1663 0, /* properties_provided */
1664 0, /* properties_destroyed */
1665 0, /* todo_flags_start */
1666 0, /* todo_flags_finish */
1667 };
1668
1669 class pass_lower_subreg : public rtl_opt_pass
1670 {
1671 public:
1672 pass_lower_subreg (gcc::context *ctxt)
1673 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1674 {}
1675
1676 /* opt_pass methods: */
1677 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1678 virtual unsigned int execute (function *)
1679 {
1680 decompose_multiword_subregs (false);
1681 return 0;
1682 }
1683
1684 }; // class pass_lower_subreg
1685
1686 } // anon namespace
1687
1688 rtl_opt_pass *
1689 make_pass_lower_subreg (gcc::context *ctxt)
1690 {
1691 return new pass_lower_subreg (ctxt);
1692 }
1693
1694 /* Implement second lower subreg pass. */
1695
1696 namespace {
1697
1698 const pass_data pass_data_lower_subreg2 =
1699 {
1700 RTL_PASS, /* type */
1701 "subreg2", /* name */
1702 OPTGROUP_NONE, /* optinfo_flags */
1703 TV_LOWER_SUBREG, /* tv_id */
1704 0, /* properties_required */
1705 0, /* properties_provided */
1706 0, /* properties_destroyed */
1707 0, /* todo_flags_start */
1708 TODO_df_finish, /* todo_flags_finish */
1709 };
1710
1711 class pass_lower_subreg2 : public rtl_opt_pass
1712 {
1713 public:
1714 pass_lower_subreg2 (gcc::context *ctxt)
1715 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1716 {}
1717
1718 /* opt_pass methods: */
1719 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1720 virtual unsigned int execute (function *)
1721 {
1722 decompose_multiword_subregs (true);
1723 return 0;
1724 }
1725
1726 }; // class pass_lower_subreg2
1727
1728 } // anon namespace
1729
1730 rtl_opt_pass *
1731 make_pass_lower_subreg2 (gcc::context *ctxt)
1732 {
1733 return new pass_lower_subreg2 (ctxt);
1734 }