]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
x86: Remove "%!" before ret
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2021 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44
45
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
80
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it cannot be decomposed. */
83 static bitmap non_decomposable_context;
84
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
90
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
94
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
100
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
105
106 /* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112 {
113 if (!GET_MODE_SIZE (mode).is_constant (bytes))
114 return false;
115 *words = CEIL (*bytes, UNITS_PER_WORD);
116 return true;
117 }
118
119 /* RTXes used while computing costs. */
120 struct cost_rtxes {
121 /* Source and target registers. */
122 rtx source;
123 rtx target;
124
125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
126 rtx zext;
127
128 /* A shift of SOURCE. */
129 rtx shift;
130
131 /* A SET of TARGET. */
132 rtx set;
133 };
134
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
137
138 static int
139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 machine_mode mode, int op1)
141 {
142 PUT_CODE (rtxes->shift, code);
143 PUT_MODE (rtxes->shift, mode);
144 PUT_MODE (rtxes->source, mode);
145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146 return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150 to true if it is profitable to split a double-word CODE shift
151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
152 for speed or size profitability.
153
154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
155 the cost of moving zero into a word-mode register. WORD_MOVE_COST
156 is the cost of moving between word registers. */
157
158 static void
159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 bool *splitting, enum rtx_code code,
161 int word_move_zero_cost, int word_move_cost)
162 {
163 int wide_cost, narrow_cost, upper_cost, i;
164
165 for (i = 0; i < BITS_PER_WORD; i++)
166 {
167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 i + BITS_PER_WORD);
169 if (i == 0)
170 narrow_cost = word_move_cost;
171 else
172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173
174 if (code != ASHIFTRT)
175 upper_cost = word_move_zero_cost;
176 else if (i == BITS_PER_WORD - 1)
177 upper_cost = word_move_cost;
178 else
179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 BITS_PER_WORD - 1);
181
182 if (LOG_COSTS)
183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186
187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 splitting[i] = true;
189 }
190 }
191
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193 selects which. Use RTXES for computing costs. */
194
195 static void
196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198 unsigned int i;
199 int word_move_zero_cost, word_move_cost;
200
201 PUT_MODE (rtxes->target, word_mode);
202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204
205 SET_SRC (rtxes->set) = rtxes->source;
206 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207
208 if (LOG_COSTS)
209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211
212 for (i = 0; i < MAX_MACHINE_MODE; i++)
213 {
214 machine_mode mode = (machine_mode) i;
215 unsigned int size, factor;
216 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 {
218 unsigned int mode_move_cost;
219
220 PUT_MODE (rtxes->target, mode);
221 PUT_MODE (rtxes->source, mode);
222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223
224 if (LOG_COSTS)
225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 GET_MODE_NAME (mode), mode_move_cost,
227 word_move_cost, factor);
228
229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 {
231 choices[speed_p].move_modes_to_split[i] = true;
232 choices[speed_p].something_to_do = true;
233 }
234 }
235 }
236
237 /* For the moves and shifts, the only case that is checked is one
238 where the mode of the target is an integer mode twice the width
239 of the word_mode.
240
241 If it is not profitable to split a double word move then do not
242 even consider the shifts or the zero extension. */
243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244 {
245 int zext_cost;
246
247 /* The only case here to check to see if moving the upper part with a
248 zero is cheaper than doing the zext itself. */
249 PUT_MODE (rtxes->source, word_mode);
250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251
252 if (LOG_COSTS)
253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 zext_cost, word_move_cost, word_move_zero_cost);
256
257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 choices[speed_p].splitting_zext = true;
259
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashift, ASHIFT,
262 word_move_zero_cost, word_move_cost);
263 compute_splitting_shift (speed_p, rtxes,
264 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 word_move_zero_cost, word_move_cost);
266 compute_splitting_shift (speed_p, rtxes,
267 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 word_move_zero_cost, word_move_cost);
269 }
270 }
271
272 /* Do one-per-target initialisation. This involves determining
273 which operations on the machine are profitable. If none are found,
274 then the pass just returns when called. */
275
276 void
277 init_lower_subreg (void)
278 {
279 struct cost_rtxes rtxes;
280
281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282
283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284
285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290
291 if (LOG_COSTS)
292 fprintf (stderr, "\nSize costs\n==========\n\n");
293 compute_costs (false, &rtxes);
294
295 if (LOG_COSTS)
296 fprintf (stderr, "\nSpeed costs\n===========\n\n");
297 compute_costs (true, &rtxes);
298 }
299
300 static bool
301 simple_move_operand (rtx x)
302 {
303 if (GET_CODE (x) == SUBREG)
304 x = SUBREG_REG (x);
305
306 if (!OBJECT_P (x))
307 return false;
308
309 if (GET_CODE (x) == LABEL_REF
310 || GET_CODE (x) == SYMBOL_REF
311 || GET_CODE (x) == HIGH
312 || GET_CODE (x) == CONST)
313 return false;
314
315 if (MEM_P (x)
316 && (MEM_VOLATILE_P (x)
317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318 return false;
319
320 return true;
321 }
322
323 /* If X is an operator that can be treated as a simple move that we
324 can split, then return the operand that is operated on. */
325
326 static rtx
327 operand_for_swap_move_operator (rtx x)
328 {
329 /* A word sized rotate of a register pair is equivalent to swapping
330 the registers in the register pair. */
331 if (GET_CODE (x) == ROTATE
332 && GET_MODE (x) == twice_word_mode
333 && simple_move_operand (XEXP (x, 0))
334 && CONST_INT_P (XEXP (x, 1))
335 && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
336 return XEXP (x, 0);
337
338 return NULL_RTX;
339 }
340
341 /* If INSN is a single set between two objects that we want to split,
342 return the single set. SPEED_P says whether we are optimizing
343 INSN for speed or size.
344
345 INSN should have been passed to recog and extract_insn before this
346 is called. */
347
348 static rtx
349 simple_move (rtx_insn *insn, bool speed_p)
350 {
351 rtx x, op;
352 rtx set;
353 machine_mode mode;
354
355 if (recog_data.n_operands != 2)
356 return NULL_RTX;
357
358 set = single_set (insn);
359 if (!set)
360 return NULL_RTX;
361
362 x = SET_DEST (set);
363 if (x != recog_data.operand[0] && x != recog_data.operand[1])
364 return NULL_RTX;
365 if (!simple_move_operand (x))
366 return NULL_RTX;
367
368 x = SET_SRC (set);
369 if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
370 x = op;
371
372 if (x != recog_data.operand[0] && x != recog_data.operand[1])
373 return NULL_RTX;
374 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
375 things like x86 rdtsc which returns a DImode value. */
376 if (GET_CODE (x) != ASM_OPERANDS
377 && !simple_move_operand (x))
378 return NULL_RTX;
379
380 /* We try to decompose in integer modes, to avoid generating
381 inefficient code copying between integer and floating point
382 registers. That means that we can't decompose if this is a
383 non-integer mode for which there is no integer mode of the same
384 size. */
385 mode = GET_MODE (SET_DEST (set));
386 scalar_int_mode int_mode;
387 if (!SCALAR_INT_MODE_P (mode)
388 && (!int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists (&int_mode)
389 || !targetm.modes_tieable_p (mode, int_mode)))
390 return NULL_RTX;
391
392 /* Reject PARTIAL_INT modes. They are used for processor specific
393 purposes and it's probably best not to tamper with them. */
394 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
395 return NULL_RTX;
396
397 if (!choices[speed_p].move_modes_to_split[(int) mode])
398 return NULL_RTX;
399
400 return set;
401 }
402
403 /* If SET is a copy from one multi-word pseudo-register to another,
404 record that in reg_copy_graph. Return whether it is such a
405 copy. */
406
407 static bool
408 find_pseudo_copy (rtx set)
409 {
410 rtx dest = SET_DEST (set);
411 rtx src = SET_SRC (set);
412 rtx op;
413 unsigned int rd, rs;
414 bitmap b;
415
416 if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
417 src = op;
418
419 if (!REG_P (dest) || !REG_P (src))
420 return false;
421
422 rd = REGNO (dest);
423 rs = REGNO (src);
424 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
425 return false;
426
427 b = reg_copy_graph[rs];
428 if (b == NULL)
429 {
430 b = BITMAP_ALLOC (NULL);
431 reg_copy_graph[rs] = b;
432 }
433
434 bitmap_set_bit (b, rd);
435
436 return true;
437 }
438
439 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
440 where they are copied to another register, add the register to
441 which they are copied to DECOMPOSABLE_CONTEXT. Use
442 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
443 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
444
445 static void
446 propagate_pseudo_copies (void)
447 {
448 auto_bitmap queue, propagate;
449
450 bitmap_copy (queue, decomposable_context);
451 do
452 {
453 bitmap_iterator iter;
454 unsigned int i;
455
456 bitmap_clear (propagate);
457
458 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
459 {
460 bitmap b = reg_copy_graph[i];
461 if (b)
462 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
463 }
464
465 bitmap_and_compl (queue, propagate, decomposable_context);
466 bitmap_ior_into (decomposable_context, propagate);
467 }
468 while (!bitmap_empty_p (queue));
469 }
470
471 /* A pointer to one of these values is passed to
472 find_decomposable_subregs. */
473
474 enum classify_move_insn
475 {
476 /* Not a simple move from one location to another. */
477 NOT_SIMPLE_MOVE,
478 /* A simple move we want to decompose. */
479 DECOMPOSABLE_SIMPLE_MOVE,
480 /* Any other simple move. */
481 SIMPLE_MOVE
482 };
483
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
486 unadorned register which is not a simple pseudo-register copy,
487 DATA will point at the type of move, and we set a bit in
488 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
489
490 static void
491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
492 {
493 subrtx_var_iterator::array_type array;
494 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
495 {
496 rtx x = *iter;
497 if (GET_CODE (x) == SUBREG)
498 {
499 rtx inner = SUBREG_REG (x);
500 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
501
502 if (!REG_P (inner))
503 continue;
504
505 regno = REGNO (inner);
506 if (HARD_REGISTER_NUM_P (regno))
507 {
508 iter.skip_subrtxes ();
509 continue;
510 }
511
512 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
513 || !interesting_mode_p (GET_MODE (inner), &inner_size,
514 &inner_words))
515 continue;
516
517 /* We only try to decompose single word subregs of multi-word
518 registers. When we find one, we return -1 to avoid iterating
519 over the inner register.
520
521 ??? This doesn't allow, e.g., DImode subregs of TImode values
522 on 32-bit targets. We would need to record the way the
523 pseudo-register was used, and only decompose if all the uses
524 were the same number and size of pieces. Hopefully this
525 doesn't happen much. */
526
527 if (outer_words == 1
528 && inner_words > 1
529 /* Don't allow to decompose floating point subregs of
530 multi-word pseudos if the floating point mode does
531 not have word size, because otherwise we'd generate
532 a subreg with that floating mode from a different
533 sized integral pseudo which is not allowed by
534 validate_subreg. */
535 && (!FLOAT_MODE_P (GET_MODE (x))
536 || outer_size == UNITS_PER_WORD))
537 {
538 bitmap_set_bit (decomposable_context, regno);
539 iter.skip_subrtxes ();
540 continue;
541 }
542
543 /* If this is a cast from one mode to another, where the modes
544 have the same size, and they are not tieable, then mark this
545 register as non-decomposable. If we decompose it we are
546 likely to mess up whatever the backend is trying to do. */
547 if (outer_words > 1
548 && outer_size == inner_size
549 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
550 {
551 bitmap_set_bit (non_decomposable_context, regno);
552 bitmap_set_bit (subreg_context, regno);
553 iter.skip_subrtxes ();
554 continue;
555 }
556 }
557 else if (REG_P (x))
558 {
559 unsigned int regno, size, words;
560
561 /* We will see an outer SUBREG before we see the inner REG, so
562 when we see a plain REG here it means a direct reference to
563 the register.
564
565 If this is not a simple copy from one location to another,
566 then we cannot decompose this register. If this is a simple
567 copy we want to decompose, and the mode is right,
568 then we mark the register as decomposable.
569 Otherwise we don't say anything about this register --
570 it could be decomposed, but whether that would be
571 profitable depends upon how it is used elsewhere.
572
573 We only set bits in the bitmap for multi-word
574 pseudo-registers, since those are the only ones we care about
575 and it keeps the size of the bitmaps down. */
576
577 regno = REGNO (x);
578 if (!HARD_REGISTER_NUM_P (regno)
579 && interesting_mode_p (GET_MODE (x), &size, &words)
580 && words > 1)
581 {
582 switch (*pcmi)
583 {
584 case NOT_SIMPLE_MOVE:
585 bitmap_set_bit (non_decomposable_context, regno);
586 break;
587 case DECOMPOSABLE_SIMPLE_MOVE:
588 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
589 bitmap_set_bit (decomposable_context, regno);
590 break;
591 case SIMPLE_MOVE:
592 break;
593 default:
594 gcc_unreachable ();
595 }
596 }
597 }
598 else if (MEM_P (x))
599 {
600 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
601
602 /* Any registers used in a MEM do not participate in a
603 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
604 here, and return -1 to block the parent's recursion. */
605 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
606 iter.skip_subrtxes ();
607 }
608 }
609 }
610
611 /* Decompose REGNO into word-sized components. We smash the REG node
612 in place. This ensures that (1) something goes wrong quickly if we
613 fail to make some replacement, and (2) the debug information inside
614 the symbol table is automatically kept up to date. */
615
616 static void
617 decompose_register (unsigned int regno)
618 {
619 rtx reg;
620 unsigned int size, words, i;
621 rtvec v;
622
623 reg = regno_reg_rtx[regno];
624
625 regno_reg_rtx[regno] = NULL_RTX;
626
627 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
628 gcc_unreachable ();
629
630 v = rtvec_alloc (words);
631 for (i = 0; i < words; ++i)
632 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
633
634 PUT_CODE (reg, CONCATN);
635 XVEC (reg, 0) = v;
636
637 if (dump_file)
638 {
639 fprintf (dump_file, "; Splitting reg %u ->", regno);
640 for (i = 0; i < words; ++i)
641 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
642 fputc ('\n', dump_file);
643 }
644 }
645
646 /* Get a SUBREG of a CONCATN. */
647
648 static rtx
649 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
650 {
651 unsigned int outer_size, outer_words, inner_size, inner_words;
652 machine_mode innermode, partmode;
653 rtx part;
654 unsigned int final_offset;
655 unsigned int byte;
656
657 innermode = GET_MODE (op);
658 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
659 || !interesting_mode_p (innermode, &inner_size, &inner_words))
660 gcc_unreachable ();
661
662 /* Must be constant if interesting_mode_p passes. */
663 byte = orig_byte.to_constant ();
664 gcc_assert (GET_CODE (op) == CONCATN);
665 gcc_assert (byte % outer_size == 0);
666
667 gcc_assert (byte < inner_size);
668 if (outer_size > inner_size)
669 return NULL_RTX;
670
671 inner_size /= XVECLEN (op, 0);
672 part = XVECEXP (op, 0, byte / inner_size);
673 partmode = GET_MODE (part);
674
675 final_offset = byte % inner_size;
676 if (final_offset + outer_size > inner_size)
677 return NULL_RTX;
678
679 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
680 regular CONST_VECTORs. They have vector or integer modes, depending
681 on the capabilities of the target. Cope with them. */
682 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
683 partmode = GET_MODE_INNER (innermode);
684 else if (partmode == VOIDmode)
685 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
686 GET_MODE_CLASS (innermode), 0).require ();
687
688 return simplify_gen_subreg (outermode, part, partmode, final_offset);
689 }
690
691 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
692
693 static rtx
694 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
695 machine_mode innermode, unsigned int byte)
696 {
697 rtx ret;
698
699 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
700 If OP is a SUBREG of a CONCATN, then it must be a simple mode
701 change with the same size and offset 0, or it must extract a
702 part. We shouldn't see anything else here. */
703 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
704 {
705 rtx op2;
706
707 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
708 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
709 && known_eq (SUBREG_BYTE (op), 0))
710 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
711 GET_MODE (SUBREG_REG (op)), byte);
712
713 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
714 SUBREG_BYTE (op));
715 if (op2 == NULL_RTX)
716 {
717 /* We don't handle paradoxical subregs here. */
718 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
719 gcc_assert (!paradoxical_subreg_p (op));
720 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
721 byte + SUBREG_BYTE (op));
722 gcc_assert (op2 != NULL_RTX);
723 return op2;
724 }
725
726 op = op2;
727 gcc_assert (op != NULL_RTX);
728 gcc_assert (innermode == GET_MODE (op));
729 }
730
731 if (GET_CODE (op) == CONCATN)
732 return simplify_subreg_concatn (outermode, op, byte);
733
734 ret = simplify_gen_subreg (outermode, op, innermode, byte);
735
736 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
737 resolve_simple_move will ask for the high part of the paradoxical
738 subreg, which does not have a value. Just return a zero. */
739 if (ret == NULL_RTX
740 && paradoxical_subreg_p (op))
741 return CONST0_RTX (outermode);
742
743 gcc_assert (ret != NULL_RTX);
744 return ret;
745 }
746
747 /* Return whether we should resolve X into the registers into which it
748 was decomposed. */
749
750 static bool
751 resolve_reg_p (rtx x)
752 {
753 return GET_CODE (x) == CONCATN;
754 }
755
756 /* Return whether X is a SUBREG of a register which we need to
757 resolve. */
758
759 static bool
760 resolve_subreg_p (rtx x)
761 {
762 if (GET_CODE (x) != SUBREG)
763 return false;
764 return resolve_reg_p (SUBREG_REG (x));
765 }
766
767 /* Look for SUBREGs in *LOC which need to be decomposed. */
768
769 static bool
770 resolve_subreg_use (rtx *loc, rtx insn)
771 {
772 subrtx_ptr_iterator::array_type array;
773 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
774 {
775 rtx *loc = *iter;
776 rtx x = *loc;
777 if (resolve_subreg_p (x))
778 {
779 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
780 SUBREG_BYTE (x));
781
782 /* It is possible for a note to contain a reference which we can
783 decompose. In this case, return 1 to the caller to indicate
784 that the note must be removed. */
785 if (!x)
786 {
787 gcc_assert (!insn);
788 return true;
789 }
790
791 validate_change (insn, loc, x, 1);
792 iter.skip_subrtxes ();
793 }
794 else if (resolve_reg_p (x))
795 /* Return 1 to the caller to indicate that we found a direct
796 reference to a register which is being decomposed. This can
797 happen inside notes, multiword shift or zero-extend
798 instructions. */
799 return true;
800 }
801
802 return false;
803 }
804
805 /* Resolve any decomposed registers which appear in register notes on
806 INSN. */
807
808 static void
809 resolve_reg_notes (rtx_insn *insn)
810 {
811 rtx *pnote, note;
812
813 note = find_reg_equal_equiv_note (insn);
814 if (note)
815 {
816 int old_count = num_validated_changes ();
817 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
818 remove_note (insn, note);
819 else
820 if (old_count != num_validated_changes ())
821 df_notes_rescan (insn);
822 }
823
824 pnote = &REG_NOTES (insn);
825 while (*pnote != NULL_RTX)
826 {
827 bool del = false;
828
829 note = *pnote;
830 switch (REG_NOTE_KIND (note))
831 {
832 case REG_DEAD:
833 case REG_UNUSED:
834 if (resolve_reg_p (XEXP (note, 0)))
835 del = true;
836 break;
837
838 default:
839 break;
840 }
841
842 if (del)
843 *pnote = XEXP (note, 1);
844 else
845 pnote = &XEXP (note, 1);
846 }
847 }
848
849 /* Return whether X can be decomposed into subwords. */
850
851 static bool
852 can_decompose_p (rtx x)
853 {
854 if (REG_P (x))
855 {
856 unsigned int regno = REGNO (x);
857
858 if (HARD_REGISTER_NUM_P (regno))
859 {
860 unsigned int byte, num_bytes, num_words;
861
862 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
863 return false;
864 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
865 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
866 return false;
867 return true;
868 }
869 else
870 return !bitmap_bit_p (subreg_context, regno);
871 }
872
873 return true;
874 }
875
876 /* OPND is a concatn operand this is used with a simple move operator.
877 Return a new rtx with the concatn's operands swapped. */
878
879 static rtx
880 resolve_operand_for_swap_move_operator (rtx opnd)
881 {
882 gcc_assert (GET_CODE (opnd) == CONCATN);
883 rtx concatn = copy_rtx (opnd);
884 rtx op0 = XVECEXP (concatn, 0, 0);
885 rtx op1 = XVECEXP (concatn, 0, 1);
886 XVECEXP (concatn, 0, 0) = op1;
887 XVECEXP (concatn, 0, 1) = op0;
888 return concatn;
889 }
890
891 /* Decompose the registers used in a simple move SET within INSN. If
892 we don't change anything, return INSN, otherwise return the start
893 of the sequence of moves. */
894
895 static rtx_insn *
896 resolve_simple_move (rtx set, rtx_insn *insn)
897 {
898 rtx src, dest, real_dest, src_op;
899 rtx_insn *insns;
900 machine_mode orig_mode, dest_mode;
901 unsigned int orig_size, words;
902 bool pushing;
903
904 src = SET_SRC (set);
905 dest = SET_DEST (set);
906 orig_mode = GET_MODE (dest);
907
908 if (!interesting_mode_p (orig_mode, &orig_size, &words))
909 gcc_unreachable ();
910 gcc_assert (words > 1);
911
912 start_sequence ();
913
914 /* We have to handle copying from a SUBREG of a decomposed reg where
915 the SUBREG is larger than word size. Rather than assume that we
916 can take a word_mode SUBREG of the destination, we copy to a new
917 register and then copy that to the destination. */
918
919 real_dest = NULL_RTX;
920
921 if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
922 {
923 if (resolve_reg_p (dest))
924 {
925 /* DEST is a CONCATN, so swap its operands and strip
926 SRC's operator. */
927 dest = resolve_operand_for_swap_move_operator (dest);
928 src = src_op;
929 }
930 else if (resolve_reg_p (src_op))
931 {
932 /* SRC is an operation on a CONCATN, so strip the operator and
933 swap the CONCATN's operands. */
934 src = resolve_operand_for_swap_move_operator (src_op);
935 }
936 }
937
938 if (GET_CODE (src) == SUBREG
939 && resolve_reg_p (SUBREG_REG (src))
940 && (maybe_ne (SUBREG_BYTE (src), 0)
941 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
942 {
943 real_dest = dest;
944 dest = gen_reg_rtx (orig_mode);
945 if (REG_P (real_dest))
946 REG_ATTRS (dest) = REG_ATTRS (real_dest);
947 }
948
949 /* Similarly if we are copying to a SUBREG of a decomposed reg where
950 the SUBREG is larger than word size. */
951
952 if (GET_CODE (dest) == SUBREG
953 && resolve_reg_p (SUBREG_REG (dest))
954 && (maybe_ne (SUBREG_BYTE (dest), 0)
955 || maybe_ne (orig_size,
956 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
957 {
958 rtx reg, smove;
959 rtx_insn *minsn;
960
961 reg = gen_reg_rtx (orig_mode);
962 minsn = emit_move_insn (reg, src);
963 smove = single_set (minsn);
964 gcc_assert (smove != NULL_RTX);
965 resolve_simple_move (smove, minsn);
966 src = reg;
967 }
968
969 /* If we didn't have any big SUBREGS of decomposed registers, and
970 neither side of the move is a register we are decomposing, then
971 we don't have to do anything here. */
972
973 if (src == SET_SRC (set)
974 && dest == SET_DEST (set)
975 && !resolve_reg_p (src)
976 && !resolve_subreg_p (src)
977 && !resolve_reg_p (dest)
978 && !resolve_subreg_p (dest))
979 {
980 end_sequence ();
981 return insn;
982 }
983
984 /* It's possible for the code to use a subreg of a decomposed
985 register while forming an address. We need to handle that before
986 passing the address to emit_move_insn. We pass NULL_RTX as the
987 insn parameter to resolve_subreg_use because we cannot validate
988 the insn yet. */
989 if (MEM_P (src) || MEM_P (dest))
990 {
991 int acg;
992
993 if (MEM_P (src))
994 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
995 if (MEM_P (dest))
996 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
997 acg = apply_change_group ();
998 gcc_assert (acg);
999 }
1000
1001 /* If SRC is a register which we can't decompose, or has side
1002 effects, we need to move via a temporary register. */
1003
1004 if (!can_decompose_p (src)
1005 || side_effects_p (src)
1006 || GET_CODE (src) == ASM_OPERANDS)
1007 {
1008 rtx reg;
1009
1010 reg = gen_reg_rtx (orig_mode);
1011
1012 if (AUTO_INC_DEC)
1013 {
1014 rtx_insn *move = emit_move_insn (reg, src);
1015 if (MEM_P (src))
1016 {
1017 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1018 if (note)
1019 add_reg_note (move, REG_INC, XEXP (note, 0));
1020 }
1021 }
1022 else
1023 emit_move_insn (reg, src);
1024
1025 src = reg;
1026 }
1027
1028 /* If DEST is a register which we can't decompose, or has side
1029 effects, we need to first move to a temporary register. We
1030 handle the common case of pushing an operand directly. We also
1031 go through a temporary register if it holds a floating point
1032 value. This gives us better code on systems which can't move
1033 data easily between integer and floating point registers. */
1034
1035 dest_mode = orig_mode;
1036 pushing = push_operand (dest, dest_mode);
1037 if (!can_decompose_p (dest)
1038 || (side_effects_p (dest) && !pushing)
1039 || (!SCALAR_INT_MODE_P (dest_mode)
1040 && !resolve_reg_p (dest)
1041 && !resolve_subreg_p (dest)))
1042 {
1043 if (real_dest == NULL_RTX)
1044 real_dest = dest;
1045 if (!SCALAR_INT_MODE_P (dest_mode))
1046 dest_mode = int_mode_for_mode (dest_mode).require ();
1047 dest = gen_reg_rtx (dest_mode);
1048 if (REG_P (real_dest))
1049 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1050 }
1051
1052 if (pushing)
1053 {
1054 unsigned int i, j, jinc;
1055
1056 gcc_assert (orig_size % UNITS_PER_WORD == 0);
1057 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1058 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1059
1060 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1061 {
1062 j = 0;
1063 jinc = 1;
1064 }
1065 else
1066 {
1067 j = words - 1;
1068 jinc = -1;
1069 }
1070
1071 for (i = 0; i < words; ++i, j += jinc)
1072 {
1073 rtx temp;
1074
1075 temp = copy_rtx (XEXP (dest, 0));
1076 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1077 j * UNITS_PER_WORD);
1078 emit_move_insn (temp,
1079 simplify_gen_subreg_concatn (word_mode, src,
1080 orig_mode,
1081 j * UNITS_PER_WORD));
1082 }
1083 }
1084 else
1085 {
1086 unsigned int i;
1087
1088 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1089 emit_clobber (dest);
1090
1091 for (i = 0; i < words; ++i)
1092 {
1093 rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1094 dest_mode,
1095 i * UNITS_PER_WORD);
1096 /* simplify_gen_subreg_concatn can return (const_int 0) for
1097 some sub-objects of paradoxical subregs. As a source operand,
1098 that's fine. As a destination it must be avoided. Those are
1099 supposed to be don't care bits, so we can just drop that store
1100 on the floor. */
1101 if (t != CONST0_RTX (word_mode))
1102 emit_move_insn (t,
1103 simplify_gen_subreg_concatn (word_mode, src,
1104 orig_mode,
1105 i * UNITS_PER_WORD));
1106 }
1107 }
1108
1109 if (real_dest != NULL_RTX)
1110 {
1111 rtx mdest, smove;
1112 rtx_insn *minsn;
1113
1114 if (dest_mode == orig_mode)
1115 mdest = dest;
1116 else
1117 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1118 minsn = emit_move_insn (real_dest, mdest);
1119
1120 if (AUTO_INC_DEC && MEM_P (real_dest)
1121 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1122 {
1123 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1124 if (note)
1125 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1126 }
1127
1128 smove = single_set (minsn);
1129 gcc_assert (smove != NULL_RTX);
1130
1131 resolve_simple_move (smove, minsn);
1132 }
1133
1134 insns = get_insns ();
1135 end_sequence ();
1136
1137 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1138
1139 emit_insn_before (insns, insn);
1140
1141 /* If we get here via self-recursion, then INSN is not yet in the insns
1142 chain and delete_insn will fail. We only want to remove INSN from the
1143 current sequence. See PR56738. */
1144 if (in_sequence_p ())
1145 remove_insn (insn);
1146 else
1147 delete_insn (insn);
1148
1149 return insns;
1150 }
1151
1152 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1153 component registers. Return whether we changed something. */
1154
1155 static bool
1156 resolve_clobber (rtx pat, rtx_insn *insn)
1157 {
1158 rtx reg;
1159 machine_mode orig_mode;
1160 unsigned int orig_size, words, i;
1161 int ret;
1162
1163 reg = XEXP (pat, 0);
1164 /* For clobbers we can look through paradoxical subregs which
1165 we do not handle in simplify_gen_subreg_concatn. */
1166 if (paradoxical_subreg_p (reg))
1167 reg = SUBREG_REG (reg);
1168 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1169 return false;
1170
1171 orig_mode = GET_MODE (reg);
1172 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1173 gcc_unreachable ();
1174
1175 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1176 simplify_gen_subreg_concatn (word_mode, reg,
1177 orig_mode, 0),
1178 0);
1179 df_insn_rescan (insn);
1180 gcc_assert (ret != 0);
1181
1182 for (i = words - 1; i > 0; --i)
1183 {
1184 rtx x;
1185
1186 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1187 i * UNITS_PER_WORD);
1188 x = gen_rtx_CLOBBER (VOIDmode, x);
1189 emit_insn_after (x, insn);
1190 }
1191
1192 resolve_reg_notes (insn);
1193
1194 return true;
1195 }
1196
1197 /* A USE of a decomposed register is no longer meaningful. Return
1198 whether we changed something. */
1199
1200 static bool
1201 resolve_use (rtx pat, rtx_insn *insn)
1202 {
1203 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1204 {
1205 delete_insn (insn);
1206 return true;
1207 }
1208
1209 resolve_reg_notes (insn);
1210
1211 return false;
1212 }
1213
1214 /* A VAR_LOCATION can be simplified. */
1215
1216 static void
1217 resolve_debug (rtx_insn *insn)
1218 {
1219 subrtx_ptr_iterator::array_type array;
1220 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1221 {
1222 rtx *loc = *iter;
1223 rtx x = *loc;
1224 if (resolve_subreg_p (x))
1225 {
1226 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1227 SUBREG_BYTE (x));
1228
1229 if (x)
1230 *loc = x;
1231 else
1232 x = copy_rtx (*loc);
1233 }
1234 if (resolve_reg_p (x))
1235 *loc = copy_rtx (x);
1236 }
1237
1238 df_insn_rescan (insn);
1239
1240 resolve_reg_notes (insn);
1241 }
1242
1243 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1244 set the decomposable_context bitmap accordingly. SPEED_P is true
1245 if we are optimizing INSN for speed rather than size. Return true
1246 if INSN is decomposable. */
1247
1248 static bool
1249 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1250 {
1251 rtx set;
1252 rtx op;
1253 rtx op_operand;
1254
1255 set = single_set (insn);
1256 if (!set)
1257 return false;
1258
1259 op = SET_SRC (set);
1260 if (GET_CODE (op) != ASHIFT
1261 && GET_CODE (op) != LSHIFTRT
1262 && GET_CODE (op) != ASHIFTRT
1263 && GET_CODE (op) != ZERO_EXTEND)
1264 return false;
1265
1266 op_operand = XEXP (op, 0);
1267 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1268 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1269 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1270 || GET_MODE (op) != twice_word_mode)
1271 return false;
1272
1273 if (GET_CODE (op) == ZERO_EXTEND)
1274 {
1275 if (GET_MODE (op_operand) != word_mode
1276 || !choices[speed_p].splitting_zext)
1277 return false;
1278 }
1279 else /* left or right shift */
1280 {
1281 bool *splitting = (GET_CODE (op) == ASHIFT
1282 ? choices[speed_p].splitting_ashift
1283 : GET_CODE (op) == ASHIFTRT
1284 ? choices[speed_p].splitting_ashiftrt
1285 : choices[speed_p].splitting_lshiftrt);
1286 if (!CONST_INT_P (XEXP (op, 1))
1287 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1288 2 * BITS_PER_WORD - 1)
1289 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1290 return false;
1291
1292 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1293 }
1294
1295 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1296
1297 return true;
1298 }
1299
1300 /* Decompose a more than word wide shift (in INSN) of a multiword
1301 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1302 and 'set to zero' insn. Return a pointer to the new insn when a
1303 replacement was done. */
1304
1305 static rtx_insn *
1306 resolve_shift_zext (rtx_insn *insn)
1307 {
1308 rtx set;
1309 rtx op;
1310 rtx op_operand;
1311 rtx_insn *insns;
1312 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1313 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1314 scalar_int_mode inner_mode;
1315
1316 set = single_set (insn);
1317 if (!set)
1318 return NULL;
1319
1320 op = SET_SRC (set);
1321 if (GET_CODE (op) != ASHIFT
1322 && GET_CODE (op) != LSHIFTRT
1323 && GET_CODE (op) != ASHIFTRT
1324 && GET_CODE (op) != ZERO_EXTEND)
1325 return NULL;
1326
1327 op_operand = XEXP (op, 0);
1328 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1329 return NULL;
1330
1331 /* We can tear this operation apart only if the regs were already
1332 torn apart. */
1333 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1334 return NULL;
1335
1336 /* src_reg_num is the number of the word mode register which we
1337 are operating on. For a left shift and a zero_extend on little
1338 endian machines this is register 0. */
1339 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1340 ? 1 : 0;
1341
1342 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1343 src_reg_num = 1 - src_reg_num;
1344
1345 if (GET_CODE (op) == ZERO_EXTEND)
1346 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1347 else
1348 dest_reg_num = 1 - src_reg_num;
1349
1350 offset1 = UNITS_PER_WORD * dest_reg_num;
1351 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1352 src_offset = UNITS_PER_WORD * src_reg_num;
1353
1354 start_sequence ();
1355
1356 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1357 GET_MODE (SET_DEST (set)),
1358 offset1);
1359 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1360 GET_MODE (SET_DEST (set)),
1361 offset2);
1362 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1363 GET_MODE (op_operand),
1364 src_offset);
1365 if (GET_CODE (op) == ASHIFTRT
1366 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1367 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1368 BITS_PER_WORD - 1, NULL_RTX, 0);
1369
1370 if (GET_CODE (op) != ZERO_EXTEND)
1371 {
1372 int shift_count = INTVAL (XEXP (op, 1));
1373 if (shift_count > BITS_PER_WORD)
1374 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1375 LSHIFT_EXPR : RSHIFT_EXPR,
1376 word_mode, src_reg,
1377 shift_count - BITS_PER_WORD,
1378 dest_reg, GET_CODE (op) != ASHIFTRT);
1379 }
1380
1381 if (dest_reg != src_reg)
1382 emit_move_insn (dest_reg, src_reg);
1383 if (GET_CODE (op) != ASHIFTRT)
1384 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1385 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1386 emit_move_insn (dest_upper, copy_rtx (src_reg));
1387 else
1388 emit_move_insn (dest_upper, upper_src);
1389 insns = get_insns ();
1390
1391 end_sequence ();
1392
1393 emit_insn_before (insns, insn);
1394
1395 if (dump_file)
1396 {
1397 rtx_insn *in;
1398 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1399 for (in = insns; in != insn; in = NEXT_INSN (in))
1400 fprintf (dump_file, "%d ", INSN_UID (in));
1401 fprintf (dump_file, "\n");
1402 }
1403
1404 delete_insn (insn);
1405 return insns;
1406 }
1407
1408 /* Print to dump_file a description of what we're doing with shift code CODE.
1409 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1410
1411 static void
1412 dump_shift_choices (enum rtx_code code, bool *splitting)
1413 {
1414 int i;
1415 const char *sep;
1416
1417 fprintf (dump_file,
1418 " Splitting mode %s for %s lowering with shift amounts = ",
1419 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1420 sep = "";
1421 for (i = 0; i < BITS_PER_WORD; i++)
1422 if (splitting[i])
1423 {
1424 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1425 sep = ",";
1426 }
1427 fprintf (dump_file, "\n");
1428 }
1429
1430 /* Print to dump_file a description of what we're doing when optimizing
1431 for speed or size; SPEED_P says which. DESCRIPTION is a description
1432 of the SPEED_P choice. */
1433
1434 static void
1435 dump_choices (bool speed_p, const char *description)
1436 {
1437 unsigned int size, factor, i;
1438
1439 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1440
1441 for (i = 0; i < MAX_MACHINE_MODE; i++)
1442 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1443 && factor > 1)
1444 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1445 choices[speed_p].move_modes_to_split[i]
1446 ? "Splitting"
1447 : "Skipping",
1448 GET_MODE_NAME ((machine_mode) i));
1449
1450 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1451 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1452 GET_MODE_NAME (twice_word_mode));
1453
1454 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1455 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1456 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1457 fprintf (dump_file, "\n");
1458 }
1459
1460 /* Look for registers which are always accessed via word-sized SUBREGs
1461 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1462 registers into several word-sized pseudo-registers. */
1463
1464 static void
1465 decompose_multiword_subregs (bool decompose_copies)
1466 {
1467 unsigned int max;
1468 basic_block bb;
1469 bool speed_p;
1470
1471 if (dump_file)
1472 {
1473 dump_choices (false, "size");
1474 dump_choices (true, "speed");
1475 }
1476
1477 /* Check if this target even has any modes to consider lowering. */
1478 if (!choices[false].something_to_do && !choices[true].something_to_do)
1479 {
1480 if (dump_file)
1481 fprintf (dump_file, "Nothing to do!\n");
1482 return;
1483 }
1484
1485 max = max_reg_num ();
1486
1487 /* First see if there are any multi-word pseudo-registers. If there
1488 aren't, there is nothing we can do. This should speed up this
1489 pass in the normal case, since it should be faster than scanning
1490 all the insns. */
1491 {
1492 unsigned int i;
1493 bool useful_modes_seen = false;
1494
1495 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1496 if (regno_reg_rtx[i] != NULL)
1497 {
1498 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1499 if (choices[false].move_modes_to_split[(int) mode]
1500 || choices[true].move_modes_to_split[(int) mode])
1501 {
1502 useful_modes_seen = true;
1503 break;
1504 }
1505 }
1506
1507 if (!useful_modes_seen)
1508 {
1509 if (dump_file)
1510 fprintf (dump_file, "Nothing to lower in this function.\n");
1511 return;
1512 }
1513 }
1514
1515 if (df)
1516 {
1517 df_set_flags (DF_DEFER_INSN_RESCAN);
1518 run_word_dce ();
1519 }
1520
1521 /* FIXME: It may be possible to change this code to look for each
1522 multi-word pseudo-register and to find each insn which sets or
1523 uses that register. That should be faster than scanning all the
1524 insns. */
1525
1526 decomposable_context = BITMAP_ALLOC (NULL);
1527 non_decomposable_context = BITMAP_ALLOC (NULL);
1528 subreg_context = BITMAP_ALLOC (NULL);
1529
1530 reg_copy_graph.create (max);
1531 reg_copy_graph.safe_grow_cleared (max, true);
1532 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1533
1534 speed_p = optimize_function_for_speed_p (cfun);
1535 FOR_EACH_BB_FN (bb, cfun)
1536 {
1537 rtx_insn *insn;
1538
1539 FOR_BB_INSNS (bb, insn)
1540 {
1541 rtx set;
1542 enum classify_move_insn cmi;
1543 int i, n;
1544
1545 if (!INSN_P (insn)
1546 || GET_CODE (PATTERN (insn)) == CLOBBER
1547 || GET_CODE (PATTERN (insn)) == USE)
1548 continue;
1549
1550 recog_memoized (insn);
1551
1552 if (find_decomposable_shift_zext (insn, speed_p))
1553 continue;
1554
1555 extract_insn (insn);
1556
1557 set = simple_move (insn, speed_p);
1558
1559 if (!set)
1560 cmi = NOT_SIMPLE_MOVE;
1561 else
1562 {
1563 /* We mark pseudo-to-pseudo copies as decomposable during the
1564 second pass only. The first pass is so early that there is
1565 good chance such moves will be optimized away completely by
1566 subsequent optimizations anyway.
1567
1568 However, we call find_pseudo_copy even during the first pass
1569 so as to properly set up the reg_copy_graph. */
1570 if (find_pseudo_copy (set))
1571 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1572 else
1573 cmi = SIMPLE_MOVE;
1574 }
1575
1576 n = recog_data.n_operands;
1577 for (i = 0; i < n; ++i)
1578 {
1579 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1580
1581 /* We handle ASM_OPERANDS as a special case to support
1582 things like x86 rdtsc which returns a DImode value.
1583 We can decompose the output, which will certainly be
1584 operand 0, but not the inputs. */
1585
1586 if (cmi == SIMPLE_MOVE
1587 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1588 {
1589 gcc_assert (i == 0);
1590 cmi = NOT_SIMPLE_MOVE;
1591 }
1592 }
1593 }
1594 }
1595
1596 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1597 if (!bitmap_empty_p (decomposable_context))
1598 {
1599 unsigned int i;
1600 sbitmap_iterator sbi;
1601 bitmap_iterator iter;
1602 unsigned int regno;
1603
1604 propagate_pseudo_copies ();
1605
1606 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1607 bitmap_clear (sub_blocks);
1608
1609 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1610 decompose_register (regno);
1611
1612 FOR_EACH_BB_FN (bb, cfun)
1613 {
1614 rtx_insn *insn;
1615
1616 FOR_BB_INSNS (bb, insn)
1617 {
1618 rtx pat;
1619
1620 if (!INSN_P (insn))
1621 continue;
1622
1623 pat = PATTERN (insn);
1624 if (GET_CODE (pat) == CLOBBER)
1625 resolve_clobber (pat, insn);
1626 else if (GET_CODE (pat) == USE)
1627 resolve_use (pat, insn);
1628 else if (DEBUG_INSN_P (insn))
1629 resolve_debug (insn);
1630 else
1631 {
1632 rtx set;
1633 int i;
1634
1635 recog_memoized (insn);
1636 extract_insn (insn);
1637
1638 set = simple_move (insn, speed_p);
1639 if (set)
1640 {
1641 rtx_insn *orig_insn = insn;
1642 bool cfi = control_flow_insn_p (insn);
1643
1644 /* We can end up splitting loads to multi-word pseudos
1645 into separate loads to machine word size pseudos.
1646 When this happens, we first had one load that can
1647 throw, and after resolve_simple_move we'll have a
1648 bunch of loads (at least two). All those loads may
1649 trap if we can have non-call exceptions, so they
1650 all will end the current basic block. We split the
1651 block after the outer loop over all insns, but we
1652 make sure here that we will be able to split the
1653 basic block and still produce the correct control
1654 flow graph for it. */
1655 gcc_assert (!cfi
1656 || (cfun->can_throw_non_call_exceptions
1657 && can_throw_internal (insn)));
1658
1659 insn = resolve_simple_move (set, insn);
1660 if (insn != orig_insn)
1661 {
1662 recog_memoized (insn);
1663 extract_insn (insn);
1664
1665 if (cfi)
1666 bitmap_set_bit (sub_blocks, bb->index);
1667 }
1668 }
1669 else
1670 {
1671 rtx_insn *decomposed_shift;
1672
1673 decomposed_shift = resolve_shift_zext (insn);
1674 if (decomposed_shift != NULL_RTX)
1675 {
1676 insn = decomposed_shift;
1677 recog_memoized (insn);
1678 extract_insn (insn);
1679 }
1680 }
1681
1682 for (i = recog_data.n_operands - 1; i >= 0; --i)
1683 resolve_subreg_use (recog_data.operand_loc[i], insn);
1684
1685 resolve_reg_notes (insn);
1686
1687 if (num_validated_changes () > 0)
1688 {
1689 for (i = recog_data.n_dups - 1; i >= 0; --i)
1690 {
1691 rtx *pl = recog_data.dup_loc[i];
1692 int dup_num = recog_data.dup_num[i];
1693 rtx *px = recog_data.operand_loc[dup_num];
1694
1695 validate_unshare_change (insn, pl, *px, 1);
1696 }
1697
1698 i = apply_change_group ();
1699 gcc_assert (i);
1700 }
1701 }
1702 }
1703 }
1704
1705 /* If we had insns to split that caused control flow insns in the middle
1706 of a basic block, split those blocks now. Note that we only handle
1707 the case where splitting a load has caused multiple possibly trapping
1708 loads to appear. */
1709 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1710 {
1711 rtx_insn *insn, *end;
1712 edge fallthru;
1713
1714 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1715 insn = BB_HEAD (bb);
1716 end = BB_END (bb);
1717
1718 while (insn != end)
1719 {
1720 if (control_flow_insn_p (insn))
1721 {
1722 /* Split the block after insn. There will be a fallthru
1723 edge, which is OK so we keep it. We have to create the
1724 exception edges ourselves. */
1725 fallthru = split_block (bb, insn);
1726 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1727 bb = fallthru->dest;
1728 insn = BB_HEAD (bb);
1729 }
1730 else
1731 insn = NEXT_INSN (insn);
1732 }
1733 }
1734 }
1735
1736 for (bitmap b : reg_copy_graph)
1737 if (b)
1738 BITMAP_FREE (b);
1739
1740 reg_copy_graph.release ();
1741
1742 BITMAP_FREE (decomposable_context);
1743 BITMAP_FREE (non_decomposable_context);
1744 BITMAP_FREE (subreg_context);
1745 }
1746 \f
1747 /* Implement first lower subreg pass. */
1748
1749 namespace {
1750
1751 const pass_data pass_data_lower_subreg =
1752 {
1753 RTL_PASS, /* type */
1754 "subreg1", /* name */
1755 OPTGROUP_NONE, /* optinfo_flags */
1756 TV_LOWER_SUBREG, /* tv_id */
1757 0, /* properties_required */
1758 0, /* properties_provided */
1759 0, /* properties_destroyed */
1760 0, /* todo_flags_start */
1761 0, /* todo_flags_finish */
1762 };
1763
1764 class pass_lower_subreg : public rtl_opt_pass
1765 {
1766 public:
1767 pass_lower_subreg (gcc::context *ctxt)
1768 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1769 {}
1770
1771 /* opt_pass methods: */
1772 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1773 virtual unsigned int execute (function *)
1774 {
1775 decompose_multiword_subregs (false);
1776 return 0;
1777 }
1778
1779 }; // class pass_lower_subreg
1780
1781 } // anon namespace
1782
1783 rtl_opt_pass *
1784 make_pass_lower_subreg (gcc::context *ctxt)
1785 {
1786 return new pass_lower_subreg (ctxt);
1787 }
1788
1789 /* Implement second lower subreg pass. */
1790
1791 namespace {
1792
1793 const pass_data pass_data_lower_subreg2 =
1794 {
1795 RTL_PASS, /* type */
1796 "subreg2", /* name */
1797 OPTGROUP_NONE, /* optinfo_flags */
1798 TV_LOWER_SUBREG, /* tv_id */
1799 0, /* properties_required */
1800 0, /* properties_provided */
1801 0, /* properties_destroyed */
1802 0, /* todo_flags_start */
1803 TODO_df_finish, /* todo_flags_finish */
1804 };
1805
1806 class pass_lower_subreg2 : public rtl_opt_pass
1807 {
1808 public:
1809 pass_lower_subreg2 (gcc::context *ctxt)
1810 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1811 {}
1812
1813 /* opt_pass methods: */
1814 virtual bool gate (function *) { return flag_split_wide_types
1815 && flag_split_wide_types_early; }
1816 virtual unsigned int execute (function *)
1817 {
1818 decompose_multiword_subregs (true);
1819 return 0;
1820 }
1821
1822 }; // class pass_lower_subreg2
1823
1824 } // anon namespace
1825
1826 rtl_opt_pass *
1827 make_pass_lower_subreg2 (gcc::context *ctxt)
1828 {
1829 return new pass_lower_subreg2 (ctxt);
1830 }
1831
1832 /* Implement third lower subreg pass. */
1833
1834 namespace {
1835
1836 const pass_data pass_data_lower_subreg3 =
1837 {
1838 RTL_PASS, /* type */
1839 "subreg3", /* name */
1840 OPTGROUP_NONE, /* optinfo_flags */
1841 TV_LOWER_SUBREG, /* tv_id */
1842 0, /* properties_required */
1843 0, /* properties_provided */
1844 0, /* properties_destroyed */
1845 0, /* todo_flags_start */
1846 TODO_df_finish, /* todo_flags_finish */
1847 };
1848
1849 class pass_lower_subreg3 : public rtl_opt_pass
1850 {
1851 public:
1852 pass_lower_subreg3 (gcc::context *ctxt)
1853 : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1854 {}
1855
1856 /* opt_pass methods: */
1857 virtual bool gate (function *) { return flag_split_wide_types; }
1858 virtual unsigned int execute (function *)
1859 {
1860 decompose_multiword_subregs (true);
1861 return 0;
1862 }
1863
1864 }; // class pass_lower_subreg3
1865
1866 } // anon namespace
1867
1868 rtl_opt_pass *
1869 make_pass_lower_subreg3 (gcc::context *ctxt)
1870 {
1871 return new pass_lower_subreg3 (ctxt);
1872 }