]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
e53a16e7 1/* Decompose multiword subregs.
5624e564 2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
e53a16e7
ILT
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
e53a16e7
ILT
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
e53a16e7
ILT
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "machmode.h"
26#include "tm.h"
40e23961
MC
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
4d648807 36#include "tree.h"
e53a16e7
ILT
37#include "rtl.h"
38#include "tm_p.h"
e53a16e7
ILT
39#include "flags.h"
40#include "insn-config.h"
41#include "obstack.h"
60393bbc 42#include "predict.h"
60393bbc
AM
43#include "hard-reg-set.h"
44#include "input.h"
45#include "function.h"
46#include "dominance.h"
47#include "cfg.h"
48#include "cfgrtl.h"
49#include "cfgbuild.h"
e53a16e7
ILT
50#include "basic-block.h"
51#include "recog.h"
52#include "bitmap.h"
8d074192 53#include "dce.h"
e53a16e7 54#include "expr.h"
7984c787 55#include "except.h"
e53a16e7
ILT
56#include "regs.h"
57#include "tree-pass.h"
6fb5fa3c 58#include "df.h"
af4ba423 59#include "lower-subreg.h"
cf55cb6a 60#include "rtl-iter.h"
e53a16e7
ILT
61
62#ifdef STACK_GROWS_DOWNWARD
63# undef STACK_GROWS_DOWNWARD
64# define STACK_GROWS_DOWNWARD 1
65#else
66# define STACK_GROWS_DOWNWARD 0
67#endif
68
e53a16e7
ILT
69
70/* Decompose multi-word pseudo-registers into individual
af4ba423
KZ
71 pseudo-registers when possible and profitable. This is possible
72 when all the uses of a multi-word register are via SUBREG, or are
73 copies of the register to another location. Breaking apart the
74 register permits more CSE and permits better register allocation.
75 This is profitable if the machine does not have move instructions
76 to do this.
77
78 This pass only splits moves with modes that are wider than
d7fde18c
JJ
79 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
80 integer modes that are twice the width of word_mode. The latter
81 could be generalized if there was a need to do this, but the trend in
af4ba423
KZ
82 architectures is to not need this.
83
84 There are two useful preprocessor defines for use by maintainers:
85
86 #define LOG_COSTS 1
87
88 if you wish to see the actual cost estimates that are being used
89 for each mode wider than word mode and the cost estimates for zero
90 extension and the shifts. This can be useful when port maintainers
91 are tuning insn rtx costs.
92
93 #define FORCE_LOWERING 1
94
95 if you wish to test the pass with all the transformation forced on.
96 This can be useful for finding bugs in the transformations. */
97
98#define LOG_COSTS 0
99#define FORCE_LOWERING 0
e53a16e7
ILT
100
101/* Bit N in this bitmap is set if regno N is used in a context in
102 which we can decompose it. */
103static bitmap decomposable_context;
104
105/* Bit N in this bitmap is set if regno N is used in a context in
106 which it can not be decomposed. */
107static bitmap non_decomposable_context;
108
402464a0
BS
109/* Bit N in this bitmap is set if regno N is used in a subreg
110 which changes the mode but not the size. This typically happens
111 when the register accessed as a floating-point value; we want to
112 avoid generating accesses to its subwords in integer modes. */
113static bitmap subreg_context;
114
e53a16e7
ILT
115/* Bit N in the bitmap in element M of this array is set if there is a
116 copy from reg M to reg N. */
9771b263 117static vec<bitmap> reg_copy_graph;
e53a16e7 118
af4ba423
KZ
119struct target_lower_subreg default_target_lower_subreg;
120#if SWITCHABLE_TARGET
121struct target_lower_subreg *this_target_lower_subreg
122 = &default_target_lower_subreg;
123#endif
124
125#define twice_word_mode \
126 this_target_lower_subreg->x_twice_word_mode
127#define choices \
128 this_target_lower_subreg->x_choices
129
130/* RTXes used while computing costs. */
131struct cost_rtxes {
132 /* Source and target registers. */
133 rtx source;
134 rtx target;
135
136 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
137 rtx zext;
138
139 /* A shift of SOURCE. */
140 rtx shift;
141
142 /* A SET of TARGET. */
143 rtx set;
144};
145
146/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
147 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
148
149static int
150shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
ef4bddc2 151 machine_mode mode, int op1)
af4ba423 152{
af4ba423
KZ
153 PUT_CODE (rtxes->shift, code);
154 PUT_MODE (rtxes->shift, mode);
155 PUT_MODE (rtxes->source, mode);
156 XEXP (rtxes->shift, 1) = GEN_INT (op1);
69523a7c 157 return set_src_cost (rtxes->shift, speed_p);
af4ba423
KZ
158}
159
160/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
161 to true if it is profitable to split a double-word CODE shift
162 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
163 for speed or size profitability.
164
165 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
166 the cost of moving zero into a word-mode register. WORD_MOVE_COST
167 is the cost of moving between word registers. */
168
169static void
170compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
171 bool *splitting, enum rtx_code code,
172 int word_move_zero_cost, int word_move_cost)
173{
d7fde18c 174 int wide_cost, narrow_cost, upper_cost, i;
af4ba423
KZ
175
176 for (i = 0; i < BITS_PER_WORD; i++)
177 {
178 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
179 i + BITS_PER_WORD);
180 if (i == 0)
181 narrow_cost = word_move_cost;
182 else
183 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
184
d7fde18c
JJ
185 if (code != ASHIFTRT)
186 upper_cost = word_move_zero_cost;
187 else if (i == BITS_PER_WORD - 1)
188 upper_cost = word_move_cost;
189 else
190 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
191 BITS_PER_WORD - 1);
192
af4ba423
KZ
193 if (LOG_COSTS)
194 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
195 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
d7fde18c 196 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
af4ba423 197
d7fde18c 198 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
af4ba423
KZ
199 splitting[i] = true;
200 }
201}
202
203/* Compute what we should do when optimizing for speed or size; SPEED_P
204 selects which. Use RTXES for computing costs. */
205
206static void
207compute_costs (bool speed_p, struct cost_rtxes *rtxes)
208{
209 unsigned int i;
210 int word_move_zero_cost, word_move_cost;
211
69523a7c 212 PUT_MODE (rtxes->target, word_mode);
af4ba423 213 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
69523a7c 214 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
215
216 SET_SRC (rtxes->set) = rtxes->source;
69523a7c 217 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
218
219 if (LOG_COSTS)
220 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
221 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
222
223 for (i = 0; i < MAX_MACHINE_MODE; i++)
224 {
ef4bddc2 225 machine_mode mode = (machine_mode) i;
af4ba423
KZ
226 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
227 if (factor > 1)
228 {
229 int mode_move_cost;
230
231 PUT_MODE (rtxes->target, mode);
232 PUT_MODE (rtxes->source, mode);
69523a7c 233 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
234
235 if (LOG_COSTS)
236 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
237 GET_MODE_NAME (mode), mode_move_cost,
238 word_move_cost, factor);
239
240 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
241 {
242 choices[speed_p].move_modes_to_split[i] = true;
243 choices[speed_p].something_to_do = true;
244 }
245 }
246 }
247
248 /* For the moves and shifts, the only case that is checked is one
249 where the mode of the target is an integer mode twice the width
250 of the word_mode.
251
252 If it is not profitable to split a double word move then do not
253 even consider the shifts or the zero extension. */
254 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
255 {
256 int zext_cost;
257
258 /* The only case here to check to see if moving the upper part with a
259 zero is cheaper than doing the zext itself. */
af4ba423 260 PUT_MODE (rtxes->source, word_mode);
69523a7c 261 zext_cost = set_src_cost (rtxes->zext, speed_p);
af4ba423
KZ
262
263 if (LOG_COSTS)
264 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
265 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
266 zext_cost, word_move_cost, word_move_zero_cost);
267
268 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
269 choices[speed_p].splitting_zext = true;
270
271 compute_splitting_shift (speed_p, rtxes,
272 choices[speed_p].splitting_ashift, ASHIFT,
273 word_move_zero_cost, word_move_cost);
274 compute_splitting_shift (speed_p, rtxes,
275 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
276 word_move_zero_cost, word_move_cost);
d7fde18c
JJ
277 compute_splitting_shift (speed_p, rtxes,
278 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
279 word_move_zero_cost, word_move_cost);
af4ba423
KZ
280 }
281}
282
283/* Do one-per-target initialisation. This involves determining
284 which operations on the machine are profitable. If none are found,
285 then the pass just returns when called. */
286
287void
288init_lower_subreg (void)
289{
290 struct cost_rtxes rtxes;
291
292 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
293
294 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
295
296 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
297 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
298 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
299 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
300 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
301
302 if (LOG_COSTS)
303 fprintf (stderr, "\nSize costs\n==========\n\n");
304 compute_costs (false, &rtxes);
305
306 if (LOG_COSTS)
307 fprintf (stderr, "\nSpeed costs\n===========\n\n");
308 compute_costs (true, &rtxes);
309}
2b54c30f
ILT
310
311static bool
312simple_move_operand (rtx x)
313{
314 if (GET_CODE (x) == SUBREG)
315 x = SUBREG_REG (x);
316
317 if (!OBJECT_P (x))
318 return false;
319
320 if (GET_CODE (x) == LABEL_REF
321 || GET_CODE (x) == SYMBOL_REF
7e0c3f57
ILT
322 || GET_CODE (x) == HIGH
323 || GET_CODE (x) == CONST)
2b54c30f
ILT
324 return false;
325
326 if (MEM_P (x)
327 && (MEM_VOLATILE_P (x)
5bfed9a9 328 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
2b54c30f
ILT
329 return false;
330
331 return true;
332}
333
af4ba423
KZ
334/* If INSN is a single set between two objects that we want to split,
335 return the single set. SPEED_P says whether we are optimizing
336 INSN for speed or size.
337
338 INSN should have been passed to recog and extract_insn before this
339 is called. */
e53a16e7
ILT
340
341static rtx
e967cc2f 342simple_move (rtx_insn *insn, bool speed_p)
e53a16e7
ILT
343{
344 rtx x;
345 rtx set;
ef4bddc2 346 machine_mode mode;
e53a16e7
ILT
347
348 if (recog_data.n_operands != 2)
349 return NULL_RTX;
350
351 set = single_set (insn);
352 if (!set)
353 return NULL_RTX;
354
355 x = SET_DEST (set);
356 if (x != recog_data.operand[0] && x != recog_data.operand[1])
357 return NULL_RTX;
2b54c30f 358 if (!simple_move_operand (x))
e53a16e7
ILT
359 return NULL_RTX;
360
361 x = SET_SRC (set);
362 if (x != recog_data.operand[0] && x != recog_data.operand[1])
363 return NULL_RTX;
2b54c30f
ILT
364 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
365 things like x86 rdtsc which returns a DImode value. */
366 if (GET_CODE (x) != ASM_OPERANDS
367 && !simple_move_operand (x))
e53a16e7
ILT
368 return NULL_RTX;
369
370 /* We try to decompose in integer modes, to avoid generating
371 inefficient code copying between integer and floating point
372 registers. That means that we can't decompose if this is a
373 non-integer mode for which there is no integer mode of the same
374 size. */
576fe41a 375 mode = GET_MODE (SET_DEST (set));
e53a16e7
ILT
376 if (!SCALAR_INT_MODE_P (mode)
377 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
378 == BLKmode))
379 return NULL_RTX;
380
1f64a081
ILT
381 /* Reject PARTIAL_INT modes. They are used for processor specific
382 purposes and it's probably best not to tamper with them. */
383 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
384 return NULL_RTX;
385
af4ba423
KZ
386 if (!choices[speed_p].move_modes_to_split[(int) mode])
387 return NULL_RTX;
388
e53a16e7
ILT
389 return set;
390}
391
392/* If SET is a copy from one multi-word pseudo-register to another,
393 record that in reg_copy_graph. Return whether it is such a
394 copy. */
395
396static bool
397find_pseudo_copy (rtx set)
398{
399 rtx dest = SET_DEST (set);
400 rtx src = SET_SRC (set);
401 unsigned int rd, rs;
402 bitmap b;
403
404 if (!REG_P (dest) || !REG_P (src))
405 return false;
406
407 rd = REGNO (dest);
408 rs = REGNO (src);
409 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
410 return false;
411
9771b263 412 b = reg_copy_graph[rs];
e53a16e7
ILT
413 if (b == NULL)
414 {
415 b = BITMAP_ALLOC (NULL);
9771b263 416 reg_copy_graph[rs] = b;
e53a16e7
ILT
417 }
418
419 bitmap_set_bit (b, rd);
420
421 return true;
422}
423
424/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
425 where they are copied to another register, add the register to
426 which they are copied to DECOMPOSABLE_CONTEXT. Use
427 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
428 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
429
430static void
431propagate_pseudo_copies (void)
432{
433 bitmap queue, propagate;
434
435 queue = BITMAP_ALLOC (NULL);
436 propagate = BITMAP_ALLOC (NULL);
437
438 bitmap_copy (queue, decomposable_context);
439 do
440 {
441 bitmap_iterator iter;
442 unsigned int i;
443
444 bitmap_clear (propagate);
445
446 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
447 {
9771b263 448 bitmap b = reg_copy_graph[i];
e53a16e7
ILT
449 if (b)
450 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
451 }
452
453 bitmap_and_compl (queue, propagate, decomposable_context);
454 bitmap_ior_into (decomposable_context, propagate);
455 }
456 while (!bitmap_empty_p (queue));
457
458 BITMAP_FREE (queue);
459 BITMAP_FREE (propagate);
460}
461
462/* A pointer to one of these values is passed to
a36a1928 463 find_decomposable_subregs. */
e53a16e7
ILT
464
465enum classify_move_insn
466{
467 /* Not a simple move from one location to another. */
468 NOT_SIMPLE_MOVE,
c2c47e8f
UW
469 /* A simple move we want to decompose. */
470 DECOMPOSABLE_SIMPLE_MOVE,
471 /* Any other simple move. */
e53a16e7
ILT
472 SIMPLE_MOVE
473};
474
a36a1928
RS
475/* If we find a SUBREG in *LOC which we could use to decompose a
476 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
477 unadorned register which is not a simple pseudo-register copy,
478 DATA will point at the type of move, and we set a bit in
479 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
e53a16e7 480
a36a1928
RS
481static void
482find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
e53a16e7 483{
a36a1928
RS
484 subrtx_var_iterator::array_type array;
485 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
e53a16e7 486 {
a36a1928
RS
487 rtx x = *iter;
488 if (GET_CODE (x) == SUBREG)
489 {
490 rtx inner = SUBREG_REG (x);
491 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
e53a16e7 492
a36a1928
RS
493 if (!REG_P (inner))
494 continue;
e53a16e7 495
a36a1928
RS
496 regno = REGNO (inner);
497 if (HARD_REGISTER_NUM_P (regno))
498 {
499 iter.skip_subrtxes ();
500 continue;
501 }
e53a16e7 502
a36a1928
RS
503 outer_size = GET_MODE_SIZE (GET_MODE (x));
504 inner_size = GET_MODE_SIZE (GET_MODE (inner));
505 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
506 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
e53a16e7 507
a36a1928
RS
508 /* We only try to decompose single word subregs of multi-word
509 registers. When we find one, we return -1 to avoid iterating
510 over the inner register.
e53a16e7 511
a36a1928
RS
512 ??? This doesn't allow, e.g., DImode subregs of TImode values
513 on 32-bit targets. We would need to record the way the
514 pseudo-register was used, and only decompose if all the uses
515 were the same number and size of pieces. Hopefully this
516 doesn't happen much. */
e53a16e7 517
a36a1928
RS
518 if (outer_words == 1 && inner_words > 1)
519 {
520 bitmap_set_bit (decomposable_context, regno);
521 iter.skip_subrtxes ();
522 continue;
523 }
03743286 524
a36a1928
RS
525 /* If this is a cast from one mode to another, where the modes
526 have the same size, and they are not tieable, then mark this
527 register as non-decomposable. If we decompose it we are
528 likely to mess up whatever the backend is trying to do. */
529 if (outer_words > 1
530 && outer_size == inner_size
531 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
532 {
533 bitmap_set_bit (non_decomposable_context, regno);
534 bitmap_set_bit (subreg_context, regno);
535 iter.skip_subrtxes ();
536 continue;
537 }
03743286 538 }
a36a1928 539 else if (REG_P (x))
e53a16e7 540 {
a36a1928
RS
541 unsigned int regno;
542
543 /* We will see an outer SUBREG before we see the inner REG, so
544 when we see a plain REG here it means a direct reference to
545 the register.
546
547 If this is not a simple copy from one location to another,
548 then we can not decompose this register. If this is a simple
549 copy we want to decompose, and the mode is right,
550 then we mark the register as decomposable.
551 Otherwise we don't say anything about this register --
552 it could be decomposed, but whether that would be
553 profitable depends upon how it is used elsewhere.
554
555 We only set bits in the bitmap for multi-word
556 pseudo-registers, since those are the only ones we care about
557 and it keeps the size of the bitmaps down. */
558
559 regno = REGNO (x);
560 if (!HARD_REGISTER_NUM_P (regno)
561 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
e53a16e7 562 {
a36a1928
RS
563 switch (*pcmi)
564 {
565 case NOT_SIMPLE_MOVE:
566 bitmap_set_bit (non_decomposable_context, regno);
567 break;
568 case DECOMPOSABLE_SIMPLE_MOVE:
569 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
570 bitmap_set_bit (decomposable_context, regno);
571 break;
572 case SIMPLE_MOVE:
573 break;
574 default:
575 gcc_unreachable ();
576 }
e53a16e7
ILT
577 }
578 }
a36a1928
RS
579 else if (MEM_P (x))
580 {
581 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
2b54c30f 582
a36a1928
RS
583 /* Any registers used in a MEM do not participate in a
584 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
585 here, and return -1 to block the parent's recursion. */
586 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
587 iter.skip_subrtxes ();
588 }
2b54c30f 589 }
e53a16e7
ILT
590}
591
592/* Decompose REGNO into word-sized components. We smash the REG node
593 in place. This ensures that (1) something goes wrong quickly if we
594 fail to make some replacement, and (2) the debug information inside
595 the symbol table is automatically kept up to date. */
596
597static void
598decompose_register (unsigned int regno)
599{
600 rtx reg;
601 unsigned int words, i;
602 rtvec v;
603
604 reg = regno_reg_rtx[regno];
605
606 regno_reg_rtx[regno] = NULL_RTX;
e53a16e7
ILT
607
608 words = GET_MODE_SIZE (GET_MODE (reg));
609 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
610
611 v = rtvec_alloc (words);
612 for (i = 0; i < words; ++i)
613 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
614
615 PUT_CODE (reg, CONCATN);
616 XVEC (reg, 0) = v;
617
618 if (dump_file)
619 {
620 fprintf (dump_file, "; Splitting reg %u ->", regno);
621 for (i = 0; i < words; ++i)
622 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
623 fputc ('\n', dump_file);
624 }
625}
626
627/* Get a SUBREG of a CONCATN. */
628
629static rtx
ef4bddc2 630simplify_subreg_concatn (machine_mode outermode, rtx op,
e53a16e7
ILT
631 unsigned int byte)
632{
633 unsigned int inner_size;
ef4bddc2 634 machine_mode innermode, partmode;
e53a16e7
ILT
635 rtx part;
636 unsigned int final_offset;
637
638 gcc_assert (GET_CODE (op) == CONCATN);
639 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
640
641 innermode = GET_MODE (op);
642 gcc_assert (byte < GET_MODE_SIZE (innermode));
643 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
644
645 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
646 part = XVECEXP (op, 0, byte / inner_size);
695ae295
UB
647 partmode = GET_MODE (part);
648
822a55a0
UB
649 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
650 regular CONST_VECTORs. They have vector or integer modes, depending
651 on the capabilities of the target. Cope with them. */
652 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
653 partmode = GET_MODE_INNER (innermode);
654 else if (partmode == VOIDmode)
695ae295 655 {
822a55a0
UB
656 enum mode_class mclass = GET_MODE_CLASS (innermode);
657 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
695ae295
UB
658 }
659
e53a16e7
ILT
660 final_offset = byte % inner_size;
661 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
662 return NULL_RTX;
663
695ae295 664 return simplify_gen_subreg (outermode, part, partmode, final_offset);
e53a16e7
ILT
665}
666
667/* Wrapper around simplify_gen_subreg which handles CONCATN. */
668
669static rtx
ef4bddc2
RS
670simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
671 machine_mode innermode, unsigned int byte)
e53a16e7 672{
0e6c5b58
ILT
673 rtx ret;
674
e53a16e7
ILT
675 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
676 If OP is a SUBREG of a CONCATN, then it must be a simple mode
677 change with the same size and offset 0, or it must extract a
678 part. We shouldn't see anything else here. */
679 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
680 {
681 rtx op2;
682
683 if ((GET_MODE_SIZE (GET_MODE (op))
684 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
685 && SUBREG_BYTE (op) == 0)
686 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
687 GET_MODE (SUBREG_REG (op)), byte);
688
689 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
690 SUBREG_BYTE (op));
691 if (op2 == NULL_RTX)
692 {
693 /* We don't handle paradoxical subregs here. */
694 gcc_assert (GET_MODE_SIZE (outermode)
695 <= GET_MODE_SIZE (GET_MODE (op)));
696 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
697 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
698 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
699 byte + SUBREG_BYTE (op));
700 gcc_assert (op2 != NULL_RTX);
701 return op2;
702 }
703
704 op = op2;
705 gcc_assert (op != NULL_RTX);
706 gcc_assert (innermode == GET_MODE (op));
707 }
0e6c5b58 708
e53a16e7
ILT
709 if (GET_CODE (op) == CONCATN)
710 return simplify_subreg_concatn (outermode, op, byte);
0e6c5b58
ILT
711
712 ret = simplify_gen_subreg (outermode, op, innermode, byte);
713
714 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
715 resolve_simple_move will ask for the high part of the paradoxical
716 subreg, which does not have a value. Just return a zero. */
717 if (ret == NULL_RTX
718 && GET_CODE (op) == SUBREG
719 && SUBREG_BYTE (op) == 0
720 && (GET_MODE_SIZE (innermode)
721 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
722 return CONST0_RTX (outermode);
723
724 gcc_assert (ret != NULL_RTX);
725 return ret;
e53a16e7
ILT
726}
727
728/* Return whether we should resolve X into the registers into which it
729 was decomposed. */
730
731static bool
732resolve_reg_p (rtx x)
733{
734 return GET_CODE (x) == CONCATN;
735}
736
737/* Return whether X is a SUBREG of a register which we need to
738 resolve. */
739
740static bool
741resolve_subreg_p (rtx x)
742{
743 if (GET_CODE (x) != SUBREG)
744 return false;
745 return resolve_reg_p (SUBREG_REG (x));
746}
747
cf55cb6a 748/* Look for SUBREGs in *LOC which need to be decomposed. */
e53a16e7 749
cf55cb6a
RS
750static bool
751resolve_subreg_use (rtx *loc, rtx insn)
e53a16e7 752{
cf55cb6a
RS
753 subrtx_ptr_iterator::array_type array;
754 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
e53a16e7 755 {
cf55cb6a
RS
756 rtx *loc = *iter;
757 rtx x = *loc;
758 if (resolve_subreg_p (x))
e53a16e7 759 {
cf55cb6a
RS
760 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
761 SUBREG_BYTE (x));
e53a16e7 762
cf55cb6a
RS
763 /* It is possible for a note to contain a reference which we can
764 decompose. In this case, return 1 to the caller to indicate
765 that the note must be removed. */
766 if (!x)
767 {
768 gcc_assert (!insn);
769 return true;
770 }
e53a16e7 771
cf55cb6a
RS
772 validate_change (insn, loc, x, 1);
773 iter.skip_subrtxes ();
774 }
775 else if (resolve_reg_p (x))
776 /* Return 1 to the caller to indicate that we found a direct
777 reference to a register which is being decomposed. This can
778 happen inside notes, multiword shift or zero-extend
779 instructions. */
780 return true;
e53a16e7
ILT
781 }
782
cf55cb6a 783 return false;
e53a16e7
ILT
784}
785
e53a16e7
ILT
786/* Resolve any decomposed registers which appear in register notes on
787 INSN. */
788
789static void
e967cc2f 790resolve_reg_notes (rtx_insn *insn)
e53a16e7
ILT
791{
792 rtx *pnote, note;
793
794 note = find_reg_equal_equiv_note (insn);
795 if (note)
796 {
6fb5fa3c 797 int old_count = num_validated_changes ();
cf55cb6a 798 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
4a8cae83 799 remove_note (insn, note);
6fb5fa3c
DB
800 else
801 if (old_count != num_validated_changes ())
802 df_notes_rescan (insn);
e53a16e7
ILT
803 }
804
805 pnote = &REG_NOTES (insn);
806 while (*pnote != NULL_RTX)
807 {
60564289 808 bool del = false;
e53a16e7
ILT
809
810 note = *pnote;
811 switch (REG_NOTE_KIND (note))
812 {
6fb5fa3c
DB
813 case REG_DEAD:
814 case REG_UNUSED:
e53a16e7 815 if (resolve_reg_p (XEXP (note, 0)))
60564289 816 del = true;
e53a16e7
ILT
817 break;
818
819 default:
820 break;
821 }
822
60564289 823 if (del)
e53a16e7
ILT
824 *pnote = XEXP (note, 1);
825 else
826 pnote = &XEXP (note, 1);
827 }
828}
829
2b54c30f 830/* Return whether X can be decomposed into subwords. */
e53a16e7
ILT
831
832static bool
2b54c30f 833can_decompose_p (rtx x)
e53a16e7
ILT
834{
835 if (REG_P (x))
836 {
837 unsigned int regno = REGNO (x);
838
839 if (HARD_REGISTER_NUM_P (regno))
488c8379
RS
840 {
841 unsigned int byte, num_bytes;
842
843 num_bytes = GET_MODE_SIZE (GET_MODE (x));
844 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
845 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
846 return false;
847 return true;
848 }
e53a16e7 849 else
402464a0 850 return !bitmap_bit_p (subreg_context, regno);
e53a16e7
ILT
851 }
852
2b54c30f 853 return true;
e53a16e7
ILT
854}
855
856/* Decompose the registers used in a simple move SET within INSN. If
857 we don't change anything, return INSN, otherwise return the start
858 of the sequence of moves. */
859
e967cc2f
DM
860static rtx_insn *
861resolve_simple_move (rtx set, rtx_insn *insn)
e53a16e7 862{
e967cc2f
DM
863 rtx src, dest, real_dest;
864 rtx_insn *insns;
ef4bddc2 865 machine_mode orig_mode, dest_mode;
e53a16e7
ILT
866 unsigned int words;
867 bool pushing;
868
869 src = SET_SRC (set);
870 dest = SET_DEST (set);
871 orig_mode = GET_MODE (dest);
872
873 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
af4ba423 874 gcc_assert (words > 1);
e53a16e7
ILT
875
876 start_sequence ();
877
878 /* We have to handle copying from a SUBREG of a decomposed reg where
879 the SUBREG is larger than word size. Rather than assume that we
880 can take a word_mode SUBREG of the destination, we copy to a new
881 register and then copy that to the destination. */
882
883 real_dest = NULL_RTX;
884
885 if (GET_CODE (src) == SUBREG
886 && resolve_reg_p (SUBREG_REG (src))
887 && (SUBREG_BYTE (src) != 0
888 || (GET_MODE_SIZE (orig_mode)
889 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
890 {
891 real_dest = dest;
892 dest = gen_reg_rtx (orig_mode);
893 if (REG_P (real_dest))
894 REG_ATTRS (dest) = REG_ATTRS (real_dest);
895 }
896
897 /* Similarly if we are copying to a SUBREG of a decomposed reg where
898 the SUBREG is larger than word size. */
899
900 if (GET_CODE (dest) == SUBREG
901 && resolve_reg_p (SUBREG_REG (dest))
902 && (SUBREG_BYTE (dest) != 0
903 || (GET_MODE_SIZE (orig_mode)
904 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
905 {
e967cc2f
DM
906 rtx reg, smove;
907 rtx_insn *minsn;
e53a16e7
ILT
908
909 reg = gen_reg_rtx (orig_mode);
910 minsn = emit_move_insn (reg, src);
911 smove = single_set (minsn);
912 gcc_assert (smove != NULL_RTX);
913 resolve_simple_move (smove, minsn);
914 src = reg;
915 }
916
917 /* If we didn't have any big SUBREGS of decomposed registers, and
918 neither side of the move is a register we are decomposing, then
919 we don't have to do anything here. */
920
921 if (src == SET_SRC (set)
922 && dest == SET_DEST (set)
923 && !resolve_reg_p (src)
924 && !resolve_subreg_p (src)
925 && !resolve_reg_p (dest)
926 && !resolve_subreg_p (dest))
927 {
928 end_sequence ();
929 return insn;
930 }
931
30d18db4
ILT
932 /* It's possible for the code to use a subreg of a decomposed
933 register while forming an address. We need to handle that before
934 passing the address to emit_move_insn. We pass NULL_RTX as the
935 insn parameter to resolve_subreg_use because we can not validate
936 the insn yet. */
937 if (MEM_P (src) || MEM_P (dest))
938 {
939 int acg;
940
941 if (MEM_P (src))
cf55cb6a 942 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
30d18db4 943 if (MEM_P (dest))
cf55cb6a 944 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
30d18db4
ILT
945 acg = apply_change_group ();
946 gcc_assert (acg);
947 }
948
e53a16e7
ILT
949 /* If SRC is a register which we can't decompose, or has side
950 effects, we need to move via a temporary register. */
951
2b54c30f 952 if (!can_decompose_p (src)
e53a16e7
ILT
953 || side_effects_p (src)
954 || GET_CODE (src) == ASM_OPERANDS)
955 {
956 rtx reg;
957
958 reg = gen_reg_rtx (orig_mode);
ce5d49a8
ZC
959
960#ifdef AUTO_INC_DEC
961 {
962 rtx move = emit_move_insn (reg, src);
963 if (MEM_P (src))
964 {
965 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
966 if (note)
967 add_reg_note (move, REG_INC, XEXP (note, 0));
968 }
969 }
970#else
e53a16e7 971 emit_move_insn (reg, src);
ce5d49a8 972#endif
e53a16e7
ILT
973 src = reg;
974 }
975
976 /* If DEST is a register which we can't decompose, or has side
977 effects, we need to first move to a temporary register. We
978 handle the common case of pushing an operand directly. We also
979 go through a temporary register if it holds a floating point
980 value. This gives us better code on systems which can't move
981 data easily between integer and floating point registers. */
982
983 dest_mode = orig_mode;
984 pushing = push_operand (dest, dest_mode);
2b54c30f 985 if (!can_decompose_p (dest)
e53a16e7
ILT
986 || (side_effects_p (dest) && !pushing)
987 || (!SCALAR_INT_MODE_P (dest_mode)
988 && !resolve_reg_p (dest)
989 && !resolve_subreg_p (dest)))
990 {
991 if (real_dest == NULL_RTX)
992 real_dest = dest;
993 if (!SCALAR_INT_MODE_P (dest_mode))
994 {
995 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
996 MODE_INT, 0);
997 gcc_assert (dest_mode != BLKmode);
998 }
999 dest = gen_reg_rtx (dest_mode);
1000 if (REG_P (real_dest))
1001 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1002 }
1003
1004 if (pushing)
1005 {
1006 unsigned int i, j, jinc;
1007
1008 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1009 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1010 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1011
1012 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1013 {
1014 j = 0;
1015 jinc = 1;
1016 }
1017 else
1018 {
1019 j = words - 1;
1020 jinc = -1;
1021 }
1022
1023 for (i = 0; i < words; ++i, j += jinc)
1024 {
1025 rtx temp;
1026
1027 temp = copy_rtx (XEXP (dest, 0));
1028 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1029 j * UNITS_PER_WORD);
1030 emit_move_insn (temp,
1031 simplify_gen_subreg_concatn (word_mode, src,
1032 orig_mode,
1033 j * UNITS_PER_WORD));
1034 }
1035 }
1036 else
1037 {
1038 unsigned int i;
1039
1040 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
c41c1387 1041 emit_clobber (dest);
e53a16e7
ILT
1042
1043 for (i = 0; i < words; ++i)
1044 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1045 dest_mode,
1046 i * UNITS_PER_WORD),
1047 simplify_gen_subreg_concatn (word_mode, src,
1048 orig_mode,
1049 i * UNITS_PER_WORD));
1050 }
1051
1052 if (real_dest != NULL_RTX)
1053 {
e967cc2f
DM
1054 rtx mdest, smove;
1055 rtx_insn *minsn;
e53a16e7
ILT
1056
1057 if (dest_mode == orig_mode)
1058 mdest = dest;
1059 else
1060 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1061 minsn = emit_move_insn (real_dest, mdest);
1062
ce5d49a8
ZC
1063#ifdef AUTO_INC_DEC
1064 if (MEM_P (real_dest)
1065 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1066 {
1067 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1068 if (note)
1069 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1070 }
1071#endif
1072
e53a16e7
ILT
1073 smove = single_set (minsn);
1074 gcc_assert (smove != NULL_RTX);
1075
1076 resolve_simple_move (smove, minsn);
1077 }
1078
1079 insns = get_insns ();
1080 end_sequence ();
1081
1d65f45c 1082 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
73663bb7 1083
e53a16e7
ILT
1084 emit_insn_before (insns, insn);
1085
82981227 1086 /* If we get here via self-recursion, then INSN is not yet in the insns
6873ecab
SB
1087 chain and delete_insn will fail. We only want to remove INSN from the
1088 current sequence. See PR56738. */
1089 if (in_sequence_p ())
1090 remove_insn (insn);
1091 else
1092 delete_insn (insn);
e53a16e7
ILT
1093
1094 return insns;
1095}
1096
1097/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1098 component registers. Return whether we changed something. */
1099
1100static bool
e967cc2f 1101resolve_clobber (rtx pat, rtx_insn *insn)
e53a16e7 1102{
d70dcf29 1103 rtx reg;
ef4bddc2 1104 machine_mode orig_mode;
e53a16e7 1105 unsigned int words, i;
7e0c3f57 1106 int ret;
e53a16e7
ILT
1107
1108 reg = XEXP (pat, 0);
9a5a8e58 1109 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
e53a16e7
ILT
1110 return false;
1111
1112 orig_mode = GET_MODE (reg);
1113 words = GET_MODE_SIZE (orig_mode);
1114 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1115
7e0c3f57
ILT
1116 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1117 simplify_gen_subreg_concatn (word_mode, reg,
1118 orig_mode, 0),
1119 0);
6fb5fa3c 1120 df_insn_rescan (insn);
7e0c3f57
ILT
1121 gcc_assert (ret != 0);
1122
e53a16e7
ILT
1123 for (i = words - 1; i > 0; --i)
1124 {
1125 rtx x;
1126
9a5a8e58
ILT
1127 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1128 i * UNITS_PER_WORD);
e53a16e7
ILT
1129 x = gen_rtx_CLOBBER (VOIDmode, x);
1130 emit_insn_after (x, insn);
1131 }
1132
d4fd3465
ILT
1133 resolve_reg_notes (insn);
1134
e53a16e7
ILT
1135 return true;
1136}
1137
1138/* A USE of a decomposed register is no longer meaningful. Return
1139 whether we changed something. */
1140
1141static bool
e967cc2f 1142resolve_use (rtx pat, rtx_insn *insn)
e53a16e7
ILT
1143{
1144 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1145 {
1146 delete_insn (insn);
1147 return true;
1148 }
d4fd3465
ILT
1149
1150 resolve_reg_notes (insn);
1151
e53a16e7
ILT
1152 return false;
1153}
1154
b5b8b0ac
AO
1155/* A VAR_LOCATION can be simplified. */
1156
1157static void
e967cc2f 1158resolve_debug (rtx_insn *insn)
b5b8b0ac 1159{
f2d3f347
RS
1160 subrtx_ptr_iterator::array_type array;
1161 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1162 {
1163 rtx *loc = *iter;
1164 rtx x = *loc;
1165 if (resolve_subreg_p (x))
1166 {
1167 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1168 SUBREG_BYTE (x));
1169
1170 if (x)
1171 *loc = x;
1172 else
1173 x = copy_rtx (*loc);
1174 }
1175 if (resolve_reg_p (x))
1176 *loc = copy_rtx (x);
1177 }
b5b8b0ac
AO
1178
1179 df_insn_rescan (insn);
1180
1181 resolve_reg_notes (insn);
1182}
1183
af4ba423
KZ
1184/* Check if INSN is a decomposable multiword-shift or zero-extend and
1185 set the decomposable_context bitmap accordingly. SPEED_P is true
1186 if we are optimizing INSN for speed rather than size. Return true
1187 if INSN is decomposable. */
e0892570 1188
af4ba423 1189static bool
e967cc2f 1190find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
e0892570
AK
1191{
1192 rtx set;
1193 rtx op;
1194 rtx op_operand;
1195
1196 set = single_set (insn);
1197 if (!set)
af4ba423 1198 return false;
e0892570
AK
1199
1200 op = SET_SRC (set);
1201 if (GET_CODE (op) != ASHIFT
1202 && GET_CODE (op) != LSHIFTRT
d7fde18c 1203 && GET_CODE (op) != ASHIFTRT
e0892570 1204 && GET_CODE (op) != ZERO_EXTEND)
af4ba423 1205 return false;
e0892570
AK
1206
1207 op_operand = XEXP (op, 0);
1208 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1209 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1210 || HARD_REGISTER_NUM_P (REGNO (op_operand))
af4ba423
KZ
1211 || GET_MODE (op) != twice_word_mode)
1212 return false;
e0892570
AK
1213
1214 if (GET_CODE (op) == ZERO_EXTEND)
1215 {
1216 if (GET_MODE (op_operand) != word_mode
af4ba423
KZ
1217 || !choices[speed_p].splitting_zext)
1218 return false;
e0892570
AK
1219 }
1220 else /* left or right shift */
1221 {
af4ba423
KZ
1222 bool *splitting = (GET_CODE (op) == ASHIFT
1223 ? choices[speed_p].splitting_ashift
d7fde18c
JJ
1224 : GET_CODE (op) == ASHIFTRT
1225 ? choices[speed_p].splitting_ashiftrt
af4ba423 1226 : choices[speed_p].splitting_lshiftrt);
481683e1 1227 if (!CONST_INT_P (XEXP (op, 1))
af4ba423
KZ
1228 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1229 2 * BITS_PER_WORD - 1)
1230 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1231 return false;
1232
1233 bitmap_set_bit (decomposable_context, REGNO (op_operand));
e0892570
AK
1234 }
1235
1236 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1237
af4ba423 1238 return true;
e0892570
AK
1239}
1240
1241/* Decompose a more than word wide shift (in INSN) of a multiword
1242 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1243 and 'set to zero' insn. Return a pointer to the new insn when a
1244 replacement was done. */
1245
e967cc2f
DM
1246static rtx_insn *
1247resolve_shift_zext (rtx_insn *insn)
e0892570
AK
1248{
1249 rtx set;
1250 rtx op;
1251 rtx op_operand;
e967cc2f 1252 rtx_insn *insns;
d7fde18c 1253 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
e0892570
AK
1254 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1255
1256 set = single_set (insn);
1257 if (!set)
e967cc2f 1258 return NULL;
e0892570
AK
1259
1260 op = SET_SRC (set);
1261 if (GET_CODE (op) != ASHIFT
1262 && GET_CODE (op) != LSHIFTRT
d7fde18c 1263 && GET_CODE (op) != ASHIFTRT
e0892570 1264 && GET_CODE (op) != ZERO_EXTEND)
e967cc2f 1265 return NULL;
e0892570
AK
1266
1267 op_operand = XEXP (op, 0);
1268
af4ba423
KZ
1269 /* We can tear this operation apart only if the regs were already
1270 torn apart. */
e0892570 1271 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
e967cc2f 1272 return NULL;
e0892570
AK
1273
1274 /* src_reg_num is the number of the word mode register which we
1275 are operating on. For a left shift and a zero_extend on little
1276 endian machines this is register 0. */
d7fde18c
JJ
1277 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1278 ? 1 : 0;
e0892570 1279
acbe5496
AK
1280 if (WORDS_BIG_ENDIAN
1281 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
e0892570
AK
1282 src_reg_num = 1 - src_reg_num;
1283
1284 if (GET_CODE (op) == ZERO_EXTEND)
acbe5496 1285 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
e0892570
AK
1286 else
1287 dest_reg_num = 1 - src_reg_num;
1288
1289 offset1 = UNITS_PER_WORD * dest_reg_num;
1290 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1291 src_offset = UNITS_PER_WORD * src_reg_num;
1292
e0892570
AK
1293 start_sequence ();
1294
1295 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1296 GET_MODE (SET_DEST (set)),
1297 offset1);
d7fde18c
JJ
1298 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1299 GET_MODE (SET_DEST (set)),
1300 offset2);
e0892570
AK
1301 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1302 GET_MODE (op_operand),
1303 src_offset);
d7fde18c
JJ
1304 if (GET_CODE (op) == ASHIFTRT
1305 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1306 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1307 BITS_PER_WORD - 1, NULL_RTX, 0);
1308
e0892570
AK
1309 if (GET_CODE (op) != ZERO_EXTEND)
1310 {
1311 int shift_count = INTVAL (XEXP (op, 1));
1312 if (shift_count > BITS_PER_WORD)
1313 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1314 LSHIFT_EXPR : RSHIFT_EXPR,
1315 word_mode, src_reg,
eb6c3df1 1316 shift_count - BITS_PER_WORD,
d7fde18c 1317 dest_reg, GET_CODE (op) != ASHIFTRT);
e0892570
AK
1318 }
1319
1320 if (dest_reg != src_reg)
1321 emit_move_insn (dest_reg, src_reg);
d7fde18c
JJ
1322 if (GET_CODE (op) != ASHIFTRT)
1323 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1324 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1325 emit_move_insn (dest_upper, copy_rtx (src_reg));
1326 else
1327 emit_move_insn (dest_upper, upper_src);
e0892570
AK
1328 insns = get_insns ();
1329
1330 end_sequence ();
1331
1332 emit_insn_before (insns, insn);
1333
1334 if (dump_file)
1335 {
e967cc2f 1336 rtx_insn *in;
e0892570
AK
1337 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1338 for (in = insns; in != insn; in = NEXT_INSN (in))
1339 fprintf (dump_file, "%d ", INSN_UID (in));
1340 fprintf (dump_file, "\n");
1341 }
1342
1343 delete_insn (insn);
1344 return insns;
1345}
1346
af4ba423
KZ
1347/* Print to dump_file a description of what we're doing with shift code CODE.
1348 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1349
1350static void
1351dump_shift_choices (enum rtx_code code, bool *splitting)
1352{
1353 int i;
1354 const char *sep;
1355
1356 fprintf (dump_file,
1357 " Splitting mode %s for %s lowering with shift amounts = ",
1358 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1359 sep = "";
1360 for (i = 0; i < BITS_PER_WORD; i++)
1361 if (splitting[i])
1362 {
1363 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1364 sep = ",";
1365 }
1366 fprintf (dump_file, "\n");
1367}
1368
1369/* Print to dump_file a description of what we're doing when optimizing
1370 for speed or size; SPEED_P says which. DESCRIPTION is a description
1371 of the SPEED_P choice. */
1372
1373static void
1374dump_choices (bool speed_p, const char *description)
1375{
1376 unsigned int i;
1377
1378 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1379
1380 for (i = 0; i < MAX_MACHINE_MODE; i++)
ef4bddc2 1381 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
af4ba423
KZ
1382 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1383 choices[speed_p].move_modes_to_split[i]
1384 ? "Splitting"
1385 : "Skipping",
ef4bddc2 1386 GET_MODE_NAME ((machine_mode) i));
af4ba423
KZ
1387
1388 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1389 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1390 GET_MODE_NAME (twice_word_mode));
1391
1392 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
d7fde18c
JJ
1393 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1394 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
af4ba423
KZ
1395 fprintf (dump_file, "\n");
1396}
1397
e53a16e7 1398/* Look for registers which are always accessed via word-sized SUBREGs
c2c47e8f
UW
1399 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1400 registers into several word-sized pseudo-registers. */
e53a16e7
ILT
1401
1402static void
c2c47e8f 1403decompose_multiword_subregs (bool decompose_copies)
e53a16e7
ILT
1404{
1405 unsigned int max;
1406 basic_block bb;
af4ba423 1407 bool speed_p;
e53a16e7 1408
af4ba423
KZ
1409 if (dump_file)
1410 {
1411 dump_choices (false, "size");
1412 dump_choices (true, "speed");
1413 }
1414
1415 /* Check if this target even has any modes to consider lowering. */
1416 if (!choices[false].something_to_do && !choices[true].something_to_do)
1417 {
1418 if (dump_file)
1419 fprintf (dump_file, "Nothing to do!\n");
1420 return;
1421 }
6fb5fa3c 1422
e53a16e7
ILT
1423 max = max_reg_num ();
1424
1425 /* First see if there are any multi-word pseudo-registers. If there
1426 aren't, there is nothing we can do. This should speed up this
1427 pass in the normal case, since it should be faster than scanning
1428 all the insns. */
1429 {
1430 unsigned int i;
af4ba423 1431 bool useful_modes_seen = false;
e53a16e7
ILT
1432
1433 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
af4ba423
KZ
1434 if (regno_reg_rtx[i] != NULL)
1435 {
ef4bddc2 1436 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
af4ba423
KZ
1437 if (choices[false].move_modes_to_split[(int) mode]
1438 || choices[true].move_modes_to_split[(int) mode])
1439 {
1440 useful_modes_seen = true;
1441 break;
1442 }
1443 }
1444
1445 if (!useful_modes_seen)
e53a16e7 1446 {
af4ba423
KZ
1447 if (dump_file)
1448 fprintf (dump_file, "Nothing to lower in this function.\n");
1449 return;
e53a16e7 1450 }
e53a16e7
ILT
1451 }
1452
8d074192 1453 if (df)
af4ba423
KZ
1454 {
1455 df_set_flags (DF_DEFER_INSN_RESCAN);
1456 run_word_dce ();
1457 }
8d074192 1458
af4ba423
KZ
1459 /* FIXME: It may be possible to change this code to look for each
1460 multi-word pseudo-register and to find each insn which sets or
1461 uses that register. That should be faster than scanning all the
1462 insns. */
e53a16e7
ILT
1463
1464 decomposable_context = BITMAP_ALLOC (NULL);
1465 non_decomposable_context = BITMAP_ALLOC (NULL);
402464a0 1466 subreg_context = BITMAP_ALLOC (NULL);
e53a16e7 1467
9771b263
DN
1468 reg_copy_graph.create (max);
1469 reg_copy_graph.safe_grow_cleared (max);
1470 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
e53a16e7 1471
af4ba423 1472 speed_p = optimize_function_for_speed_p (cfun);
11cd3bed 1473 FOR_EACH_BB_FN (bb, cfun)
e53a16e7 1474 {
e967cc2f 1475 rtx_insn *insn;
e53a16e7
ILT
1476
1477 FOR_BB_INSNS (bb, insn)
1478 {
1479 rtx set;
1480 enum classify_move_insn cmi;
1481 int i, n;
1482
1483 if (!INSN_P (insn)
1484 || GET_CODE (PATTERN (insn)) == CLOBBER
1485 || GET_CODE (PATTERN (insn)) == USE)
1486 continue;
1487
d5785e76
JJ
1488 recog_memoized (insn);
1489
af4ba423 1490 if (find_decomposable_shift_zext (insn, speed_p))
e0892570
AK
1491 continue;
1492
e53a16e7
ILT
1493 extract_insn (insn);
1494
af4ba423 1495 set = simple_move (insn, speed_p);
e53a16e7
ILT
1496
1497 if (!set)
1498 cmi = NOT_SIMPLE_MOVE;
1499 else
1500 {
c2c47e8f
UW
1501 /* We mark pseudo-to-pseudo copies as decomposable during the
1502 second pass only. The first pass is so early that there is
1503 good chance such moves will be optimized away completely by
1504 subsequent optimizations anyway.
1505
1506 However, we call find_pseudo_copy even during the first pass
1507 so as to properly set up the reg_copy_graph. */
4a8cae83 1508 if (find_pseudo_copy (set))
c2c47e8f 1509 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
e53a16e7
ILT
1510 else
1511 cmi = SIMPLE_MOVE;
1512 }
1513
1514 n = recog_data.n_operands;
1515 for (i = 0; i < n; ++i)
1516 {
a36a1928 1517 find_decomposable_subregs (&recog_data.operand[i], &cmi);
e53a16e7
ILT
1518
1519 /* We handle ASM_OPERANDS as a special case to support
1520 things like x86 rdtsc which returns a DImode value.
1521 We can decompose the output, which will certainly be
1522 operand 0, but not the inputs. */
1523
1524 if (cmi == SIMPLE_MOVE
1525 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1526 {
1527 gcc_assert (i == 0);
1528 cmi = NOT_SIMPLE_MOVE;
1529 }
1530 }
1531 }
1532 }
1533
1534 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1535 if (!bitmap_empty_p (decomposable_context))
1536 {
73663bb7 1537 sbitmap sub_blocks;
7984c787
SB
1538 unsigned int i;
1539 sbitmap_iterator sbi;
e53a16e7
ILT
1540 bitmap_iterator iter;
1541 unsigned int regno;
1542
1543 propagate_pseudo_copies ();
1544
8b1c6fd7 1545 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
f61e445a 1546 bitmap_clear (sub_blocks);
e53a16e7
ILT
1547
1548 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1549 decompose_register (regno);
1550
11cd3bed 1551 FOR_EACH_BB_FN (bb, cfun)
e53a16e7 1552 {
e967cc2f 1553 rtx_insn *insn;
e53a16e7 1554
ba4807a0 1555 FOR_BB_INSNS (bb, insn)
e53a16e7 1556 {
11895e28 1557 rtx pat;
e53a16e7
ILT
1558
1559 if (!INSN_P (insn))
1560 continue;
1561
e53a16e7
ILT
1562 pat = PATTERN (insn);
1563 if (GET_CODE (pat) == CLOBBER)
d4fd3465 1564 resolve_clobber (pat, insn);
e53a16e7 1565 else if (GET_CODE (pat) == USE)
d4fd3465 1566 resolve_use (pat, insn);
b5b8b0ac
AO
1567 else if (DEBUG_INSN_P (insn))
1568 resolve_debug (insn);
e53a16e7
ILT
1569 else
1570 {
1571 rtx set;
1572 int i;
1573
1574 recog_memoized (insn);
1575 extract_insn (insn);
1576
af4ba423 1577 set = simple_move (insn, speed_p);
e53a16e7
ILT
1578 if (set)
1579 {
e967cc2f 1580 rtx_insn *orig_insn = insn;
73663bb7 1581 bool cfi = control_flow_insn_p (insn);
e53a16e7 1582
7984c787
SB
1583 /* We can end up splitting loads to multi-word pseudos
1584 into separate loads to machine word size pseudos.
1585 When this happens, we first had one load that can
1586 throw, and after resolve_simple_move we'll have a
1587 bunch of loads (at least two). All those loads may
1588 trap if we can have non-call exceptions, so they
1589 all will end the current basic block. We split the
1590 block after the outer loop over all insns, but we
1591 make sure here that we will be able to split the
1592 basic block and still produce the correct control
1593 flow graph for it. */
1594 gcc_assert (!cfi
8f4f502f 1595 || (cfun->can_throw_non_call_exceptions
7984c787
SB
1596 && can_throw_internal (insn)));
1597
e53a16e7
ILT
1598 insn = resolve_simple_move (set, insn);
1599 if (insn != orig_insn)
1600 {
e53a16e7
ILT
1601 recog_memoized (insn);
1602 extract_insn (insn);
73663bb7
ILT
1603
1604 if (cfi)
d7c028c0 1605 bitmap_set_bit (sub_blocks, bb->index);
e53a16e7
ILT
1606 }
1607 }
e0892570
AK
1608 else
1609 {
e967cc2f 1610 rtx_insn *decomposed_shift;
e0892570
AK
1611
1612 decomposed_shift = resolve_shift_zext (insn);
1613 if (decomposed_shift != NULL_RTX)
1614 {
e0892570
AK
1615 insn = decomposed_shift;
1616 recog_memoized (insn);
1617 extract_insn (insn);
1618 }
1619 }
e53a16e7
ILT
1620
1621 for (i = recog_data.n_operands - 1; i >= 0; --i)
cf55cb6a 1622 resolve_subreg_use (recog_data.operand_loc[i], insn);
e53a16e7
ILT
1623
1624 resolve_reg_notes (insn);
1625
1626 if (num_validated_changes () > 0)
1627 {
1628 for (i = recog_data.n_dups - 1; i >= 0; --i)
1629 {
1630 rtx *pl = recog_data.dup_loc[i];
1631 int dup_num = recog_data.dup_num[i];
1632 rtx *px = recog_data.operand_loc[dup_num];
1633
1a309dfb 1634 validate_unshare_change (insn, pl, *px, 1);
e53a16e7
ILT
1635 }
1636
1637 i = apply_change_group ();
1638 gcc_assert (i);
e53a16e7
ILT
1639 }
1640 }
e53a16e7
ILT
1641 }
1642 }
1643
7984c787
SB
1644 /* If we had insns to split that caused control flow insns in the middle
1645 of a basic block, split those blocks now. Note that we only handle
1646 the case where splitting a load has caused multiple possibly trapping
1647 loads to appear. */
d4ac4ce2 1648 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
7984c787 1649 {
e967cc2f 1650 rtx_insn *insn, *end;
7984c787
SB
1651 edge fallthru;
1652
06e28de2 1653 bb = BASIC_BLOCK_FOR_FN (cfun, i);
7984c787
SB
1654 insn = BB_HEAD (bb);
1655 end = BB_END (bb);
1656
1657 while (insn != end)
1658 {
1659 if (control_flow_insn_p (insn))
1660 {
1661 /* Split the block after insn. There will be a fallthru
1662 edge, which is OK so we keep it. We have to create the
1663 exception edges ourselves. */
1664 fallthru = split_block (bb, insn);
1665 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1666 bb = fallthru->dest;
1667 insn = BB_HEAD (bb);
1668 }
1669 else
1670 insn = NEXT_INSN (insn);
1671 }
1672 }
73663bb7 1673
73663bb7 1674 sbitmap_free (sub_blocks);
e53a16e7
ILT
1675 }
1676
1677 {
1678 unsigned int i;
1679 bitmap b;
1680
9771b263 1681 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
e53a16e7
ILT
1682 if (b)
1683 BITMAP_FREE (b);
1684 }
1685
9771b263 1686 reg_copy_graph.release ();
e53a16e7
ILT
1687
1688 BITMAP_FREE (decomposable_context);
1689 BITMAP_FREE (non_decomposable_context);
402464a0 1690 BITMAP_FREE (subreg_context);
e53a16e7
ILT
1691}
1692\f
e53a16e7
ILT
1693/* Implement first lower subreg pass. */
1694
27a4cd48
DM
1695namespace {
1696
1697const pass_data pass_data_lower_subreg =
e53a16e7 1698{
27a4cd48
DM
1699 RTL_PASS, /* type */
1700 "subreg1", /* name */
1701 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1702 TV_LOWER_SUBREG, /* tv_id */
1703 0, /* properties_required */
1704 0, /* properties_provided */
1705 0, /* properties_destroyed */
1706 0, /* todo_flags_start */
3bea341f 1707 0, /* todo_flags_finish */
e53a16e7
ILT
1708};
1709
27a4cd48
DM
1710class pass_lower_subreg : public rtl_opt_pass
1711{
1712public:
c3284718
RS
1713 pass_lower_subreg (gcc::context *ctxt)
1714 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
27a4cd48
DM
1715 {}
1716
1717 /* opt_pass methods: */
1a3d085c 1718 virtual bool gate (function *) { return flag_split_wide_types != 0; }
be55bfe6
TS
1719 virtual unsigned int execute (function *)
1720 {
1721 decompose_multiword_subregs (false);
1722 return 0;
1723 }
27a4cd48
DM
1724
1725}; // class pass_lower_subreg
1726
1727} // anon namespace
1728
1729rtl_opt_pass *
1730make_pass_lower_subreg (gcc::context *ctxt)
1731{
1732 return new pass_lower_subreg (ctxt);
1733}
1734
be55bfe6
TS
1735/* Implement second lower subreg pass. */
1736
27a4cd48
DM
1737namespace {
1738
1739const pass_data pass_data_lower_subreg2 =
e53a16e7 1740{
27a4cd48
DM
1741 RTL_PASS, /* type */
1742 "subreg2", /* name */
1743 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1744 TV_LOWER_SUBREG, /* tv_id */
1745 0, /* properties_required */
1746 0, /* properties_provided */
1747 0, /* properties_destroyed */
1748 0, /* todo_flags_start */
3bea341f 1749 TODO_df_finish, /* todo_flags_finish */
e53a16e7 1750};
27a4cd48
DM
1751
1752class pass_lower_subreg2 : public rtl_opt_pass
1753{
1754public:
c3284718
RS
1755 pass_lower_subreg2 (gcc::context *ctxt)
1756 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
27a4cd48
DM
1757 {}
1758
1759 /* opt_pass methods: */
1a3d085c 1760 virtual bool gate (function *) { return flag_split_wide_types != 0; }
be55bfe6
TS
1761 virtual unsigned int execute (function *)
1762 {
1763 decompose_multiword_subregs (true);
1764 return 0;
1765 }
27a4cd48
DM
1766
1767}; // class pass_lower_subreg2
1768
1769} // anon namespace
1770
1771rtl_opt_pass *
1772make_pass_lower_subreg2 (gcc::context *ctxt)
1773{
1774 return new pass_lower_subreg2 (ctxt);
1775}