]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
[PR testsuite/64177] Audit Cilk Plus tests for CILK_NWORKERS=1
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
e53a16e7 1/* Decompose multiword subregs.
818ab71a 2 Copyright (C) 2007-2016 Free Software Foundation, Inc.
e53a16e7
ILT
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
e53a16e7
ILT
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
e53a16e7
ILT
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
e53a16e7 26#include "rtl.h"
957060b5
AM
27#include "tree.h"
28#include "cfghooks.h"
c7131fb2 29#include "df.h"
e53a16e7 30#include "tm_p.h"
957060b5 31#include "expmed.h"
e53a16e7 32#include "insn-config.h"
957060b5
AM
33#include "emit-rtl.h"
34#include "recog.h"
60393bbc
AM
35#include "cfgrtl.h"
36#include "cfgbuild.h"
8d074192 37#include "dce.h"
e53a16e7 38#include "expr.h"
e53a16e7 39#include "tree-pass.h"
af4ba423 40#include "lower-subreg.h"
cf55cb6a 41#include "rtl-iter.h"
e53a16e7 42
e53a16e7
ILT
43
44/* Decompose multi-word pseudo-registers into individual
af4ba423
KZ
45 pseudo-registers when possible and profitable. This is possible
46 when all the uses of a multi-word register are via SUBREG, or are
47 copies of the register to another location. Breaking apart the
48 register permits more CSE and permits better register allocation.
49 This is profitable if the machine does not have move instructions
50 to do this.
51
52 This pass only splits moves with modes that are wider than
d7fde18c
JJ
53 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
54 integer modes that are twice the width of word_mode. The latter
55 could be generalized if there was a need to do this, but the trend in
af4ba423
KZ
56 architectures is to not need this.
57
58 There are two useful preprocessor defines for use by maintainers:
59
60 #define LOG_COSTS 1
61
62 if you wish to see the actual cost estimates that are being used
63 for each mode wider than word mode and the cost estimates for zero
64 extension and the shifts. This can be useful when port maintainers
65 are tuning insn rtx costs.
66
67 #define FORCE_LOWERING 1
68
69 if you wish to test the pass with all the transformation forced on.
70 This can be useful for finding bugs in the transformations. */
71
72#define LOG_COSTS 0
73#define FORCE_LOWERING 0
e53a16e7
ILT
74
75/* Bit N in this bitmap is set if regno N is used in a context in
76 which we can decompose it. */
77static bitmap decomposable_context;
78
79/* Bit N in this bitmap is set if regno N is used in a context in
80 which it can not be decomposed. */
81static bitmap non_decomposable_context;
82
402464a0
BS
83/* Bit N in this bitmap is set if regno N is used in a subreg
84 which changes the mode but not the size. This typically happens
85 when the register accessed as a floating-point value; we want to
86 avoid generating accesses to its subwords in integer modes. */
87static bitmap subreg_context;
88
e53a16e7
ILT
89/* Bit N in the bitmap in element M of this array is set if there is a
90 copy from reg M to reg N. */
9771b263 91static vec<bitmap> reg_copy_graph;
e53a16e7 92
af4ba423
KZ
93struct target_lower_subreg default_target_lower_subreg;
94#if SWITCHABLE_TARGET
95struct target_lower_subreg *this_target_lower_subreg
96 = &default_target_lower_subreg;
97#endif
98
99#define twice_word_mode \
100 this_target_lower_subreg->x_twice_word_mode
101#define choices \
102 this_target_lower_subreg->x_choices
103
104/* RTXes used while computing costs. */
105struct cost_rtxes {
106 /* Source and target registers. */
107 rtx source;
108 rtx target;
109
110 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
111 rtx zext;
112
113 /* A shift of SOURCE. */
114 rtx shift;
115
116 /* A SET of TARGET. */
117 rtx set;
118};
119
120/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
121 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
122
123static int
124shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
ef4bddc2 125 machine_mode mode, int op1)
af4ba423 126{
af4ba423
KZ
127 PUT_CODE (rtxes->shift, code);
128 PUT_MODE (rtxes->shift, mode);
129 PUT_MODE (rtxes->source, mode);
130 XEXP (rtxes->shift, 1) = GEN_INT (op1);
e548c9df 131 return set_src_cost (rtxes->shift, mode, speed_p);
af4ba423
KZ
132}
133
134/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
135 to true if it is profitable to split a double-word CODE shift
136 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
137 for speed or size profitability.
138
139 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
140 the cost of moving zero into a word-mode register. WORD_MOVE_COST
141 is the cost of moving between word registers. */
142
143static void
144compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
145 bool *splitting, enum rtx_code code,
146 int word_move_zero_cost, int word_move_cost)
147{
d7fde18c 148 int wide_cost, narrow_cost, upper_cost, i;
af4ba423
KZ
149
150 for (i = 0; i < BITS_PER_WORD; i++)
151 {
152 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
153 i + BITS_PER_WORD);
154 if (i == 0)
155 narrow_cost = word_move_cost;
156 else
157 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
158
d7fde18c
JJ
159 if (code != ASHIFTRT)
160 upper_cost = word_move_zero_cost;
161 else if (i == BITS_PER_WORD - 1)
162 upper_cost = word_move_cost;
163 else
164 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
165 BITS_PER_WORD - 1);
166
af4ba423
KZ
167 if (LOG_COSTS)
168 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
169 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
d7fde18c 170 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
af4ba423 171
d7fde18c 172 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
af4ba423
KZ
173 splitting[i] = true;
174 }
175}
176
177/* Compute what we should do when optimizing for speed or size; SPEED_P
178 selects which. Use RTXES for computing costs. */
179
180static void
181compute_costs (bool speed_p, struct cost_rtxes *rtxes)
182{
183 unsigned int i;
184 int word_move_zero_cost, word_move_cost;
185
69523a7c 186 PUT_MODE (rtxes->target, word_mode);
af4ba423 187 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
69523a7c 188 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
189
190 SET_SRC (rtxes->set) = rtxes->source;
69523a7c 191 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
192
193 if (LOG_COSTS)
194 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
195 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
196
197 for (i = 0; i < MAX_MACHINE_MODE; i++)
198 {
ef4bddc2 199 machine_mode mode = (machine_mode) i;
af4ba423
KZ
200 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
201 if (factor > 1)
202 {
203 int mode_move_cost;
204
205 PUT_MODE (rtxes->target, mode);
206 PUT_MODE (rtxes->source, mode);
69523a7c 207 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
af4ba423
KZ
208
209 if (LOG_COSTS)
210 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
211 GET_MODE_NAME (mode), mode_move_cost,
212 word_move_cost, factor);
213
214 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
215 {
216 choices[speed_p].move_modes_to_split[i] = true;
217 choices[speed_p].something_to_do = true;
218 }
219 }
220 }
221
222 /* For the moves and shifts, the only case that is checked is one
223 where the mode of the target is an integer mode twice the width
224 of the word_mode.
225
226 If it is not profitable to split a double word move then do not
227 even consider the shifts or the zero extension. */
228 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
229 {
230 int zext_cost;
231
232 /* The only case here to check to see if moving the upper part with a
233 zero is cheaper than doing the zext itself. */
af4ba423 234 PUT_MODE (rtxes->source, word_mode);
e548c9df 235 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
af4ba423
KZ
236
237 if (LOG_COSTS)
238 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
239 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
240 zext_cost, word_move_cost, word_move_zero_cost);
241
242 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
243 choices[speed_p].splitting_zext = true;
244
245 compute_splitting_shift (speed_p, rtxes,
246 choices[speed_p].splitting_ashift, ASHIFT,
247 word_move_zero_cost, word_move_cost);
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
250 word_move_zero_cost, word_move_cost);
d7fde18c
JJ
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
253 word_move_zero_cost, word_move_cost);
af4ba423
KZ
254 }
255}
256
257/* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
260
261void
262init_lower_subreg (void)
263{
264 struct cost_rtxes rtxes;
265
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
267
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
269
c3dc5e66
RS
270 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
271 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
f7df4a84 272 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
af4ba423
KZ
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
275
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
279
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
283}
2b54c30f
ILT
284
285static bool
286simple_move_operand (rtx x)
287{
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
290
291 if (!OBJECT_P (x))
292 return false;
293
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
7e0c3f57
ILT
296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
2b54c30f
ILT
298 return false;
299
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
5bfed9a9 302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
2b54c30f
ILT
303 return false;
304
305 return true;
306}
307
af4ba423
KZ
308/* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
311
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
e53a16e7
ILT
314
315static rtx
e967cc2f 316simple_move (rtx_insn *insn, bool speed_p)
e53a16e7
ILT
317{
318 rtx x;
319 rtx set;
ef4bddc2 320 machine_mode mode;
e53a16e7
ILT
321
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
324
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
328
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
2b54c30f 332 if (!simple_move_operand (x))
e53a16e7
ILT
333 return NULL_RTX;
334
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
2b54c30f
ILT
338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
e53a16e7
ILT
342 return NULL_RTX;
343
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
576fe41a 349 mode = GET_MODE (SET_DEST (set));
e53a16e7
ILT
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
354
1f64a081
ILT
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
359
af4ba423
KZ
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
362
e53a16e7
ILT
363 return set;
364}
365
366/* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
369
370static bool
371find_pseudo_copy (rtx set)
372{
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
377
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
380
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
385
9771b263 386 b = reg_copy_graph[rs];
e53a16e7
ILT
387 if (b == NULL)
388 {
389 b = BITMAP_ALLOC (NULL);
9771b263 390 reg_copy_graph[rs] = b;
e53a16e7
ILT
391 }
392
393 bitmap_set_bit (b, rd);
394
395 return true;
396}
397
398/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
403
404static void
405propagate_pseudo_copies (void)
406{
407 bitmap queue, propagate;
408
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
411
412 bitmap_copy (queue, decomposable_context);
413 do
414 {
415 bitmap_iterator iter;
416 unsigned int i;
417
418 bitmap_clear (propagate);
419
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
421 {
9771b263 422 bitmap b = reg_copy_graph[i];
e53a16e7
ILT
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 }
426
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
429 }
430 while (!bitmap_empty_p (queue));
431
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
434}
435
436/* A pointer to one of these values is passed to
a36a1928 437 find_decomposable_subregs. */
e53a16e7
ILT
438
439enum classify_move_insn
440{
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
c2c47e8f
UW
443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
e53a16e7
ILT
446 SIMPLE_MOVE
447};
448
a36a1928
RS
449/* If we find a SUBREG in *LOC which we could use to decompose a
450 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
451 unadorned register which is not a simple pseudo-register copy,
452 DATA will point at the type of move, and we set a bit in
453 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
e53a16e7 454
a36a1928
RS
455static void
456find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
e53a16e7 457{
a36a1928
RS
458 subrtx_var_iterator::array_type array;
459 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
e53a16e7 460 {
a36a1928
RS
461 rtx x = *iter;
462 if (GET_CODE (x) == SUBREG)
463 {
464 rtx inner = SUBREG_REG (x);
465 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
e53a16e7 466
a36a1928
RS
467 if (!REG_P (inner))
468 continue;
e53a16e7 469
a36a1928
RS
470 regno = REGNO (inner);
471 if (HARD_REGISTER_NUM_P (regno))
472 {
473 iter.skip_subrtxes ();
474 continue;
475 }
e53a16e7 476
a36a1928
RS
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
e53a16e7 481
a36a1928
RS
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
e53a16e7 485
a36a1928
RS
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
e53a16e7 491
a36a1928
RS
492 if (outer_words == 1 && inner_words > 1)
493 {
494 bitmap_set_bit (decomposable_context, regno);
495 iter.skip_subrtxes ();
496 continue;
497 }
03743286 498
a36a1928
RS
499 /* If this is a cast from one mode to another, where the modes
500 have the same size, and they are not tieable, then mark this
501 register as non-decomposable. If we decompose it we are
502 likely to mess up whatever the backend is trying to do. */
503 if (outer_words > 1
504 && outer_size == inner_size
505 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
506 {
507 bitmap_set_bit (non_decomposable_context, regno);
508 bitmap_set_bit (subreg_context, regno);
509 iter.skip_subrtxes ();
510 continue;
511 }
03743286 512 }
a36a1928 513 else if (REG_P (x))
e53a16e7 514 {
a36a1928
RS
515 unsigned int regno;
516
517 /* We will see an outer SUBREG before we see the inner REG, so
518 when we see a plain REG here it means a direct reference to
519 the register.
520
521 If this is not a simple copy from one location to another,
522 then we can not decompose this register. If this is a simple
523 copy we want to decompose, and the mode is right,
524 then we mark the register as decomposable.
525 Otherwise we don't say anything about this register --
526 it could be decomposed, but whether that would be
527 profitable depends upon how it is used elsewhere.
528
529 We only set bits in the bitmap for multi-word
530 pseudo-registers, since those are the only ones we care about
531 and it keeps the size of the bitmaps down. */
532
533 regno = REGNO (x);
534 if (!HARD_REGISTER_NUM_P (regno)
535 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
e53a16e7 536 {
a36a1928
RS
537 switch (*pcmi)
538 {
539 case NOT_SIMPLE_MOVE:
540 bitmap_set_bit (non_decomposable_context, regno);
541 break;
542 case DECOMPOSABLE_SIMPLE_MOVE:
543 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
544 bitmap_set_bit (decomposable_context, regno);
545 break;
546 case SIMPLE_MOVE:
547 break;
548 default:
549 gcc_unreachable ();
550 }
e53a16e7
ILT
551 }
552 }
a36a1928
RS
553 else if (MEM_P (x))
554 {
555 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
2b54c30f 556
a36a1928
RS
557 /* Any registers used in a MEM do not participate in a
558 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
559 here, and return -1 to block the parent's recursion. */
560 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
561 iter.skip_subrtxes ();
562 }
2b54c30f 563 }
e53a16e7
ILT
564}
565
566/* Decompose REGNO into word-sized components. We smash the REG node
567 in place. This ensures that (1) something goes wrong quickly if we
568 fail to make some replacement, and (2) the debug information inside
569 the symbol table is automatically kept up to date. */
570
571static void
572decompose_register (unsigned int regno)
573{
574 rtx reg;
575 unsigned int words, i;
576 rtvec v;
577
578 reg = regno_reg_rtx[regno];
579
580 regno_reg_rtx[regno] = NULL_RTX;
e53a16e7
ILT
581
582 words = GET_MODE_SIZE (GET_MODE (reg));
583 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
584
585 v = rtvec_alloc (words);
586 for (i = 0; i < words; ++i)
587 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
588
589 PUT_CODE (reg, CONCATN);
590 XVEC (reg, 0) = v;
591
592 if (dump_file)
593 {
594 fprintf (dump_file, "; Splitting reg %u ->", regno);
595 for (i = 0; i < words; ++i)
596 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
597 fputc ('\n', dump_file);
598 }
599}
600
601/* Get a SUBREG of a CONCATN. */
602
603static rtx
ef4bddc2 604simplify_subreg_concatn (machine_mode outermode, rtx op,
e53a16e7
ILT
605 unsigned int byte)
606{
607 unsigned int inner_size;
ef4bddc2 608 machine_mode innermode, partmode;
e53a16e7
ILT
609 rtx part;
610 unsigned int final_offset;
611
612 gcc_assert (GET_CODE (op) == CONCATN);
613 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
614
615 innermode = GET_MODE (op);
616 gcc_assert (byte < GET_MODE_SIZE (innermode));
617 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
618
619 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
620 part = XVECEXP (op, 0, byte / inner_size);
695ae295
UB
621 partmode = GET_MODE (part);
622
822a55a0
UB
623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
624 regular CONST_VECTORs. They have vector or integer modes, depending
625 on the capabilities of the target. Cope with them. */
626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
627 partmode = GET_MODE_INNER (innermode);
628 else if (partmode == VOIDmode)
695ae295 629 {
822a55a0
UB
630 enum mode_class mclass = GET_MODE_CLASS (innermode);
631 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
695ae295
UB
632 }
633
e53a16e7
ILT
634 final_offset = byte % inner_size;
635 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
636 return NULL_RTX;
637
695ae295 638 return simplify_gen_subreg (outermode, part, partmode, final_offset);
e53a16e7
ILT
639}
640
641/* Wrapper around simplify_gen_subreg which handles CONCATN. */
642
643static rtx
ef4bddc2
RS
644simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
645 machine_mode innermode, unsigned int byte)
e53a16e7 646{
0e6c5b58
ILT
647 rtx ret;
648
e53a16e7
ILT
649 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
650 If OP is a SUBREG of a CONCATN, then it must be a simple mode
651 change with the same size and offset 0, or it must extract a
652 part. We shouldn't see anything else here. */
653 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
654 {
655 rtx op2;
656
657 if ((GET_MODE_SIZE (GET_MODE (op))
658 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
659 && SUBREG_BYTE (op) == 0)
660 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
661 GET_MODE (SUBREG_REG (op)), byte);
662
663 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
664 SUBREG_BYTE (op));
665 if (op2 == NULL_RTX)
666 {
667 /* We don't handle paradoxical subregs here. */
668 gcc_assert (GET_MODE_SIZE (outermode)
669 <= GET_MODE_SIZE (GET_MODE (op)));
670 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
671 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
672 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
673 byte + SUBREG_BYTE (op));
674 gcc_assert (op2 != NULL_RTX);
675 return op2;
676 }
677
678 op = op2;
679 gcc_assert (op != NULL_RTX);
680 gcc_assert (innermode == GET_MODE (op));
681 }
0e6c5b58 682
e53a16e7
ILT
683 if (GET_CODE (op) == CONCATN)
684 return simplify_subreg_concatn (outermode, op, byte);
0e6c5b58
ILT
685
686 ret = simplify_gen_subreg (outermode, op, innermode, byte);
687
688 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
689 resolve_simple_move will ask for the high part of the paradoxical
690 subreg, which does not have a value. Just return a zero. */
691 if (ret == NULL_RTX
692 && GET_CODE (op) == SUBREG
693 && SUBREG_BYTE (op) == 0
694 && (GET_MODE_SIZE (innermode)
695 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
696 return CONST0_RTX (outermode);
697
698 gcc_assert (ret != NULL_RTX);
699 return ret;
e53a16e7
ILT
700}
701
702/* Return whether we should resolve X into the registers into which it
703 was decomposed. */
704
705static bool
706resolve_reg_p (rtx x)
707{
708 return GET_CODE (x) == CONCATN;
709}
710
711/* Return whether X is a SUBREG of a register which we need to
712 resolve. */
713
714static bool
715resolve_subreg_p (rtx x)
716{
717 if (GET_CODE (x) != SUBREG)
718 return false;
719 return resolve_reg_p (SUBREG_REG (x));
720}
721
cf55cb6a 722/* Look for SUBREGs in *LOC which need to be decomposed. */
e53a16e7 723
cf55cb6a
RS
724static bool
725resolve_subreg_use (rtx *loc, rtx insn)
e53a16e7 726{
cf55cb6a
RS
727 subrtx_ptr_iterator::array_type array;
728 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
e53a16e7 729 {
cf55cb6a
RS
730 rtx *loc = *iter;
731 rtx x = *loc;
732 if (resolve_subreg_p (x))
e53a16e7 733 {
cf55cb6a
RS
734 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
735 SUBREG_BYTE (x));
e53a16e7 736
cf55cb6a
RS
737 /* It is possible for a note to contain a reference which we can
738 decompose. In this case, return 1 to the caller to indicate
739 that the note must be removed. */
740 if (!x)
741 {
742 gcc_assert (!insn);
743 return true;
744 }
e53a16e7 745
cf55cb6a
RS
746 validate_change (insn, loc, x, 1);
747 iter.skip_subrtxes ();
748 }
749 else if (resolve_reg_p (x))
750 /* Return 1 to the caller to indicate that we found a direct
751 reference to a register which is being decomposed. This can
752 happen inside notes, multiword shift or zero-extend
753 instructions. */
754 return true;
e53a16e7
ILT
755 }
756
cf55cb6a 757 return false;
e53a16e7
ILT
758}
759
e53a16e7
ILT
760/* Resolve any decomposed registers which appear in register notes on
761 INSN. */
762
763static void
e967cc2f 764resolve_reg_notes (rtx_insn *insn)
e53a16e7
ILT
765{
766 rtx *pnote, note;
767
768 note = find_reg_equal_equiv_note (insn);
769 if (note)
770 {
6fb5fa3c 771 int old_count = num_validated_changes ();
cf55cb6a 772 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
4a8cae83 773 remove_note (insn, note);
6fb5fa3c
DB
774 else
775 if (old_count != num_validated_changes ())
776 df_notes_rescan (insn);
e53a16e7
ILT
777 }
778
779 pnote = &REG_NOTES (insn);
780 while (*pnote != NULL_RTX)
781 {
60564289 782 bool del = false;
e53a16e7
ILT
783
784 note = *pnote;
785 switch (REG_NOTE_KIND (note))
786 {
6fb5fa3c
DB
787 case REG_DEAD:
788 case REG_UNUSED:
e53a16e7 789 if (resolve_reg_p (XEXP (note, 0)))
60564289 790 del = true;
e53a16e7
ILT
791 break;
792
793 default:
794 break;
795 }
796
60564289 797 if (del)
e53a16e7
ILT
798 *pnote = XEXP (note, 1);
799 else
800 pnote = &XEXP (note, 1);
801 }
802}
803
2b54c30f 804/* Return whether X can be decomposed into subwords. */
e53a16e7
ILT
805
806static bool
2b54c30f 807can_decompose_p (rtx x)
e53a16e7
ILT
808{
809 if (REG_P (x))
810 {
811 unsigned int regno = REGNO (x);
812
813 if (HARD_REGISTER_NUM_P (regno))
488c8379
RS
814 {
815 unsigned int byte, num_bytes;
816
817 num_bytes = GET_MODE_SIZE (GET_MODE (x));
818 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
819 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
820 return false;
821 return true;
822 }
e53a16e7 823 else
402464a0 824 return !bitmap_bit_p (subreg_context, regno);
e53a16e7
ILT
825 }
826
2b54c30f 827 return true;
e53a16e7
ILT
828}
829
830/* Decompose the registers used in a simple move SET within INSN. If
831 we don't change anything, return INSN, otherwise return the start
832 of the sequence of moves. */
833
e967cc2f
DM
834static rtx_insn *
835resolve_simple_move (rtx set, rtx_insn *insn)
e53a16e7 836{
e967cc2f
DM
837 rtx src, dest, real_dest;
838 rtx_insn *insns;
ef4bddc2 839 machine_mode orig_mode, dest_mode;
e53a16e7
ILT
840 unsigned int words;
841 bool pushing;
842
843 src = SET_SRC (set);
844 dest = SET_DEST (set);
845 orig_mode = GET_MODE (dest);
846
847 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
af4ba423 848 gcc_assert (words > 1);
e53a16e7
ILT
849
850 start_sequence ();
851
852 /* We have to handle copying from a SUBREG of a decomposed reg where
853 the SUBREG is larger than word size. Rather than assume that we
854 can take a word_mode SUBREG of the destination, we copy to a new
855 register and then copy that to the destination. */
856
857 real_dest = NULL_RTX;
858
859 if (GET_CODE (src) == SUBREG
860 && resolve_reg_p (SUBREG_REG (src))
861 && (SUBREG_BYTE (src) != 0
862 || (GET_MODE_SIZE (orig_mode)
863 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
864 {
865 real_dest = dest;
866 dest = gen_reg_rtx (orig_mode);
867 if (REG_P (real_dest))
868 REG_ATTRS (dest) = REG_ATTRS (real_dest);
869 }
870
871 /* Similarly if we are copying to a SUBREG of a decomposed reg where
872 the SUBREG is larger than word size. */
873
874 if (GET_CODE (dest) == SUBREG
875 && resolve_reg_p (SUBREG_REG (dest))
876 && (SUBREG_BYTE (dest) != 0
877 || (GET_MODE_SIZE (orig_mode)
878 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
879 {
e967cc2f
DM
880 rtx reg, smove;
881 rtx_insn *minsn;
e53a16e7
ILT
882
883 reg = gen_reg_rtx (orig_mode);
884 minsn = emit_move_insn (reg, src);
885 smove = single_set (minsn);
886 gcc_assert (smove != NULL_RTX);
887 resolve_simple_move (smove, minsn);
888 src = reg;
889 }
890
891 /* If we didn't have any big SUBREGS of decomposed registers, and
892 neither side of the move is a register we are decomposing, then
893 we don't have to do anything here. */
894
895 if (src == SET_SRC (set)
896 && dest == SET_DEST (set)
897 && !resolve_reg_p (src)
898 && !resolve_subreg_p (src)
899 && !resolve_reg_p (dest)
900 && !resolve_subreg_p (dest))
901 {
902 end_sequence ();
903 return insn;
904 }
905
30d18db4
ILT
906 /* It's possible for the code to use a subreg of a decomposed
907 register while forming an address. We need to handle that before
908 passing the address to emit_move_insn. We pass NULL_RTX as the
909 insn parameter to resolve_subreg_use because we can not validate
910 the insn yet. */
911 if (MEM_P (src) || MEM_P (dest))
912 {
913 int acg;
914
915 if (MEM_P (src))
cf55cb6a 916 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
30d18db4 917 if (MEM_P (dest))
cf55cb6a 918 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
30d18db4
ILT
919 acg = apply_change_group ();
920 gcc_assert (acg);
921 }
922
e53a16e7
ILT
923 /* If SRC is a register which we can't decompose, or has side
924 effects, we need to move via a temporary register. */
925
2b54c30f 926 if (!can_decompose_p (src)
e53a16e7
ILT
927 || side_effects_p (src)
928 || GET_CODE (src) == ASM_OPERANDS)
929 {
930 rtx reg;
931
932 reg = gen_reg_rtx (orig_mode);
ce5d49a8 933
760edf20
TS
934 if (AUTO_INC_DEC)
935 {
936 rtx move = emit_move_insn (reg, src);
937 if (MEM_P (src))
938 {
939 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
940 if (note)
941 add_reg_note (move, REG_INC, XEXP (note, 0));
942 }
943 }
944 else
945 emit_move_insn (reg, src);
946
e53a16e7
ILT
947 src = reg;
948 }
949
950 /* If DEST is a register which we can't decompose, or has side
951 effects, we need to first move to a temporary register. We
952 handle the common case of pushing an operand directly. We also
953 go through a temporary register if it holds a floating point
954 value. This gives us better code on systems which can't move
955 data easily between integer and floating point registers. */
956
957 dest_mode = orig_mode;
958 pushing = push_operand (dest, dest_mode);
2b54c30f 959 if (!can_decompose_p (dest)
e53a16e7
ILT
960 || (side_effects_p (dest) && !pushing)
961 || (!SCALAR_INT_MODE_P (dest_mode)
962 && !resolve_reg_p (dest)
963 && !resolve_subreg_p (dest)))
964 {
965 if (real_dest == NULL_RTX)
966 real_dest = dest;
967 if (!SCALAR_INT_MODE_P (dest_mode))
968 {
969 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
970 MODE_INT, 0);
971 gcc_assert (dest_mode != BLKmode);
972 }
973 dest = gen_reg_rtx (dest_mode);
974 if (REG_P (real_dest))
975 REG_ATTRS (dest) = REG_ATTRS (real_dest);
976 }
977
978 if (pushing)
979 {
980 unsigned int i, j, jinc;
981
982 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
983 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
984 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
985
986 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
987 {
988 j = 0;
989 jinc = 1;
990 }
991 else
992 {
993 j = words - 1;
994 jinc = -1;
995 }
996
997 for (i = 0; i < words; ++i, j += jinc)
998 {
999 rtx temp;
1000
1001 temp = copy_rtx (XEXP (dest, 0));
1002 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1003 j * UNITS_PER_WORD);
1004 emit_move_insn (temp,
1005 simplify_gen_subreg_concatn (word_mode, src,
1006 orig_mode,
1007 j * UNITS_PER_WORD));
1008 }
1009 }
1010 else
1011 {
1012 unsigned int i;
1013
1014 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
c41c1387 1015 emit_clobber (dest);
e53a16e7
ILT
1016
1017 for (i = 0; i < words; ++i)
1018 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1019 dest_mode,
1020 i * UNITS_PER_WORD),
1021 simplify_gen_subreg_concatn (word_mode, src,
1022 orig_mode,
1023 i * UNITS_PER_WORD));
1024 }
1025
1026 if (real_dest != NULL_RTX)
1027 {
e967cc2f
DM
1028 rtx mdest, smove;
1029 rtx_insn *minsn;
e53a16e7
ILT
1030
1031 if (dest_mode == orig_mode)
1032 mdest = dest;
1033 else
1034 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1035 minsn = emit_move_insn (real_dest, mdest);
1036
760edf20 1037 if (AUTO_INC_DEC && MEM_P (real_dest)
ce5d49a8
ZC
1038 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1039 {
1040 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1041 if (note)
1042 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1043 }
ce5d49a8 1044
e53a16e7
ILT
1045 smove = single_set (minsn);
1046 gcc_assert (smove != NULL_RTX);
1047
1048 resolve_simple_move (smove, minsn);
1049 }
1050
1051 insns = get_insns ();
1052 end_sequence ();
1053
1d65f45c 1054 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
73663bb7 1055
e53a16e7
ILT
1056 emit_insn_before (insns, insn);
1057
82981227 1058 /* If we get here via self-recursion, then INSN is not yet in the insns
6873ecab
SB
1059 chain and delete_insn will fail. We only want to remove INSN from the
1060 current sequence. See PR56738. */
1061 if (in_sequence_p ())
1062 remove_insn (insn);
1063 else
1064 delete_insn (insn);
e53a16e7
ILT
1065
1066 return insns;
1067}
1068
1069/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1070 component registers. Return whether we changed something. */
1071
1072static bool
e967cc2f 1073resolve_clobber (rtx pat, rtx_insn *insn)
e53a16e7 1074{
d70dcf29 1075 rtx reg;
ef4bddc2 1076 machine_mode orig_mode;
e53a16e7 1077 unsigned int words, i;
7e0c3f57 1078 int ret;
e53a16e7
ILT
1079
1080 reg = XEXP (pat, 0);
9a5a8e58 1081 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
e53a16e7
ILT
1082 return false;
1083
1084 orig_mode = GET_MODE (reg);
1085 words = GET_MODE_SIZE (orig_mode);
1086 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1087
7e0c3f57
ILT
1088 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1089 simplify_gen_subreg_concatn (word_mode, reg,
1090 orig_mode, 0),
1091 0);
6fb5fa3c 1092 df_insn_rescan (insn);
7e0c3f57
ILT
1093 gcc_assert (ret != 0);
1094
e53a16e7
ILT
1095 for (i = words - 1; i > 0; --i)
1096 {
1097 rtx x;
1098
9a5a8e58
ILT
1099 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1100 i * UNITS_PER_WORD);
e53a16e7
ILT
1101 x = gen_rtx_CLOBBER (VOIDmode, x);
1102 emit_insn_after (x, insn);
1103 }
1104
d4fd3465
ILT
1105 resolve_reg_notes (insn);
1106
e53a16e7
ILT
1107 return true;
1108}
1109
1110/* A USE of a decomposed register is no longer meaningful. Return
1111 whether we changed something. */
1112
1113static bool
e967cc2f 1114resolve_use (rtx pat, rtx_insn *insn)
e53a16e7
ILT
1115{
1116 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1117 {
1118 delete_insn (insn);
1119 return true;
1120 }
d4fd3465
ILT
1121
1122 resolve_reg_notes (insn);
1123
e53a16e7
ILT
1124 return false;
1125}
1126
b5b8b0ac
AO
1127/* A VAR_LOCATION can be simplified. */
1128
1129static void
e967cc2f 1130resolve_debug (rtx_insn *insn)
b5b8b0ac 1131{
f2d3f347
RS
1132 subrtx_ptr_iterator::array_type array;
1133 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1134 {
1135 rtx *loc = *iter;
1136 rtx x = *loc;
1137 if (resolve_subreg_p (x))
1138 {
1139 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1140 SUBREG_BYTE (x));
1141
1142 if (x)
1143 *loc = x;
1144 else
1145 x = copy_rtx (*loc);
1146 }
1147 if (resolve_reg_p (x))
1148 *loc = copy_rtx (x);
1149 }
b5b8b0ac
AO
1150
1151 df_insn_rescan (insn);
1152
1153 resolve_reg_notes (insn);
1154}
1155
af4ba423
KZ
1156/* Check if INSN is a decomposable multiword-shift or zero-extend and
1157 set the decomposable_context bitmap accordingly. SPEED_P is true
1158 if we are optimizing INSN for speed rather than size. Return true
1159 if INSN is decomposable. */
e0892570 1160
af4ba423 1161static bool
e967cc2f 1162find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
e0892570
AK
1163{
1164 rtx set;
1165 rtx op;
1166 rtx op_operand;
1167
1168 set = single_set (insn);
1169 if (!set)
af4ba423 1170 return false;
e0892570
AK
1171
1172 op = SET_SRC (set);
1173 if (GET_CODE (op) != ASHIFT
1174 && GET_CODE (op) != LSHIFTRT
d7fde18c 1175 && GET_CODE (op) != ASHIFTRT
e0892570 1176 && GET_CODE (op) != ZERO_EXTEND)
af4ba423 1177 return false;
e0892570
AK
1178
1179 op_operand = XEXP (op, 0);
1180 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1181 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1182 || HARD_REGISTER_NUM_P (REGNO (op_operand))
af4ba423
KZ
1183 || GET_MODE (op) != twice_word_mode)
1184 return false;
e0892570
AK
1185
1186 if (GET_CODE (op) == ZERO_EXTEND)
1187 {
1188 if (GET_MODE (op_operand) != word_mode
af4ba423
KZ
1189 || !choices[speed_p].splitting_zext)
1190 return false;
e0892570
AK
1191 }
1192 else /* left or right shift */
1193 {
af4ba423
KZ
1194 bool *splitting = (GET_CODE (op) == ASHIFT
1195 ? choices[speed_p].splitting_ashift
d7fde18c
JJ
1196 : GET_CODE (op) == ASHIFTRT
1197 ? choices[speed_p].splitting_ashiftrt
af4ba423 1198 : choices[speed_p].splitting_lshiftrt);
481683e1 1199 if (!CONST_INT_P (XEXP (op, 1))
af4ba423
KZ
1200 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1201 2 * BITS_PER_WORD - 1)
1202 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1203 return false;
1204
1205 bitmap_set_bit (decomposable_context, REGNO (op_operand));
e0892570
AK
1206 }
1207
1208 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1209
af4ba423 1210 return true;
e0892570
AK
1211}
1212
1213/* Decompose a more than word wide shift (in INSN) of a multiword
1214 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1215 and 'set to zero' insn. Return a pointer to the new insn when a
1216 replacement was done. */
1217
e967cc2f
DM
1218static rtx_insn *
1219resolve_shift_zext (rtx_insn *insn)
e0892570
AK
1220{
1221 rtx set;
1222 rtx op;
1223 rtx op_operand;
e967cc2f 1224 rtx_insn *insns;
d7fde18c 1225 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
e0892570
AK
1226 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1227
1228 set = single_set (insn);
1229 if (!set)
e967cc2f 1230 return NULL;
e0892570
AK
1231
1232 op = SET_SRC (set);
1233 if (GET_CODE (op) != ASHIFT
1234 && GET_CODE (op) != LSHIFTRT
d7fde18c 1235 && GET_CODE (op) != ASHIFTRT
e0892570 1236 && GET_CODE (op) != ZERO_EXTEND)
e967cc2f 1237 return NULL;
e0892570
AK
1238
1239 op_operand = XEXP (op, 0);
1240
af4ba423
KZ
1241 /* We can tear this operation apart only if the regs were already
1242 torn apart. */
e0892570 1243 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
e967cc2f 1244 return NULL;
e0892570
AK
1245
1246 /* src_reg_num is the number of the word mode register which we
1247 are operating on. For a left shift and a zero_extend on little
1248 endian machines this is register 0. */
d7fde18c
JJ
1249 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1250 ? 1 : 0;
e0892570 1251
acbe5496
AK
1252 if (WORDS_BIG_ENDIAN
1253 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
e0892570
AK
1254 src_reg_num = 1 - src_reg_num;
1255
1256 if (GET_CODE (op) == ZERO_EXTEND)
acbe5496 1257 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
e0892570
AK
1258 else
1259 dest_reg_num = 1 - src_reg_num;
1260
1261 offset1 = UNITS_PER_WORD * dest_reg_num;
1262 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1263 src_offset = UNITS_PER_WORD * src_reg_num;
1264
e0892570
AK
1265 start_sequence ();
1266
1267 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1268 GET_MODE (SET_DEST (set)),
1269 offset1);
d7fde18c
JJ
1270 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1271 GET_MODE (SET_DEST (set)),
1272 offset2);
e0892570
AK
1273 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1274 GET_MODE (op_operand),
1275 src_offset);
d7fde18c
JJ
1276 if (GET_CODE (op) == ASHIFTRT
1277 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1278 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1279 BITS_PER_WORD - 1, NULL_RTX, 0);
1280
e0892570
AK
1281 if (GET_CODE (op) != ZERO_EXTEND)
1282 {
1283 int shift_count = INTVAL (XEXP (op, 1));
1284 if (shift_count > BITS_PER_WORD)
1285 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1286 LSHIFT_EXPR : RSHIFT_EXPR,
1287 word_mode, src_reg,
eb6c3df1 1288 shift_count - BITS_PER_WORD,
d7fde18c 1289 dest_reg, GET_CODE (op) != ASHIFTRT);
e0892570
AK
1290 }
1291
1292 if (dest_reg != src_reg)
1293 emit_move_insn (dest_reg, src_reg);
d7fde18c
JJ
1294 if (GET_CODE (op) != ASHIFTRT)
1295 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1296 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1297 emit_move_insn (dest_upper, copy_rtx (src_reg));
1298 else
1299 emit_move_insn (dest_upper, upper_src);
e0892570
AK
1300 insns = get_insns ();
1301
1302 end_sequence ();
1303
1304 emit_insn_before (insns, insn);
1305
1306 if (dump_file)
1307 {
e967cc2f 1308 rtx_insn *in;
e0892570
AK
1309 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1310 for (in = insns; in != insn; in = NEXT_INSN (in))
1311 fprintf (dump_file, "%d ", INSN_UID (in));
1312 fprintf (dump_file, "\n");
1313 }
1314
1315 delete_insn (insn);
1316 return insns;
1317}
1318
af4ba423
KZ
1319/* Print to dump_file a description of what we're doing with shift code CODE.
1320 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1321
1322static void
1323dump_shift_choices (enum rtx_code code, bool *splitting)
1324{
1325 int i;
1326 const char *sep;
1327
1328 fprintf (dump_file,
1329 " Splitting mode %s for %s lowering with shift amounts = ",
1330 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1331 sep = "";
1332 for (i = 0; i < BITS_PER_WORD; i++)
1333 if (splitting[i])
1334 {
1335 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1336 sep = ",";
1337 }
1338 fprintf (dump_file, "\n");
1339}
1340
1341/* Print to dump_file a description of what we're doing when optimizing
1342 for speed or size; SPEED_P says which. DESCRIPTION is a description
1343 of the SPEED_P choice. */
1344
1345static void
1346dump_choices (bool speed_p, const char *description)
1347{
1348 unsigned int i;
1349
1350 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1351
1352 for (i = 0; i < MAX_MACHINE_MODE; i++)
ef4bddc2 1353 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
af4ba423
KZ
1354 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1355 choices[speed_p].move_modes_to_split[i]
1356 ? "Splitting"
1357 : "Skipping",
ef4bddc2 1358 GET_MODE_NAME ((machine_mode) i));
af4ba423
KZ
1359
1360 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1361 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1362 GET_MODE_NAME (twice_word_mode));
1363
1364 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
d7fde18c
JJ
1365 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1366 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
af4ba423
KZ
1367 fprintf (dump_file, "\n");
1368}
1369
e53a16e7 1370/* Look for registers which are always accessed via word-sized SUBREGs
c2c47e8f
UW
1371 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1372 registers into several word-sized pseudo-registers. */
e53a16e7
ILT
1373
1374static void
c2c47e8f 1375decompose_multiword_subregs (bool decompose_copies)
e53a16e7
ILT
1376{
1377 unsigned int max;
1378 basic_block bb;
af4ba423 1379 bool speed_p;
e53a16e7 1380
af4ba423
KZ
1381 if (dump_file)
1382 {
1383 dump_choices (false, "size");
1384 dump_choices (true, "speed");
1385 }
1386
1387 /* Check if this target even has any modes to consider lowering. */
1388 if (!choices[false].something_to_do && !choices[true].something_to_do)
1389 {
1390 if (dump_file)
1391 fprintf (dump_file, "Nothing to do!\n");
1392 return;
1393 }
6fb5fa3c 1394
e53a16e7
ILT
1395 max = max_reg_num ();
1396
1397 /* First see if there are any multi-word pseudo-registers. If there
1398 aren't, there is nothing we can do. This should speed up this
1399 pass in the normal case, since it should be faster than scanning
1400 all the insns. */
1401 {
1402 unsigned int i;
af4ba423 1403 bool useful_modes_seen = false;
e53a16e7
ILT
1404
1405 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
af4ba423
KZ
1406 if (regno_reg_rtx[i] != NULL)
1407 {
ef4bddc2 1408 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
af4ba423
KZ
1409 if (choices[false].move_modes_to_split[(int) mode]
1410 || choices[true].move_modes_to_split[(int) mode])
1411 {
1412 useful_modes_seen = true;
1413 break;
1414 }
1415 }
1416
1417 if (!useful_modes_seen)
e53a16e7 1418 {
af4ba423
KZ
1419 if (dump_file)
1420 fprintf (dump_file, "Nothing to lower in this function.\n");
1421 return;
e53a16e7 1422 }
e53a16e7
ILT
1423 }
1424
8d074192 1425 if (df)
af4ba423
KZ
1426 {
1427 df_set_flags (DF_DEFER_INSN_RESCAN);
1428 run_word_dce ();
1429 }
8d074192 1430
af4ba423
KZ
1431 /* FIXME: It may be possible to change this code to look for each
1432 multi-word pseudo-register and to find each insn which sets or
1433 uses that register. That should be faster than scanning all the
1434 insns. */
e53a16e7
ILT
1435
1436 decomposable_context = BITMAP_ALLOC (NULL);
1437 non_decomposable_context = BITMAP_ALLOC (NULL);
402464a0 1438 subreg_context = BITMAP_ALLOC (NULL);
e53a16e7 1439
9771b263
DN
1440 reg_copy_graph.create (max);
1441 reg_copy_graph.safe_grow_cleared (max);
1442 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
e53a16e7 1443
af4ba423 1444 speed_p = optimize_function_for_speed_p (cfun);
11cd3bed 1445 FOR_EACH_BB_FN (bb, cfun)
e53a16e7 1446 {
e967cc2f 1447 rtx_insn *insn;
e53a16e7
ILT
1448
1449 FOR_BB_INSNS (bb, insn)
1450 {
1451 rtx set;
1452 enum classify_move_insn cmi;
1453 int i, n;
1454
1455 if (!INSN_P (insn)
1456 || GET_CODE (PATTERN (insn)) == CLOBBER
1457 || GET_CODE (PATTERN (insn)) == USE)
1458 continue;
1459
d5785e76
JJ
1460 recog_memoized (insn);
1461
af4ba423 1462 if (find_decomposable_shift_zext (insn, speed_p))
e0892570
AK
1463 continue;
1464
e53a16e7
ILT
1465 extract_insn (insn);
1466
af4ba423 1467 set = simple_move (insn, speed_p);
e53a16e7
ILT
1468
1469 if (!set)
1470 cmi = NOT_SIMPLE_MOVE;
1471 else
1472 {
c2c47e8f
UW
1473 /* We mark pseudo-to-pseudo copies as decomposable during the
1474 second pass only. The first pass is so early that there is
1475 good chance such moves will be optimized away completely by
1476 subsequent optimizations anyway.
1477
1478 However, we call find_pseudo_copy even during the first pass
1479 so as to properly set up the reg_copy_graph. */
4a8cae83 1480 if (find_pseudo_copy (set))
c2c47e8f 1481 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
e53a16e7
ILT
1482 else
1483 cmi = SIMPLE_MOVE;
1484 }
1485
1486 n = recog_data.n_operands;
1487 for (i = 0; i < n; ++i)
1488 {
a36a1928 1489 find_decomposable_subregs (&recog_data.operand[i], &cmi);
e53a16e7
ILT
1490
1491 /* We handle ASM_OPERANDS as a special case to support
1492 things like x86 rdtsc which returns a DImode value.
1493 We can decompose the output, which will certainly be
1494 operand 0, but not the inputs. */
1495
1496 if (cmi == SIMPLE_MOVE
1497 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1498 {
1499 gcc_assert (i == 0);
1500 cmi = NOT_SIMPLE_MOVE;
1501 }
1502 }
1503 }
1504 }
1505
1506 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1507 if (!bitmap_empty_p (decomposable_context))
1508 {
73663bb7 1509 sbitmap sub_blocks;
7984c787
SB
1510 unsigned int i;
1511 sbitmap_iterator sbi;
e53a16e7
ILT
1512 bitmap_iterator iter;
1513 unsigned int regno;
1514
1515 propagate_pseudo_copies ();
1516
8b1c6fd7 1517 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
f61e445a 1518 bitmap_clear (sub_blocks);
e53a16e7
ILT
1519
1520 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1521 decompose_register (regno);
1522
11cd3bed 1523 FOR_EACH_BB_FN (bb, cfun)
e53a16e7 1524 {
e967cc2f 1525 rtx_insn *insn;
e53a16e7 1526
ba4807a0 1527 FOR_BB_INSNS (bb, insn)
e53a16e7 1528 {
11895e28 1529 rtx pat;
e53a16e7
ILT
1530
1531 if (!INSN_P (insn))
1532 continue;
1533
e53a16e7
ILT
1534 pat = PATTERN (insn);
1535 if (GET_CODE (pat) == CLOBBER)
d4fd3465 1536 resolve_clobber (pat, insn);
e53a16e7 1537 else if (GET_CODE (pat) == USE)
d4fd3465 1538 resolve_use (pat, insn);
b5b8b0ac
AO
1539 else if (DEBUG_INSN_P (insn))
1540 resolve_debug (insn);
e53a16e7
ILT
1541 else
1542 {
1543 rtx set;
1544 int i;
1545
1546 recog_memoized (insn);
1547 extract_insn (insn);
1548
af4ba423 1549 set = simple_move (insn, speed_p);
e53a16e7
ILT
1550 if (set)
1551 {
e967cc2f 1552 rtx_insn *orig_insn = insn;
73663bb7 1553 bool cfi = control_flow_insn_p (insn);
e53a16e7 1554
7984c787
SB
1555 /* We can end up splitting loads to multi-word pseudos
1556 into separate loads to machine word size pseudos.
1557 When this happens, we first had one load that can
1558 throw, and after resolve_simple_move we'll have a
1559 bunch of loads (at least two). All those loads may
1560 trap if we can have non-call exceptions, so they
1561 all will end the current basic block. We split the
1562 block after the outer loop over all insns, but we
1563 make sure here that we will be able to split the
1564 basic block and still produce the correct control
1565 flow graph for it. */
1566 gcc_assert (!cfi
8f4f502f 1567 || (cfun->can_throw_non_call_exceptions
7984c787
SB
1568 && can_throw_internal (insn)));
1569
e53a16e7
ILT
1570 insn = resolve_simple_move (set, insn);
1571 if (insn != orig_insn)
1572 {
e53a16e7
ILT
1573 recog_memoized (insn);
1574 extract_insn (insn);
73663bb7
ILT
1575
1576 if (cfi)
d7c028c0 1577 bitmap_set_bit (sub_blocks, bb->index);
e53a16e7
ILT
1578 }
1579 }
e0892570
AK
1580 else
1581 {
e967cc2f 1582 rtx_insn *decomposed_shift;
e0892570
AK
1583
1584 decomposed_shift = resolve_shift_zext (insn);
1585 if (decomposed_shift != NULL_RTX)
1586 {
e0892570
AK
1587 insn = decomposed_shift;
1588 recog_memoized (insn);
1589 extract_insn (insn);
1590 }
1591 }
e53a16e7
ILT
1592
1593 for (i = recog_data.n_operands - 1; i >= 0; --i)
cf55cb6a 1594 resolve_subreg_use (recog_data.operand_loc[i], insn);
e53a16e7
ILT
1595
1596 resolve_reg_notes (insn);
1597
1598 if (num_validated_changes () > 0)
1599 {
1600 for (i = recog_data.n_dups - 1; i >= 0; --i)
1601 {
1602 rtx *pl = recog_data.dup_loc[i];
1603 int dup_num = recog_data.dup_num[i];
1604 rtx *px = recog_data.operand_loc[dup_num];
1605
1a309dfb 1606 validate_unshare_change (insn, pl, *px, 1);
e53a16e7
ILT
1607 }
1608
1609 i = apply_change_group ();
1610 gcc_assert (i);
e53a16e7
ILT
1611 }
1612 }
e53a16e7
ILT
1613 }
1614 }
1615
7984c787
SB
1616 /* If we had insns to split that caused control flow insns in the middle
1617 of a basic block, split those blocks now. Note that we only handle
1618 the case where splitting a load has caused multiple possibly trapping
1619 loads to appear. */
d4ac4ce2 1620 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
7984c787 1621 {
e967cc2f 1622 rtx_insn *insn, *end;
7984c787
SB
1623 edge fallthru;
1624
06e28de2 1625 bb = BASIC_BLOCK_FOR_FN (cfun, i);
7984c787
SB
1626 insn = BB_HEAD (bb);
1627 end = BB_END (bb);
1628
1629 while (insn != end)
1630 {
1631 if (control_flow_insn_p (insn))
1632 {
1633 /* Split the block after insn. There will be a fallthru
1634 edge, which is OK so we keep it. We have to create the
1635 exception edges ourselves. */
1636 fallthru = split_block (bb, insn);
1637 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1638 bb = fallthru->dest;
1639 insn = BB_HEAD (bb);
1640 }
1641 else
1642 insn = NEXT_INSN (insn);
1643 }
1644 }
73663bb7 1645
73663bb7 1646 sbitmap_free (sub_blocks);
e53a16e7
ILT
1647 }
1648
1649 {
1650 unsigned int i;
1651 bitmap b;
1652
9771b263 1653 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
e53a16e7
ILT
1654 if (b)
1655 BITMAP_FREE (b);
1656 }
1657
9771b263 1658 reg_copy_graph.release ();
e53a16e7
ILT
1659
1660 BITMAP_FREE (decomposable_context);
1661 BITMAP_FREE (non_decomposable_context);
402464a0 1662 BITMAP_FREE (subreg_context);
e53a16e7
ILT
1663}
1664\f
e53a16e7
ILT
1665/* Implement first lower subreg pass. */
1666
27a4cd48
DM
1667namespace {
1668
1669const pass_data pass_data_lower_subreg =
e53a16e7 1670{
27a4cd48
DM
1671 RTL_PASS, /* type */
1672 "subreg1", /* name */
1673 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1674 TV_LOWER_SUBREG, /* tv_id */
1675 0, /* properties_required */
1676 0, /* properties_provided */
1677 0, /* properties_destroyed */
1678 0, /* todo_flags_start */
3bea341f 1679 0, /* todo_flags_finish */
e53a16e7
ILT
1680};
1681
27a4cd48
DM
1682class pass_lower_subreg : public rtl_opt_pass
1683{
1684public:
c3284718
RS
1685 pass_lower_subreg (gcc::context *ctxt)
1686 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
27a4cd48
DM
1687 {}
1688
1689 /* opt_pass methods: */
1a3d085c 1690 virtual bool gate (function *) { return flag_split_wide_types != 0; }
be55bfe6
TS
1691 virtual unsigned int execute (function *)
1692 {
1693 decompose_multiword_subregs (false);
1694 return 0;
1695 }
27a4cd48
DM
1696
1697}; // class pass_lower_subreg
1698
1699} // anon namespace
1700
1701rtl_opt_pass *
1702make_pass_lower_subreg (gcc::context *ctxt)
1703{
1704 return new pass_lower_subreg (ctxt);
1705}
1706
be55bfe6
TS
1707/* Implement second lower subreg pass. */
1708
27a4cd48
DM
1709namespace {
1710
1711const pass_data pass_data_lower_subreg2 =
e53a16e7 1712{
27a4cd48
DM
1713 RTL_PASS, /* type */
1714 "subreg2", /* name */
1715 OPTGROUP_NONE, /* optinfo_flags */
27a4cd48
DM
1716 TV_LOWER_SUBREG, /* tv_id */
1717 0, /* properties_required */
1718 0, /* properties_provided */
1719 0, /* properties_destroyed */
1720 0, /* todo_flags_start */
3bea341f 1721 TODO_df_finish, /* todo_flags_finish */
e53a16e7 1722};
27a4cd48
DM
1723
1724class pass_lower_subreg2 : public rtl_opt_pass
1725{
1726public:
c3284718
RS
1727 pass_lower_subreg2 (gcc::context *ctxt)
1728 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
27a4cd48
DM
1729 {}
1730
1731 /* opt_pass methods: */
1a3d085c 1732 virtual bool gate (function *) { return flag_split_wide_types != 0; }
be55bfe6
TS
1733 virtual unsigned int execute (function *)
1734 {
1735 decompose_multiword_subregs (true);
1736 return 0;
1737 }
27a4cd48
DM
1738
1739}; // class pass_lower_subreg2
1740
1741} // anon namespace
1742
1743rtl_opt_pass *
1744make_pass_lower_subreg2 (gcc::context *ctxt)
1745{
1746 return new pass_lower_subreg2 (ctxt);
1747}