]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
poly_int: GET_MODE_SIZE
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
1a6a0f2a 1/* Decompose multiword subregs.
8e8f6434 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
1a6a0f2a 3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
8c4c00c1 10Software Foundation; either version 3, or (at your option) any later
1a6a0f2a 11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
8c4c00c1 19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
1a6a0f2a 21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
1a6a0f2a 26#include "rtl.h"
7c29e30e 27#include "tree.h"
28#include "cfghooks.h"
9ef16211 29#include "df.h"
ad7b10a2 30#include "memmodel.h"
1a6a0f2a 31#include "tm_p.h"
7c29e30e 32#include "expmed.h"
1a6a0f2a 33#include "insn-config.h"
7c29e30e 34#include "emit-rtl.h"
35#include "recog.h"
94ea8568 36#include "cfgrtl.h"
37#include "cfgbuild.h"
0e8e9be3 38#include "dce.h"
1a6a0f2a 39#include "expr.h"
1a6a0f2a 40#include "tree-pass.h"
c7944dce 41#include "lower-subreg.h"
2e3cae91 42#include "rtl-iter.h"
5f6dcf1a 43#include "target.h"
1a6a0f2a 44
1a6a0f2a 45
46/* Decompose multi-word pseudo-registers into individual
c7944dce 47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
4d5cf08a 55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
c7944dce 58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74#define LOG_COSTS 0
75#define FORCE_LOWERING 0
1a6a0f2a 76
77/* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79static bitmap decomposable_context;
80
81/* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83static bitmap non_decomposable_context;
84
5277d36e 85/* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89static bitmap subreg_context;
90
1a6a0f2a 91/* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
f1f41a6c 93static vec<bitmap> reg_copy_graph;
1a6a0f2a 94
c7944dce 95struct target_lower_subreg default_target_lower_subreg;
96#if SWITCHABLE_TARGET
97struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99#endif
100
101#define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103#define choices \
104 this_target_lower_subreg->x_choices
105
50e9e5b3 106/* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109static inline bool
110interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112{
52acb7ae 113 if (!GET_MODE_SIZE (mode).is_constant (bytes))
114 return false;
50e9e5b3 115 *words = CEIL (*bytes, UNITS_PER_WORD);
116 return true;
117}
118
c7944dce 119/* RTXes used while computing costs. */
120struct cost_rtxes {
121 /* Source and target registers. */
122 rtx source;
123 rtx target;
124
125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
126 rtx zext;
127
128 /* A shift of SOURCE. */
129 rtx shift;
130
131 /* A SET of TARGET. */
132 rtx set;
133};
134
135/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
137
138static int
139shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
3754d046 140 machine_mode mode, int op1)
c7944dce 141{
c7944dce 142 PUT_CODE (rtxes->shift, code);
143 PUT_MODE (rtxes->shift, mode);
144 PUT_MODE (rtxes->source, mode);
bd39703a 145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
5ae4887d 146 return set_src_cost (rtxes->shift, mode, speed_p);
c7944dce 147}
148
149/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150 to true if it is profitable to split a double-word CODE shift
151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
152 for speed or size profitability.
153
154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
155 the cost of moving zero into a word-mode register. WORD_MOVE_COST
156 is the cost of moving between word registers. */
157
158static void
159compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 bool *splitting, enum rtx_code code,
161 int word_move_zero_cost, int word_move_cost)
162{
4d5cf08a 163 int wide_cost, narrow_cost, upper_cost, i;
c7944dce 164
165 for (i = 0; i < BITS_PER_WORD; i++)
166 {
167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 i + BITS_PER_WORD);
169 if (i == 0)
170 narrow_cost = word_move_cost;
171 else
172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173
4d5cf08a 174 if (code != ASHIFTRT)
175 upper_cost = word_move_zero_cost;
176 else if (i == BITS_PER_WORD - 1)
177 upper_cost = word_move_cost;
178 else
179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 BITS_PER_WORD - 1);
181
c7944dce 182 if (LOG_COSTS)
183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
4d5cf08a 185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
c7944dce 186
4d5cf08a 187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
c7944dce 188 splitting[i] = true;
189 }
190}
191
192/* Compute what we should do when optimizing for speed or size; SPEED_P
193 selects which. Use RTXES for computing costs. */
194
195static void
196compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197{
198 unsigned int i;
199 int word_move_zero_cost, word_move_cost;
200
a6d935b7 201 PUT_MODE (rtxes->target, word_mode);
c7944dce 202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
a6d935b7 203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 204
205 SET_SRC (rtxes->set) = rtxes->source;
a6d935b7 206 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 207
208 if (LOG_COSTS)
209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211
212 for (i = 0; i < MAX_MACHINE_MODE; i++)
213 {
3754d046 214 machine_mode mode = (machine_mode) i;
50e9e5b3 215 unsigned int size, factor;
216 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
c7944dce 217 {
50e9e5b3 218 unsigned int mode_move_cost;
c7944dce 219
220 PUT_MODE (rtxes->target, mode);
221 PUT_MODE (rtxes->source, mode);
a6d935b7 222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 223
224 if (LOG_COSTS)
225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 GET_MODE_NAME (mode), mode_move_cost,
227 word_move_cost, factor);
228
229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 {
231 choices[speed_p].move_modes_to_split[i] = true;
232 choices[speed_p].something_to_do = true;
233 }
234 }
235 }
236
237 /* For the moves and shifts, the only case that is checked is one
238 where the mode of the target is an integer mode twice the width
239 of the word_mode.
240
241 If it is not profitable to split a double word move then do not
242 even consider the shifts or the zero extension. */
243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244 {
245 int zext_cost;
246
247 /* The only case here to check to see if moving the upper part with a
248 zero is cheaper than doing the zext itself. */
c7944dce 249 PUT_MODE (rtxes->source, word_mode);
5ae4887d 250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
c7944dce 251
252 if (LOG_COSTS)
253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 zext_cost, word_move_cost, word_move_zero_cost);
256
257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 choices[speed_p].splitting_zext = true;
259
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashift, ASHIFT,
262 word_move_zero_cost, word_move_cost);
263 compute_splitting_shift (speed_p, rtxes,
264 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 word_move_zero_cost, word_move_cost);
4d5cf08a 266 compute_splitting_shift (speed_p, rtxes,
267 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 word_move_zero_cost, word_move_cost);
c7944dce 269 }
270}
271
272/* Do one-per-target initialisation. This involves determining
273 which operations on the machine are profitable. If none are found,
274 then the pass just returns when called. */
275
276void
277init_lower_subreg (void)
278{
279 struct cost_rtxes rtxes;
280
281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282
28ebc73c 283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
c7944dce 284
dcd6d0f4 285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
d1f9b275 287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
c7944dce 288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290
291 if (LOG_COSTS)
292 fprintf (stderr, "\nSize costs\n==========\n\n");
293 compute_costs (false, &rtxes);
294
295 if (LOG_COSTS)
296 fprintf (stderr, "\nSpeed costs\n===========\n\n");
297 compute_costs (true, &rtxes);
298}
67c3f580 299
300static bool
301simple_move_operand (rtx x)
302{
303 if (GET_CODE (x) == SUBREG)
304 x = SUBREG_REG (x);
305
306 if (!OBJECT_P (x))
307 return false;
308
309 if (GET_CODE (x) == LABEL_REF
310 || GET_CODE (x) == SYMBOL_REF
ab9eaa97 311 || GET_CODE (x) == HIGH
312 || GET_CODE (x) == CONST)
67c3f580 313 return false;
314
315 if (MEM_P (x)
316 && (MEM_VOLATILE_P (x)
4e27ffd0 317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
67c3f580 318 return false;
319
320 return true;
321}
322
c7944dce 323/* If INSN is a single set between two objects that we want to split,
324 return the single set. SPEED_P says whether we are optimizing
325 INSN for speed or size.
326
327 INSN should have been passed to recog and extract_insn before this
328 is called. */
1a6a0f2a 329
330static rtx
a5942062 331simple_move (rtx_insn *insn, bool speed_p)
1a6a0f2a 332{
333 rtx x;
334 rtx set;
3754d046 335 machine_mode mode;
1a6a0f2a 336
337 if (recog_data.n_operands != 2)
338 return NULL_RTX;
339
340 set = single_set (insn);
341 if (!set)
342 return NULL_RTX;
343
344 x = SET_DEST (set);
345 if (x != recog_data.operand[0] && x != recog_data.operand[1])
346 return NULL_RTX;
67c3f580 347 if (!simple_move_operand (x))
1a6a0f2a 348 return NULL_RTX;
349
350 x = SET_SRC (set);
351 if (x != recog_data.operand[0] && x != recog_data.operand[1])
352 return NULL_RTX;
67c3f580 353 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
354 things like x86 rdtsc which returns a DImode value. */
355 if (GET_CODE (x) != ASM_OPERANDS
356 && !simple_move_operand (x))
1a6a0f2a 357 return NULL_RTX;
358
359 /* We try to decompose in integer modes, to avoid generating
360 inefficient code copying between integer and floating point
361 registers. That means that we can't decompose if this is a
362 non-integer mode for which there is no integer mode of the same
363 size. */
36c98bd9 364 mode = GET_MODE (SET_DEST (set));
1a6a0f2a 365 if (!SCALAR_INT_MODE_P (mode)
44504d18 366 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
1a6a0f2a 367 return NULL_RTX;
368
5e016dfc 369 /* Reject PARTIAL_INT modes. They are used for processor specific
370 purposes and it's probably best not to tamper with them. */
371 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
372 return NULL_RTX;
373
c7944dce 374 if (!choices[speed_p].move_modes_to_split[(int) mode])
375 return NULL_RTX;
376
1a6a0f2a 377 return set;
378}
379
380/* If SET is a copy from one multi-word pseudo-register to another,
381 record that in reg_copy_graph. Return whether it is such a
382 copy. */
383
384static bool
385find_pseudo_copy (rtx set)
386{
387 rtx dest = SET_DEST (set);
388 rtx src = SET_SRC (set);
389 unsigned int rd, rs;
390 bitmap b;
391
392 if (!REG_P (dest) || !REG_P (src))
393 return false;
394
395 rd = REGNO (dest);
396 rs = REGNO (src);
397 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
398 return false;
399
f1f41a6c 400 b = reg_copy_graph[rs];
1a6a0f2a 401 if (b == NULL)
402 {
403 b = BITMAP_ALLOC (NULL);
f1f41a6c 404 reg_copy_graph[rs] = b;
1a6a0f2a 405 }
406
407 bitmap_set_bit (b, rd);
408
409 return true;
410}
411
412/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
413 where they are copied to another register, add the register to
414 which they are copied to DECOMPOSABLE_CONTEXT. Use
415 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
416 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
417
418static void
419propagate_pseudo_copies (void)
420{
035def86 421 auto_bitmap queue, propagate;
1a6a0f2a 422
423 bitmap_copy (queue, decomposable_context);
424 do
425 {
426 bitmap_iterator iter;
427 unsigned int i;
428
429 bitmap_clear (propagate);
430
431 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
432 {
f1f41a6c 433 bitmap b = reg_copy_graph[i];
1a6a0f2a 434 if (b)
435 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
436 }
437
438 bitmap_and_compl (queue, propagate, decomposable_context);
439 bitmap_ior_into (decomposable_context, propagate);
440 }
441 while (!bitmap_empty_p (queue));
1a6a0f2a 442}
443
444/* A pointer to one of these values is passed to
665db605 445 find_decomposable_subregs. */
1a6a0f2a 446
447enum classify_move_insn
448{
449 /* Not a simple move from one location to another. */
450 NOT_SIMPLE_MOVE,
b5ca6624 451 /* A simple move we want to decompose. */
452 DECOMPOSABLE_SIMPLE_MOVE,
453 /* Any other simple move. */
1a6a0f2a 454 SIMPLE_MOVE
455};
456
665db605 457/* If we find a SUBREG in *LOC which we could use to decompose a
458 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
459 unadorned register which is not a simple pseudo-register copy,
460 DATA will point at the type of move, and we set a bit in
461 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
1a6a0f2a 462
665db605 463static void
464find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
1a6a0f2a 465{
665db605 466 subrtx_var_iterator::array_type array;
467 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
1a6a0f2a 468 {
665db605 469 rtx x = *iter;
470 if (GET_CODE (x) == SUBREG)
471 {
472 rtx inner = SUBREG_REG (x);
473 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
1a6a0f2a 474
665db605 475 if (!REG_P (inner))
476 continue;
1a6a0f2a 477
665db605 478 regno = REGNO (inner);
479 if (HARD_REGISTER_NUM_P (regno))
480 {
481 iter.skip_subrtxes ();
482 continue;
483 }
1a6a0f2a 484
50e9e5b3 485 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
486 || !interesting_mode_p (GET_MODE (inner), &inner_size,
487 &inner_words))
488 continue;
1a6a0f2a 489
665db605 490 /* We only try to decompose single word subregs of multi-word
491 registers. When we find one, we return -1 to avoid iterating
492 over the inner register.
1a6a0f2a 493
665db605 494 ??? This doesn't allow, e.g., DImode subregs of TImode values
495 on 32-bit targets. We would need to record the way the
496 pseudo-register was used, and only decompose if all the uses
497 were the same number and size of pieces. Hopefully this
498 doesn't happen much. */
1a6a0f2a 499
665db605 500 if (outer_words == 1 && inner_words > 1)
501 {
502 bitmap_set_bit (decomposable_context, regno);
503 iter.skip_subrtxes ();
504 continue;
505 }
4e7a1eb8 506
665db605 507 /* If this is a cast from one mode to another, where the modes
508 have the same size, and they are not tieable, then mark this
509 register as non-decomposable. If we decompose it we are
510 likely to mess up whatever the backend is trying to do. */
511 if (outer_words > 1
512 && outer_size == inner_size
5f6dcf1a 513 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
665db605 514 {
515 bitmap_set_bit (non_decomposable_context, regno);
516 bitmap_set_bit (subreg_context, regno);
517 iter.skip_subrtxes ();
518 continue;
519 }
4e7a1eb8 520 }
665db605 521 else if (REG_P (x))
1a6a0f2a 522 {
50e9e5b3 523 unsigned int regno, size, words;
665db605 524
525 /* We will see an outer SUBREG before we see the inner REG, so
526 when we see a plain REG here it means a direct reference to
527 the register.
528
529 If this is not a simple copy from one location to another,
530 then we can not decompose this register. If this is a simple
531 copy we want to decompose, and the mode is right,
532 then we mark the register as decomposable.
533 Otherwise we don't say anything about this register --
534 it could be decomposed, but whether that would be
535 profitable depends upon how it is used elsewhere.
536
537 We only set bits in the bitmap for multi-word
538 pseudo-registers, since those are the only ones we care about
539 and it keeps the size of the bitmaps down. */
540
541 regno = REGNO (x);
542 if (!HARD_REGISTER_NUM_P (regno)
50e9e5b3 543 && interesting_mode_p (GET_MODE (x), &size, &words)
544 && words > 1)
1a6a0f2a 545 {
665db605 546 switch (*pcmi)
547 {
548 case NOT_SIMPLE_MOVE:
549 bitmap_set_bit (non_decomposable_context, regno);
550 break;
551 case DECOMPOSABLE_SIMPLE_MOVE:
5f6dcf1a 552 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
665db605 553 bitmap_set_bit (decomposable_context, regno);
554 break;
555 case SIMPLE_MOVE:
556 break;
557 default:
558 gcc_unreachable ();
559 }
1a6a0f2a 560 }
561 }
665db605 562 else if (MEM_P (x))
563 {
564 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
67c3f580 565
665db605 566 /* Any registers used in a MEM do not participate in a
567 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
568 here, and return -1 to block the parent's recursion. */
569 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
570 iter.skip_subrtxes ();
571 }
67c3f580 572 }
1a6a0f2a 573}
574
575/* Decompose REGNO into word-sized components. We smash the REG node
576 in place. This ensures that (1) something goes wrong quickly if we
577 fail to make some replacement, and (2) the debug information inside
578 the symbol table is automatically kept up to date. */
579
580static void
581decompose_register (unsigned int regno)
582{
583 rtx reg;
50e9e5b3 584 unsigned int size, words, i;
1a6a0f2a 585 rtvec v;
586
587 reg = regno_reg_rtx[regno];
588
589 regno_reg_rtx[regno] = NULL_RTX;
1a6a0f2a 590
50e9e5b3 591 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
592 gcc_unreachable ();
1a6a0f2a 593
594 v = rtvec_alloc (words);
595 for (i = 0; i < words; ++i)
596 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
597
598 PUT_CODE (reg, CONCATN);
599 XVEC (reg, 0) = v;
600
601 if (dump_file)
602 {
603 fprintf (dump_file, "; Splitting reg %u ->", regno);
604 for (i = 0; i < words; ++i)
605 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
606 fputc ('\n', dump_file);
607 }
608}
609
610/* Get a SUBREG of a CONCATN. */
611
612static rtx
9edf7ea8 613simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
1a6a0f2a 614{
50e9e5b3 615 unsigned int outer_size, outer_words, inner_size, inner_words;
3754d046 616 machine_mode innermode, partmode;
1a6a0f2a 617 rtx part;
618 unsigned int final_offset;
9edf7ea8 619 unsigned int byte;
1a6a0f2a 620
50e9e5b3 621 innermode = GET_MODE (op);
622 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
623 || !interesting_mode_p (innermode, &inner_size, &inner_words))
624 gcc_unreachable ();
625
9edf7ea8 626 /* Must be constant if interesting_mode_p passes. */
627 byte = orig_byte.to_constant ();
1a6a0f2a 628 gcc_assert (GET_CODE (op) == CONCATN);
50e9e5b3 629 gcc_assert (byte % outer_size == 0);
1a6a0f2a 630
50e9e5b3 631 gcc_assert (byte < inner_size);
632 if (outer_size > inner_size)
ae1f04be 633 return NULL_RTX;
1a6a0f2a 634
50e9e5b3 635 inner_size /= XVECLEN (op, 0);
1a6a0f2a 636 part = XVECEXP (op, 0, byte / inner_size);
50bdfec8 637 partmode = GET_MODE (part);
638
ef379746 639 final_offset = byte % inner_size;
50e9e5b3 640 if (final_offset + outer_size > inner_size)
ef379746 641 return NULL_RTX;
642
598ffe59 643 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
644 regular CONST_VECTORs. They have vector or integer modes, depending
645 on the capabilities of the target. Cope with them. */
646 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
647 partmode = GET_MODE_INNER (innermode);
648 else if (partmode == VOIDmode)
ef379746 649 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
650 GET_MODE_CLASS (innermode), 0).require ();
1a6a0f2a 651
50bdfec8 652 return simplify_gen_subreg (outermode, part, partmode, final_offset);
1a6a0f2a 653}
654
655/* Wrapper around simplify_gen_subreg which handles CONCATN. */
656
657static rtx
3754d046 658simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
659 machine_mode innermode, unsigned int byte)
1a6a0f2a 660{
3fa57b79 661 rtx ret;
662
1a6a0f2a 663 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
664 If OP is a SUBREG of a CONCATN, then it must be a simple mode
665 change with the same size and offset 0, or it must extract a
666 part. We shouldn't see anything else here. */
667 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
668 {
669 rtx op2;
670
52acb7ae 671 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
672 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
9edf7ea8 673 && known_eq (SUBREG_BYTE (op), 0))
1a6a0f2a 674 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
675 GET_MODE (SUBREG_REG (op)), byte);
676
677 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
678 SUBREG_BYTE (op));
679 if (op2 == NULL_RTX)
680 {
681 /* We don't handle paradoxical subregs here. */
d0257d43 682 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
683 gcc_assert (!paradoxical_subreg_p (op));
1a6a0f2a 684 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
685 byte + SUBREG_BYTE (op));
686 gcc_assert (op2 != NULL_RTX);
687 return op2;
688 }
689
690 op = op2;
691 gcc_assert (op != NULL_RTX);
692 gcc_assert (innermode == GET_MODE (op));
693 }
3fa57b79 694
1a6a0f2a 695 if (GET_CODE (op) == CONCATN)
696 return simplify_subreg_concatn (outermode, op, byte);
3fa57b79 697
698 ret = simplify_gen_subreg (outermode, op, innermode, byte);
699
700 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
701 resolve_simple_move will ask for the high part of the paradoxical
702 subreg, which does not have a value. Just return a zero. */
703 if (ret == NULL_RTX
d0257d43 704 && paradoxical_subreg_p (op))
3fa57b79 705 return CONST0_RTX (outermode);
706
707 gcc_assert (ret != NULL_RTX);
708 return ret;
1a6a0f2a 709}
710
711/* Return whether we should resolve X into the registers into which it
712 was decomposed. */
713
714static bool
715resolve_reg_p (rtx x)
716{
717 return GET_CODE (x) == CONCATN;
718}
719
720/* Return whether X is a SUBREG of a register which we need to
721 resolve. */
722
723static bool
724resolve_subreg_p (rtx x)
725{
726 if (GET_CODE (x) != SUBREG)
727 return false;
728 return resolve_reg_p (SUBREG_REG (x));
729}
730
2e3cae91 731/* Look for SUBREGs in *LOC which need to be decomposed. */
1a6a0f2a 732
2e3cae91 733static bool
734resolve_subreg_use (rtx *loc, rtx insn)
1a6a0f2a 735{
2e3cae91 736 subrtx_ptr_iterator::array_type array;
737 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
1a6a0f2a 738 {
2e3cae91 739 rtx *loc = *iter;
740 rtx x = *loc;
741 if (resolve_subreg_p (x))
1a6a0f2a 742 {
2e3cae91 743 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 SUBREG_BYTE (x));
1a6a0f2a 745
2e3cae91 746 /* It is possible for a note to contain a reference which we can
747 decompose. In this case, return 1 to the caller to indicate
748 that the note must be removed. */
749 if (!x)
750 {
751 gcc_assert (!insn);
752 return true;
753 }
1a6a0f2a 754
2e3cae91 755 validate_change (insn, loc, x, 1);
756 iter.skip_subrtxes ();
757 }
758 else if (resolve_reg_p (x))
759 /* Return 1 to the caller to indicate that we found a direct
760 reference to a register which is being decomposed. This can
761 happen inside notes, multiword shift or zero-extend
762 instructions. */
763 return true;
1a6a0f2a 764 }
765
2e3cae91 766 return false;
1a6a0f2a 767}
768
1a6a0f2a 769/* Resolve any decomposed registers which appear in register notes on
770 INSN. */
771
772static void
a5942062 773resolve_reg_notes (rtx_insn *insn)
1a6a0f2a 774{
775 rtx *pnote, note;
776
777 note = find_reg_equal_equiv_note (insn);
778 if (note)
779 {
3072d30e 780 int old_count = num_validated_changes ();
2e3cae91 781 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
1e5b92fa 782 remove_note (insn, note);
3072d30e 783 else
784 if (old_count != num_validated_changes ())
785 df_notes_rescan (insn);
1a6a0f2a 786 }
787
788 pnote = &REG_NOTES (insn);
789 while (*pnote != NULL_RTX)
790 {
9ce37fa7 791 bool del = false;
1a6a0f2a 792
793 note = *pnote;
794 switch (REG_NOTE_KIND (note))
795 {
3072d30e 796 case REG_DEAD:
797 case REG_UNUSED:
1a6a0f2a 798 if (resolve_reg_p (XEXP (note, 0)))
9ce37fa7 799 del = true;
1a6a0f2a 800 break;
801
802 default:
803 break;
804 }
805
9ce37fa7 806 if (del)
1a6a0f2a 807 *pnote = XEXP (note, 1);
808 else
809 pnote = &XEXP (note, 1);
810 }
811}
812
67c3f580 813/* Return whether X can be decomposed into subwords. */
1a6a0f2a 814
815static bool
67c3f580 816can_decompose_p (rtx x)
1a6a0f2a 817{
818 if (REG_P (x))
819 {
820 unsigned int regno = REGNO (x);
821
822 if (HARD_REGISTER_NUM_P (regno))
5f961ca4 823 {
50e9e5b3 824 unsigned int byte, num_bytes, num_words;
5f961ca4 825
50e9e5b3 826 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
827 return false;
5f961ca4 828 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
829 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
830 return false;
831 return true;
832 }
1a6a0f2a 833 else
5277d36e 834 return !bitmap_bit_p (subreg_context, regno);
1a6a0f2a 835 }
836
67c3f580 837 return true;
1a6a0f2a 838}
839
840/* Decompose the registers used in a simple move SET within INSN. If
841 we don't change anything, return INSN, otherwise return the start
842 of the sequence of moves. */
843
a5942062 844static rtx_insn *
845resolve_simple_move (rtx set, rtx_insn *insn)
1a6a0f2a 846{
a5942062 847 rtx src, dest, real_dest;
848 rtx_insn *insns;
3754d046 849 machine_mode orig_mode, dest_mode;
50e9e5b3 850 unsigned int orig_size, words;
1a6a0f2a 851 bool pushing;
852
853 src = SET_SRC (set);
854 dest = SET_DEST (set);
855 orig_mode = GET_MODE (dest);
856
50e9e5b3 857 if (!interesting_mode_p (orig_mode, &orig_size, &words))
858 gcc_unreachable ();
c7944dce 859 gcc_assert (words > 1);
1a6a0f2a 860
861 start_sequence ();
862
863 /* We have to handle copying from a SUBREG of a decomposed reg where
864 the SUBREG is larger than word size. Rather than assume that we
865 can take a word_mode SUBREG of the destination, we copy to a new
866 register and then copy that to the destination. */
867
868 real_dest = NULL_RTX;
869
870 if (GET_CODE (src) == SUBREG
871 && resolve_reg_p (SUBREG_REG (src))
9edf7ea8 872 && (maybe_ne (SUBREG_BYTE (src), 0)
52acb7ae 873 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
1a6a0f2a 874 {
875 real_dest = dest;
876 dest = gen_reg_rtx (orig_mode);
877 if (REG_P (real_dest))
878 REG_ATTRS (dest) = REG_ATTRS (real_dest);
879 }
880
881 /* Similarly if we are copying to a SUBREG of a decomposed reg where
882 the SUBREG is larger than word size. */
883
884 if (GET_CODE (dest) == SUBREG
885 && resolve_reg_p (SUBREG_REG (dest))
9edf7ea8 886 && (maybe_ne (SUBREG_BYTE (dest), 0)
52acb7ae 887 || maybe_ne (orig_size,
888 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
1a6a0f2a 889 {
a5942062 890 rtx reg, smove;
891 rtx_insn *minsn;
1a6a0f2a 892
893 reg = gen_reg_rtx (orig_mode);
894 minsn = emit_move_insn (reg, src);
895 smove = single_set (minsn);
896 gcc_assert (smove != NULL_RTX);
897 resolve_simple_move (smove, minsn);
898 src = reg;
899 }
900
901 /* If we didn't have any big SUBREGS of decomposed registers, and
902 neither side of the move is a register we are decomposing, then
903 we don't have to do anything here. */
904
905 if (src == SET_SRC (set)
906 && dest == SET_DEST (set)
907 && !resolve_reg_p (src)
908 && !resolve_subreg_p (src)
909 && !resolve_reg_p (dest)
910 && !resolve_subreg_p (dest))
911 {
912 end_sequence ();
913 return insn;
914 }
915
ccd1ec59 916 /* It's possible for the code to use a subreg of a decomposed
917 register while forming an address. We need to handle that before
918 passing the address to emit_move_insn. We pass NULL_RTX as the
919 insn parameter to resolve_subreg_use because we can not validate
920 the insn yet. */
921 if (MEM_P (src) || MEM_P (dest))
922 {
923 int acg;
924
925 if (MEM_P (src))
2e3cae91 926 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
ccd1ec59 927 if (MEM_P (dest))
2e3cae91 928 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
ccd1ec59 929 acg = apply_change_group ();
930 gcc_assert (acg);
931 }
932
1a6a0f2a 933 /* If SRC is a register which we can't decompose, or has side
934 effects, we need to move via a temporary register. */
935
67c3f580 936 if (!can_decompose_p (src)
1a6a0f2a 937 || side_effects_p (src)
938 || GET_CODE (src) == ASM_OPERANDS)
939 {
940 rtx reg;
941
942 reg = gen_reg_rtx (orig_mode);
68a8f1b3 943
32aa77d9 944 if (AUTO_INC_DEC)
945 {
26cd1198 946 rtx_insn *move = emit_move_insn (reg, src);
32aa77d9 947 if (MEM_P (src))
948 {
949 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
950 if (note)
951 add_reg_note (move, REG_INC, XEXP (note, 0));
952 }
953 }
954 else
955 emit_move_insn (reg, src);
956
1a6a0f2a 957 src = reg;
958 }
959
960 /* If DEST is a register which we can't decompose, or has side
961 effects, we need to first move to a temporary register. We
962 handle the common case of pushing an operand directly. We also
963 go through a temporary register if it holds a floating point
964 value. This gives us better code on systems which can't move
965 data easily between integer and floating point registers. */
966
967 dest_mode = orig_mode;
968 pushing = push_operand (dest, dest_mode);
67c3f580 969 if (!can_decompose_p (dest)
1a6a0f2a 970 || (side_effects_p (dest) && !pushing)
971 || (!SCALAR_INT_MODE_P (dest_mode)
972 && !resolve_reg_p (dest)
973 && !resolve_subreg_p (dest)))
974 {
975 if (real_dest == NULL_RTX)
976 real_dest = dest;
977 if (!SCALAR_INT_MODE_P (dest_mode))
e2cd4ccd 978 dest_mode = int_mode_for_mode (dest_mode).require ();
1a6a0f2a 979 dest = gen_reg_rtx (dest_mode);
980 if (REG_P (real_dest))
981 REG_ATTRS (dest) = REG_ATTRS (real_dest);
982 }
983
984 if (pushing)
985 {
986 unsigned int i, j, jinc;
987
50e9e5b3 988 gcc_assert (orig_size % UNITS_PER_WORD == 0);
1a6a0f2a 989 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
990 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
991
992 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
993 {
994 j = 0;
995 jinc = 1;
996 }
997 else
998 {
999 j = words - 1;
1000 jinc = -1;
1001 }
1002
1003 for (i = 0; i < words; ++i, j += jinc)
1004 {
1005 rtx temp;
1006
1007 temp = copy_rtx (XEXP (dest, 0));
1008 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1009 j * UNITS_PER_WORD);
1010 emit_move_insn (temp,
1011 simplify_gen_subreg_concatn (word_mode, src,
1012 orig_mode,
1013 j * UNITS_PER_WORD));
1014 }
1015 }
1016 else
1017 {
1018 unsigned int i;
1019
1020 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
18b42941 1021 emit_clobber (dest);
1a6a0f2a 1022
1023 for (i = 0; i < words; ++i)
1024 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1025 dest_mode,
1026 i * UNITS_PER_WORD),
1027 simplify_gen_subreg_concatn (word_mode, src,
1028 orig_mode,
1029 i * UNITS_PER_WORD));
1030 }
1031
1032 if (real_dest != NULL_RTX)
1033 {
a5942062 1034 rtx mdest, smove;
1035 rtx_insn *minsn;
1a6a0f2a 1036
1037 if (dest_mode == orig_mode)
1038 mdest = dest;
1039 else
1040 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1041 minsn = emit_move_insn (real_dest, mdest);
1042
32aa77d9 1043 if (AUTO_INC_DEC && MEM_P (real_dest)
68a8f1b3 1044 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1045 {
1046 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1047 if (note)
1048 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1049 }
68a8f1b3 1050
1a6a0f2a 1051 smove = single_set (minsn);
1052 gcc_assert (smove != NULL_RTX);
1053
1054 resolve_simple_move (smove, minsn);
1055 }
1056
1057 insns = get_insns ();
1058 end_sequence ();
1059
e38def9c 1060 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
97bb2849 1061
1a6a0f2a 1062 emit_insn_before (insns, insn);
1063
87c46d87 1064 /* If we get here via self-recursion, then INSN is not yet in the insns
c7ee1482 1065 chain and delete_insn will fail. We only want to remove INSN from the
1066 current sequence. See PR56738. */
1067 if (in_sequence_p ())
1068 remove_insn (insn);
1069 else
1070 delete_insn (insn);
1a6a0f2a 1071
1072 return insns;
1073}
1074
1075/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1076 component registers. Return whether we changed something. */
1077
1078static bool
a5942062 1079resolve_clobber (rtx pat, rtx_insn *insn)
1a6a0f2a 1080{
e29831db 1081 rtx reg;
3754d046 1082 machine_mode orig_mode;
50e9e5b3 1083 unsigned int orig_size, words, i;
ab9eaa97 1084 int ret;
1a6a0f2a 1085
1086 reg = XEXP (pat, 0);
2289a5f2 1087 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1a6a0f2a 1088 return false;
1089
1090 orig_mode = GET_MODE (reg);
50e9e5b3 1091 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1092 gcc_unreachable ();
1a6a0f2a 1093
ab9eaa97 1094 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1095 simplify_gen_subreg_concatn (word_mode, reg,
1096 orig_mode, 0),
1097 0);
3072d30e 1098 df_insn_rescan (insn);
ab9eaa97 1099 gcc_assert (ret != 0);
1100
1a6a0f2a 1101 for (i = words - 1; i > 0; --i)
1102 {
1103 rtx x;
1104
2289a5f2 1105 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1106 i * UNITS_PER_WORD);
1a6a0f2a 1107 x = gen_rtx_CLOBBER (VOIDmode, x);
1108 emit_insn_after (x, insn);
1109 }
1110
db2200eb 1111 resolve_reg_notes (insn);
1112
1a6a0f2a 1113 return true;
1114}
1115
1116/* A USE of a decomposed register is no longer meaningful. Return
1117 whether we changed something. */
1118
1119static bool
a5942062 1120resolve_use (rtx pat, rtx_insn *insn)
1a6a0f2a 1121{
1122 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1123 {
1124 delete_insn (insn);
1125 return true;
1126 }
db2200eb 1127
1128 resolve_reg_notes (insn);
1129
1a6a0f2a 1130 return false;
1131}
1132
9845d120 1133/* A VAR_LOCATION can be simplified. */
1134
1135static void
a5942062 1136resolve_debug (rtx_insn *insn)
9845d120 1137{
d1f3d29f 1138 subrtx_ptr_iterator::array_type array;
1139 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1140 {
1141 rtx *loc = *iter;
1142 rtx x = *loc;
1143 if (resolve_subreg_p (x))
1144 {
1145 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1146 SUBREG_BYTE (x));
1147
1148 if (x)
1149 *loc = x;
1150 else
1151 x = copy_rtx (*loc);
1152 }
1153 if (resolve_reg_p (x))
1154 *loc = copy_rtx (x);
1155 }
9845d120 1156
1157 df_insn_rescan (insn);
1158
1159 resolve_reg_notes (insn);
1160}
1161
c7944dce 1162/* Check if INSN is a decomposable multiword-shift or zero-extend and
1163 set the decomposable_context bitmap accordingly. SPEED_P is true
1164 if we are optimizing INSN for speed rather than size. Return true
1165 if INSN is decomposable. */
9cf5d19e 1166
c7944dce 1167static bool
a5942062 1168find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
9cf5d19e 1169{
1170 rtx set;
1171 rtx op;
1172 rtx op_operand;
1173
1174 set = single_set (insn);
1175 if (!set)
c7944dce 1176 return false;
9cf5d19e 1177
1178 op = SET_SRC (set);
1179 if (GET_CODE (op) != ASHIFT
1180 && GET_CODE (op) != LSHIFTRT
4d5cf08a 1181 && GET_CODE (op) != ASHIFTRT
9cf5d19e 1182 && GET_CODE (op) != ZERO_EXTEND)
c7944dce 1183 return false;
9cf5d19e 1184
1185 op_operand = XEXP (op, 0);
1186 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1187 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1188 || HARD_REGISTER_NUM_P (REGNO (op_operand))
c7944dce 1189 || GET_MODE (op) != twice_word_mode)
1190 return false;
9cf5d19e 1191
1192 if (GET_CODE (op) == ZERO_EXTEND)
1193 {
1194 if (GET_MODE (op_operand) != word_mode
c7944dce 1195 || !choices[speed_p].splitting_zext)
1196 return false;
9cf5d19e 1197 }
1198 else /* left or right shift */
1199 {
c7944dce 1200 bool *splitting = (GET_CODE (op) == ASHIFT
1201 ? choices[speed_p].splitting_ashift
4d5cf08a 1202 : GET_CODE (op) == ASHIFTRT
1203 ? choices[speed_p].splitting_ashiftrt
c7944dce 1204 : choices[speed_p].splitting_lshiftrt);
971ba038 1205 if (!CONST_INT_P (XEXP (op, 1))
c7944dce 1206 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1207 2 * BITS_PER_WORD - 1)
1208 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1209 return false;
1210
1211 bitmap_set_bit (decomposable_context, REGNO (op_operand));
9cf5d19e 1212 }
1213
1214 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1215
c7944dce 1216 return true;
9cf5d19e 1217}
1218
1219/* Decompose a more than word wide shift (in INSN) of a multiword
1220 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1221 and 'set to zero' insn. Return a pointer to the new insn when a
1222 replacement was done. */
1223
a5942062 1224static rtx_insn *
1225resolve_shift_zext (rtx_insn *insn)
9cf5d19e 1226{
1227 rtx set;
1228 rtx op;
1229 rtx op_operand;
a5942062 1230 rtx_insn *insns;
4d5cf08a 1231 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
9cf5d19e 1232 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
086e7f88 1233 scalar_int_mode inner_mode;
9cf5d19e 1234
1235 set = single_set (insn);
1236 if (!set)
a5942062 1237 return NULL;
9cf5d19e 1238
1239 op = SET_SRC (set);
1240 if (GET_CODE (op) != ASHIFT
1241 && GET_CODE (op) != LSHIFTRT
4d5cf08a 1242 && GET_CODE (op) != ASHIFTRT
9cf5d19e 1243 && GET_CODE (op) != ZERO_EXTEND)
a5942062 1244 return NULL;
9cf5d19e 1245
1246 op_operand = XEXP (op, 0);
086e7f88 1247 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1248 return NULL;
9cf5d19e 1249
c7944dce 1250 /* We can tear this operation apart only if the regs were already
1251 torn apart. */
9cf5d19e 1252 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
a5942062 1253 return NULL;
9cf5d19e 1254
1255 /* src_reg_num is the number of the word mode register which we
1256 are operating on. For a left shift and a zero_extend on little
1257 endian machines this is register 0. */
4d5cf08a 1258 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1259 ? 1 : 0;
9cf5d19e 1260
086e7f88 1261 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
9cf5d19e 1262 src_reg_num = 1 - src_reg_num;
1263
1264 if (GET_CODE (op) == ZERO_EXTEND)
4d8ad352 1265 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
9cf5d19e 1266 else
1267 dest_reg_num = 1 - src_reg_num;
1268
1269 offset1 = UNITS_PER_WORD * dest_reg_num;
1270 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1271 src_offset = UNITS_PER_WORD * src_reg_num;
1272
9cf5d19e 1273 start_sequence ();
1274
1275 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1276 GET_MODE (SET_DEST (set)),
1277 offset1);
4d5cf08a 1278 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1279 GET_MODE (SET_DEST (set)),
1280 offset2);
9cf5d19e 1281 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1282 GET_MODE (op_operand),
1283 src_offset);
4d5cf08a 1284 if (GET_CODE (op) == ASHIFTRT
1285 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1286 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1287 BITS_PER_WORD - 1, NULL_RTX, 0);
1288
9cf5d19e 1289 if (GET_CODE (op) != ZERO_EXTEND)
1290 {
1291 int shift_count = INTVAL (XEXP (op, 1));
1292 if (shift_count > BITS_PER_WORD)
1293 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1294 LSHIFT_EXPR : RSHIFT_EXPR,
1295 word_mode, src_reg,
f5ff0b21 1296 shift_count - BITS_PER_WORD,
4d5cf08a 1297 dest_reg, GET_CODE (op) != ASHIFTRT);
9cf5d19e 1298 }
1299
1300 if (dest_reg != src_reg)
1301 emit_move_insn (dest_reg, src_reg);
4d5cf08a 1302 if (GET_CODE (op) != ASHIFTRT)
1303 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1304 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1305 emit_move_insn (dest_upper, copy_rtx (src_reg));
1306 else
1307 emit_move_insn (dest_upper, upper_src);
9cf5d19e 1308 insns = get_insns ();
1309
1310 end_sequence ();
1311
1312 emit_insn_before (insns, insn);
1313
1314 if (dump_file)
1315 {
a5942062 1316 rtx_insn *in;
9cf5d19e 1317 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1318 for (in = insns; in != insn; in = NEXT_INSN (in))
1319 fprintf (dump_file, "%d ", INSN_UID (in));
1320 fprintf (dump_file, "\n");
1321 }
1322
1323 delete_insn (insn);
1324 return insns;
1325}
1326
c7944dce 1327/* Print to dump_file a description of what we're doing with shift code CODE.
1328 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1329
1330static void
1331dump_shift_choices (enum rtx_code code, bool *splitting)
1332{
1333 int i;
1334 const char *sep;
1335
1336 fprintf (dump_file,
1337 " Splitting mode %s for %s lowering with shift amounts = ",
1338 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1339 sep = "";
1340 for (i = 0; i < BITS_PER_WORD; i++)
1341 if (splitting[i])
1342 {
1343 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1344 sep = ",";
1345 }
1346 fprintf (dump_file, "\n");
1347}
1348
1349/* Print to dump_file a description of what we're doing when optimizing
1350 for speed or size; SPEED_P says which. DESCRIPTION is a description
1351 of the SPEED_P choice. */
1352
1353static void
1354dump_choices (bool speed_p, const char *description)
1355{
50e9e5b3 1356 unsigned int size, factor, i;
c7944dce 1357
1358 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1359
1360 for (i = 0; i < MAX_MACHINE_MODE; i++)
50e9e5b3 1361 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1362 && factor > 1)
c7944dce 1363 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1364 choices[speed_p].move_modes_to_split[i]
1365 ? "Splitting"
1366 : "Skipping",
3754d046 1367 GET_MODE_NAME ((machine_mode) i));
c7944dce 1368
1369 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1370 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1371 GET_MODE_NAME (twice_word_mode));
1372
1373 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
4d5cf08a 1374 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1375 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
c7944dce 1376 fprintf (dump_file, "\n");
1377}
1378
1a6a0f2a 1379/* Look for registers which are always accessed via word-sized SUBREGs
b5ca6624 1380 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1381 registers into several word-sized pseudo-registers. */
1a6a0f2a 1382
1383static void
b5ca6624 1384decompose_multiword_subregs (bool decompose_copies)
1a6a0f2a 1385{
1386 unsigned int max;
1387 basic_block bb;
c7944dce 1388 bool speed_p;
1a6a0f2a 1389
c7944dce 1390 if (dump_file)
1391 {
1392 dump_choices (false, "size");
1393 dump_choices (true, "speed");
1394 }
1395
1396 /* Check if this target even has any modes to consider lowering. */
1397 if (!choices[false].something_to_do && !choices[true].something_to_do)
1398 {
1399 if (dump_file)
1400 fprintf (dump_file, "Nothing to do!\n");
1401 return;
1402 }
3072d30e 1403
1a6a0f2a 1404 max = max_reg_num ();
1405
1406 /* First see if there are any multi-word pseudo-registers. If there
1407 aren't, there is nothing we can do. This should speed up this
1408 pass in the normal case, since it should be faster than scanning
1409 all the insns. */
1410 {
1411 unsigned int i;
c7944dce 1412 bool useful_modes_seen = false;
1a6a0f2a 1413
1414 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
c7944dce 1415 if (regno_reg_rtx[i] != NULL)
1416 {
3754d046 1417 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
c7944dce 1418 if (choices[false].move_modes_to_split[(int) mode]
1419 || choices[true].move_modes_to_split[(int) mode])
1420 {
1421 useful_modes_seen = true;
1422 break;
1423 }
1424 }
1425
1426 if (!useful_modes_seen)
1a6a0f2a 1427 {
c7944dce 1428 if (dump_file)
1429 fprintf (dump_file, "Nothing to lower in this function.\n");
1430 return;
1a6a0f2a 1431 }
1a6a0f2a 1432 }
1433
0e8e9be3 1434 if (df)
c7944dce 1435 {
1436 df_set_flags (DF_DEFER_INSN_RESCAN);
1437 run_word_dce ();
1438 }
0e8e9be3 1439
c7944dce 1440 /* FIXME: It may be possible to change this code to look for each
1441 multi-word pseudo-register and to find each insn which sets or
1442 uses that register. That should be faster than scanning all the
1443 insns. */
1a6a0f2a 1444
1445 decomposable_context = BITMAP_ALLOC (NULL);
1446 non_decomposable_context = BITMAP_ALLOC (NULL);
5277d36e 1447 subreg_context = BITMAP_ALLOC (NULL);
1a6a0f2a 1448
f1f41a6c 1449 reg_copy_graph.create (max);
1450 reg_copy_graph.safe_grow_cleared (max);
1451 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1a6a0f2a 1452
c7944dce 1453 speed_p = optimize_function_for_speed_p (cfun);
fc00614f 1454 FOR_EACH_BB_FN (bb, cfun)
1a6a0f2a 1455 {
a5942062 1456 rtx_insn *insn;
1a6a0f2a 1457
1458 FOR_BB_INSNS (bb, insn)
1459 {
1460 rtx set;
1461 enum classify_move_insn cmi;
1462 int i, n;
1463
1464 if (!INSN_P (insn)
1465 || GET_CODE (PATTERN (insn)) == CLOBBER
1466 || GET_CODE (PATTERN (insn)) == USE)
1467 continue;
1468
08b31038 1469 recog_memoized (insn);
1470
c7944dce 1471 if (find_decomposable_shift_zext (insn, speed_p))
9cf5d19e 1472 continue;
1473
1a6a0f2a 1474 extract_insn (insn);
1475
c7944dce 1476 set = simple_move (insn, speed_p);
1a6a0f2a 1477
1478 if (!set)
1479 cmi = NOT_SIMPLE_MOVE;
1480 else
1481 {
b5ca6624 1482 /* We mark pseudo-to-pseudo copies as decomposable during the
1483 second pass only. The first pass is so early that there is
1484 good chance such moves will be optimized away completely by
1485 subsequent optimizations anyway.
1486
1487 However, we call find_pseudo_copy even during the first pass
1488 so as to properly set up the reg_copy_graph. */
1e5b92fa 1489 if (find_pseudo_copy (set))
b5ca6624 1490 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1a6a0f2a 1491 else
1492 cmi = SIMPLE_MOVE;
1493 }
1494
1495 n = recog_data.n_operands;
1496 for (i = 0; i < n; ++i)
1497 {
665db605 1498 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1a6a0f2a 1499
1500 /* We handle ASM_OPERANDS as a special case to support
1501 things like x86 rdtsc which returns a DImode value.
1502 We can decompose the output, which will certainly be
1503 operand 0, but not the inputs. */
1504
1505 if (cmi == SIMPLE_MOVE
1506 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1507 {
1508 gcc_assert (i == 0);
1509 cmi = NOT_SIMPLE_MOVE;
1510 }
1511 }
1512 }
1513 }
1514
1515 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1516 if (!bitmap_empty_p (decomposable_context))
1517 {
db1c50be 1518 unsigned int i;
1519 sbitmap_iterator sbi;
1a6a0f2a 1520 bitmap_iterator iter;
1521 unsigned int regno;
1522
1523 propagate_pseudo_copies ();
1524
3c6549f8 1525 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
53c5d9d4 1526 bitmap_clear (sub_blocks);
1a6a0f2a 1527
1528 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1529 decompose_register (regno);
1530
fc00614f 1531 FOR_EACH_BB_FN (bb, cfun)
1a6a0f2a 1532 {
a5942062 1533 rtx_insn *insn;
1a6a0f2a 1534
201f6961 1535 FOR_BB_INSNS (bb, insn)
1a6a0f2a 1536 {
da7a04f1 1537 rtx pat;
1a6a0f2a 1538
1539 if (!INSN_P (insn))
1540 continue;
1541
1a6a0f2a 1542 pat = PATTERN (insn);
1543 if (GET_CODE (pat) == CLOBBER)
db2200eb 1544 resolve_clobber (pat, insn);
1a6a0f2a 1545 else if (GET_CODE (pat) == USE)
db2200eb 1546 resolve_use (pat, insn);
9845d120 1547 else if (DEBUG_INSN_P (insn))
1548 resolve_debug (insn);
1a6a0f2a 1549 else
1550 {
1551 rtx set;
1552 int i;
1553
1554 recog_memoized (insn);
1555 extract_insn (insn);
1556
c7944dce 1557 set = simple_move (insn, speed_p);
1a6a0f2a 1558 if (set)
1559 {
a5942062 1560 rtx_insn *orig_insn = insn;
97bb2849 1561 bool cfi = control_flow_insn_p (insn);
1a6a0f2a 1562
db1c50be 1563 /* We can end up splitting loads to multi-word pseudos
1564 into separate loads to machine word size pseudos.
1565 When this happens, we first had one load that can
1566 throw, and after resolve_simple_move we'll have a
1567 bunch of loads (at least two). All those loads may
1568 trap if we can have non-call exceptions, so they
1569 all will end the current basic block. We split the
1570 block after the outer loop over all insns, but we
1571 make sure here that we will be able to split the
1572 basic block and still produce the correct control
1573 flow graph for it. */
1574 gcc_assert (!cfi
cbeb677e 1575 || (cfun->can_throw_non_call_exceptions
db1c50be 1576 && can_throw_internal (insn)));
1577
1a6a0f2a 1578 insn = resolve_simple_move (set, insn);
1579 if (insn != orig_insn)
1580 {
1a6a0f2a 1581 recog_memoized (insn);
1582 extract_insn (insn);
97bb2849 1583
1584 if (cfi)
08b7917c 1585 bitmap_set_bit (sub_blocks, bb->index);
1a6a0f2a 1586 }
1587 }
9cf5d19e 1588 else
1589 {
a5942062 1590 rtx_insn *decomposed_shift;
9cf5d19e 1591
1592 decomposed_shift = resolve_shift_zext (insn);
1593 if (decomposed_shift != NULL_RTX)
1594 {
9cf5d19e 1595 insn = decomposed_shift;
1596 recog_memoized (insn);
1597 extract_insn (insn);
1598 }
1599 }
1a6a0f2a 1600
1601 for (i = recog_data.n_operands - 1; i >= 0; --i)
2e3cae91 1602 resolve_subreg_use (recog_data.operand_loc[i], insn);
1a6a0f2a 1603
1604 resolve_reg_notes (insn);
1605
1606 if (num_validated_changes () > 0)
1607 {
1608 for (i = recog_data.n_dups - 1; i >= 0; --i)
1609 {
1610 rtx *pl = recog_data.dup_loc[i];
1611 int dup_num = recog_data.dup_num[i];
1612 rtx *px = recog_data.operand_loc[dup_num];
1613
c47adb48 1614 validate_unshare_change (insn, pl, *px, 1);
1a6a0f2a 1615 }
1616
1617 i = apply_change_group ();
1618 gcc_assert (i);
1a6a0f2a 1619 }
1620 }
1a6a0f2a 1621 }
1622 }
1623
db1c50be 1624 /* If we had insns to split that caused control flow insns in the middle
1625 of a basic block, split those blocks now. Note that we only handle
1626 the case where splitting a load has caused multiple possibly trapping
1627 loads to appear. */
0d211963 1628 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
db1c50be 1629 {
a5942062 1630 rtx_insn *insn, *end;
db1c50be 1631 edge fallthru;
1632
f5a6b05f 1633 bb = BASIC_BLOCK_FOR_FN (cfun, i);
db1c50be 1634 insn = BB_HEAD (bb);
1635 end = BB_END (bb);
1636
1637 while (insn != end)
1638 {
1639 if (control_flow_insn_p (insn))
1640 {
1641 /* Split the block after insn. There will be a fallthru
1642 edge, which is OK so we keep it. We have to create the
1643 exception edges ourselves. */
1644 fallthru = split_block (bb, insn);
1645 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1646 bb = fallthru->dest;
1647 insn = BB_HEAD (bb);
1648 }
1649 else
1650 insn = NEXT_INSN (insn);
1651 }
1652 }
1a6a0f2a 1653 }
1654
1655 {
1656 unsigned int i;
1657 bitmap b;
1658
f1f41a6c 1659 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1a6a0f2a 1660 if (b)
1661 BITMAP_FREE (b);
1662 }
1663
f1f41a6c 1664 reg_copy_graph.release ();
1a6a0f2a 1665
1666 BITMAP_FREE (decomposable_context);
1667 BITMAP_FREE (non_decomposable_context);
5277d36e 1668 BITMAP_FREE (subreg_context);
1a6a0f2a 1669}
1670\f
1a6a0f2a 1671/* Implement first lower subreg pass. */
1672
cbe8bda8 1673namespace {
1674
1675const pass_data pass_data_lower_subreg =
1a6a0f2a 1676{
cbe8bda8 1677 RTL_PASS, /* type */
1678 "subreg1", /* name */
1679 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 1680 TV_LOWER_SUBREG, /* tv_id */
1681 0, /* properties_required */
1682 0, /* properties_provided */
1683 0, /* properties_destroyed */
1684 0, /* todo_flags_start */
8b88439e 1685 0, /* todo_flags_finish */
1a6a0f2a 1686};
1687
cbe8bda8 1688class pass_lower_subreg : public rtl_opt_pass
1689{
1690public:
9af5ce0c 1691 pass_lower_subreg (gcc::context *ctxt)
1692 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
cbe8bda8 1693 {}
1694
1695 /* opt_pass methods: */
31315c24 1696 virtual bool gate (function *) { return flag_split_wide_types != 0; }
65b0537f 1697 virtual unsigned int execute (function *)
1698 {
1699 decompose_multiword_subregs (false);
1700 return 0;
1701 }
cbe8bda8 1702
1703}; // class pass_lower_subreg
1704
1705} // anon namespace
1706
1707rtl_opt_pass *
1708make_pass_lower_subreg (gcc::context *ctxt)
1709{
1710 return new pass_lower_subreg (ctxt);
1711}
1712
65b0537f 1713/* Implement second lower subreg pass. */
1714
cbe8bda8 1715namespace {
1716
1717const pass_data pass_data_lower_subreg2 =
1a6a0f2a 1718{
cbe8bda8 1719 RTL_PASS, /* type */
1720 "subreg2", /* name */
1721 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 1722 TV_LOWER_SUBREG, /* tv_id */
1723 0, /* properties_required */
1724 0, /* properties_provided */
1725 0, /* properties_destroyed */
1726 0, /* todo_flags_start */
8b88439e 1727 TODO_df_finish, /* todo_flags_finish */
1a6a0f2a 1728};
cbe8bda8 1729
1730class pass_lower_subreg2 : public rtl_opt_pass
1731{
1732public:
9af5ce0c 1733 pass_lower_subreg2 (gcc::context *ctxt)
1734 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
cbe8bda8 1735 {}
1736
1737 /* opt_pass methods: */
31315c24 1738 virtual bool gate (function *) { return flag_split_wide_types != 0; }
65b0537f 1739 virtual unsigned int execute (function *)
1740 {
1741 decompose_multiword_subregs (true);
1742 return 0;
1743 }
cbe8bda8 1744
1745}; // class pass_lower_subreg2
1746
1747} // anon namespace
1748
1749rtl_opt_pass *
1750make_pass_lower_subreg2 (gcc::context *ctxt)
1751{
1752 return new pass_lower_subreg2 (ctxt);
1753}