]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/lower-subreg.c
Move MEMMODEL_* from coretypes.h to memmodel.h
[thirdparty/gcc.git] / gcc / lower-subreg.c
CommitLineData
1a6a0f2a 1/* Decompose multiword subregs.
f1717362 2 Copyright (C) 2007-2016 Free Software Foundation, Inc.
1a6a0f2a 3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
8c4c00c1 10Software Foundation; either version 3, or (at your option) any later
1a6a0f2a 11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
8c4c00c1 19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
1a6a0f2a 21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
1a6a0f2a 26#include "rtl.h"
7c29e30e 27#include "tree.h"
28#include "cfghooks.h"
9ef16211 29#include "df.h"
ad7b10a2 30#include "memmodel.h"
1a6a0f2a 31#include "tm_p.h"
7c29e30e 32#include "expmed.h"
1a6a0f2a 33#include "insn-config.h"
7c29e30e 34#include "emit-rtl.h"
35#include "recog.h"
94ea8568 36#include "cfgrtl.h"
37#include "cfgbuild.h"
0e8e9be3 38#include "dce.h"
1a6a0f2a 39#include "expr.h"
1a6a0f2a 40#include "tree-pass.h"
c7944dce 41#include "lower-subreg.h"
2e3cae91 42#include "rtl-iter.h"
1a6a0f2a 43
1a6a0f2a 44
45/* Decompose multi-word pseudo-registers into individual
c7944dce 46 pseudo-registers when possible and profitable. This is possible
47 when all the uses of a multi-word register are via SUBREG, or are
48 copies of the register to another location. Breaking apart the
49 register permits more CSE and permits better register allocation.
50 This is profitable if the machine does not have move instructions
51 to do this.
52
53 This pass only splits moves with modes that are wider than
4d5cf08a 54 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
55 integer modes that are twice the width of word_mode. The latter
56 could be generalized if there was a need to do this, but the trend in
c7944dce 57 architectures is to not need this.
58
59 There are two useful preprocessor defines for use by maintainers:
60
61 #define LOG_COSTS 1
62
63 if you wish to see the actual cost estimates that are being used
64 for each mode wider than word mode and the cost estimates for zero
65 extension and the shifts. This can be useful when port maintainers
66 are tuning insn rtx costs.
67
68 #define FORCE_LOWERING 1
69
70 if you wish to test the pass with all the transformation forced on.
71 This can be useful for finding bugs in the transformations. */
72
73#define LOG_COSTS 0
74#define FORCE_LOWERING 0
1a6a0f2a 75
76/* Bit N in this bitmap is set if regno N is used in a context in
77 which we can decompose it. */
78static bitmap decomposable_context;
79
80/* Bit N in this bitmap is set if regno N is used in a context in
81 which it can not be decomposed. */
82static bitmap non_decomposable_context;
83
5277d36e 84/* Bit N in this bitmap is set if regno N is used in a subreg
85 which changes the mode but not the size. This typically happens
86 when the register accessed as a floating-point value; we want to
87 avoid generating accesses to its subwords in integer modes. */
88static bitmap subreg_context;
89
1a6a0f2a 90/* Bit N in the bitmap in element M of this array is set if there is a
91 copy from reg M to reg N. */
f1f41a6c 92static vec<bitmap> reg_copy_graph;
1a6a0f2a 93
c7944dce 94struct target_lower_subreg default_target_lower_subreg;
95#if SWITCHABLE_TARGET
96struct target_lower_subreg *this_target_lower_subreg
97 = &default_target_lower_subreg;
98#endif
99
100#define twice_word_mode \
101 this_target_lower_subreg->x_twice_word_mode
102#define choices \
103 this_target_lower_subreg->x_choices
104
105/* RTXes used while computing costs. */
106struct cost_rtxes {
107 /* Source and target registers. */
108 rtx source;
109 rtx target;
110
111 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
112 rtx zext;
113
114 /* A shift of SOURCE. */
115 rtx shift;
116
117 /* A SET of TARGET. */
118 rtx set;
119};
120
121/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
122 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
123
124static int
125shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
3754d046 126 machine_mode mode, int op1)
c7944dce 127{
c7944dce 128 PUT_CODE (rtxes->shift, code);
129 PUT_MODE (rtxes->shift, mode);
130 PUT_MODE (rtxes->source, mode);
131 XEXP (rtxes->shift, 1) = GEN_INT (op1);
5ae4887d 132 return set_src_cost (rtxes->shift, mode, speed_p);
c7944dce 133}
134
135/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
136 to true if it is profitable to split a double-word CODE shift
137 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
138 for speed or size profitability.
139
140 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
141 the cost of moving zero into a word-mode register. WORD_MOVE_COST
142 is the cost of moving between word registers. */
143
144static void
145compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
146 bool *splitting, enum rtx_code code,
147 int word_move_zero_cost, int word_move_cost)
148{
4d5cf08a 149 int wide_cost, narrow_cost, upper_cost, i;
c7944dce 150
151 for (i = 0; i < BITS_PER_WORD; i++)
152 {
153 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
154 i + BITS_PER_WORD);
155 if (i == 0)
156 narrow_cost = word_move_cost;
157 else
158 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
159
4d5cf08a 160 if (code != ASHIFTRT)
161 upper_cost = word_move_zero_cost;
162 else if (i == BITS_PER_WORD - 1)
163 upper_cost = word_move_cost;
164 else
165 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
166 BITS_PER_WORD - 1);
167
c7944dce 168 if (LOG_COSTS)
169 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
170 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
4d5cf08a 171 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
c7944dce 172
4d5cf08a 173 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
c7944dce 174 splitting[i] = true;
175 }
176}
177
178/* Compute what we should do when optimizing for speed or size; SPEED_P
179 selects which. Use RTXES for computing costs. */
180
181static void
182compute_costs (bool speed_p, struct cost_rtxes *rtxes)
183{
184 unsigned int i;
185 int word_move_zero_cost, word_move_cost;
186
a6d935b7 187 PUT_MODE (rtxes->target, word_mode);
c7944dce 188 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
a6d935b7 189 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 190
191 SET_SRC (rtxes->set) = rtxes->source;
a6d935b7 192 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 193
194 if (LOG_COSTS)
195 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
196 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
197
198 for (i = 0; i < MAX_MACHINE_MODE; i++)
199 {
3754d046 200 machine_mode mode = (machine_mode) i;
c7944dce 201 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
202 if (factor > 1)
203 {
204 int mode_move_cost;
205
206 PUT_MODE (rtxes->target, mode);
207 PUT_MODE (rtxes->source, mode);
a6d935b7 208 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
c7944dce 209
210 if (LOG_COSTS)
211 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
212 GET_MODE_NAME (mode), mode_move_cost,
213 word_move_cost, factor);
214
215 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
216 {
217 choices[speed_p].move_modes_to_split[i] = true;
218 choices[speed_p].something_to_do = true;
219 }
220 }
221 }
222
223 /* For the moves and shifts, the only case that is checked is one
224 where the mode of the target is an integer mode twice the width
225 of the word_mode.
226
227 If it is not profitable to split a double word move then do not
228 even consider the shifts or the zero extension. */
229 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
230 {
231 int zext_cost;
232
233 /* The only case here to check to see if moving the upper part with a
234 zero is cheaper than doing the zext itself. */
c7944dce 235 PUT_MODE (rtxes->source, word_mode);
5ae4887d 236 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
c7944dce 237
238 if (LOG_COSTS)
239 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
240 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
241 zext_cost, word_move_cost, word_move_zero_cost);
242
243 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
244 choices[speed_p].splitting_zext = true;
245
246 compute_splitting_shift (speed_p, rtxes,
247 choices[speed_p].splitting_ashift, ASHIFT,
248 word_move_zero_cost, word_move_cost);
249 compute_splitting_shift (speed_p, rtxes,
250 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
251 word_move_zero_cost, word_move_cost);
4d5cf08a 252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
254 word_move_zero_cost, word_move_cost);
c7944dce 255 }
256}
257
258/* Do one-per-target initialisation. This involves determining
259 which operations on the machine are profitable. If none are found,
260 then the pass just returns when called. */
261
262void
263init_lower_subreg (void)
264{
265 struct cost_rtxes rtxes;
266
267 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
268
269 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
270
dcd6d0f4 271 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
272 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
d1f9b275 273 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
c7944dce 274 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
275 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
276
277 if (LOG_COSTS)
278 fprintf (stderr, "\nSize costs\n==========\n\n");
279 compute_costs (false, &rtxes);
280
281 if (LOG_COSTS)
282 fprintf (stderr, "\nSpeed costs\n===========\n\n");
283 compute_costs (true, &rtxes);
284}
67c3f580 285
286static bool
287simple_move_operand (rtx x)
288{
289 if (GET_CODE (x) == SUBREG)
290 x = SUBREG_REG (x);
291
292 if (!OBJECT_P (x))
293 return false;
294
295 if (GET_CODE (x) == LABEL_REF
296 || GET_CODE (x) == SYMBOL_REF
ab9eaa97 297 || GET_CODE (x) == HIGH
298 || GET_CODE (x) == CONST)
67c3f580 299 return false;
300
301 if (MEM_P (x)
302 && (MEM_VOLATILE_P (x)
4e27ffd0 303 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
67c3f580 304 return false;
305
306 return true;
307}
308
c7944dce 309/* If INSN is a single set between two objects that we want to split,
310 return the single set. SPEED_P says whether we are optimizing
311 INSN for speed or size.
312
313 INSN should have been passed to recog and extract_insn before this
314 is called. */
1a6a0f2a 315
316static rtx
a5942062 317simple_move (rtx_insn *insn, bool speed_p)
1a6a0f2a 318{
319 rtx x;
320 rtx set;
3754d046 321 machine_mode mode;
1a6a0f2a 322
323 if (recog_data.n_operands != 2)
324 return NULL_RTX;
325
326 set = single_set (insn);
327 if (!set)
328 return NULL_RTX;
329
330 x = SET_DEST (set);
331 if (x != recog_data.operand[0] && x != recog_data.operand[1])
332 return NULL_RTX;
67c3f580 333 if (!simple_move_operand (x))
1a6a0f2a 334 return NULL_RTX;
335
336 x = SET_SRC (set);
337 if (x != recog_data.operand[0] && x != recog_data.operand[1])
338 return NULL_RTX;
67c3f580 339 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
340 things like x86 rdtsc which returns a DImode value. */
341 if (GET_CODE (x) != ASM_OPERANDS
342 && !simple_move_operand (x))
1a6a0f2a 343 return NULL_RTX;
344
345 /* We try to decompose in integer modes, to avoid generating
346 inefficient code copying between integer and floating point
347 registers. That means that we can't decompose if this is a
348 non-integer mode for which there is no integer mode of the same
349 size. */
36c98bd9 350 mode = GET_MODE (SET_DEST (set));
1a6a0f2a 351 if (!SCALAR_INT_MODE_P (mode)
352 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
353 == BLKmode))
354 return NULL_RTX;
355
5e016dfc 356 /* Reject PARTIAL_INT modes. They are used for processor specific
357 purposes and it's probably best not to tamper with them. */
358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
359 return NULL_RTX;
360
c7944dce 361 if (!choices[speed_p].move_modes_to_split[(int) mode])
362 return NULL_RTX;
363
1a6a0f2a 364 return set;
365}
366
367/* If SET is a copy from one multi-word pseudo-register to another,
368 record that in reg_copy_graph. Return whether it is such a
369 copy. */
370
371static bool
372find_pseudo_copy (rtx set)
373{
374 rtx dest = SET_DEST (set);
375 rtx src = SET_SRC (set);
376 unsigned int rd, rs;
377 bitmap b;
378
379 if (!REG_P (dest) || !REG_P (src))
380 return false;
381
382 rd = REGNO (dest);
383 rs = REGNO (src);
384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
385 return false;
386
f1f41a6c 387 b = reg_copy_graph[rs];
1a6a0f2a 388 if (b == NULL)
389 {
390 b = BITMAP_ALLOC (NULL);
f1f41a6c 391 reg_copy_graph[rs] = b;
1a6a0f2a 392 }
393
394 bitmap_set_bit (b, rd);
395
396 return true;
397}
398
399/* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
400 where they are copied to another register, add the register to
401 which they are copied to DECOMPOSABLE_CONTEXT. Use
402 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
404
405static void
406propagate_pseudo_copies (void)
407{
408 bitmap queue, propagate;
409
410 queue = BITMAP_ALLOC (NULL);
411 propagate = BITMAP_ALLOC (NULL);
412
413 bitmap_copy (queue, decomposable_context);
414 do
415 {
416 bitmap_iterator iter;
417 unsigned int i;
418
419 bitmap_clear (propagate);
420
421 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
422 {
f1f41a6c 423 bitmap b = reg_copy_graph[i];
1a6a0f2a 424 if (b)
425 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
426 }
427
428 bitmap_and_compl (queue, propagate, decomposable_context);
429 bitmap_ior_into (decomposable_context, propagate);
430 }
431 while (!bitmap_empty_p (queue));
432
433 BITMAP_FREE (queue);
434 BITMAP_FREE (propagate);
435}
436
437/* A pointer to one of these values is passed to
665db605 438 find_decomposable_subregs. */
1a6a0f2a 439
440enum classify_move_insn
441{
442 /* Not a simple move from one location to another. */
443 NOT_SIMPLE_MOVE,
b5ca6624 444 /* A simple move we want to decompose. */
445 DECOMPOSABLE_SIMPLE_MOVE,
446 /* Any other simple move. */
1a6a0f2a 447 SIMPLE_MOVE
448};
449
665db605 450/* If we find a SUBREG in *LOC which we could use to decompose a
451 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
452 unadorned register which is not a simple pseudo-register copy,
453 DATA will point at the type of move, and we set a bit in
454 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
1a6a0f2a 455
665db605 456static void
457find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
1a6a0f2a 458{
665db605 459 subrtx_var_iterator::array_type array;
460 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
1a6a0f2a 461 {
665db605 462 rtx x = *iter;
463 if (GET_CODE (x) == SUBREG)
464 {
465 rtx inner = SUBREG_REG (x);
466 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
1a6a0f2a 467
665db605 468 if (!REG_P (inner))
469 continue;
1a6a0f2a 470
665db605 471 regno = REGNO (inner);
472 if (HARD_REGISTER_NUM_P (regno))
473 {
474 iter.skip_subrtxes ();
475 continue;
476 }
1a6a0f2a 477
665db605 478 outer_size = GET_MODE_SIZE (GET_MODE (x));
479 inner_size = GET_MODE_SIZE (GET_MODE (inner));
480 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
481 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1a6a0f2a 482
665db605 483 /* We only try to decompose single word subregs of multi-word
484 registers. When we find one, we return -1 to avoid iterating
485 over the inner register.
1a6a0f2a 486
665db605 487 ??? This doesn't allow, e.g., DImode subregs of TImode values
488 on 32-bit targets. We would need to record the way the
489 pseudo-register was used, and only decompose if all the uses
490 were the same number and size of pieces. Hopefully this
491 doesn't happen much. */
1a6a0f2a 492
665db605 493 if (outer_words == 1 && inner_words > 1)
494 {
495 bitmap_set_bit (decomposable_context, regno);
496 iter.skip_subrtxes ();
497 continue;
498 }
4e7a1eb8 499
665db605 500 /* If this is a cast from one mode to another, where the modes
501 have the same size, and they are not tieable, then mark this
502 register as non-decomposable. If we decompose it we are
503 likely to mess up whatever the backend is trying to do. */
504 if (outer_words > 1
505 && outer_size == inner_size
506 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
507 {
508 bitmap_set_bit (non_decomposable_context, regno);
509 bitmap_set_bit (subreg_context, regno);
510 iter.skip_subrtxes ();
511 continue;
512 }
4e7a1eb8 513 }
665db605 514 else if (REG_P (x))
1a6a0f2a 515 {
665db605 516 unsigned int regno;
517
518 /* We will see an outer SUBREG before we see the inner REG, so
519 when we see a plain REG here it means a direct reference to
520 the register.
521
522 If this is not a simple copy from one location to another,
523 then we can not decompose this register. If this is a simple
524 copy we want to decompose, and the mode is right,
525 then we mark the register as decomposable.
526 Otherwise we don't say anything about this register --
527 it could be decomposed, but whether that would be
528 profitable depends upon how it is used elsewhere.
529
530 We only set bits in the bitmap for multi-word
531 pseudo-registers, since those are the only ones we care about
532 and it keeps the size of the bitmaps down. */
533
534 regno = REGNO (x);
535 if (!HARD_REGISTER_NUM_P (regno)
536 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1a6a0f2a 537 {
665db605 538 switch (*pcmi)
539 {
540 case NOT_SIMPLE_MOVE:
541 bitmap_set_bit (non_decomposable_context, regno);
542 break;
543 case DECOMPOSABLE_SIMPLE_MOVE:
544 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
545 bitmap_set_bit (decomposable_context, regno);
546 break;
547 case SIMPLE_MOVE:
548 break;
549 default:
550 gcc_unreachable ();
551 }
1a6a0f2a 552 }
553 }
665db605 554 else if (MEM_P (x))
555 {
556 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
67c3f580 557
665db605 558 /* Any registers used in a MEM do not participate in a
559 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
560 here, and return -1 to block the parent's recursion. */
561 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
562 iter.skip_subrtxes ();
563 }
67c3f580 564 }
1a6a0f2a 565}
566
567/* Decompose REGNO into word-sized components. We smash the REG node
568 in place. This ensures that (1) something goes wrong quickly if we
569 fail to make some replacement, and (2) the debug information inside
570 the symbol table is automatically kept up to date. */
571
572static void
573decompose_register (unsigned int regno)
574{
575 rtx reg;
576 unsigned int words, i;
577 rtvec v;
578
579 reg = regno_reg_rtx[regno];
580
581 regno_reg_rtx[regno] = NULL_RTX;
1a6a0f2a 582
583 words = GET_MODE_SIZE (GET_MODE (reg));
584 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
585
586 v = rtvec_alloc (words);
587 for (i = 0; i < words; ++i)
588 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
589
590 PUT_CODE (reg, CONCATN);
591 XVEC (reg, 0) = v;
592
593 if (dump_file)
594 {
595 fprintf (dump_file, "; Splitting reg %u ->", regno);
596 for (i = 0; i < words; ++i)
597 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
598 fputc ('\n', dump_file);
599 }
600}
601
602/* Get a SUBREG of a CONCATN. */
603
604static rtx
3754d046 605simplify_subreg_concatn (machine_mode outermode, rtx op,
1a6a0f2a 606 unsigned int byte)
607{
608 unsigned int inner_size;
3754d046 609 machine_mode innermode, partmode;
1a6a0f2a 610 rtx part;
611 unsigned int final_offset;
612
613 gcc_assert (GET_CODE (op) == CONCATN);
614 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
615
616 innermode = GET_MODE (op);
617 gcc_assert (byte < GET_MODE_SIZE (innermode));
ae1f04be 618 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
619 return NULL_RTX;
1a6a0f2a 620
621 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
622 part = XVECEXP (op, 0, byte / inner_size);
50bdfec8 623 partmode = GET_MODE (part);
624
598ffe59 625 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
626 regular CONST_VECTORs. They have vector or integer modes, depending
627 on the capabilities of the target. Cope with them. */
628 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
629 partmode = GET_MODE_INNER (innermode);
630 else if (partmode == VOIDmode)
50bdfec8 631 {
598ffe59 632 enum mode_class mclass = GET_MODE_CLASS (innermode);
633 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
50bdfec8 634 }
635
1a6a0f2a 636 final_offset = byte % inner_size;
637 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
638 return NULL_RTX;
639
50bdfec8 640 return simplify_gen_subreg (outermode, part, partmode, final_offset);
1a6a0f2a 641}
642
643/* Wrapper around simplify_gen_subreg which handles CONCATN. */
644
645static rtx
3754d046 646simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
647 machine_mode innermode, unsigned int byte)
1a6a0f2a 648{
3fa57b79 649 rtx ret;
650
1a6a0f2a 651 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
652 If OP is a SUBREG of a CONCATN, then it must be a simple mode
653 change with the same size and offset 0, or it must extract a
654 part. We shouldn't see anything else here. */
655 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
656 {
657 rtx op2;
658
659 if ((GET_MODE_SIZE (GET_MODE (op))
660 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
661 && SUBREG_BYTE (op) == 0)
662 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
663 GET_MODE (SUBREG_REG (op)), byte);
664
665 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
666 SUBREG_BYTE (op));
667 if (op2 == NULL_RTX)
668 {
669 /* We don't handle paradoxical subregs here. */
670 gcc_assert (GET_MODE_SIZE (outermode)
671 <= GET_MODE_SIZE (GET_MODE (op)));
672 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
673 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
674 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
675 byte + SUBREG_BYTE (op));
676 gcc_assert (op2 != NULL_RTX);
677 return op2;
678 }
679
680 op = op2;
681 gcc_assert (op != NULL_RTX);
682 gcc_assert (innermode == GET_MODE (op));
683 }
3fa57b79 684
1a6a0f2a 685 if (GET_CODE (op) == CONCATN)
686 return simplify_subreg_concatn (outermode, op, byte);
3fa57b79 687
688 ret = simplify_gen_subreg (outermode, op, innermode, byte);
689
690 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
691 resolve_simple_move will ask for the high part of the paradoxical
692 subreg, which does not have a value. Just return a zero. */
693 if (ret == NULL_RTX
694 && GET_CODE (op) == SUBREG
695 && SUBREG_BYTE (op) == 0
696 && (GET_MODE_SIZE (innermode)
697 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
698 return CONST0_RTX (outermode);
699
700 gcc_assert (ret != NULL_RTX);
701 return ret;
1a6a0f2a 702}
703
704/* Return whether we should resolve X into the registers into which it
705 was decomposed. */
706
707static bool
708resolve_reg_p (rtx x)
709{
710 return GET_CODE (x) == CONCATN;
711}
712
713/* Return whether X is a SUBREG of a register which we need to
714 resolve. */
715
716static bool
717resolve_subreg_p (rtx x)
718{
719 if (GET_CODE (x) != SUBREG)
720 return false;
721 return resolve_reg_p (SUBREG_REG (x));
722}
723
2e3cae91 724/* Look for SUBREGs in *LOC which need to be decomposed. */
1a6a0f2a 725
2e3cae91 726static bool
727resolve_subreg_use (rtx *loc, rtx insn)
1a6a0f2a 728{
2e3cae91 729 subrtx_ptr_iterator::array_type array;
730 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
1a6a0f2a 731 {
2e3cae91 732 rtx *loc = *iter;
733 rtx x = *loc;
734 if (resolve_subreg_p (x))
1a6a0f2a 735 {
2e3cae91 736 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
737 SUBREG_BYTE (x));
1a6a0f2a 738
2e3cae91 739 /* It is possible for a note to contain a reference which we can
740 decompose. In this case, return 1 to the caller to indicate
741 that the note must be removed. */
742 if (!x)
743 {
744 gcc_assert (!insn);
745 return true;
746 }
1a6a0f2a 747
2e3cae91 748 validate_change (insn, loc, x, 1);
749 iter.skip_subrtxes ();
750 }
751 else if (resolve_reg_p (x))
752 /* Return 1 to the caller to indicate that we found a direct
753 reference to a register which is being decomposed. This can
754 happen inside notes, multiword shift or zero-extend
755 instructions. */
756 return true;
1a6a0f2a 757 }
758
2e3cae91 759 return false;
1a6a0f2a 760}
761
1a6a0f2a 762/* Resolve any decomposed registers which appear in register notes on
763 INSN. */
764
765static void
a5942062 766resolve_reg_notes (rtx_insn *insn)
1a6a0f2a 767{
768 rtx *pnote, note;
769
770 note = find_reg_equal_equiv_note (insn);
771 if (note)
772 {
3072d30e 773 int old_count = num_validated_changes ();
2e3cae91 774 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
1e5b92fa 775 remove_note (insn, note);
3072d30e 776 else
777 if (old_count != num_validated_changes ())
778 df_notes_rescan (insn);
1a6a0f2a 779 }
780
781 pnote = &REG_NOTES (insn);
782 while (*pnote != NULL_RTX)
783 {
9ce37fa7 784 bool del = false;
1a6a0f2a 785
786 note = *pnote;
787 switch (REG_NOTE_KIND (note))
788 {
3072d30e 789 case REG_DEAD:
790 case REG_UNUSED:
1a6a0f2a 791 if (resolve_reg_p (XEXP (note, 0)))
9ce37fa7 792 del = true;
1a6a0f2a 793 break;
794
795 default:
796 break;
797 }
798
9ce37fa7 799 if (del)
1a6a0f2a 800 *pnote = XEXP (note, 1);
801 else
802 pnote = &XEXP (note, 1);
803 }
804}
805
67c3f580 806/* Return whether X can be decomposed into subwords. */
1a6a0f2a 807
808static bool
67c3f580 809can_decompose_p (rtx x)
1a6a0f2a 810{
811 if (REG_P (x))
812 {
813 unsigned int regno = REGNO (x);
814
815 if (HARD_REGISTER_NUM_P (regno))
5f961ca4 816 {
817 unsigned int byte, num_bytes;
818
819 num_bytes = GET_MODE_SIZE (GET_MODE (x));
820 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
821 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
822 return false;
823 return true;
824 }
1a6a0f2a 825 else
5277d36e 826 return !bitmap_bit_p (subreg_context, regno);
1a6a0f2a 827 }
828
67c3f580 829 return true;
1a6a0f2a 830}
831
832/* Decompose the registers used in a simple move SET within INSN. If
833 we don't change anything, return INSN, otherwise return the start
834 of the sequence of moves. */
835
a5942062 836static rtx_insn *
837resolve_simple_move (rtx set, rtx_insn *insn)
1a6a0f2a 838{
a5942062 839 rtx src, dest, real_dest;
840 rtx_insn *insns;
3754d046 841 machine_mode orig_mode, dest_mode;
1a6a0f2a 842 unsigned int words;
843 bool pushing;
844
845 src = SET_SRC (set);
846 dest = SET_DEST (set);
847 orig_mode = GET_MODE (dest);
848
849 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
c7944dce 850 gcc_assert (words > 1);
1a6a0f2a 851
852 start_sequence ();
853
854 /* We have to handle copying from a SUBREG of a decomposed reg where
855 the SUBREG is larger than word size. Rather than assume that we
856 can take a word_mode SUBREG of the destination, we copy to a new
857 register and then copy that to the destination. */
858
859 real_dest = NULL_RTX;
860
861 if (GET_CODE (src) == SUBREG
862 && resolve_reg_p (SUBREG_REG (src))
863 && (SUBREG_BYTE (src) != 0
864 || (GET_MODE_SIZE (orig_mode)
865 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
866 {
867 real_dest = dest;
868 dest = gen_reg_rtx (orig_mode);
869 if (REG_P (real_dest))
870 REG_ATTRS (dest) = REG_ATTRS (real_dest);
871 }
872
873 /* Similarly if we are copying to a SUBREG of a decomposed reg where
874 the SUBREG is larger than word size. */
875
876 if (GET_CODE (dest) == SUBREG
877 && resolve_reg_p (SUBREG_REG (dest))
878 && (SUBREG_BYTE (dest) != 0
879 || (GET_MODE_SIZE (orig_mode)
880 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
881 {
a5942062 882 rtx reg, smove;
883 rtx_insn *minsn;
1a6a0f2a 884
885 reg = gen_reg_rtx (orig_mode);
886 minsn = emit_move_insn (reg, src);
887 smove = single_set (minsn);
888 gcc_assert (smove != NULL_RTX);
889 resolve_simple_move (smove, minsn);
890 src = reg;
891 }
892
893 /* If we didn't have any big SUBREGS of decomposed registers, and
894 neither side of the move is a register we are decomposing, then
895 we don't have to do anything here. */
896
897 if (src == SET_SRC (set)
898 && dest == SET_DEST (set)
899 && !resolve_reg_p (src)
900 && !resolve_subreg_p (src)
901 && !resolve_reg_p (dest)
902 && !resolve_subreg_p (dest))
903 {
904 end_sequence ();
905 return insn;
906 }
907
ccd1ec59 908 /* It's possible for the code to use a subreg of a decomposed
909 register while forming an address. We need to handle that before
910 passing the address to emit_move_insn. We pass NULL_RTX as the
911 insn parameter to resolve_subreg_use because we can not validate
912 the insn yet. */
913 if (MEM_P (src) || MEM_P (dest))
914 {
915 int acg;
916
917 if (MEM_P (src))
2e3cae91 918 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
ccd1ec59 919 if (MEM_P (dest))
2e3cae91 920 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
ccd1ec59 921 acg = apply_change_group ();
922 gcc_assert (acg);
923 }
924
1a6a0f2a 925 /* If SRC is a register which we can't decompose, or has side
926 effects, we need to move via a temporary register. */
927
67c3f580 928 if (!can_decompose_p (src)
1a6a0f2a 929 || side_effects_p (src)
930 || GET_CODE (src) == ASM_OPERANDS)
931 {
932 rtx reg;
933
934 reg = gen_reg_rtx (orig_mode);
68a8f1b3 935
32aa77d9 936 if (AUTO_INC_DEC)
937 {
938 rtx move = emit_move_insn (reg, src);
939 if (MEM_P (src))
940 {
941 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
942 if (note)
943 add_reg_note (move, REG_INC, XEXP (note, 0));
944 }
945 }
946 else
947 emit_move_insn (reg, src);
948
1a6a0f2a 949 src = reg;
950 }
951
952 /* If DEST is a register which we can't decompose, or has side
953 effects, we need to first move to a temporary register. We
954 handle the common case of pushing an operand directly. We also
955 go through a temporary register if it holds a floating point
956 value. This gives us better code on systems which can't move
957 data easily between integer and floating point registers. */
958
959 dest_mode = orig_mode;
960 pushing = push_operand (dest, dest_mode);
67c3f580 961 if (!can_decompose_p (dest)
1a6a0f2a 962 || (side_effects_p (dest) && !pushing)
963 || (!SCALAR_INT_MODE_P (dest_mode)
964 && !resolve_reg_p (dest)
965 && !resolve_subreg_p (dest)))
966 {
967 if (real_dest == NULL_RTX)
968 real_dest = dest;
969 if (!SCALAR_INT_MODE_P (dest_mode))
970 {
971 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
972 MODE_INT, 0);
973 gcc_assert (dest_mode != BLKmode);
974 }
975 dest = gen_reg_rtx (dest_mode);
976 if (REG_P (real_dest))
977 REG_ATTRS (dest) = REG_ATTRS (real_dest);
978 }
979
980 if (pushing)
981 {
982 unsigned int i, j, jinc;
983
984 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
985 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
986 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
987
988 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
989 {
990 j = 0;
991 jinc = 1;
992 }
993 else
994 {
995 j = words - 1;
996 jinc = -1;
997 }
998
999 for (i = 0; i < words; ++i, j += jinc)
1000 {
1001 rtx temp;
1002
1003 temp = copy_rtx (XEXP (dest, 0));
1004 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1005 j * UNITS_PER_WORD);
1006 emit_move_insn (temp,
1007 simplify_gen_subreg_concatn (word_mode, src,
1008 orig_mode,
1009 j * UNITS_PER_WORD));
1010 }
1011 }
1012 else
1013 {
1014 unsigned int i;
1015
1016 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
18b42941 1017 emit_clobber (dest);
1a6a0f2a 1018
1019 for (i = 0; i < words; ++i)
1020 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1021 dest_mode,
1022 i * UNITS_PER_WORD),
1023 simplify_gen_subreg_concatn (word_mode, src,
1024 orig_mode,
1025 i * UNITS_PER_WORD));
1026 }
1027
1028 if (real_dest != NULL_RTX)
1029 {
a5942062 1030 rtx mdest, smove;
1031 rtx_insn *minsn;
1a6a0f2a 1032
1033 if (dest_mode == orig_mode)
1034 mdest = dest;
1035 else
1036 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1037 minsn = emit_move_insn (real_dest, mdest);
1038
32aa77d9 1039 if (AUTO_INC_DEC && MEM_P (real_dest)
68a8f1b3 1040 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1041 {
1042 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1043 if (note)
1044 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1045 }
68a8f1b3 1046
1a6a0f2a 1047 smove = single_set (minsn);
1048 gcc_assert (smove != NULL_RTX);
1049
1050 resolve_simple_move (smove, minsn);
1051 }
1052
1053 insns = get_insns ();
1054 end_sequence ();
1055
e38def9c 1056 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
97bb2849 1057
1a6a0f2a 1058 emit_insn_before (insns, insn);
1059
87c46d87 1060 /* If we get here via self-recursion, then INSN is not yet in the insns
c7ee1482 1061 chain and delete_insn will fail. We only want to remove INSN from the
1062 current sequence. See PR56738. */
1063 if (in_sequence_p ())
1064 remove_insn (insn);
1065 else
1066 delete_insn (insn);
1a6a0f2a 1067
1068 return insns;
1069}
1070
1071/* Change a CLOBBER of a decomposed register into a CLOBBER of the
1072 component registers. Return whether we changed something. */
1073
1074static bool
a5942062 1075resolve_clobber (rtx pat, rtx_insn *insn)
1a6a0f2a 1076{
e29831db 1077 rtx reg;
3754d046 1078 machine_mode orig_mode;
1a6a0f2a 1079 unsigned int words, i;
ab9eaa97 1080 int ret;
1a6a0f2a 1081
1082 reg = XEXP (pat, 0);
2289a5f2 1083 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1a6a0f2a 1084 return false;
1085
1086 orig_mode = GET_MODE (reg);
1087 words = GET_MODE_SIZE (orig_mode);
1088 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1089
ab9eaa97 1090 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1091 simplify_gen_subreg_concatn (word_mode, reg,
1092 orig_mode, 0),
1093 0);
3072d30e 1094 df_insn_rescan (insn);
ab9eaa97 1095 gcc_assert (ret != 0);
1096
1a6a0f2a 1097 for (i = words - 1; i > 0; --i)
1098 {
1099 rtx x;
1100
2289a5f2 1101 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1102 i * UNITS_PER_WORD);
1a6a0f2a 1103 x = gen_rtx_CLOBBER (VOIDmode, x);
1104 emit_insn_after (x, insn);
1105 }
1106
db2200eb 1107 resolve_reg_notes (insn);
1108
1a6a0f2a 1109 return true;
1110}
1111
1112/* A USE of a decomposed register is no longer meaningful. Return
1113 whether we changed something. */
1114
1115static bool
a5942062 1116resolve_use (rtx pat, rtx_insn *insn)
1a6a0f2a 1117{
1118 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1119 {
1120 delete_insn (insn);
1121 return true;
1122 }
db2200eb 1123
1124 resolve_reg_notes (insn);
1125
1a6a0f2a 1126 return false;
1127}
1128
9845d120 1129/* A VAR_LOCATION can be simplified. */
1130
1131static void
a5942062 1132resolve_debug (rtx_insn *insn)
9845d120 1133{
d1f3d29f 1134 subrtx_ptr_iterator::array_type array;
1135 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1136 {
1137 rtx *loc = *iter;
1138 rtx x = *loc;
1139 if (resolve_subreg_p (x))
1140 {
1141 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1142 SUBREG_BYTE (x));
1143
1144 if (x)
1145 *loc = x;
1146 else
1147 x = copy_rtx (*loc);
1148 }
1149 if (resolve_reg_p (x))
1150 *loc = copy_rtx (x);
1151 }
9845d120 1152
1153 df_insn_rescan (insn);
1154
1155 resolve_reg_notes (insn);
1156}
1157
c7944dce 1158/* Check if INSN is a decomposable multiword-shift or zero-extend and
1159 set the decomposable_context bitmap accordingly. SPEED_P is true
1160 if we are optimizing INSN for speed rather than size. Return true
1161 if INSN is decomposable. */
9cf5d19e 1162
c7944dce 1163static bool
a5942062 1164find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
9cf5d19e 1165{
1166 rtx set;
1167 rtx op;
1168 rtx op_operand;
1169
1170 set = single_set (insn);
1171 if (!set)
c7944dce 1172 return false;
9cf5d19e 1173
1174 op = SET_SRC (set);
1175 if (GET_CODE (op) != ASHIFT
1176 && GET_CODE (op) != LSHIFTRT
4d5cf08a 1177 && GET_CODE (op) != ASHIFTRT
9cf5d19e 1178 && GET_CODE (op) != ZERO_EXTEND)
c7944dce 1179 return false;
9cf5d19e 1180
1181 op_operand = XEXP (op, 0);
1182 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1183 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1184 || HARD_REGISTER_NUM_P (REGNO (op_operand))
c7944dce 1185 || GET_MODE (op) != twice_word_mode)
1186 return false;
9cf5d19e 1187
1188 if (GET_CODE (op) == ZERO_EXTEND)
1189 {
1190 if (GET_MODE (op_operand) != word_mode
c7944dce 1191 || !choices[speed_p].splitting_zext)
1192 return false;
9cf5d19e 1193 }
1194 else /* left or right shift */
1195 {
c7944dce 1196 bool *splitting = (GET_CODE (op) == ASHIFT
1197 ? choices[speed_p].splitting_ashift
4d5cf08a 1198 : GET_CODE (op) == ASHIFTRT
1199 ? choices[speed_p].splitting_ashiftrt
c7944dce 1200 : choices[speed_p].splitting_lshiftrt);
971ba038 1201 if (!CONST_INT_P (XEXP (op, 1))
c7944dce 1202 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1203 2 * BITS_PER_WORD - 1)
1204 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1205 return false;
1206
1207 bitmap_set_bit (decomposable_context, REGNO (op_operand));
9cf5d19e 1208 }
1209
1210 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1211
c7944dce 1212 return true;
9cf5d19e 1213}
1214
1215/* Decompose a more than word wide shift (in INSN) of a multiword
1216 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1217 and 'set to zero' insn. Return a pointer to the new insn when a
1218 replacement was done. */
1219
a5942062 1220static rtx_insn *
1221resolve_shift_zext (rtx_insn *insn)
9cf5d19e 1222{
1223 rtx set;
1224 rtx op;
1225 rtx op_operand;
a5942062 1226 rtx_insn *insns;
4d5cf08a 1227 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
9cf5d19e 1228 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1229
1230 set = single_set (insn);
1231 if (!set)
a5942062 1232 return NULL;
9cf5d19e 1233
1234 op = SET_SRC (set);
1235 if (GET_CODE (op) != ASHIFT
1236 && GET_CODE (op) != LSHIFTRT
4d5cf08a 1237 && GET_CODE (op) != ASHIFTRT
9cf5d19e 1238 && GET_CODE (op) != ZERO_EXTEND)
a5942062 1239 return NULL;
9cf5d19e 1240
1241 op_operand = XEXP (op, 0);
1242
c7944dce 1243 /* We can tear this operation apart only if the regs were already
1244 torn apart. */
9cf5d19e 1245 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
a5942062 1246 return NULL;
9cf5d19e 1247
1248 /* src_reg_num is the number of the word mode register which we
1249 are operating on. For a left shift and a zero_extend on little
1250 endian machines this is register 0. */
4d5cf08a 1251 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1252 ? 1 : 0;
9cf5d19e 1253
4d8ad352 1254 if (WORDS_BIG_ENDIAN
1255 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
9cf5d19e 1256 src_reg_num = 1 - src_reg_num;
1257
1258 if (GET_CODE (op) == ZERO_EXTEND)
4d8ad352 1259 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
9cf5d19e 1260 else
1261 dest_reg_num = 1 - src_reg_num;
1262
1263 offset1 = UNITS_PER_WORD * dest_reg_num;
1264 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1265 src_offset = UNITS_PER_WORD * src_reg_num;
1266
9cf5d19e 1267 start_sequence ();
1268
1269 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1270 GET_MODE (SET_DEST (set)),
1271 offset1);
4d5cf08a 1272 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1273 GET_MODE (SET_DEST (set)),
1274 offset2);
9cf5d19e 1275 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1276 GET_MODE (op_operand),
1277 src_offset);
4d5cf08a 1278 if (GET_CODE (op) == ASHIFTRT
1279 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1280 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1281 BITS_PER_WORD - 1, NULL_RTX, 0);
1282
9cf5d19e 1283 if (GET_CODE (op) != ZERO_EXTEND)
1284 {
1285 int shift_count = INTVAL (XEXP (op, 1));
1286 if (shift_count > BITS_PER_WORD)
1287 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1288 LSHIFT_EXPR : RSHIFT_EXPR,
1289 word_mode, src_reg,
f5ff0b21 1290 shift_count - BITS_PER_WORD,
4d5cf08a 1291 dest_reg, GET_CODE (op) != ASHIFTRT);
9cf5d19e 1292 }
1293
1294 if (dest_reg != src_reg)
1295 emit_move_insn (dest_reg, src_reg);
4d5cf08a 1296 if (GET_CODE (op) != ASHIFTRT)
1297 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1298 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1299 emit_move_insn (dest_upper, copy_rtx (src_reg));
1300 else
1301 emit_move_insn (dest_upper, upper_src);
9cf5d19e 1302 insns = get_insns ();
1303
1304 end_sequence ();
1305
1306 emit_insn_before (insns, insn);
1307
1308 if (dump_file)
1309 {
a5942062 1310 rtx_insn *in;
9cf5d19e 1311 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1312 for (in = insns; in != insn; in = NEXT_INSN (in))
1313 fprintf (dump_file, "%d ", INSN_UID (in));
1314 fprintf (dump_file, "\n");
1315 }
1316
1317 delete_insn (insn);
1318 return insns;
1319}
1320
c7944dce 1321/* Print to dump_file a description of what we're doing with shift code CODE.
1322 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1323
1324static void
1325dump_shift_choices (enum rtx_code code, bool *splitting)
1326{
1327 int i;
1328 const char *sep;
1329
1330 fprintf (dump_file,
1331 " Splitting mode %s for %s lowering with shift amounts = ",
1332 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1333 sep = "";
1334 for (i = 0; i < BITS_PER_WORD; i++)
1335 if (splitting[i])
1336 {
1337 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1338 sep = ",";
1339 }
1340 fprintf (dump_file, "\n");
1341}
1342
1343/* Print to dump_file a description of what we're doing when optimizing
1344 for speed or size; SPEED_P says which. DESCRIPTION is a description
1345 of the SPEED_P choice. */
1346
1347static void
1348dump_choices (bool speed_p, const char *description)
1349{
1350 unsigned int i;
1351
1352 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1353
1354 for (i = 0; i < MAX_MACHINE_MODE; i++)
3754d046 1355 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
c7944dce 1356 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1357 choices[speed_p].move_modes_to_split[i]
1358 ? "Splitting"
1359 : "Skipping",
3754d046 1360 GET_MODE_NAME ((machine_mode) i));
c7944dce 1361
1362 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1363 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1364 GET_MODE_NAME (twice_word_mode));
1365
1366 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
4d5cf08a 1367 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1368 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
c7944dce 1369 fprintf (dump_file, "\n");
1370}
1371
1a6a0f2a 1372/* Look for registers which are always accessed via word-sized SUBREGs
b5ca6624 1373 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1374 registers into several word-sized pseudo-registers. */
1a6a0f2a 1375
1376static void
b5ca6624 1377decompose_multiword_subregs (bool decompose_copies)
1a6a0f2a 1378{
1379 unsigned int max;
1380 basic_block bb;
c7944dce 1381 bool speed_p;
1a6a0f2a 1382
c7944dce 1383 if (dump_file)
1384 {
1385 dump_choices (false, "size");
1386 dump_choices (true, "speed");
1387 }
1388
1389 /* Check if this target even has any modes to consider lowering. */
1390 if (!choices[false].something_to_do && !choices[true].something_to_do)
1391 {
1392 if (dump_file)
1393 fprintf (dump_file, "Nothing to do!\n");
1394 return;
1395 }
3072d30e 1396
1a6a0f2a 1397 max = max_reg_num ();
1398
1399 /* First see if there are any multi-word pseudo-registers. If there
1400 aren't, there is nothing we can do. This should speed up this
1401 pass in the normal case, since it should be faster than scanning
1402 all the insns. */
1403 {
1404 unsigned int i;
c7944dce 1405 bool useful_modes_seen = false;
1a6a0f2a 1406
1407 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
c7944dce 1408 if (regno_reg_rtx[i] != NULL)
1409 {
3754d046 1410 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
c7944dce 1411 if (choices[false].move_modes_to_split[(int) mode]
1412 || choices[true].move_modes_to_split[(int) mode])
1413 {
1414 useful_modes_seen = true;
1415 break;
1416 }
1417 }
1418
1419 if (!useful_modes_seen)
1a6a0f2a 1420 {
c7944dce 1421 if (dump_file)
1422 fprintf (dump_file, "Nothing to lower in this function.\n");
1423 return;
1a6a0f2a 1424 }
1a6a0f2a 1425 }
1426
0e8e9be3 1427 if (df)
c7944dce 1428 {
1429 df_set_flags (DF_DEFER_INSN_RESCAN);
1430 run_word_dce ();
1431 }
0e8e9be3 1432
c7944dce 1433 /* FIXME: It may be possible to change this code to look for each
1434 multi-word pseudo-register and to find each insn which sets or
1435 uses that register. That should be faster than scanning all the
1436 insns. */
1a6a0f2a 1437
1438 decomposable_context = BITMAP_ALLOC (NULL);
1439 non_decomposable_context = BITMAP_ALLOC (NULL);
5277d36e 1440 subreg_context = BITMAP_ALLOC (NULL);
1a6a0f2a 1441
f1f41a6c 1442 reg_copy_graph.create (max);
1443 reg_copy_graph.safe_grow_cleared (max);
1444 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1a6a0f2a 1445
c7944dce 1446 speed_p = optimize_function_for_speed_p (cfun);
fc00614f 1447 FOR_EACH_BB_FN (bb, cfun)
1a6a0f2a 1448 {
a5942062 1449 rtx_insn *insn;
1a6a0f2a 1450
1451 FOR_BB_INSNS (bb, insn)
1452 {
1453 rtx set;
1454 enum classify_move_insn cmi;
1455 int i, n;
1456
1457 if (!INSN_P (insn)
1458 || GET_CODE (PATTERN (insn)) == CLOBBER
1459 || GET_CODE (PATTERN (insn)) == USE)
1460 continue;
1461
08b31038 1462 recog_memoized (insn);
1463
c7944dce 1464 if (find_decomposable_shift_zext (insn, speed_p))
9cf5d19e 1465 continue;
1466
1a6a0f2a 1467 extract_insn (insn);
1468
c7944dce 1469 set = simple_move (insn, speed_p);
1a6a0f2a 1470
1471 if (!set)
1472 cmi = NOT_SIMPLE_MOVE;
1473 else
1474 {
b5ca6624 1475 /* We mark pseudo-to-pseudo copies as decomposable during the
1476 second pass only. The first pass is so early that there is
1477 good chance such moves will be optimized away completely by
1478 subsequent optimizations anyway.
1479
1480 However, we call find_pseudo_copy even during the first pass
1481 so as to properly set up the reg_copy_graph. */
1e5b92fa 1482 if (find_pseudo_copy (set))
b5ca6624 1483 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1a6a0f2a 1484 else
1485 cmi = SIMPLE_MOVE;
1486 }
1487
1488 n = recog_data.n_operands;
1489 for (i = 0; i < n; ++i)
1490 {
665db605 1491 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1a6a0f2a 1492
1493 /* We handle ASM_OPERANDS as a special case to support
1494 things like x86 rdtsc which returns a DImode value.
1495 We can decompose the output, which will certainly be
1496 operand 0, but not the inputs. */
1497
1498 if (cmi == SIMPLE_MOVE
1499 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1500 {
1501 gcc_assert (i == 0);
1502 cmi = NOT_SIMPLE_MOVE;
1503 }
1504 }
1505 }
1506 }
1507
1508 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1509 if (!bitmap_empty_p (decomposable_context))
1510 {
db1c50be 1511 unsigned int i;
1512 sbitmap_iterator sbi;
1a6a0f2a 1513 bitmap_iterator iter;
1514 unsigned int regno;
1515
1516 propagate_pseudo_copies ();
1517
3c6549f8 1518 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
53c5d9d4 1519 bitmap_clear (sub_blocks);
1a6a0f2a 1520
1521 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1522 decompose_register (regno);
1523
fc00614f 1524 FOR_EACH_BB_FN (bb, cfun)
1a6a0f2a 1525 {
a5942062 1526 rtx_insn *insn;
1a6a0f2a 1527
201f6961 1528 FOR_BB_INSNS (bb, insn)
1a6a0f2a 1529 {
da7a04f1 1530 rtx pat;
1a6a0f2a 1531
1532 if (!INSN_P (insn))
1533 continue;
1534
1a6a0f2a 1535 pat = PATTERN (insn);
1536 if (GET_CODE (pat) == CLOBBER)
db2200eb 1537 resolve_clobber (pat, insn);
1a6a0f2a 1538 else if (GET_CODE (pat) == USE)
db2200eb 1539 resolve_use (pat, insn);
9845d120 1540 else if (DEBUG_INSN_P (insn))
1541 resolve_debug (insn);
1a6a0f2a 1542 else
1543 {
1544 rtx set;
1545 int i;
1546
1547 recog_memoized (insn);
1548 extract_insn (insn);
1549
c7944dce 1550 set = simple_move (insn, speed_p);
1a6a0f2a 1551 if (set)
1552 {
a5942062 1553 rtx_insn *orig_insn = insn;
97bb2849 1554 bool cfi = control_flow_insn_p (insn);
1a6a0f2a 1555
db1c50be 1556 /* We can end up splitting loads to multi-word pseudos
1557 into separate loads to machine word size pseudos.
1558 When this happens, we first had one load that can
1559 throw, and after resolve_simple_move we'll have a
1560 bunch of loads (at least two). All those loads may
1561 trap if we can have non-call exceptions, so they
1562 all will end the current basic block. We split the
1563 block after the outer loop over all insns, but we
1564 make sure here that we will be able to split the
1565 basic block and still produce the correct control
1566 flow graph for it. */
1567 gcc_assert (!cfi
cbeb677e 1568 || (cfun->can_throw_non_call_exceptions
db1c50be 1569 && can_throw_internal (insn)));
1570
1a6a0f2a 1571 insn = resolve_simple_move (set, insn);
1572 if (insn != orig_insn)
1573 {
1a6a0f2a 1574 recog_memoized (insn);
1575 extract_insn (insn);
97bb2849 1576
1577 if (cfi)
08b7917c 1578 bitmap_set_bit (sub_blocks, bb->index);
1a6a0f2a 1579 }
1580 }
9cf5d19e 1581 else
1582 {
a5942062 1583 rtx_insn *decomposed_shift;
9cf5d19e 1584
1585 decomposed_shift = resolve_shift_zext (insn);
1586 if (decomposed_shift != NULL_RTX)
1587 {
9cf5d19e 1588 insn = decomposed_shift;
1589 recog_memoized (insn);
1590 extract_insn (insn);
1591 }
1592 }
1a6a0f2a 1593
1594 for (i = recog_data.n_operands - 1; i >= 0; --i)
2e3cae91 1595 resolve_subreg_use (recog_data.operand_loc[i], insn);
1a6a0f2a 1596
1597 resolve_reg_notes (insn);
1598
1599 if (num_validated_changes () > 0)
1600 {
1601 for (i = recog_data.n_dups - 1; i >= 0; --i)
1602 {
1603 rtx *pl = recog_data.dup_loc[i];
1604 int dup_num = recog_data.dup_num[i];
1605 rtx *px = recog_data.operand_loc[dup_num];
1606
c47adb48 1607 validate_unshare_change (insn, pl, *px, 1);
1a6a0f2a 1608 }
1609
1610 i = apply_change_group ();
1611 gcc_assert (i);
1a6a0f2a 1612 }
1613 }
1a6a0f2a 1614 }
1615 }
1616
db1c50be 1617 /* If we had insns to split that caused control flow insns in the middle
1618 of a basic block, split those blocks now. Note that we only handle
1619 the case where splitting a load has caused multiple possibly trapping
1620 loads to appear. */
0d211963 1621 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
db1c50be 1622 {
a5942062 1623 rtx_insn *insn, *end;
db1c50be 1624 edge fallthru;
1625
f5a6b05f 1626 bb = BASIC_BLOCK_FOR_FN (cfun, i);
db1c50be 1627 insn = BB_HEAD (bb);
1628 end = BB_END (bb);
1629
1630 while (insn != end)
1631 {
1632 if (control_flow_insn_p (insn))
1633 {
1634 /* Split the block after insn. There will be a fallthru
1635 edge, which is OK so we keep it. We have to create the
1636 exception edges ourselves. */
1637 fallthru = split_block (bb, insn);
1638 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1639 bb = fallthru->dest;
1640 insn = BB_HEAD (bb);
1641 }
1642 else
1643 insn = NEXT_INSN (insn);
1644 }
1645 }
1a6a0f2a 1646 }
1647
1648 {
1649 unsigned int i;
1650 bitmap b;
1651
f1f41a6c 1652 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1a6a0f2a 1653 if (b)
1654 BITMAP_FREE (b);
1655 }
1656
f1f41a6c 1657 reg_copy_graph.release ();
1a6a0f2a 1658
1659 BITMAP_FREE (decomposable_context);
1660 BITMAP_FREE (non_decomposable_context);
5277d36e 1661 BITMAP_FREE (subreg_context);
1a6a0f2a 1662}
1663\f
1a6a0f2a 1664/* Implement first lower subreg pass. */
1665
cbe8bda8 1666namespace {
1667
1668const pass_data pass_data_lower_subreg =
1a6a0f2a 1669{
cbe8bda8 1670 RTL_PASS, /* type */
1671 "subreg1", /* name */
1672 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 1673 TV_LOWER_SUBREG, /* tv_id */
1674 0, /* properties_required */
1675 0, /* properties_provided */
1676 0, /* properties_destroyed */
1677 0, /* todo_flags_start */
8b88439e 1678 0, /* todo_flags_finish */
1a6a0f2a 1679};
1680
cbe8bda8 1681class pass_lower_subreg : public rtl_opt_pass
1682{
1683public:
9af5ce0c 1684 pass_lower_subreg (gcc::context *ctxt)
1685 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
cbe8bda8 1686 {}
1687
1688 /* opt_pass methods: */
31315c24 1689 virtual bool gate (function *) { return flag_split_wide_types != 0; }
65b0537f 1690 virtual unsigned int execute (function *)
1691 {
1692 decompose_multiword_subregs (false);
1693 return 0;
1694 }
cbe8bda8 1695
1696}; // class pass_lower_subreg
1697
1698} // anon namespace
1699
1700rtl_opt_pass *
1701make_pass_lower_subreg (gcc::context *ctxt)
1702{
1703 return new pass_lower_subreg (ctxt);
1704}
1705
65b0537f 1706/* Implement second lower subreg pass. */
1707
cbe8bda8 1708namespace {
1709
1710const pass_data pass_data_lower_subreg2 =
1a6a0f2a 1711{
cbe8bda8 1712 RTL_PASS, /* type */
1713 "subreg2", /* name */
1714 OPTGROUP_NONE, /* optinfo_flags */
cbe8bda8 1715 TV_LOWER_SUBREG, /* tv_id */
1716 0, /* properties_required */
1717 0, /* properties_provided */
1718 0, /* properties_destroyed */
1719 0, /* todo_flags_start */
8b88439e 1720 TODO_df_finish, /* todo_flags_finish */
1a6a0f2a 1721};
cbe8bda8 1722
1723class pass_lower_subreg2 : public rtl_opt_pass
1724{
1725public:
9af5ce0c 1726 pass_lower_subreg2 (gcc::context *ctxt)
1727 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
cbe8bda8 1728 {}
1729
1730 /* opt_pass methods: */
31315c24 1731 virtual bool gate (function *) { return flag_split_wide_types != 0; }
65b0537f 1732 virtual unsigned int execute (function *)
1733 {
1734 decompose_multiword_subregs (true);
1735 return 0;
1736 }
cbe8bda8 1737
1738}; // class pass_lower_subreg2
1739
1740} // anon namespace
1741
1742rtl_opt_pass *
1743make_pass_lower_subreg2 (gcc::context *ctxt)
1744{
1745 return new pass_lower_subreg2 (ctxt);
1746}