]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
Make more use of int_mode_for_mode
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44
45
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
80
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83 static bitmap non_decomposable_context;
84
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
90
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
94
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
100
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
105
106 /* RTXes used while computing costs. */
107 struct cost_rtxes {
108 /* Source and target registers. */
109 rtx source;
110 rtx target;
111
112 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
113 rtx zext;
114
115 /* A shift of SOURCE. */
116 rtx shift;
117
118 /* A SET of TARGET. */
119 rtx set;
120 };
121
122 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
123 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
124
125 static int
126 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
127 machine_mode mode, int op1)
128 {
129 PUT_CODE (rtxes->shift, code);
130 PUT_MODE (rtxes->shift, mode);
131 PUT_MODE (rtxes->source, mode);
132 XEXP (rtxes->shift, 1) = GEN_INT (op1);
133 return set_src_cost (rtxes->shift, mode, speed_p);
134 }
135
136 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
137 to true if it is profitable to split a double-word CODE shift
138 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
139 for speed or size profitability.
140
141 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
142 the cost of moving zero into a word-mode register. WORD_MOVE_COST
143 is the cost of moving between word registers. */
144
145 static void
146 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
147 bool *splitting, enum rtx_code code,
148 int word_move_zero_cost, int word_move_cost)
149 {
150 int wide_cost, narrow_cost, upper_cost, i;
151
152 for (i = 0; i < BITS_PER_WORD; i++)
153 {
154 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
155 i + BITS_PER_WORD);
156 if (i == 0)
157 narrow_cost = word_move_cost;
158 else
159 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
160
161 if (code != ASHIFTRT)
162 upper_cost = word_move_zero_cost;
163 else if (i == BITS_PER_WORD - 1)
164 upper_cost = word_move_cost;
165 else
166 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
167 BITS_PER_WORD - 1);
168
169 if (LOG_COSTS)
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
172 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
173
174 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
175 splitting[i] = true;
176 }
177 }
178
179 /* Compute what we should do when optimizing for speed or size; SPEED_P
180 selects which. Use RTXES for computing costs. */
181
182 static void
183 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
184 {
185 unsigned int i;
186 int word_move_zero_cost, word_move_cost;
187
188 PUT_MODE (rtxes->target, word_mode);
189 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
190 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
191
192 SET_SRC (rtxes->set) = rtxes->source;
193 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
194
195 if (LOG_COSTS)
196 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
197 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
198
199 for (i = 0; i < MAX_MACHINE_MODE; i++)
200 {
201 machine_mode mode = (machine_mode) i;
202 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
203 if (factor > 1)
204 {
205 int mode_move_cost;
206
207 PUT_MODE (rtxes->target, mode);
208 PUT_MODE (rtxes->source, mode);
209 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
210
211 if (LOG_COSTS)
212 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
213 GET_MODE_NAME (mode), mode_move_cost,
214 word_move_cost, factor);
215
216 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
217 {
218 choices[speed_p].move_modes_to_split[i] = true;
219 choices[speed_p].something_to_do = true;
220 }
221 }
222 }
223
224 /* For the moves and shifts, the only case that is checked is one
225 where the mode of the target is an integer mode twice the width
226 of the word_mode.
227
228 If it is not profitable to split a double word move then do not
229 even consider the shifts or the zero extension. */
230 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
231 {
232 int zext_cost;
233
234 /* The only case here to check to see if moving the upper part with a
235 zero is cheaper than doing the zext itself. */
236 PUT_MODE (rtxes->source, word_mode);
237 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
238
239 if (LOG_COSTS)
240 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
241 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
242 zext_cost, word_move_cost, word_move_zero_cost);
243
244 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
245 choices[speed_p].splitting_zext = true;
246
247 compute_splitting_shift (speed_p, rtxes,
248 choices[speed_p].splitting_ashift, ASHIFT,
249 word_move_zero_cost, word_move_cost);
250 compute_splitting_shift (speed_p, rtxes,
251 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
252 word_move_zero_cost, word_move_cost);
253 compute_splitting_shift (speed_p, rtxes,
254 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
255 word_move_zero_cost, word_move_cost);
256 }
257 }
258
259 /* Do one-per-target initialisation. This involves determining
260 which operations on the machine are profitable. If none are found,
261 then the pass just returns when called. */
262
263 void
264 init_lower_subreg (void)
265 {
266 struct cost_rtxes rtxes;
267
268 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
269
270 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
271
272 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
273 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
274 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
275 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
276 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
277
278 if (LOG_COSTS)
279 fprintf (stderr, "\nSize costs\n==========\n\n");
280 compute_costs (false, &rtxes);
281
282 if (LOG_COSTS)
283 fprintf (stderr, "\nSpeed costs\n===========\n\n");
284 compute_costs (true, &rtxes);
285 }
286
287 static bool
288 simple_move_operand (rtx x)
289 {
290 if (GET_CODE (x) == SUBREG)
291 x = SUBREG_REG (x);
292
293 if (!OBJECT_P (x))
294 return false;
295
296 if (GET_CODE (x) == LABEL_REF
297 || GET_CODE (x) == SYMBOL_REF
298 || GET_CODE (x) == HIGH
299 || GET_CODE (x) == CONST)
300 return false;
301
302 if (MEM_P (x)
303 && (MEM_VOLATILE_P (x)
304 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
305 return false;
306
307 return true;
308 }
309
310 /* If INSN is a single set between two objects that we want to split,
311 return the single set. SPEED_P says whether we are optimizing
312 INSN for speed or size.
313
314 INSN should have been passed to recog and extract_insn before this
315 is called. */
316
317 static rtx
318 simple_move (rtx_insn *insn, bool speed_p)
319 {
320 rtx x;
321 rtx set;
322 machine_mode mode;
323
324 if (recog_data.n_operands != 2)
325 return NULL_RTX;
326
327 set = single_set (insn);
328 if (!set)
329 return NULL_RTX;
330
331 x = SET_DEST (set);
332 if (x != recog_data.operand[0] && x != recog_data.operand[1])
333 return NULL_RTX;
334 if (!simple_move_operand (x))
335 return NULL_RTX;
336
337 x = SET_SRC (set);
338 if (x != recog_data.operand[0] && x != recog_data.operand[1])
339 return NULL_RTX;
340 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
341 things like x86 rdtsc which returns a DImode value. */
342 if (GET_CODE (x) != ASM_OPERANDS
343 && !simple_move_operand (x))
344 return NULL_RTX;
345
346 /* We try to decompose in integer modes, to avoid generating
347 inefficient code copying between integer and floating point
348 registers. That means that we can't decompose if this is a
349 non-integer mode for which there is no integer mode of the same
350 size. */
351 mode = GET_MODE (SET_DEST (set));
352 if (!SCALAR_INT_MODE_P (mode)
353 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
354 return NULL_RTX;
355
356 /* Reject PARTIAL_INT modes. They are used for processor specific
357 purposes and it's probably best not to tamper with them. */
358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
359 return NULL_RTX;
360
361 if (!choices[speed_p].move_modes_to_split[(int) mode])
362 return NULL_RTX;
363
364 return set;
365 }
366
367 /* If SET is a copy from one multi-word pseudo-register to another,
368 record that in reg_copy_graph. Return whether it is such a
369 copy. */
370
371 static bool
372 find_pseudo_copy (rtx set)
373 {
374 rtx dest = SET_DEST (set);
375 rtx src = SET_SRC (set);
376 unsigned int rd, rs;
377 bitmap b;
378
379 if (!REG_P (dest) || !REG_P (src))
380 return false;
381
382 rd = REGNO (dest);
383 rs = REGNO (src);
384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
385 return false;
386
387 b = reg_copy_graph[rs];
388 if (b == NULL)
389 {
390 b = BITMAP_ALLOC (NULL);
391 reg_copy_graph[rs] = b;
392 }
393
394 bitmap_set_bit (b, rd);
395
396 return true;
397 }
398
399 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
400 where they are copied to another register, add the register to
401 which they are copied to DECOMPOSABLE_CONTEXT. Use
402 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
404
405 static void
406 propagate_pseudo_copies (void)
407 {
408 auto_bitmap queue, propagate;
409
410 bitmap_copy (queue, decomposable_context);
411 do
412 {
413 bitmap_iterator iter;
414 unsigned int i;
415
416 bitmap_clear (propagate);
417
418 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
419 {
420 bitmap b = reg_copy_graph[i];
421 if (b)
422 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
423 }
424
425 bitmap_and_compl (queue, propagate, decomposable_context);
426 bitmap_ior_into (decomposable_context, propagate);
427 }
428 while (!bitmap_empty_p (queue));
429 }
430
431 /* A pointer to one of these values is passed to
432 find_decomposable_subregs. */
433
434 enum classify_move_insn
435 {
436 /* Not a simple move from one location to another. */
437 NOT_SIMPLE_MOVE,
438 /* A simple move we want to decompose. */
439 DECOMPOSABLE_SIMPLE_MOVE,
440 /* Any other simple move. */
441 SIMPLE_MOVE
442 };
443
444 /* If we find a SUBREG in *LOC which we could use to decompose a
445 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
446 unadorned register which is not a simple pseudo-register copy,
447 DATA will point at the type of move, and we set a bit in
448 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
449
450 static void
451 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
452 {
453 subrtx_var_iterator::array_type array;
454 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
455 {
456 rtx x = *iter;
457 if (GET_CODE (x) == SUBREG)
458 {
459 rtx inner = SUBREG_REG (x);
460 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
461
462 if (!REG_P (inner))
463 continue;
464
465 regno = REGNO (inner);
466 if (HARD_REGISTER_NUM_P (regno))
467 {
468 iter.skip_subrtxes ();
469 continue;
470 }
471
472 outer_size = GET_MODE_SIZE (GET_MODE (x));
473 inner_size = GET_MODE_SIZE (GET_MODE (inner));
474 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
475 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
476
477 /* We only try to decompose single word subregs of multi-word
478 registers. When we find one, we return -1 to avoid iterating
479 over the inner register.
480
481 ??? This doesn't allow, e.g., DImode subregs of TImode values
482 on 32-bit targets. We would need to record the way the
483 pseudo-register was used, and only decompose if all the uses
484 were the same number and size of pieces. Hopefully this
485 doesn't happen much. */
486
487 if (outer_words == 1 && inner_words > 1)
488 {
489 bitmap_set_bit (decomposable_context, regno);
490 iter.skip_subrtxes ();
491 continue;
492 }
493
494 /* If this is a cast from one mode to another, where the modes
495 have the same size, and they are not tieable, then mark this
496 register as non-decomposable. If we decompose it we are
497 likely to mess up whatever the backend is trying to do. */
498 if (outer_words > 1
499 && outer_size == inner_size
500 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
501 {
502 bitmap_set_bit (non_decomposable_context, regno);
503 bitmap_set_bit (subreg_context, regno);
504 iter.skip_subrtxes ();
505 continue;
506 }
507 }
508 else if (REG_P (x))
509 {
510 unsigned int regno;
511
512 /* We will see an outer SUBREG before we see the inner REG, so
513 when we see a plain REG here it means a direct reference to
514 the register.
515
516 If this is not a simple copy from one location to another,
517 then we can not decompose this register. If this is a simple
518 copy we want to decompose, and the mode is right,
519 then we mark the register as decomposable.
520 Otherwise we don't say anything about this register --
521 it could be decomposed, but whether that would be
522 profitable depends upon how it is used elsewhere.
523
524 We only set bits in the bitmap for multi-word
525 pseudo-registers, since those are the only ones we care about
526 and it keeps the size of the bitmaps down. */
527
528 regno = REGNO (x);
529 if (!HARD_REGISTER_NUM_P (regno)
530 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
531 {
532 switch (*pcmi)
533 {
534 case NOT_SIMPLE_MOVE:
535 bitmap_set_bit (non_decomposable_context, regno);
536 break;
537 case DECOMPOSABLE_SIMPLE_MOVE:
538 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
539 bitmap_set_bit (decomposable_context, regno);
540 break;
541 case SIMPLE_MOVE:
542 break;
543 default:
544 gcc_unreachable ();
545 }
546 }
547 }
548 else if (MEM_P (x))
549 {
550 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
551
552 /* Any registers used in a MEM do not participate in a
553 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
554 here, and return -1 to block the parent's recursion. */
555 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
556 iter.skip_subrtxes ();
557 }
558 }
559 }
560
561 /* Decompose REGNO into word-sized components. We smash the REG node
562 in place. This ensures that (1) something goes wrong quickly if we
563 fail to make some replacement, and (2) the debug information inside
564 the symbol table is automatically kept up to date. */
565
566 static void
567 decompose_register (unsigned int regno)
568 {
569 rtx reg;
570 unsigned int words, i;
571 rtvec v;
572
573 reg = regno_reg_rtx[regno];
574
575 regno_reg_rtx[regno] = NULL_RTX;
576
577 words = GET_MODE_SIZE (GET_MODE (reg));
578 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
579
580 v = rtvec_alloc (words);
581 for (i = 0; i < words; ++i)
582 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
583
584 PUT_CODE (reg, CONCATN);
585 XVEC (reg, 0) = v;
586
587 if (dump_file)
588 {
589 fprintf (dump_file, "; Splitting reg %u ->", regno);
590 for (i = 0; i < words; ++i)
591 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
592 fputc ('\n', dump_file);
593 }
594 }
595
596 /* Get a SUBREG of a CONCATN. */
597
598 static rtx
599 simplify_subreg_concatn (machine_mode outermode, rtx op,
600 unsigned int byte)
601 {
602 unsigned int inner_size;
603 machine_mode innermode, partmode;
604 rtx part;
605 unsigned int final_offset;
606
607 gcc_assert (GET_CODE (op) == CONCATN);
608 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
609
610 innermode = GET_MODE (op);
611 gcc_assert (byte < GET_MODE_SIZE (innermode));
612 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
613 return NULL_RTX;
614
615 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
616 part = XVECEXP (op, 0, byte / inner_size);
617 partmode = GET_MODE (part);
618
619 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
620 regular CONST_VECTORs. They have vector or integer modes, depending
621 on the capabilities of the target. Cope with them. */
622 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
623 partmode = GET_MODE_INNER (innermode);
624 else if (partmode == VOIDmode)
625 {
626 enum mode_class mclass = GET_MODE_CLASS (innermode);
627 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
628 }
629
630 final_offset = byte % inner_size;
631 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
632 return NULL_RTX;
633
634 return simplify_gen_subreg (outermode, part, partmode, final_offset);
635 }
636
637 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
638
639 static rtx
640 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
641 machine_mode innermode, unsigned int byte)
642 {
643 rtx ret;
644
645 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
646 If OP is a SUBREG of a CONCATN, then it must be a simple mode
647 change with the same size and offset 0, or it must extract a
648 part. We shouldn't see anything else here. */
649 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
650 {
651 rtx op2;
652
653 if ((GET_MODE_SIZE (GET_MODE (op))
654 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
655 && SUBREG_BYTE (op) == 0)
656 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
657 GET_MODE (SUBREG_REG (op)), byte);
658
659 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
660 SUBREG_BYTE (op));
661 if (op2 == NULL_RTX)
662 {
663 /* We don't handle paradoxical subregs here. */
664 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
665 gcc_assert (!paradoxical_subreg_p (op));
666 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
667 byte + SUBREG_BYTE (op));
668 gcc_assert (op2 != NULL_RTX);
669 return op2;
670 }
671
672 op = op2;
673 gcc_assert (op != NULL_RTX);
674 gcc_assert (innermode == GET_MODE (op));
675 }
676
677 if (GET_CODE (op) == CONCATN)
678 return simplify_subreg_concatn (outermode, op, byte);
679
680 ret = simplify_gen_subreg (outermode, op, innermode, byte);
681
682 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
683 resolve_simple_move will ask for the high part of the paradoxical
684 subreg, which does not have a value. Just return a zero. */
685 if (ret == NULL_RTX
686 && paradoxical_subreg_p (op))
687 return CONST0_RTX (outermode);
688
689 gcc_assert (ret != NULL_RTX);
690 return ret;
691 }
692
693 /* Return whether we should resolve X into the registers into which it
694 was decomposed. */
695
696 static bool
697 resolve_reg_p (rtx x)
698 {
699 return GET_CODE (x) == CONCATN;
700 }
701
702 /* Return whether X is a SUBREG of a register which we need to
703 resolve. */
704
705 static bool
706 resolve_subreg_p (rtx x)
707 {
708 if (GET_CODE (x) != SUBREG)
709 return false;
710 return resolve_reg_p (SUBREG_REG (x));
711 }
712
713 /* Look for SUBREGs in *LOC which need to be decomposed. */
714
715 static bool
716 resolve_subreg_use (rtx *loc, rtx insn)
717 {
718 subrtx_ptr_iterator::array_type array;
719 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
720 {
721 rtx *loc = *iter;
722 rtx x = *loc;
723 if (resolve_subreg_p (x))
724 {
725 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
726 SUBREG_BYTE (x));
727
728 /* It is possible for a note to contain a reference which we can
729 decompose. In this case, return 1 to the caller to indicate
730 that the note must be removed. */
731 if (!x)
732 {
733 gcc_assert (!insn);
734 return true;
735 }
736
737 validate_change (insn, loc, x, 1);
738 iter.skip_subrtxes ();
739 }
740 else if (resolve_reg_p (x))
741 /* Return 1 to the caller to indicate that we found a direct
742 reference to a register which is being decomposed. This can
743 happen inside notes, multiword shift or zero-extend
744 instructions. */
745 return true;
746 }
747
748 return false;
749 }
750
751 /* Resolve any decomposed registers which appear in register notes on
752 INSN. */
753
754 static void
755 resolve_reg_notes (rtx_insn *insn)
756 {
757 rtx *pnote, note;
758
759 note = find_reg_equal_equiv_note (insn);
760 if (note)
761 {
762 int old_count = num_validated_changes ();
763 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
764 remove_note (insn, note);
765 else
766 if (old_count != num_validated_changes ())
767 df_notes_rescan (insn);
768 }
769
770 pnote = &REG_NOTES (insn);
771 while (*pnote != NULL_RTX)
772 {
773 bool del = false;
774
775 note = *pnote;
776 switch (REG_NOTE_KIND (note))
777 {
778 case REG_DEAD:
779 case REG_UNUSED:
780 if (resolve_reg_p (XEXP (note, 0)))
781 del = true;
782 break;
783
784 default:
785 break;
786 }
787
788 if (del)
789 *pnote = XEXP (note, 1);
790 else
791 pnote = &XEXP (note, 1);
792 }
793 }
794
795 /* Return whether X can be decomposed into subwords. */
796
797 static bool
798 can_decompose_p (rtx x)
799 {
800 if (REG_P (x))
801 {
802 unsigned int regno = REGNO (x);
803
804 if (HARD_REGISTER_NUM_P (regno))
805 {
806 unsigned int byte, num_bytes;
807
808 num_bytes = GET_MODE_SIZE (GET_MODE (x));
809 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
810 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
811 return false;
812 return true;
813 }
814 else
815 return !bitmap_bit_p (subreg_context, regno);
816 }
817
818 return true;
819 }
820
821 /* Decompose the registers used in a simple move SET within INSN. If
822 we don't change anything, return INSN, otherwise return the start
823 of the sequence of moves. */
824
825 static rtx_insn *
826 resolve_simple_move (rtx set, rtx_insn *insn)
827 {
828 rtx src, dest, real_dest;
829 rtx_insn *insns;
830 machine_mode orig_mode, dest_mode;
831 unsigned int words;
832 bool pushing;
833
834 src = SET_SRC (set);
835 dest = SET_DEST (set);
836 orig_mode = GET_MODE (dest);
837
838 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
839 gcc_assert (words > 1);
840
841 start_sequence ();
842
843 /* We have to handle copying from a SUBREG of a decomposed reg where
844 the SUBREG is larger than word size. Rather than assume that we
845 can take a word_mode SUBREG of the destination, we copy to a new
846 register and then copy that to the destination. */
847
848 real_dest = NULL_RTX;
849
850 if (GET_CODE (src) == SUBREG
851 && resolve_reg_p (SUBREG_REG (src))
852 && (SUBREG_BYTE (src) != 0
853 || (GET_MODE_SIZE (orig_mode)
854 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
855 {
856 real_dest = dest;
857 dest = gen_reg_rtx (orig_mode);
858 if (REG_P (real_dest))
859 REG_ATTRS (dest) = REG_ATTRS (real_dest);
860 }
861
862 /* Similarly if we are copying to a SUBREG of a decomposed reg where
863 the SUBREG is larger than word size. */
864
865 if (GET_CODE (dest) == SUBREG
866 && resolve_reg_p (SUBREG_REG (dest))
867 && (SUBREG_BYTE (dest) != 0
868 || (GET_MODE_SIZE (orig_mode)
869 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
870 {
871 rtx reg, smove;
872 rtx_insn *minsn;
873
874 reg = gen_reg_rtx (orig_mode);
875 minsn = emit_move_insn (reg, src);
876 smove = single_set (minsn);
877 gcc_assert (smove != NULL_RTX);
878 resolve_simple_move (smove, minsn);
879 src = reg;
880 }
881
882 /* If we didn't have any big SUBREGS of decomposed registers, and
883 neither side of the move is a register we are decomposing, then
884 we don't have to do anything here. */
885
886 if (src == SET_SRC (set)
887 && dest == SET_DEST (set)
888 && !resolve_reg_p (src)
889 && !resolve_subreg_p (src)
890 && !resolve_reg_p (dest)
891 && !resolve_subreg_p (dest))
892 {
893 end_sequence ();
894 return insn;
895 }
896
897 /* It's possible for the code to use a subreg of a decomposed
898 register while forming an address. We need to handle that before
899 passing the address to emit_move_insn. We pass NULL_RTX as the
900 insn parameter to resolve_subreg_use because we can not validate
901 the insn yet. */
902 if (MEM_P (src) || MEM_P (dest))
903 {
904 int acg;
905
906 if (MEM_P (src))
907 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
908 if (MEM_P (dest))
909 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
910 acg = apply_change_group ();
911 gcc_assert (acg);
912 }
913
914 /* If SRC is a register which we can't decompose, or has side
915 effects, we need to move via a temporary register. */
916
917 if (!can_decompose_p (src)
918 || side_effects_p (src)
919 || GET_CODE (src) == ASM_OPERANDS)
920 {
921 rtx reg;
922
923 reg = gen_reg_rtx (orig_mode);
924
925 if (AUTO_INC_DEC)
926 {
927 rtx_insn *move = emit_move_insn (reg, src);
928 if (MEM_P (src))
929 {
930 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
931 if (note)
932 add_reg_note (move, REG_INC, XEXP (note, 0));
933 }
934 }
935 else
936 emit_move_insn (reg, src);
937
938 src = reg;
939 }
940
941 /* If DEST is a register which we can't decompose, or has side
942 effects, we need to first move to a temporary register. We
943 handle the common case of pushing an operand directly. We also
944 go through a temporary register if it holds a floating point
945 value. This gives us better code on systems which can't move
946 data easily between integer and floating point registers. */
947
948 dest_mode = orig_mode;
949 pushing = push_operand (dest, dest_mode);
950 if (!can_decompose_p (dest)
951 || (side_effects_p (dest) && !pushing)
952 || (!SCALAR_INT_MODE_P (dest_mode)
953 && !resolve_reg_p (dest)
954 && !resolve_subreg_p (dest)))
955 {
956 if (real_dest == NULL_RTX)
957 real_dest = dest;
958 if (!SCALAR_INT_MODE_P (dest_mode))
959 dest_mode = int_mode_for_mode (dest_mode).require ();
960 dest = gen_reg_rtx (dest_mode);
961 if (REG_P (real_dest))
962 REG_ATTRS (dest) = REG_ATTRS (real_dest);
963 }
964
965 if (pushing)
966 {
967 unsigned int i, j, jinc;
968
969 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
970 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
971 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
972
973 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
974 {
975 j = 0;
976 jinc = 1;
977 }
978 else
979 {
980 j = words - 1;
981 jinc = -1;
982 }
983
984 for (i = 0; i < words; ++i, j += jinc)
985 {
986 rtx temp;
987
988 temp = copy_rtx (XEXP (dest, 0));
989 temp = adjust_automodify_address_nv (dest, word_mode, temp,
990 j * UNITS_PER_WORD);
991 emit_move_insn (temp,
992 simplify_gen_subreg_concatn (word_mode, src,
993 orig_mode,
994 j * UNITS_PER_WORD));
995 }
996 }
997 else
998 {
999 unsigned int i;
1000
1001 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1002 emit_clobber (dest);
1003
1004 for (i = 0; i < words; ++i)
1005 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1006 dest_mode,
1007 i * UNITS_PER_WORD),
1008 simplify_gen_subreg_concatn (word_mode, src,
1009 orig_mode,
1010 i * UNITS_PER_WORD));
1011 }
1012
1013 if (real_dest != NULL_RTX)
1014 {
1015 rtx mdest, smove;
1016 rtx_insn *minsn;
1017
1018 if (dest_mode == orig_mode)
1019 mdest = dest;
1020 else
1021 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1022 minsn = emit_move_insn (real_dest, mdest);
1023
1024 if (AUTO_INC_DEC && MEM_P (real_dest)
1025 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1026 {
1027 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1028 if (note)
1029 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1030 }
1031
1032 smove = single_set (minsn);
1033 gcc_assert (smove != NULL_RTX);
1034
1035 resolve_simple_move (smove, minsn);
1036 }
1037
1038 insns = get_insns ();
1039 end_sequence ();
1040
1041 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1042
1043 emit_insn_before (insns, insn);
1044
1045 /* If we get here via self-recursion, then INSN is not yet in the insns
1046 chain and delete_insn will fail. We only want to remove INSN from the
1047 current sequence. See PR56738. */
1048 if (in_sequence_p ())
1049 remove_insn (insn);
1050 else
1051 delete_insn (insn);
1052
1053 return insns;
1054 }
1055
1056 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1057 component registers. Return whether we changed something. */
1058
1059 static bool
1060 resolve_clobber (rtx pat, rtx_insn *insn)
1061 {
1062 rtx reg;
1063 machine_mode orig_mode;
1064 unsigned int words, i;
1065 int ret;
1066
1067 reg = XEXP (pat, 0);
1068 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1069 return false;
1070
1071 orig_mode = GET_MODE (reg);
1072 words = GET_MODE_SIZE (orig_mode);
1073 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1074
1075 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1076 simplify_gen_subreg_concatn (word_mode, reg,
1077 orig_mode, 0),
1078 0);
1079 df_insn_rescan (insn);
1080 gcc_assert (ret != 0);
1081
1082 for (i = words - 1; i > 0; --i)
1083 {
1084 rtx x;
1085
1086 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1087 i * UNITS_PER_WORD);
1088 x = gen_rtx_CLOBBER (VOIDmode, x);
1089 emit_insn_after (x, insn);
1090 }
1091
1092 resolve_reg_notes (insn);
1093
1094 return true;
1095 }
1096
1097 /* A USE of a decomposed register is no longer meaningful. Return
1098 whether we changed something. */
1099
1100 static bool
1101 resolve_use (rtx pat, rtx_insn *insn)
1102 {
1103 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1104 {
1105 delete_insn (insn);
1106 return true;
1107 }
1108
1109 resolve_reg_notes (insn);
1110
1111 return false;
1112 }
1113
1114 /* A VAR_LOCATION can be simplified. */
1115
1116 static void
1117 resolve_debug (rtx_insn *insn)
1118 {
1119 subrtx_ptr_iterator::array_type array;
1120 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1121 {
1122 rtx *loc = *iter;
1123 rtx x = *loc;
1124 if (resolve_subreg_p (x))
1125 {
1126 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1127 SUBREG_BYTE (x));
1128
1129 if (x)
1130 *loc = x;
1131 else
1132 x = copy_rtx (*loc);
1133 }
1134 if (resolve_reg_p (x))
1135 *loc = copy_rtx (x);
1136 }
1137
1138 df_insn_rescan (insn);
1139
1140 resolve_reg_notes (insn);
1141 }
1142
1143 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1144 set the decomposable_context bitmap accordingly. SPEED_P is true
1145 if we are optimizing INSN for speed rather than size. Return true
1146 if INSN is decomposable. */
1147
1148 static bool
1149 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1150 {
1151 rtx set;
1152 rtx op;
1153 rtx op_operand;
1154
1155 set = single_set (insn);
1156 if (!set)
1157 return false;
1158
1159 op = SET_SRC (set);
1160 if (GET_CODE (op) != ASHIFT
1161 && GET_CODE (op) != LSHIFTRT
1162 && GET_CODE (op) != ASHIFTRT
1163 && GET_CODE (op) != ZERO_EXTEND)
1164 return false;
1165
1166 op_operand = XEXP (op, 0);
1167 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1168 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1169 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1170 || GET_MODE (op) != twice_word_mode)
1171 return false;
1172
1173 if (GET_CODE (op) == ZERO_EXTEND)
1174 {
1175 if (GET_MODE (op_operand) != word_mode
1176 || !choices[speed_p].splitting_zext)
1177 return false;
1178 }
1179 else /* left or right shift */
1180 {
1181 bool *splitting = (GET_CODE (op) == ASHIFT
1182 ? choices[speed_p].splitting_ashift
1183 : GET_CODE (op) == ASHIFTRT
1184 ? choices[speed_p].splitting_ashiftrt
1185 : choices[speed_p].splitting_lshiftrt);
1186 if (!CONST_INT_P (XEXP (op, 1))
1187 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1188 2 * BITS_PER_WORD - 1)
1189 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1190 return false;
1191
1192 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1193 }
1194
1195 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1196
1197 return true;
1198 }
1199
1200 /* Decompose a more than word wide shift (in INSN) of a multiword
1201 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1202 and 'set to zero' insn. Return a pointer to the new insn when a
1203 replacement was done. */
1204
1205 static rtx_insn *
1206 resolve_shift_zext (rtx_insn *insn)
1207 {
1208 rtx set;
1209 rtx op;
1210 rtx op_operand;
1211 rtx_insn *insns;
1212 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1213 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1214 scalar_int_mode inner_mode;
1215
1216 set = single_set (insn);
1217 if (!set)
1218 return NULL;
1219
1220 op = SET_SRC (set);
1221 if (GET_CODE (op) != ASHIFT
1222 && GET_CODE (op) != LSHIFTRT
1223 && GET_CODE (op) != ASHIFTRT
1224 && GET_CODE (op) != ZERO_EXTEND)
1225 return NULL;
1226
1227 op_operand = XEXP (op, 0);
1228 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1229 return NULL;
1230
1231 /* We can tear this operation apart only if the regs were already
1232 torn apart. */
1233 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1234 return NULL;
1235
1236 /* src_reg_num is the number of the word mode register which we
1237 are operating on. For a left shift and a zero_extend on little
1238 endian machines this is register 0. */
1239 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1240 ? 1 : 0;
1241
1242 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1243 src_reg_num = 1 - src_reg_num;
1244
1245 if (GET_CODE (op) == ZERO_EXTEND)
1246 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1247 else
1248 dest_reg_num = 1 - src_reg_num;
1249
1250 offset1 = UNITS_PER_WORD * dest_reg_num;
1251 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1252 src_offset = UNITS_PER_WORD * src_reg_num;
1253
1254 start_sequence ();
1255
1256 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1257 GET_MODE (SET_DEST (set)),
1258 offset1);
1259 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1260 GET_MODE (SET_DEST (set)),
1261 offset2);
1262 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1263 GET_MODE (op_operand),
1264 src_offset);
1265 if (GET_CODE (op) == ASHIFTRT
1266 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1267 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1268 BITS_PER_WORD - 1, NULL_RTX, 0);
1269
1270 if (GET_CODE (op) != ZERO_EXTEND)
1271 {
1272 int shift_count = INTVAL (XEXP (op, 1));
1273 if (shift_count > BITS_PER_WORD)
1274 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1275 LSHIFT_EXPR : RSHIFT_EXPR,
1276 word_mode, src_reg,
1277 shift_count - BITS_PER_WORD,
1278 dest_reg, GET_CODE (op) != ASHIFTRT);
1279 }
1280
1281 if (dest_reg != src_reg)
1282 emit_move_insn (dest_reg, src_reg);
1283 if (GET_CODE (op) != ASHIFTRT)
1284 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1285 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1286 emit_move_insn (dest_upper, copy_rtx (src_reg));
1287 else
1288 emit_move_insn (dest_upper, upper_src);
1289 insns = get_insns ();
1290
1291 end_sequence ();
1292
1293 emit_insn_before (insns, insn);
1294
1295 if (dump_file)
1296 {
1297 rtx_insn *in;
1298 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1299 for (in = insns; in != insn; in = NEXT_INSN (in))
1300 fprintf (dump_file, "%d ", INSN_UID (in));
1301 fprintf (dump_file, "\n");
1302 }
1303
1304 delete_insn (insn);
1305 return insns;
1306 }
1307
1308 /* Print to dump_file a description of what we're doing with shift code CODE.
1309 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1310
1311 static void
1312 dump_shift_choices (enum rtx_code code, bool *splitting)
1313 {
1314 int i;
1315 const char *sep;
1316
1317 fprintf (dump_file,
1318 " Splitting mode %s for %s lowering with shift amounts = ",
1319 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1320 sep = "";
1321 for (i = 0; i < BITS_PER_WORD; i++)
1322 if (splitting[i])
1323 {
1324 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1325 sep = ",";
1326 }
1327 fprintf (dump_file, "\n");
1328 }
1329
1330 /* Print to dump_file a description of what we're doing when optimizing
1331 for speed or size; SPEED_P says which. DESCRIPTION is a description
1332 of the SPEED_P choice. */
1333
1334 static void
1335 dump_choices (bool speed_p, const char *description)
1336 {
1337 unsigned int i;
1338
1339 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1340
1341 for (i = 0; i < MAX_MACHINE_MODE; i++)
1342 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1343 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1344 choices[speed_p].move_modes_to_split[i]
1345 ? "Splitting"
1346 : "Skipping",
1347 GET_MODE_NAME ((machine_mode) i));
1348
1349 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1350 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1351 GET_MODE_NAME (twice_word_mode));
1352
1353 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1354 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1355 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1356 fprintf (dump_file, "\n");
1357 }
1358
1359 /* Look for registers which are always accessed via word-sized SUBREGs
1360 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1361 registers into several word-sized pseudo-registers. */
1362
1363 static void
1364 decompose_multiword_subregs (bool decompose_copies)
1365 {
1366 unsigned int max;
1367 basic_block bb;
1368 bool speed_p;
1369
1370 if (dump_file)
1371 {
1372 dump_choices (false, "size");
1373 dump_choices (true, "speed");
1374 }
1375
1376 /* Check if this target even has any modes to consider lowering. */
1377 if (!choices[false].something_to_do && !choices[true].something_to_do)
1378 {
1379 if (dump_file)
1380 fprintf (dump_file, "Nothing to do!\n");
1381 return;
1382 }
1383
1384 max = max_reg_num ();
1385
1386 /* First see if there are any multi-word pseudo-registers. If there
1387 aren't, there is nothing we can do. This should speed up this
1388 pass in the normal case, since it should be faster than scanning
1389 all the insns. */
1390 {
1391 unsigned int i;
1392 bool useful_modes_seen = false;
1393
1394 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1395 if (regno_reg_rtx[i] != NULL)
1396 {
1397 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1398 if (choices[false].move_modes_to_split[(int) mode]
1399 || choices[true].move_modes_to_split[(int) mode])
1400 {
1401 useful_modes_seen = true;
1402 break;
1403 }
1404 }
1405
1406 if (!useful_modes_seen)
1407 {
1408 if (dump_file)
1409 fprintf (dump_file, "Nothing to lower in this function.\n");
1410 return;
1411 }
1412 }
1413
1414 if (df)
1415 {
1416 df_set_flags (DF_DEFER_INSN_RESCAN);
1417 run_word_dce ();
1418 }
1419
1420 /* FIXME: It may be possible to change this code to look for each
1421 multi-word pseudo-register and to find each insn which sets or
1422 uses that register. That should be faster than scanning all the
1423 insns. */
1424
1425 decomposable_context = BITMAP_ALLOC (NULL);
1426 non_decomposable_context = BITMAP_ALLOC (NULL);
1427 subreg_context = BITMAP_ALLOC (NULL);
1428
1429 reg_copy_graph.create (max);
1430 reg_copy_graph.safe_grow_cleared (max);
1431 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1432
1433 speed_p = optimize_function_for_speed_p (cfun);
1434 FOR_EACH_BB_FN (bb, cfun)
1435 {
1436 rtx_insn *insn;
1437
1438 FOR_BB_INSNS (bb, insn)
1439 {
1440 rtx set;
1441 enum classify_move_insn cmi;
1442 int i, n;
1443
1444 if (!INSN_P (insn)
1445 || GET_CODE (PATTERN (insn)) == CLOBBER
1446 || GET_CODE (PATTERN (insn)) == USE)
1447 continue;
1448
1449 recog_memoized (insn);
1450
1451 if (find_decomposable_shift_zext (insn, speed_p))
1452 continue;
1453
1454 extract_insn (insn);
1455
1456 set = simple_move (insn, speed_p);
1457
1458 if (!set)
1459 cmi = NOT_SIMPLE_MOVE;
1460 else
1461 {
1462 /* We mark pseudo-to-pseudo copies as decomposable during the
1463 second pass only. The first pass is so early that there is
1464 good chance such moves will be optimized away completely by
1465 subsequent optimizations anyway.
1466
1467 However, we call find_pseudo_copy even during the first pass
1468 so as to properly set up the reg_copy_graph. */
1469 if (find_pseudo_copy (set))
1470 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1471 else
1472 cmi = SIMPLE_MOVE;
1473 }
1474
1475 n = recog_data.n_operands;
1476 for (i = 0; i < n; ++i)
1477 {
1478 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1479
1480 /* We handle ASM_OPERANDS as a special case to support
1481 things like x86 rdtsc which returns a DImode value.
1482 We can decompose the output, which will certainly be
1483 operand 0, but not the inputs. */
1484
1485 if (cmi == SIMPLE_MOVE
1486 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1487 {
1488 gcc_assert (i == 0);
1489 cmi = NOT_SIMPLE_MOVE;
1490 }
1491 }
1492 }
1493 }
1494
1495 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1496 if (!bitmap_empty_p (decomposable_context))
1497 {
1498 unsigned int i;
1499 sbitmap_iterator sbi;
1500 bitmap_iterator iter;
1501 unsigned int regno;
1502
1503 propagate_pseudo_copies ();
1504
1505 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1506 bitmap_clear (sub_blocks);
1507
1508 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1509 decompose_register (regno);
1510
1511 FOR_EACH_BB_FN (bb, cfun)
1512 {
1513 rtx_insn *insn;
1514
1515 FOR_BB_INSNS (bb, insn)
1516 {
1517 rtx pat;
1518
1519 if (!INSN_P (insn))
1520 continue;
1521
1522 pat = PATTERN (insn);
1523 if (GET_CODE (pat) == CLOBBER)
1524 resolve_clobber (pat, insn);
1525 else if (GET_CODE (pat) == USE)
1526 resolve_use (pat, insn);
1527 else if (DEBUG_INSN_P (insn))
1528 resolve_debug (insn);
1529 else
1530 {
1531 rtx set;
1532 int i;
1533
1534 recog_memoized (insn);
1535 extract_insn (insn);
1536
1537 set = simple_move (insn, speed_p);
1538 if (set)
1539 {
1540 rtx_insn *orig_insn = insn;
1541 bool cfi = control_flow_insn_p (insn);
1542
1543 /* We can end up splitting loads to multi-word pseudos
1544 into separate loads to machine word size pseudos.
1545 When this happens, we first had one load that can
1546 throw, and after resolve_simple_move we'll have a
1547 bunch of loads (at least two). All those loads may
1548 trap if we can have non-call exceptions, so they
1549 all will end the current basic block. We split the
1550 block after the outer loop over all insns, but we
1551 make sure here that we will be able to split the
1552 basic block and still produce the correct control
1553 flow graph for it. */
1554 gcc_assert (!cfi
1555 || (cfun->can_throw_non_call_exceptions
1556 && can_throw_internal (insn)));
1557
1558 insn = resolve_simple_move (set, insn);
1559 if (insn != orig_insn)
1560 {
1561 recog_memoized (insn);
1562 extract_insn (insn);
1563
1564 if (cfi)
1565 bitmap_set_bit (sub_blocks, bb->index);
1566 }
1567 }
1568 else
1569 {
1570 rtx_insn *decomposed_shift;
1571
1572 decomposed_shift = resolve_shift_zext (insn);
1573 if (decomposed_shift != NULL_RTX)
1574 {
1575 insn = decomposed_shift;
1576 recog_memoized (insn);
1577 extract_insn (insn);
1578 }
1579 }
1580
1581 for (i = recog_data.n_operands - 1; i >= 0; --i)
1582 resolve_subreg_use (recog_data.operand_loc[i], insn);
1583
1584 resolve_reg_notes (insn);
1585
1586 if (num_validated_changes () > 0)
1587 {
1588 for (i = recog_data.n_dups - 1; i >= 0; --i)
1589 {
1590 rtx *pl = recog_data.dup_loc[i];
1591 int dup_num = recog_data.dup_num[i];
1592 rtx *px = recog_data.operand_loc[dup_num];
1593
1594 validate_unshare_change (insn, pl, *px, 1);
1595 }
1596
1597 i = apply_change_group ();
1598 gcc_assert (i);
1599 }
1600 }
1601 }
1602 }
1603
1604 /* If we had insns to split that caused control flow insns in the middle
1605 of a basic block, split those blocks now. Note that we only handle
1606 the case where splitting a load has caused multiple possibly trapping
1607 loads to appear. */
1608 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1609 {
1610 rtx_insn *insn, *end;
1611 edge fallthru;
1612
1613 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1614 insn = BB_HEAD (bb);
1615 end = BB_END (bb);
1616
1617 while (insn != end)
1618 {
1619 if (control_flow_insn_p (insn))
1620 {
1621 /* Split the block after insn. There will be a fallthru
1622 edge, which is OK so we keep it. We have to create the
1623 exception edges ourselves. */
1624 fallthru = split_block (bb, insn);
1625 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1626 bb = fallthru->dest;
1627 insn = BB_HEAD (bb);
1628 }
1629 else
1630 insn = NEXT_INSN (insn);
1631 }
1632 }
1633 }
1634
1635 {
1636 unsigned int i;
1637 bitmap b;
1638
1639 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1640 if (b)
1641 BITMAP_FREE (b);
1642 }
1643
1644 reg_copy_graph.release ();
1645
1646 BITMAP_FREE (decomposable_context);
1647 BITMAP_FREE (non_decomposable_context);
1648 BITMAP_FREE (subreg_context);
1649 }
1650 \f
1651 /* Implement first lower subreg pass. */
1652
1653 namespace {
1654
1655 const pass_data pass_data_lower_subreg =
1656 {
1657 RTL_PASS, /* type */
1658 "subreg1", /* name */
1659 OPTGROUP_NONE, /* optinfo_flags */
1660 TV_LOWER_SUBREG, /* tv_id */
1661 0, /* properties_required */
1662 0, /* properties_provided */
1663 0, /* properties_destroyed */
1664 0, /* todo_flags_start */
1665 0, /* todo_flags_finish */
1666 };
1667
1668 class pass_lower_subreg : public rtl_opt_pass
1669 {
1670 public:
1671 pass_lower_subreg (gcc::context *ctxt)
1672 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1673 {}
1674
1675 /* opt_pass methods: */
1676 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1677 virtual unsigned int execute (function *)
1678 {
1679 decompose_multiword_subregs (false);
1680 return 0;
1681 }
1682
1683 }; // class pass_lower_subreg
1684
1685 } // anon namespace
1686
1687 rtl_opt_pass *
1688 make_pass_lower_subreg (gcc::context *ctxt)
1689 {
1690 return new pass_lower_subreg (ctxt);
1691 }
1692
1693 /* Implement second lower subreg pass. */
1694
1695 namespace {
1696
1697 const pass_data pass_data_lower_subreg2 =
1698 {
1699 RTL_PASS, /* type */
1700 "subreg2", /* name */
1701 OPTGROUP_NONE, /* optinfo_flags */
1702 TV_LOWER_SUBREG, /* tv_id */
1703 0, /* properties_required */
1704 0, /* properties_provided */
1705 0, /* properties_destroyed */
1706 0, /* todo_flags_start */
1707 TODO_df_finish, /* todo_flags_finish */
1708 };
1709
1710 class pass_lower_subreg2 : public rtl_opt_pass
1711 {
1712 public:
1713 pass_lower_subreg2 (gcc::context *ctxt)
1714 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1715 {}
1716
1717 /* opt_pass methods: */
1718 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1719 virtual unsigned int execute (function *)
1720 {
1721 decompose_multiword_subregs (true);
1722 return 0;
1723 }
1724
1725 }; // class pass_lower_subreg2
1726
1727 } // anon namespace
1728
1729 rtl_opt_pass *
1730 make_pass_lower_subreg2 (gcc::context *ctxt)
1731 {
1732 return new pass_lower_subreg2 (ctxt);
1733 }