]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
PR target/85945
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44
45
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
80
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83 static bitmap non_decomposable_context;
84
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
90
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
94
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
100
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
105
106 /* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112 {
113 if (!GET_MODE_SIZE (mode).is_constant (bytes))
114 return false;
115 *words = CEIL (*bytes, UNITS_PER_WORD);
116 return true;
117 }
118
119 /* RTXes used while computing costs. */
120 struct cost_rtxes {
121 /* Source and target registers. */
122 rtx source;
123 rtx target;
124
125 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
126 rtx zext;
127
128 /* A shift of SOURCE. */
129 rtx shift;
130
131 /* A SET of TARGET. */
132 rtx set;
133 };
134
135 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
136 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
137
138 static int
139 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140 machine_mode mode, int op1)
141 {
142 PUT_CODE (rtxes->shift, code);
143 PUT_MODE (rtxes->shift, mode);
144 PUT_MODE (rtxes->source, mode);
145 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146 return set_src_cost (rtxes->shift, mode, speed_p);
147 }
148
149 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
150 to true if it is profitable to split a double-word CODE shift
151 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
152 for speed or size profitability.
153
154 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
155 the cost of moving zero into a word-mode register. WORD_MOVE_COST
156 is the cost of moving between word registers. */
157
158 static void
159 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
160 bool *splitting, enum rtx_code code,
161 int word_move_zero_cost, int word_move_cost)
162 {
163 int wide_cost, narrow_cost, upper_cost, i;
164
165 for (i = 0; i < BITS_PER_WORD; i++)
166 {
167 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
168 i + BITS_PER_WORD);
169 if (i == 0)
170 narrow_cost = word_move_cost;
171 else
172 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
173
174 if (code != ASHIFTRT)
175 upper_cost = word_move_zero_cost;
176 else if (i == BITS_PER_WORD - 1)
177 upper_cost = word_move_cost;
178 else
179 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
180 BITS_PER_WORD - 1);
181
182 if (LOG_COSTS)
183 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
184 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186
187 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 splitting[i] = true;
189 }
190 }
191
192 /* Compute what we should do when optimizing for speed or size; SPEED_P
193 selects which. Use RTXES for computing costs. */
194
195 static void
196 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
197 {
198 unsigned int i;
199 int word_move_zero_cost, word_move_cost;
200
201 PUT_MODE (rtxes->target, word_mode);
202 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204
205 SET_SRC (rtxes->set) = rtxes->source;
206 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207
208 if (LOG_COSTS)
209 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
210 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
211
212 for (i = 0; i < MAX_MACHINE_MODE; i++)
213 {
214 machine_mode mode = (machine_mode) i;
215 unsigned int size, factor;
216 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217 {
218 unsigned int mode_move_cost;
219
220 PUT_MODE (rtxes->target, mode);
221 PUT_MODE (rtxes->source, mode);
222 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223
224 if (LOG_COSTS)
225 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
226 GET_MODE_NAME (mode), mode_move_cost,
227 word_move_cost, factor);
228
229 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
230 {
231 choices[speed_p].move_modes_to_split[i] = true;
232 choices[speed_p].something_to_do = true;
233 }
234 }
235 }
236
237 /* For the moves and shifts, the only case that is checked is one
238 where the mode of the target is an integer mode twice the width
239 of the word_mode.
240
241 If it is not profitable to split a double word move then do not
242 even consider the shifts or the zero extension. */
243 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
244 {
245 int zext_cost;
246
247 /* The only case here to check to see if moving the upper part with a
248 zero is cheaper than doing the zext itself. */
249 PUT_MODE (rtxes->source, word_mode);
250 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251
252 if (LOG_COSTS)
253 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
254 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
255 zext_cost, word_move_cost, word_move_zero_cost);
256
257 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
258 choices[speed_p].splitting_zext = true;
259
260 compute_splitting_shift (speed_p, rtxes,
261 choices[speed_p].splitting_ashift, ASHIFT,
262 word_move_zero_cost, word_move_cost);
263 compute_splitting_shift (speed_p, rtxes,
264 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
265 word_move_zero_cost, word_move_cost);
266 compute_splitting_shift (speed_p, rtxes,
267 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
268 word_move_zero_cost, word_move_cost);
269 }
270 }
271
272 /* Do one-per-target initialisation. This involves determining
273 which operations on the machine are profitable. If none are found,
274 then the pass just returns when called. */
275
276 void
277 init_lower_subreg (void)
278 {
279 struct cost_rtxes rtxes;
280
281 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
282
283 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284
285 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
286 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
289 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
290
291 if (LOG_COSTS)
292 fprintf (stderr, "\nSize costs\n==========\n\n");
293 compute_costs (false, &rtxes);
294
295 if (LOG_COSTS)
296 fprintf (stderr, "\nSpeed costs\n===========\n\n");
297 compute_costs (true, &rtxes);
298 }
299
300 static bool
301 simple_move_operand (rtx x)
302 {
303 if (GET_CODE (x) == SUBREG)
304 x = SUBREG_REG (x);
305
306 if (!OBJECT_P (x))
307 return false;
308
309 if (GET_CODE (x) == LABEL_REF
310 || GET_CODE (x) == SYMBOL_REF
311 || GET_CODE (x) == HIGH
312 || GET_CODE (x) == CONST)
313 return false;
314
315 if (MEM_P (x)
316 && (MEM_VOLATILE_P (x)
317 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318 return false;
319
320 return true;
321 }
322
323 /* If INSN is a single set between two objects that we want to split,
324 return the single set. SPEED_P says whether we are optimizing
325 INSN for speed or size.
326
327 INSN should have been passed to recog and extract_insn before this
328 is called. */
329
330 static rtx
331 simple_move (rtx_insn *insn, bool speed_p)
332 {
333 rtx x;
334 rtx set;
335 machine_mode mode;
336
337 if (recog_data.n_operands != 2)
338 return NULL_RTX;
339
340 set = single_set (insn);
341 if (!set)
342 return NULL_RTX;
343
344 x = SET_DEST (set);
345 if (x != recog_data.operand[0] && x != recog_data.operand[1])
346 return NULL_RTX;
347 if (!simple_move_operand (x))
348 return NULL_RTX;
349
350 x = SET_SRC (set);
351 if (x != recog_data.operand[0] && x != recog_data.operand[1])
352 return NULL_RTX;
353 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
354 things like x86 rdtsc which returns a DImode value. */
355 if (GET_CODE (x) != ASM_OPERANDS
356 && !simple_move_operand (x))
357 return NULL_RTX;
358
359 /* We try to decompose in integer modes, to avoid generating
360 inefficient code copying between integer and floating point
361 registers. That means that we can't decompose if this is a
362 non-integer mode for which there is no integer mode of the same
363 size. */
364 mode = GET_MODE (SET_DEST (set));
365 if (!SCALAR_INT_MODE_P (mode)
366 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
367 return NULL_RTX;
368
369 /* Reject PARTIAL_INT modes. They are used for processor specific
370 purposes and it's probably best not to tamper with them. */
371 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
372 return NULL_RTX;
373
374 if (!choices[speed_p].move_modes_to_split[(int) mode])
375 return NULL_RTX;
376
377 return set;
378 }
379
380 /* If SET is a copy from one multi-word pseudo-register to another,
381 record that in reg_copy_graph. Return whether it is such a
382 copy. */
383
384 static bool
385 find_pseudo_copy (rtx set)
386 {
387 rtx dest = SET_DEST (set);
388 rtx src = SET_SRC (set);
389 unsigned int rd, rs;
390 bitmap b;
391
392 if (!REG_P (dest) || !REG_P (src))
393 return false;
394
395 rd = REGNO (dest);
396 rs = REGNO (src);
397 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
398 return false;
399
400 b = reg_copy_graph[rs];
401 if (b == NULL)
402 {
403 b = BITMAP_ALLOC (NULL);
404 reg_copy_graph[rs] = b;
405 }
406
407 bitmap_set_bit (b, rd);
408
409 return true;
410 }
411
412 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
413 where they are copied to another register, add the register to
414 which they are copied to DECOMPOSABLE_CONTEXT. Use
415 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
416 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
417
418 static void
419 propagate_pseudo_copies (void)
420 {
421 auto_bitmap queue, propagate;
422
423 bitmap_copy (queue, decomposable_context);
424 do
425 {
426 bitmap_iterator iter;
427 unsigned int i;
428
429 bitmap_clear (propagate);
430
431 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
432 {
433 bitmap b = reg_copy_graph[i];
434 if (b)
435 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
436 }
437
438 bitmap_and_compl (queue, propagate, decomposable_context);
439 bitmap_ior_into (decomposable_context, propagate);
440 }
441 while (!bitmap_empty_p (queue));
442 }
443
444 /* A pointer to one of these values is passed to
445 find_decomposable_subregs. */
446
447 enum classify_move_insn
448 {
449 /* Not a simple move from one location to another. */
450 NOT_SIMPLE_MOVE,
451 /* A simple move we want to decompose. */
452 DECOMPOSABLE_SIMPLE_MOVE,
453 /* Any other simple move. */
454 SIMPLE_MOVE
455 };
456
457 /* If we find a SUBREG in *LOC which we could use to decompose a
458 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
459 unadorned register which is not a simple pseudo-register copy,
460 DATA will point at the type of move, and we set a bit in
461 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
462
463 static void
464 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
465 {
466 subrtx_var_iterator::array_type array;
467 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
468 {
469 rtx x = *iter;
470 if (GET_CODE (x) == SUBREG)
471 {
472 rtx inner = SUBREG_REG (x);
473 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
474
475 if (!REG_P (inner))
476 continue;
477
478 regno = REGNO (inner);
479 if (HARD_REGISTER_NUM_P (regno))
480 {
481 iter.skip_subrtxes ();
482 continue;
483 }
484
485 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
486 || !interesting_mode_p (GET_MODE (inner), &inner_size,
487 &inner_words))
488 continue;
489
490 /* We only try to decompose single word subregs of multi-word
491 registers. When we find one, we return -1 to avoid iterating
492 over the inner register.
493
494 ??? This doesn't allow, e.g., DImode subregs of TImode values
495 on 32-bit targets. We would need to record the way the
496 pseudo-register was used, and only decompose if all the uses
497 were the same number and size of pieces. Hopefully this
498 doesn't happen much. */
499
500 if (outer_words == 1
501 && inner_words > 1
502 /* Don't allow to decompose floating point subregs of
503 multi-word pseudos if the floating point mode does
504 not have word size, because otherwise we'd generate
505 a subreg with that floating mode from a different
506 sized integral pseudo which is not allowed by
507 validate_subreg. */
508 && (!FLOAT_MODE_P (GET_MODE (x))
509 || outer_size == UNITS_PER_WORD))
510 {
511 bitmap_set_bit (decomposable_context, regno);
512 iter.skip_subrtxes ();
513 continue;
514 }
515
516 /* If this is a cast from one mode to another, where the modes
517 have the same size, and they are not tieable, then mark this
518 register as non-decomposable. If we decompose it we are
519 likely to mess up whatever the backend is trying to do. */
520 if (outer_words > 1
521 && outer_size == inner_size
522 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
523 {
524 bitmap_set_bit (non_decomposable_context, regno);
525 bitmap_set_bit (subreg_context, regno);
526 iter.skip_subrtxes ();
527 continue;
528 }
529 }
530 else if (REG_P (x))
531 {
532 unsigned int regno, size, words;
533
534 /* We will see an outer SUBREG before we see the inner REG, so
535 when we see a plain REG here it means a direct reference to
536 the register.
537
538 If this is not a simple copy from one location to another,
539 then we can not decompose this register. If this is a simple
540 copy we want to decompose, and the mode is right,
541 then we mark the register as decomposable.
542 Otherwise we don't say anything about this register --
543 it could be decomposed, but whether that would be
544 profitable depends upon how it is used elsewhere.
545
546 We only set bits in the bitmap for multi-word
547 pseudo-registers, since those are the only ones we care about
548 and it keeps the size of the bitmaps down. */
549
550 regno = REGNO (x);
551 if (!HARD_REGISTER_NUM_P (regno)
552 && interesting_mode_p (GET_MODE (x), &size, &words)
553 && words > 1)
554 {
555 switch (*pcmi)
556 {
557 case NOT_SIMPLE_MOVE:
558 bitmap_set_bit (non_decomposable_context, regno);
559 break;
560 case DECOMPOSABLE_SIMPLE_MOVE:
561 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
562 bitmap_set_bit (decomposable_context, regno);
563 break;
564 case SIMPLE_MOVE:
565 break;
566 default:
567 gcc_unreachable ();
568 }
569 }
570 }
571 else if (MEM_P (x))
572 {
573 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
574
575 /* Any registers used in a MEM do not participate in a
576 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
577 here, and return -1 to block the parent's recursion. */
578 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
579 iter.skip_subrtxes ();
580 }
581 }
582 }
583
584 /* Decompose REGNO into word-sized components. We smash the REG node
585 in place. This ensures that (1) something goes wrong quickly if we
586 fail to make some replacement, and (2) the debug information inside
587 the symbol table is automatically kept up to date. */
588
589 static void
590 decompose_register (unsigned int regno)
591 {
592 rtx reg;
593 unsigned int size, words, i;
594 rtvec v;
595
596 reg = regno_reg_rtx[regno];
597
598 regno_reg_rtx[regno] = NULL_RTX;
599
600 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
601 gcc_unreachable ();
602
603 v = rtvec_alloc (words);
604 for (i = 0; i < words; ++i)
605 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
606
607 PUT_CODE (reg, CONCATN);
608 XVEC (reg, 0) = v;
609
610 if (dump_file)
611 {
612 fprintf (dump_file, "; Splitting reg %u ->", regno);
613 for (i = 0; i < words; ++i)
614 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
615 fputc ('\n', dump_file);
616 }
617 }
618
619 /* Get a SUBREG of a CONCATN. */
620
621 static rtx
622 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
623 {
624 unsigned int outer_size, outer_words, inner_size, inner_words;
625 machine_mode innermode, partmode;
626 rtx part;
627 unsigned int final_offset;
628 unsigned int byte;
629
630 innermode = GET_MODE (op);
631 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
632 || !interesting_mode_p (innermode, &inner_size, &inner_words))
633 gcc_unreachable ();
634
635 /* Must be constant if interesting_mode_p passes. */
636 byte = orig_byte.to_constant ();
637 gcc_assert (GET_CODE (op) == CONCATN);
638 gcc_assert (byte % outer_size == 0);
639
640 gcc_assert (byte < inner_size);
641 if (outer_size > inner_size)
642 return NULL_RTX;
643
644 inner_size /= XVECLEN (op, 0);
645 part = XVECEXP (op, 0, byte / inner_size);
646 partmode = GET_MODE (part);
647
648 final_offset = byte % inner_size;
649 if (final_offset + outer_size > inner_size)
650 return NULL_RTX;
651
652 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
653 regular CONST_VECTORs. They have vector or integer modes, depending
654 on the capabilities of the target. Cope with them. */
655 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
656 partmode = GET_MODE_INNER (innermode);
657 else if (partmode == VOIDmode)
658 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
659 GET_MODE_CLASS (innermode), 0).require ();
660
661 return simplify_gen_subreg (outermode, part, partmode, final_offset);
662 }
663
664 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
665
666 static rtx
667 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
668 machine_mode innermode, unsigned int byte)
669 {
670 rtx ret;
671
672 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
673 If OP is a SUBREG of a CONCATN, then it must be a simple mode
674 change with the same size and offset 0, or it must extract a
675 part. We shouldn't see anything else here. */
676 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
677 {
678 rtx op2;
679
680 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
681 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
682 && known_eq (SUBREG_BYTE (op), 0))
683 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
684 GET_MODE (SUBREG_REG (op)), byte);
685
686 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
687 SUBREG_BYTE (op));
688 if (op2 == NULL_RTX)
689 {
690 /* We don't handle paradoxical subregs here. */
691 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
692 gcc_assert (!paradoxical_subreg_p (op));
693 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
694 byte + SUBREG_BYTE (op));
695 gcc_assert (op2 != NULL_RTX);
696 return op2;
697 }
698
699 op = op2;
700 gcc_assert (op != NULL_RTX);
701 gcc_assert (innermode == GET_MODE (op));
702 }
703
704 if (GET_CODE (op) == CONCATN)
705 return simplify_subreg_concatn (outermode, op, byte);
706
707 ret = simplify_gen_subreg (outermode, op, innermode, byte);
708
709 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
710 resolve_simple_move will ask for the high part of the paradoxical
711 subreg, which does not have a value. Just return a zero. */
712 if (ret == NULL_RTX
713 && paradoxical_subreg_p (op))
714 return CONST0_RTX (outermode);
715
716 gcc_assert (ret != NULL_RTX);
717 return ret;
718 }
719
720 /* Return whether we should resolve X into the registers into which it
721 was decomposed. */
722
723 static bool
724 resolve_reg_p (rtx x)
725 {
726 return GET_CODE (x) == CONCATN;
727 }
728
729 /* Return whether X is a SUBREG of a register which we need to
730 resolve. */
731
732 static bool
733 resolve_subreg_p (rtx x)
734 {
735 if (GET_CODE (x) != SUBREG)
736 return false;
737 return resolve_reg_p (SUBREG_REG (x));
738 }
739
740 /* Look for SUBREGs in *LOC which need to be decomposed. */
741
742 static bool
743 resolve_subreg_use (rtx *loc, rtx insn)
744 {
745 subrtx_ptr_iterator::array_type array;
746 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
747 {
748 rtx *loc = *iter;
749 rtx x = *loc;
750 if (resolve_subreg_p (x))
751 {
752 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
753 SUBREG_BYTE (x));
754
755 /* It is possible for a note to contain a reference which we can
756 decompose. In this case, return 1 to the caller to indicate
757 that the note must be removed. */
758 if (!x)
759 {
760 gcc_assert (!insn);
761 return true;
762 }
763
764 validate_change (insn, loc, x, 1);
765 iter.skip_subrtxes ();
766 }
767 else if (resolve_reg_p (x))
768 /* Return 1 to the caller to indicate that we found a direct
769 reference to a register which is being decomposed. This can
770 happen inside notes, multiword shift or zero-extend
771 instructions. */
772 return true;
773 }
774
775 return false;
776 }
777
778 /* Resolve any decomposed registers which appear in register notes on
779 INSN. */
780
781 static void
782 resolve_reg_notes (rtx_insn *insn)
783 {
784 rtx *pnote, note;
785
786 note = find_reg_equal_equiv_note (insn);
787 if (note)
788 {
789 int old_count = num_validated_changes ();
790 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
791 remove_note (insn, note);
792 else
793 if (old_count != num_validated_changes ())
794 df_notes_rescan (insn);
795 }
796
797 pnote = &REG_NOTES (insn);
798 while (*pnote != NULL_RTX)
799 {
800 bool del = false;
801
802 note = *pnote;
803 switch (REG_NOTE_KIND (note))
804 {
805 case REG_DEAD:
806 case REG_UNUSED:
807 if (resolve_reg_p (XEXP (note, 0)))
808 del = true;
809 break;
810
811 default:
812 break;
813 }
814
815 if (del)
816 *pnote = XEXP (note, 1);
817 else
818 pnote = &XEXP (note, 1);
819 }
820 }
821
822 /* Return whether X can be decomposed into subwords. */
823
824 static bool
825 can_decompose_p (rtx x)
826 {
827 if (REG_P (x))
828 {
829 unsigned int regno = REGNO (x);
830
831 if (HARD_REGISTER_NUM_P (regno))
832 {
833 unsigned int byte, num_bytes, num_words;
834
835 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
836 return false;
837 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
838 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
839 return false;
840 return true;
841 }
842 else
843 return !bitmap_bit_p (subreg_context, regno);
844 }
845
846 return true;
847 }
848
849 /* Decompose the registers used in a simple move SET within INSN. If
850 we don't change anything, return INSN, otherwise return the start
851 of the sequence of moves. */
852
853 static rtx_insn *
854 resolve_simple_move (rtx set, rtx_insn *insn)
855 {
856 rtx src, dest, real_dest;
857 rtx_insn *insns;
858 machine_mode orig_mode, dest_mode;
859 unsigned int orig_size, words;
860 bool pushing;
861
862 src = SET_SRC (set);
863 dest = SET_DEST (set);
864 orig_mode = GET_MODE (dest);
865
866 if (!interesting_mode_p (orig_mode, &orig_size, &words))
867 gcc_unreachable ();
868 gcc_assert (words > 1);
869
870 start_sequence ();
871
872 /* We have to handle copying from a SUBREG of a decomposed reg where
873 the SUBREG is larger than word size. Rather than assume that we
874 can take a word_mode SUBREG of the destination, we copy to a new
875 register and then copy that to the destination. */
876
877 real_dest = NULL_RTX;
878
879 if (GET_CODE (src) == SUBREG
880 && resolve_reg_p (SUBREG_REG (src))
881 && (maybe_ne (SUBREG_BYTE (src), 0)
882 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
883 {
884 real_dest = dest;
885 dest = gen_reg_rtx (orig_mode);
886 if (REG_P (real_dest))
887 REG_ATTRS (dest) = REG_ATTRS (real_dest);
888 }
889
890 /* Similarly if we are copying to a SUBREG of a decomposed reg where
891 the SUBREG is larger than word size. */
892
893 if (GET_CODE (dest) == SUBREG
894 && resolve_reg_p (SUBREG_REG (dest))
895 && (maybe_ne (SUBREG_BYTE (dest), 0)
896 || maybe_ne (orig_size,
897 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
898 {
899 rtx reg, smove;
900 rtx_insn *minsn;
901
902 reg = gen_reg_rtx (orig_mode);
903 minsn = emit_move_insn (reg, src);
904 smove = single_set (minsn);
905 gcc_assert (smove != NULL_RTX);
906 resolve_simple_move (smove, minsn);
907 src = reg;
908 }
909
910 /* If we didn't have any big SUBREGS of decomposed registers, and
911 neither side of the move is a register we are decomposing, then
912 we don't have to do anything here. */
913
914 if (src == SET_SRC (set)
915 && dest == SET_DEST (set)
916 && !resolve_reg_p (src)
917 && !resolve_subreg_p (src)
918 && !resolve_reg_p (dest)
919 && !resolve_subreg_p (dest))
920 {
921 end_sequence ();
922 return insn;
923 }
924
925 /* It's possible for the code to use a subreg of a decomposed
926 register while forming an address. We need to handle that before
927 passing the address to emit_move_insn. We pass NULL_RTX as the
928 insn parameter to resolve_subreg_use because we can not validate
929 the insn yet. */
930 if (MEM_P (src) || MEM_P (dest))
931 {
932 int acg;
933
934 if (MEM_P (src))
935 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
936 if (MEM_P (dest))
937 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
938 acg = apply_change_group ();
939 gcc_assert (acg);
940 }
941
942 /* If SRC is a register which we can't decompose, or has side
943 effects, we need to move via a temporary register. */
944
945 if (!can_decompose_p (src)
946 || side_effects_p (src)
947 || GET_CODE (src) == ASM_OPERANDS)
948 {
949 rtx reg;
950
951 reg = gen_reg_rtx (orig_mode);
952
953 if (AUTO_INC_DEC)
954 {
955 rtx_insn *move = emit_move_insn (reg, src);
956 if (MEM_P (src))
957 {
958 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
959 if (note)
960 add_reg_note (move, REG_INC, XEXP (note, 0));
961 }
962 }
963 else
964 emit_move_insn (reg, src);
965
966 src = reg;
967 }
968
969 /* If DEST is a register which we can't decompose, or has side
970 effects, we need to first move to a temporary register. We
971 handle the common case of pushing an operand directly. We also
972 go through a temporary register if it holds a floating point
973 value. This gives us better code on systems which can't move
974 data easily between integer and floating point registers. */
975
976 dest_mode = orig_mode;
977 pushing = push_operand (dest, dest_mode);
978 if (!can_decompose_p (dest)
979 || (side_effects_p (dest) && !pushing)
980 || (!SCALAR_INT_MODE_P (dest_mode)
981 && !resolve_reg_p (dest)
982 && !resolve_subreg_p (dest)))
983 {
984 if (real_dest == NULL_RTX)
985 real_dest = dest;
986 if (!SCALAR_INT_MODE_P (dest_mode))
987 dest_mode = int_mode_for_mode (dest_mode).require ();
988 dest = gen_reg_rtx (dest_mode);
989 if (REG_P (real_dest))
990 REG_ATTRS (dest) = REG_ATTRS (real_dest);
991 }
992
993 if (pushing)
994 {
995 unsigned int i, j, jinc;
996
997 gcc_assert (orig_size % UNITS_PER_WORD == 0);
998 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
999 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1000
1001 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1002 {
1003 j = 0;
1004 jinc = 1;
1005 }
1006 else
1007 {
1008 j = words - 1;
1009 jinc = -1;
1010 }
1011
1012 for (i = 0; i < words; ++i, j += jinc)
1013 {
1014 rtx temp;
1015
1016 temp = copy_rtx (XEXP (dest, 0));
1017 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1018 j * UNITS_PER_WORD);
1019 emit_move_insn (temp,
1020 simplify_gen_subreg_concatn (word_mode, src,
1021 orig_mode,
1022 j * UNITS_PER_WORD));
1023 }
1024 }
1025 else
1026 {
1027 unsigned int i;
1028
1029 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1030 emit_clobber (dest);
1031
1032 for (i = 0; i < words; ++i)
1033 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1034 dest_mode,
1035 i * UNITS_PER_WORD),
1036 simplify_gen_subreg_concatn (word_mode, src,
1037 orig_mode,
1038 i * UNITS_PER_WORD));
1039 }
1040
1041 if (real_dest != NULL_RTX)
1042 {
1043 rtx mdest, smove;
1044 rtx_insn *minsn;
1045
1046 if (dest_mode == orig_mode)
1047 mdest = dest;
1048 else
1049 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1050 minsn = emit_move_insn (real_dest, mdest);
1051
1052 if (AUTO_INC_DEC && MEM_P (real_dest)
1053 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1054 {
1055 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1056 if (note)
1057 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1058 }
1059
1060 smove = single_set (minsn);
1061 gcc_assert (smove != NULL_RTX);
1062
1063 resolve_simple_move (smove, minsn);
1064 }
1065
1066 insns = get_insns ();
1067 end_sequence ();
1068
1069 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1070
1071 emit_insn_before (insns, insn);
1072
1073 /* If we get here via self-recursion, then INSN is not yet in the insns
1074 chain and delete_insn will fail. We only want to remove INSN from the
1075 current sequence. See PR56738. */
1076 if (in_sequence_p ())
1077 remove_insn (insn);
1078 else
1079 delete_insn (insn);
1080
1081 return insns;
1082 }
1083
1084 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1085 component registers. Return whether we changed something. */
1086
1087 static bool
1088 resolve_clobber (rtx pat, rtx_insn *insn)
1089 {
1090 rtx reg;
1091 machine_mode orig_mode;
1092 unsigned int orig_size, words, i;
1093 int ret;
1094
1095 reg = XEXP (pat, 0);
1096 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1097 return false;
1098
1099 orig_mode = GET_MODE (reg);
1100 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1101 gcc_unreachable ();
1102
1103 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1104 simplify_gen_subreg_concatn (word_mode, reg,
1105 orig_mode, 0),
1106 0);
1107 df_insn_rescan (insn);
1108 gcc_assert (ret != 0);
1109
1110 for (i = words - 1; i > 0; --i)
1111 {
1112 rtx x;
1113
1114 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1115 i * UNITS_PER_WORD);
1116 x = gen_rtx_CLOBBER (VOIDmode, x);
1117 emit_insn_after (x, insn);
1118 }
1119
1120 resolve_reg_notes (insn);
1121
1122 return true;
1123 }
1124
1125 /* A USE of a decomposed register is no longer meaningful. Return
1126 whether we changed something. */
1127
1128 static bool
1129 resolve_use (rtx pat, rtx_insn *insn)
1130 {
1131 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1132 {
1133 delete_insn (insn);
1134 return true;
1135 }
1136
1137 resolve_reg_notes (insn);
1138
1139 return false;
1140 }
1141
1142 /* A VAR_LOCATION can be simplified. */
1143
1144 static void
1145 resolve_debug (rtx_insn *insn)
1146 {
1147 subrtx_ptr_iterator::array_type array;
1148 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1149 {
1150 rtx *loc = *iter;
1151 rtx x = *loc;
1152 if (resolve_subreg_p (x))
1153 {
1154 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1155 SUBREG_BYTE (x));
1156
1157 if (x)
1158 *loc = x;
1159 else
1160 x = copy_rtx (*loc);
1161 }
1162 if (resolve_reg_p (x))
1163 *loc = copy_rtx (x);
1164 }
1165
1166 df_insn_rescan (insn);
1167
1168 resolve_reg_notes (insn);
1169 }
1170
1171 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1172 set the decomposable_context bitmap accordingly. SPEED_P is true
1173 if we are optimizing INSN for speed rather than size. Return true
1174 if INSN is decomposable. */
1175
1176 static bool
1177 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1178 {
1179 rtx set;
1180 rtx op;
1181 rtx op_operand;
1182
1183 set = single_set (insn);
1184 if (!set)
1185 return false;
1186
1187 op = SET_SRC (set);
1188 if (GET_CODE (op) != ASHIFT
1189 && GET_CODE (op) != LSHIFTRT
1190 && GET_CODE (op) != ASHIFTRT
1191 && GET_CODE (op) != ZERO_EXTEND)
1192 return false;
1193
1194 op_operand = XEXP (op, 0);
1195 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1196 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1197 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1198 || GET_MODE (op) != twice_word_mode)
1199 return false;
1200
1201 if (GET_CODE (op) == ZERO_EXTEND)
1202 {
1203 if (GET_MODE (op_operand) != word_mode
1204 || !choices[speed_p].splitting_zext)
1205 return false;
1206 }
1207 else /* left or right shift */
1208 {
1209 bool *splitting = (GET_CODE (op) == ASHIFT
1210 ? choices[speed_p].splitting_ashift
1211 : GET_CODE (op) == ASHIFTRT
1212 ? choices[speed_p].splitting_ashiftrt
1213 : choices[speed_p].splitting_lshiftrt);
1214 if (!CONST_INT_P (XEXP (op, 1))
1215 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1216 2 * BITS_PER_WORD - 1)
1217 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1218 return false;
1219
1220 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1221 }
1222
1223 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1224
1225 return true;
1226 }
1227
1228 /* Decompose a more than word wide shift (in INSN) of a multiword
1229 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1230 and 'set to zero' insn. Return a pointer to the new insn when a
1231 replacement was done. */
1232
1233 static rtx_insn *
1234 resolve_shift_zext (rtx_insn *insn)
1235 {
1236 rtx set;
1237 rtx op;
1238 rtx op_operand;
1239 rtx_insn *insns;
1240 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1241 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1242 scalar_int_mode inner_mode;
1243
1244 set = single_set (insn);
1245 if (!set)
1246 return NULL;
1247
1248 op = SET_SRC (set);
1249 if (GET_CODE (op) != ASHIFT
1250 && GET_CODE (op) != LSHIFTRT
1251 && GET_CODE (op) != ASHIFTRT
1252 && GET_CODE (op) != ZERO_EXTEND)
1253 return NULL;
1254
1255 op_operand = XEXP (op, 0);
1256 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1257 return NULL;
1258
1259 /* We can tear this operation apart only if the regs were already
1260 torn apart. */
1261 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1262 return NULL;
1263
1264 /* src_reg_num is the number of the word mode register which we
1265 are operating on. For a left shift and a zero_extend on little
1266 endian machines this is register 0. */
1267 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1268 ? 1 : 0;
1269
1270 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1271 src_reg_num = 1 - src_reg_num;
1272
1273 if (GET_CODE (op) == ZERO_EXTEND)
1274 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1275 else
1276 dest_reg_num = 1 - src_reg_num;
1277
1278 offset1 = UNITS_PER_WORD * dest_reg_num;
1279 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1280 src_offset = UNITS_PER_WORD * src_reg_num;
1281
1282 start_sequence ();
1283
1284 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1285 GET_MODE (SET_DEST (set)),
1286 offset1);
1287 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1288 GET_MODE (SET_DEST (set)),
1289 offset2);
1290 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1291 GET_MODE (op_operand),
1292 src_offset);
1293 if (GET_CODE (op) == ASHIFTRT
1294 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1295 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1296 BITS_PER_WORD - 1, NULL_RTX, 0);
1297
1298 if (GET_CODE (op) != ZERO_EXTEND)
1299 {
1300 int shift_count = INTVAL (XEXP (op, 1));
1301 if (shift_count > BITS_PER_WORD)
1302 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1303 LSHIFT_EXPR : RSHIFT_EXPR,
1304 word_mode, src_reg,
1305 shift_count - BITS_PER_WORD,
1306 dest_reg, GET_CODE (op) != ASHIFTRT);
1307 }
1308
1309 if (dest_reg != src_reg)
1310 emit_move_insn (dest_reg, src_reg);
1311 if (GET_CODE (op) != ASHIFTRT)
1312 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1313 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1314 emit_move_insn (dest_upper, copy_rtx (src_reg));
1315 else
1316 emit_move_insn (dest_upper, upper_src);
1317 insns = get_insns ();
1318
1319 end_sequence ();
1320
1321 emit_insn_before (insns, insn);
1322
1323 if (dump_file)
1324 {
1325 rtx_insn *in;
1326 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1327 for (in = insns; in != insn; in = NEXT_INSN (in))
1328 fprintf (dump_file, "%d ", INSN_UID (in));
1329 fprintf (dump_file, "\n");
1330 }
1331
1332 delete_insn (insn);
1333 return insns;
1334 }
1335
1336 /* Print to dump_file a description of what we're doing with shift code CODE.
1337 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1338
1339 static void
1340 dump_shift_choices (enum rtx_code code, bool *splitting)
1341 {
1342 int i;
1343 const char *sep;
1344
1345 fprintf (dump_file,
1346 " Splitting mode %s for %s lowering with shift amounts = ",
1347 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1348 sep = "";
1349 for (i = 0; i < BITS_PER_WORD; i++)
1350 if (splitting[i])
1351 {
1352 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1353 sep = ",";
1354 }
1355 fprintf (dump_file, "\n");
1356 }
1357
1358 /* Print to dump_file a description of what we're doing when optimizing
1359 for speed or size; SPEED_P says which. DESCRIPTION is a description
1360 of the SPEED_P choice. */
1361
1362 static void
1363 dump_choices (bool speed_p, const char *description)
1364 {
1365 unsigned int size, factor, i;
1366
1367 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1368
1369 for (i = 0; i < MAX_MACHINE_MODE; i++)
1370 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1371 && factor > 1)
1372 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1373 choices[speed_p].move_modes_to_split[i]
1374 ? "Splitting"
1375 : "Skipping",
1376 GET_MODE_NAME ((machine_mode) i));
1377
1378 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1379 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1380 GET_MODE_NAME (twice_word_mode));
1381
1382 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1383 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1384 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1385 fprintf (dump_file, "\n");
1386 }
1387
1388 /* Look for registers which are always accessed via word-sized SUBREGs
1389 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1390 registers into several word-sized pseudo-registers. */
1391
1392 static void
1393 decompose_multiword_subregs (bool decompose_copies)
1394 {
1395 unsigned int max;
1396 basic_block bb;
1397 bool speed_p;
1398
1399 if (dump_file)
1400 {
1401 dump_choices (false, "size");
1402 dump_choices (true, "speed");
1403 }
1404
1405 /* Check if this target even has any modes to consider lowering. */
1406 if (!choices[false].something_to_do && !choices[true].something_to_do)
1407 {
1408 if (dump_file)
1409 fprintf (dump_file, "Nothing to do!\n");
1410 return;
1411 }
1412
1413 max = max_reg_num ();
1414
1415 /* First see if there are any multi-word pseudo-registers. If there
1416 aren't, there is nothing we can do. This should speed up this
1417 pass in the normal case, since it should be faster than scanning
1418 all the insns. */
1419 {
1420 unsigned int i;
1421 bool useful_modes_seen = false;
1422
1423 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1424 if (regno_reg_rtx[i] != NULL)
1425 {
1426 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1427 if (choices[false].move_modes_to_split[(int) mode]
1428 || choices[true].move_modes_to_split[(int) mode])
1429 {
1430 useful_modes_seen = true;
1431 break;
1432 }
1433 }
1434
1435 if (!useful_modes_seen)
1436 {
1437 if (dump_file)
1438 fprintf (dump_file, "Nothing to lower in this function.\n");
1439 return;
1440 }
1441 }
1442
1443 if (df)
1444 {
1445 df_set_flags (DF_DEFER_INSN_RESCAN);
1446 run_word_dce ();
1447 }
1448
1449 /* FIXME: It may be possible to change this code to look for each
1450 multi-word pseudo-register and to find each insn which sets or
1451 uses that register. That should be faster than scanning all the
1452 insns. */
1453
1454 decomposable_context = BITMAP_ALLOC (NULL);
1455 non_decomposable_context = BITMAP_ALLOC (NULL);
1456 subreg_context = BITMAP_ALLOC (NULL);
1457
1458 reg_copy_graph.create (max);
1459 reg_copy_graph.safe_grow_cleared (max);
1460 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1461
1462 speed_p = optimize_function_for_speed_p (cfun);
1463 FOR_EACH_BB_FN (bb, cfun)
1464 {
1465 rtx_insn *insn;
1466
1467 FOR_BB_INSNS (bb, insn)
1468 {
1469 rtx set;
1470 enum classify_move_insn cmi;
1471 int i, n;
1472
1473 if (!INSN_P (insn)
1474 || GET_CODE (PATTERN (insn)) == CLOBBER
1475 || GET_CODE (PATTERN (insn)) == USE)
1476 continue;
1477
1478 recog_memoized (insn);
1479
1480 if (find_decomposable_shift_zext (insn, speed_p))
1481 continue;
1482
1483 extract_insn (insn);
1484
1485 set = simple_move (insn, speed_p);
1486
1487 if (!set)
1488 cmi = NOT_SIMPLE_MOVE;
1489 else
1490 {
1491 /* We mark pseudo-to-pseudo copies as decomposable during the
1492 second pass only. The first pass is so early that there is
1493 good chance such moves will be optimized away completely by
1494 subsequent optimizations anyway.
1495
1496 However, we call find_pseudo_copy even during the first pass
1497 so as to properly set up the reg_copy_graph. */
1498 if (find_pseudo_copy (set))
1499 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1500 else
1501 cmi = SIMPLE_MOVE;
1502 }
1503
1504 n = recog_data.n_operands;
1505 for (i = 0; i < n; ++i)
1506 {
1507 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1508
1509 /* We handle ASM_OPERANDS as a special case to support
1510 things like x86 rdtsc which returns a DImode value.
1511 We can decompose the output, which will certainly be
1512 operand 0, but not the inputs. */
1513
1514 if (cmi == SIMPLE_MOVE
1515 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1516 {
1517 gcc_assert (i == 0);
1518 cmi = NOT_SIMPLE_MOVE;
1519 }
1520 }
1521 }
1522 }
1523
1524 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1525 if (!bitmap_empty_p (decomposable_context))
1526 {
1527 unsigned int i;
1528 sbitmap_iterator sbi;
1529 bitmap_iterator iter;
1530 unsigned int regno;
1531
1532 propagate_pseudo_copies ();
1533
1534 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1535 bitmap_clear (sub_blocks);
1536
1537 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1538 decompose_register (regno);
1539
1540 FOR_EACH_BB_FN (bb, cfun)
1541 {
1542 rtx_insn *insn;
1543
1544 FOR_BB_INSNS (bb, insn)
1545 {
1546 rtx pat;
1547
1548 if (!INSN_P (insn))
1549 continue;
1550
1551 pat = PATTERN (insn);
1552 if (GET_CODE (pat) == CLOBBER)
1553 resolve_clobber (pat, insn);
1554 else if (GET_CODE (pat) == USE)
1555 resolve_use (pat, insn);
1556 else if (DEBUG_INSN_P (insn))
1557 resolve_debug (insn);
1558 else
1559 {
1560 rtx set;
1561 int i;
1562
1563 recog_memoized (insn);
1564 extract_insn (insn);
1565
1566 set = simple_move (insn, speed_p);
1567 if (set)
1568 {
1569 rtx_insn *orig_insn = insn;
1570 bool cfi = control_flow_insn_p (insn);
1571
1572 /* We can end up splitting loads to multi-word pseudos
1573 into separate loads to machine word size pseudos.
1574 When this happens, we first had one load that can
1575 throw, and after resolve_simple_move we'll have a
1576 bunch of loads (at least two). All those loads may
1577 trap if we can have non-call exceptions, so they
1578 all will end the current basic block. We split the
1579 block after the outer loop over all insns, but we
1580 make sure here that we will be able to split the
1581 basic block and still produce the correct control
1582 flow graph for it. */
1583 gcc_assert (!cfi
1584 || (cfun->can_throw_non_call_exceptions
1585 && can_throw_internal (insn)));
1586
1587 insn = resolve_simple_move (set, insn);
1588 if (insn != orig_insn)
1589 {
1590 recog_memoized (insn);
1591 extract_insn (insn);
1592
1593 if (cfi)
1594 bitmap_set_bit (sub_blocks, bb->index);
1595 }
1596 }
1597 else
1598 {
1599 rtx_insn *decomposed_shift;
1600
1601 decomposed_shift = resolve_shift_zext (insn);
1602 if (decomposed_shift != NULL_RTX)
1603 {
1604 insn = decomposed_shift;
1605 recog_memoized (insn);
1606 extract_insn (insn);
1607 }
1608 }
1609
1610 for (i = recog_data.n_operands - 1; i >= 0; --i)
1611 resolve_subreg_use (recog_data.operand_loc[i], insn);
1612
1613 resolve_reg_notes (insn);
1614
1615 if (num_validated_changes () > 0)
1616 {
1617 for (i = recog_data.n_dups - 1; i >= 0; --i)
1618 {
1619 rtx *pl = recog_data.dup_loc[i];
1620 int dup_num = recog_data.dup_num[i];
1621 rtx *px = recog_data.operand_loc[dup_num];
1622
1623 validate_unshare_change (insn, pl, *px, 1);
1624 }
1625
1626 i = apply_change_group ();
1627 gcc_assert (i);
1628 }
1629 }
1630 }
1631 }
1632
1633 /* If we had insns to split that caused control flow insns in the middle
1634 of a basic block, split those blocks now. Note that we only handle
1635 the case where splitting a load has caused multiple possibly trapping
1636 loads to appear. */
1637 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1638 {
1639 rtx_insn *insn, *end;
1640 edge fallthru;
1641
1642 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1643 insn = BB_HEAD (bb);
1644 end = BB_END (bb);
1645
1646 while (insn != end)
1647 {
1648 if (control_flow_insn_p (insn))
1649 {
1650 /* Split the block after insn. There will be a fallthru
1651 edge, which is OK so we keep it. We have to create the
1652 exception edges ourselves. */
1653 fallthru = split_block (bb, insn);
1654 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1655 bb = fallthru->dest;
1656 insn = BB_HEAD (bb);
1657 }
1658 else
1659 insn = NEXT_INSN (insn);
1660 }
1661 }
1662 }
1663
1664 {
1665 unsigned int i;
1666 bitmap b;
1667
1668 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1669 if (b)
1670 BITMAP_FREE (b);
1671 }
1672
1673 reg_copy_graph.release ();
1674
1675 BITMAP_FREE (decomposable_context);
1676 BITMAP_FREE (non_decomposable_context);
1677 BITMAP_FREE (subreg_context);
1678 }
1679 \f
1680 /* Implement first lower subreg pass. */
1681
1682 namespace {
1683
1684 const pass_data pass_data_lower_subreg =
1685 {
1686 RTL_PASS, /* type */
1687 "subreg1", /* name */
1688 OPTGROUP_NONE, /* optinfo_flags */
1689 TV_LOWER_SUBREG, /* tv_id */
1690 0, /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
1694 0, /* todo_flags_finish */
1695 };
1696
1697 class pass_lower_subreg : public rtl_opt_pass
1698 {
1699 public:
1700 pass_lower_subreg (gcc::context *ctxt)
1701 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1702 {}
1703
1704 /* opt_pass methods: */
1705 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1706 virtual unsigned int execute (function *)
1707 {
1708 decompose_multiword_subregs (false);
1709 return 0;
1710 }
1711
1712 }; // class pass_lower_subreg
1713
1714 } // anon namespace
1715
1716 rtl_opt_pass *
1717 make_pass_lower_subreg (gcc::context *ctxt)
1718 {
1719 return new pass_lower_subreg (ctxt);
1720 }
1721
1722 /* Implement second lower subreg pass. */
1723
1724 namespace {
1725
1726 const pass_data pass_data_lower_subreg2 =
1727 {
1728 RTL_PASS, /* type */
1729 "subreg2", /* name */
1730 OPTGROUP_NONE, /* optinfo_flags */
1731 TV_LOWER_SUBREG, /* tv_id */
1732 0, /* properties_required */
1733 0, /* properties_provided */
1734 0, /* properties_destroyed */
1735 0, /* todo_flags_start */
1736 TODO_df_finish, /* todo_flags_finish */
1737 };
1738
1739 class pass_lower_subreg2 : public rtl_opt_pass
1740 {
1741 public:
1742 pass_lower_subreg2 (gcc::context *ctxt)
1743 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1744 {}
1745
1746 /* opt_pass methods: */
1747 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1748 virtual unsigned int execute (function *)
1749 {
1750 decompose_multiword_subregs (true);
1751 return 0;
1752 }
1753
1754 }; // class pass_lower_subreg2
1755
1756 } // anon namespace
1757
1758 rtl_opt_pass *
1759 make_pass_lower_subreg2 (gcc::context *ctxt)
1760 {
1761 return new pass_lower_subreg2 (ctxt);
1762 }