]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/lower-subreg.c
Update copyright years.
[thirdparty/gcc.git] / gcc / lower-subreg.c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
44
45
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
53
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
59
60 There are two useful preprocessor defines for use by maintainers:
61
62 #define LOG_COSTS 1
63
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
68
69 #define FORCE_LOWERING 1
70
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
73
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
76
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
80
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83 static bitmap non_decomposable_context;
84
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
90
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
94
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
100
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
105
106 /* Return true if MODE is a mode we know how to lower. When returning true,
107 store its byte size in *BYTES and its word size in *WORDS. */
108
109 static inline bool
110 interesting_mode_p (machine_mode mode, unsigned int *bytes,
111 unsigned int *words)
112 {
113 *bytes = GET_MODE_SIZE (mode);
114 *words = CEIL (*bytes, UNITS_PER_WORD);
115 return true;
116 }
117
118 /* RTXes used while computing costs. */
119 struct cost_rtxes {
120 /* Source and target registers. */
121 rtx source;
122 rtx target;
123
124 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
125 rtx zext;
126
127 /* A shift of SOURCE. */
128 rtx shift;
129
130 /* A SET of TARGET. */
131 rtx set;
132 };
133
134 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
135 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
136
137 static int
138 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
139 machine_mode mode, int op1)
140 {
141 PUT_CODE (rtxes->shift, code);
142 PUT_MODE (rtxes->shift, mode);
143 PUT_MODE (rtxes->source, mode);
144 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
145 return set_src_cost (rtxes->shift, mode, speed_p);
146 }
147
148 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
149 to true if it is profitable to split a double-word CODE shift
150 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
151 for speed or size profitability.
152
153 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
154 the cost of moving zero into a word-mode register. WORD_MOVE_COST
155 is the cost of moving between word registers. */
156
157 static void
158 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
159 bool *splitting, enum rtx_code code,
160 int word_move_zero_cost, int word_move_cost)
161 {
162 int wide_cost, narrow_cost, upper_cost, i;
163
164 for (i = 0; i < BITS_PER_WORD; i++)
165 {
166 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
167 i + BITS_PER_WORD);
168 if (i == 0)
169 narrow_cost = word_move_cost;
170 else
171 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
172
173 if (code != ASHIFTRT)
174 upper_cost = word_move_zero_cost;
175 else if (i == BITS_PER_WORD - 1)
176 upper_cost = word_move_cost;
177 else
178 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
179 BITS_PER_WORD - 1);
180
181 if (LOG_COSTS)
182 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
183 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
184 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
185
186 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
187 splitting[i] = true;
188 }
189 }
190
191 /* Compute what we should do when optimizing for speed or size; SPEED_P
192 selects which. Use RTXES for computing costs. */
193
194 static void
195 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
196 {
197 unsigned int i;
198 int word_move_zero_cost, word_move_cost;
199
200 PUT_MODE (rtxes->target, word_mode);
201 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
202 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
203
204 SET_SRC (rtxes->set) = rtxes->source;
205 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
206
207 if (LOG_COSTS)
208 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
209 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
210
211 for (i = 0; i < MAX_MACHINE_MODE; i++)
212 {
213 machine_mode mode = (machine_mode) i;
214 unsigned int size, factor;
215 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
216 {
217 unsigned int mode_move_cost;
218
219 PUT_MODE (rtxes->target, mode);
220 PUT_MODE (rtxes->source, mode);
221 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
222
223 if (LOG_COSTS)
224 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
225 GET_MODE_NAME (mode), mode_move_cost,
226 word_move_cost, factor);
227
228 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
229 {
230 choices[speed_p].move_modes_to_split[i] = true;
231 choices[speed_p].something_to_do = true;
232 }
233 }
234 }
235
236 /* For the moves and shifts, the only case that is checked is one
237 where the mode of the target is an integer mode twice the width
238 of the word_mode.
239
240 If it is not profitable to split a double word move then do not
241 even consider the shifts or the zero extension. */
242 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
243 {
244 int zext_cost;
245
246 /* The only case here to check to see if moving the upper part with a
247 zero is cheaper than doing the zext itself. */
248 PUT_MODE (rtxes->source, word_mode);
249 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
250
251 if (LOG_COSTS)
252 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
253 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
254 zext_cost, word_move_cost, word_move_zero_cost);
255
256 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
257 choices[speed_p].splitting_zext = true;
258
259 compute_splitting_shift (speed_p, rtxes,
260 choices[speed_p].splitting_ashift, ASHIFT,
261 word_move_zero_cost, word_move_cost);
262 compute_splitting_shift (speed_p, rtxes,
263 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
264 word_move_zero_cost, word_move_cost);
265 compute_splitting_shift (speed_p, rtxes,
266 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
267 word_move_zero_cost, word_move_cost);
268 }
269 }
270
271 /* Do one-per-target initialisation. This involves determining
272 which operations on the machine are profitable. If none are found,
273 then the pass just returns when called. */
274
275 void
276 init_lower_subreg (void)
277 {
278 struct cost_rtxes rtxes;
279
280 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
281
282 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
283
284 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
285 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
286 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
287 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
288 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
289
290 if (LOG_COSTS)
291 fprintf (stderr, "\nSize costs\n==========\n\n");
292 compute_costs (false, &rtxes);
293
294 if (LOG_COSTS)
295 fprintf (stderr, "\nSpeed costs\n===========\n\n");
296 compute_costs (true, &rtxes);
297 }
298
299 static bool
300 simple_move_operand (rtx x)
301 {
302 if (GET_CODE (x) == SUBREG)
303 x = SUBREG_REG (x);
304
305 if (!OBJECT_P (x))
306 return false;
307
308 if (GET_CODE (x) == LABEL_REF
309 || GET_CODE (x) == SYMBOL_REF
310 || GET_CODE (x) == HIGH
311 || GET_CODE (x) == CONST)
312 return false;
313
314 if (MEM_P (x)
315 && (MEM_VOLATILE_P (x)
316 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
317 return false;
318
319 return true;
320 }
321
322 /* If INSN is a single set between two objects that we want to split,
323 return the single set. SPEED_P says whether we are optimizing
324 INSN for speed or size.
325
326 INSN should have been passed to recog and extract_insn before this
327 is called. */
328
329 static rtx
330 simple_move (rtx_insn *insn, bool speed_p)
331 {
332 rtx x;
333 rtx set;
334 machine_mode mode;
335
336 if (recog_data.n_operands != 2)
337 return NULL_RTX;
338
339 set = single_set (insn);
340 if (!set)
341 return NULL_RTX;
342
343 x = SET_DEST (set);
344 if (x != recog_data.operand[0] && x != recog_data.operand[1])
345 return NULL_RTX;
346 if (!simple_move_operand (x))
347 return NULL_RTX;
348
349 x = SET_SRC (set);
350 if (x != recog_data.operand[0] && x != recog_data.operand[1])
351 return NULL_RTX;
352 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
353 things like x86 rdtsc which returns a DImode value. */
354 if (GET_CODE (x) != ASM_OPERANDS
355 && !simple_move_operand (x))
356 return NULL_RTX;
357
358 /* We try to decompose in integer modes, to avoid generating
359 inefficient code copying between integer and floating point
360 registers. That means that we can't decompose if this is a
361 non-integer mode for which there is no integer mode of the same
362 size. */
363 mode = GET_MODE (SET_DEST (set));
364 if (!SCALAR_INT_MODE_P (mode)
365 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
366 return NULL_RTX;
367
368 /* Reject PARTIAL_INT modes. They are used for processor specific
369 purposes and it's probably best not to tamper with them. */
370 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
371 return NULL_RTX;
372
373 if (!choices[speed_p].move_modes_to_split[(int) mode])
374 return NULL_RTX;
375
376 return set;
377 }
378
379 /* If SET is a copy from one multi-word pseudo-register to another,
380 record that in reg_copy_graph. Return whether it is such a
381 copy. */
382
383 static bool
384 find_pseudo_copy (rtx set)
385 {
386 rtx dest = SET_DEST (set);
387 rtx src = SET_SRC (set);
388 unsigned int rd, rs;
389 bitmap b;
390
391 if (!REG_P (dest) || !REG_P (src))
392 return false;
393
394 rd = REGNO (dest);
395 rs = REGNO (src);
396 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
397 return false;
398
399 b = reg_copy_graph[rs];
400 if (b == NULL)
401 {
402 b = BITMAP_ALLOC (NULL);
403 reg_copy_graph[rs] = b;
404 }
405
406 bitmap_set_bit (b, rd);
407
408 return true;
409 }
410
411 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
412 where they are copied to another register, add the register to
413 which they are copied to DECOMPOSABLE_CONTEXT. Use
414 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
415 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
416
417 static void
418 propagate_pseudo_copies (void)
419 {
420 auto_bitmap queue, propagate;
421
422 bitmap_copy (queue, decomposable_context);
423 do
424 {
425 bitmap_iterator iter;
426 unsigned int i;
427
428 bitmap_clear (propagate);
429
430 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
431 {
432 bitmap b = reg_copy_graph[i];
433 if (b)
434 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
435 }
436
437 bitmap_and_compl (queue, propagate, decomposable_context);
438 bitmap_ior_into (decomposable_context, propagate);
439 }
440 while (!bitmap_empty_p (queue));
441 }
442
443 /* A pointer to one of these values is passed to
444 find_decomposable_subregs. */
445
446 enum classify_move_insn
447 {
448 /* Not a simple move from one location to another. */
449 NOT_SIMPLE_MOVE,
450 /* A simple move we want to decompose. */
451 DECOMPOSABLE_SIMPLE_MOVE,
452 /* Any other simple move. */
453 SIMPLE_MOVE
454 };
455
456 /* If we find a SUBREG in *LOC which we could use to decompose a
457 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
458 unadorned register which is not a simple pseudo-register copy,
459 DATA will point at the type of move, and we set a bit in
460 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
461
462 static void
463 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
464 {
465 subrtx_var_iterator::array_type array;
466 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
467 {
468 rtx x = *iter;
469 if (GET_CODE (x) == SUBREG)
470 {
471 rtx inner = SUBREG_REG (x);
472 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
473
474 if (!REG_P (inner))
475 continue;
476
477 regno = REGNO (inner);
478 if (HARD_REGISTER_NUM_P (regno))
479 {
480 iter.skip_subrtxes ();
481 continue;
482 }
483
484 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
485 || !interesting_mode_p (GET_MODE (inner), &inner_size,
486 &inner_words))
487 continue;
488
489 /* We only try to decompose single word subregs of multi-word
490 registers. When we find one, we return -1 to avoid iterating
491 over the inner register.
492
493 ??? This doesn't allow, e.g., DImode subregs of TImode values
494 on 32-bit targets. We would need to record the way the
495 pseudo-register was used, and only decompose if all the uses
496 were the same number and size of pieces. Hopefully this
497 doesn't happen much. */
498
499 if (outer_words == 1 && inner_words > 1)
500 {
501 bitmap_set_bit (decomposable_context, regno);
502 iter.skip_subrtxes ();
503 continue;
504 }
505
506 /* If this is a cast from one mode to another, where the modes
507 have the same size, and they are not tieable, then mark this
508 register as non-decomposable. If we decompose it we are
509 likely to mess up whatever the backend is trying to do. */
510 if (outer_words > 1
511 && outer_size == inner_size
512 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
513 {
514 bitmap_set_bit (non_decomposable_context, regno);
515 bitmap_set_bit (subreg_context, regno);
516 iter.skip_subrtxes ();
517 continue;
518 }
519 }
520 else if (REG_P (x))
521 {
522 unsigned int regno, size, words;
523
524 /* We will see an outer SUBREG before we see the inner REG, so
525 when we see a plain REG here it means a direct reference to
526 the register.
527
528 If this is not a simple copy from one location to another,
529 then we can not decompose this register. If this is a simple
530 copy we want to decompose, and the mode is right,
531 then we mark the register as decomposable.
532 Otherwise we don't say anything about this register --
533 it could be decomposed, but whether that would be
534 profitable depends upon how it is used elsewhere.
535
536 We only set bits in the bitmap for multi-word
537 pseudo-registers, since those are the only ones we care about
538 and it keeps the size of the bitmaps down. */
539
540 regno = REGNO (x);
541 if (!HARD_REGISTER_NUM_P (regno)
542 && interesting_mode_p (GET_MODE (x), &size, &words)
543 && words > 1)
544 {
545 switch (*pcmi)
546 {
547 case NOT_SIMPLE_MOVE:
548 bitmap_set_bit (non_decomposable_context, regno);
549 break;
550 case DECOMPOSABLE_SIMPLE_MOVE:
551 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
552 bitmap_set_bit (decomposable_context, regno);
553 break;
554 case SIMPLE_MOVE:
555 break;
556 default:
557 gcc_unreachable ();
558 }
559 }
560 }
561 else if (MEM_P (x))
562 {
563 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
564
565 /* Any registers used in a MEM do not participate in a
566 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
567 here, and return -1 to block the parent's recursion. */
568 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
569 iter.skip_subrtxes ();
570 }
571 }
572 }
573
574 /* Decompose REGNO into word-sized components. We smash the REG node
575 in place. This ensures that (1) something goes wrong quickly if we
576 fail to make some replacement, and (2) the debug information inside
577 the symbol table is automatically kept up to date. */
578
579 static void
580 decompose_register (unsigned int regno)
581 {
582 rtx reg;
583 unsigned int size, words, i;
584 rtvec v;
585
586 reg = regno_reg_rtx[regno];
587
588 regno_reg_rtx[regno] = NULL_RTX;
589
590 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
591 gcc_unreachable ();
592
593 v = rtvec_alloc (words);
594 for (i = 0; i < words; ++i)
595 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
596
597 PUT_CODE (reg, CONCATN);
598 XVEC (reg, 0) = v;
599
600 if (dump_file)
601 {
602 fprintf (dump_file, "; Splitting reg %u ->", regno);
603 for (i = 0; i < words; ++i)
604 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
605 fputc ('\n', dump_file);
606 }
607 }
608
609 /* Get a SUBREG of a CONCATN. */
610
611 static rtx
612 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
613 {
614 unsigned int outer_size, outer_words, inner_size, inner_words;
615 machine_mode innermode, partmode;
616 rtx part;
617 unsigned int final_offset;
618 unsigned int byte;
619
620 innermode = GET_MODE (op);
621 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
622 || !interesting_mode_p (innermode, &inner_size, &inner_words))
623 gcc_unreachable ();
624
625 /* Must be constant if interesting_mode_p passes. */
626 byte = orig_byte.to_constant ();
627 gcc_assert (GET_CODE (op) == CONCATN);
628 gcc_assert (byte % outer_size == 0);
629
630 gcc_assert (byte < inner_size);
631 if (outer_size > inner_size)
632 return NULL_RTX;
633
634 inner_size /= XVECLEN (op, 0);
635 part = XVECEXP (op, 0, byte / inner_size);
636 partmode = GET_MODE (part);
637
638 final_offset = byte % inner_size;
639 if (final_offset + outer_size > inner_size)
640 return NULL_RTX;
641
642 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
643 regular CONST_VECTORs. They have vector or integer modes, depending
644 on the capabilities of the target. Cope with them. */
645 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
646 partmode = GET_MODE_INNER (innermode);
647 else if (partmode == VOIDmode)
648 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
649 GET_MODE_CLASS (innermode), 0).require ();
650
651 return simplify_gen_subreg (outermode, part, partmode, final_offset);
652 }
653
654 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
655
656 static rtx
657 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
658 machine_mode innermode, unsigned int byte)
659 {
660 rtx ret;
661
662 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
663 If OP is a SUBREG of a CONCATN, then it must be a simple mode
664 change with the same size and offset 0, or it must extract a
665 part. We shouldn't see anything else here. */
666 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
667 {
668 rtx op2;
669
670 if ((GET_MODE_SIZE (GET_MODE (op))
671 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
672 && known_eq (SUBREG_BYTE (op), 0))
673 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
674 GET_MODE (SUBREG_REG (op)), byte);
675
676 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
677 SUBREG_BYTE (op));
678 if (op2 == NULL_RTX)
679 {
680 /* We don't handle paradoxical subregs here. */
681 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
682 gcc_assert (!paradoxical_subreg_p (op));
683 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
684 byte + SUBREG_BYTE (op));
685 gcc_assert (op2 != NULL_RTX);
686 return op2;
687 }
688
689 op = op2;
690 gcc_assert (op != NULL_RTX);
691 gcc_assert (innermode == GET_MODE (op));
692 }
693
694 if (GET_CODE (op) == CONCATN)
695 return simplify_subreg_concatn (outermode, op, byte);
696
697 ret = simplify_gen_subreg (outermode, op, innermode, byte);
698
699 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
700 resolve_simple_move will ask for the high part of the paradoxical
701 subreg, which does not have a value. Just return a zero. */
702 if (ret == NULL_RTX
703 && paradoxical_subreg_p (op))
704 return CONST0_RTX (outermode);
705
706 gcc_assert (ret != NULL_RTX);
707 return ret;
708 }
709
710 /* Return whether we should resolve X into the registers into which it
711 was decomposed. */
712
713 static bool
714 resolve_reg_p (rtx x)
715 {
716 return GET_CODE (x) == CONCATN;
717 }
718
719 /* Return whether X is a SUBREG of a register which we need to
720 resolve. */
721
722 static bool
723 resolve_subreg_p (rtx x)
724 {
725 if (GET_CODE (x) != SUBREG)
726 return false;
727 return resolve_reg_p (SUBREG_REG (x));
728 }
729
730 /* Look for SUBREGs in *LOC which need to be decomposed. */
731
732 static bool
733 resolve_subreg_use (rtx *loc, rtx insn)
734 {
735 subrtx_ptr_iterator::array_type array;
736 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
737 {
738 rtx *loc = *iter;
739 rtx x = *loc;
740 if (resolve_subreg_p (x))
741 {
742 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
743 SUBREG_BYTE (x));
744
745 /* It is possible for a note to contain a reference which we can
746 decompose. In this case, return 1 to the caller to indicate
747 that the note must be removed. */
748 if (!x)
749 {
750 gcc_assert (!insn);
751 return true;
752 }
753
754 validate_change (insn, loc, x, 1);
755 iter.skip_subrtxes ();
756 }
757 else if (resolve_reg_p (x))
758 /* Return 1 to the caller to indicate that we found a direct
759 reference to a register which is being decomposed. This can
760 happen inside notes, multiword shift or zero-extend
761 instructions. */
762 return true;
763 }
764
765 return false;
766 }
767
768 /* Resolve any decomposed registers which appear in register notes on
769 INSN. */
770
771 static void
772 resolve_reg_notes (rtx_insn *insn)
773 {
774 rtx *pnote, note;
775
776 note = find_reg_equal_equiv_note (insn);
777 if (note)
778 {
779 int old_count = num_validated_changes ();
780 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
781 remove_note (insn, note);
782 else
783 if (old_count != num_validated_changes ())
784 df_notes_rescan (insn);
785 }
786
787 pnote = &REG_NOTES (insn);
788 while (*pnote != NULL_RTX)
789 {
790 bool del = false;
791
792 note = *pnote;
793 switch (REG_NOTE_KIND (note))
794 {
795 case REG_DEAD:
796 case REG_UNUSED:
797 if (resolve_reg_p (XEXP (note, 0)))
798 del = true;
799 break;
800
801 default:
802 break;
803 }
804
805 if (del)
806 *pnote = XEXP (note, 1);
807 else
808 pnote = &XEXP (note, 1);
809 }
810 }
811
812 /* Return whether X can be decomposed into subwords. */
813
814 static bool
815 can_decompose_p (rtx x)
816 {
817 if (REG_P (x))
818 {
819 unsigned int regno = REGNO (x);
820
821 if (HARD_REGISTER_NUM_P (regno))
822 {
823 unsigned int byte, num_bytes, num_words;
824
825 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
826 return false;
827 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
828 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
829 return false;
830 return true;
831 }
832 else
833 return !bitmap_bit_p (subreg_context, regno);
834 }
835
836 return true;
837 }
838
839 /* Decompose the registers used in a simple move SET within INSN. If
840 we don't change anything, return INSN, otherwise return the start
841 of the sequence of moves. */
842
843 static rtx_insn *
844 resolve_simple_move (rtx set, rtx_insn *insn)
845 {
846 rtx src, dest, real_dest;
847 rtx_insn *insns;
848 machine_mode orig_mode, dest_mode;
849 unsigned int orig_size, words;
850 bool pushing;
851
852 src = SET_SRC (set);
853 dest = SET_DEST (set);
854 orig_mode = GET_MODE (dest);
855
856 if (!interesting_mode_p (orig_mode, &orig_size, &words))
857 gcc_unreachable ();
858 gcc_assert (words > 1);
859
860 start_sequence ();
861
862 /* We have to handle copying from a SUBREG of a decomposed reg where
863 the SUBREG is larger than word size. Rather than assume that we
864 can take a word_mode SUBREG of the destination, we copy to a new
865 register and then copy that to the destination. */
866
867 real_dest = NULL_RTX;
868
869 if (GET_CODE (src) == SUBREG
870 && resolve_reg_p (SUBREG_REG (src))
871 && (maybe_ne (SUBREG_BYTE (src), 0)
872 || (GET_MODE_SIZE (orig_mode)
873 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
874 {
875 real_dest = dest;
876 dest = gen_reg_rtx (orig_mode);
877 if (REG_P (real_dest))
878 REG_ATTRS (dest) = REG_ATTRS (real_dest);
879 }
880
881 /* Similarly if we are copying to a SUBREG of a decomposed reg where
882 the SUBREG is larger than word size. */
883
884 if (GET_CODE (dest) == SUBREG
885 && resolve_reg_p (SUBREG_REG (dest))
886 && (maybe_ne (SUBREG_BYTE (dest), 0)
887 || (GET_MODE_SIZE (orig_mode)
888 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
889 {
890 rtx reg, smove;
891 rtx_insn *minsn;
892
893 reg = gen_reg_rtx (orig_mode);
894 minsn = emit_move_insn (reg, src);
895 smove = single_set (minsn);
896 gcc_assert (smove != NULL_RTX);
897 resolve_simple_move (smove, minsn);
898 src = reg;
899 }
900
901 /* If we didn't have any big SUBREGS of decomposed registers, and
902 neither side of the move is a register we are decomposing, then
903 we don't have to do anything here. */
904
905 if (src == SET_SRC (set)
906 && dest == SET_DEST (set)
907 && !resolve_reg_p (src)
908 && !resolve_subreg_p (src)
909 && !resolve_reg_p (dest)
910 && !resolve_subreg_p (dest))
911 {
912 end_sequence ();
913 return insn;
914 }
915
916 /* It's possible for the code to use a subreg of a decomposed
917 register while forming an address. We need to handle that before
918 passing the address to emit_move_insn. We pass NULL_RTX as the
919 insn parameter to resolve_subreg_use because we can not validate
920 the insn yet. */
921 if (MEM_P (src) || MEM_P (dest))
922 {
923 int acg;
924
925 if (MEM_P (src))
926 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
927 if (MEM_P (dest))
928 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
929 acg = apply_change_group ();
930 gcc_assert (acg);
931 }
932
933 /* If SRC is a register which we can't decompose, or has side
934 effects, we need to move via a temporary register. */
935
936 if (!can_decompose_p (src)
937 || side_effects_p (src)
938 || GET_CODE (src) == ASM_OPERANDS)
939 {
940 rtx reg;
941
942 reg = gen_reg_rtx (orig_mode);
943
944 if (AUTO_INC_DEC)
945 {
946 rtx_insn *move = emit_move_insn (reg, src);
947 if (MEM_P (src))
948 {
949 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
950 if (note)
951 add_reg_note (move, REG_INC, XEXP (note, 0));
952 }
953 }
954 else
955 emit_move_insn (reg, src);
956
957 src = reg;
958 }
959
960 /* If DEST is a register which we can't decompose, or has side
961 effects, we need to first move to a temporary register. We
962 handle the common case of pushing an operand directly. We also
963 go through a temporary register if it holds a floating point
964 value. This gives us better code on systems which can't move
965 data easily between integer and floating point registers. */
966
967 dest_mode = orig_mode;
968 pushing = push_operand (dest, dest_mode);
969 if (!can_decompose_p (dest)
970 || (side_effects_p (dest) && !pushing)
971 || (!SCALAR_INT_MODE_P (dest_mode)
972 && !resolve_reg_p (dest)
973 && !resolve_subreg_p (dest)))
974 {
975 if (real_dest == NULL_RTX)
976 real_dest = dest;
977 if (!SCALAR_INT_MODE_P (dest_mode))
978 dest_mode = int_mode_for_mode (dest_mode).require ();
979 dest = gen_reg_rtx (dest_mode);
980 if (REG_P (real_dest))
981 REG_ATTRS (dest) = REG_ATTRS (real_dest);
982 }
983
984 if (pushing)
985 {
986 unsigned int i, j, jinc;
987
988 gcc_assert (orig_size % UNITS_PER_WORD == 0);
989 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
990 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
991
992 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
993 {
994 j = 0;
995 jinc = 1;
996 }
997 else
998 {
999 j = words - 1;
1000 jinc = -1;
1001 }
1002
1003 for (i = 0; i < words; ++i, j += jinc)
1004 {
1005 rtx temp;
1006
1007 temp = copy_rtx (XEXP (dest, 0));
1008 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1009 j * UNITS_PER_WORD);
1010 emit_move_insn (temp,
1011 simplify_gen_subreg_concatn (word_mode, src,
1012 orig_mode,
1013 j * UNITS_PER_WORD));
1014 }
1015 }
1016 else
1017 {
1018 unsigned int i;
1019
1020 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1021 emit_clobber (dest);
1022
1023 for (i = 0; i < words; ++i)
1024 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1025 dest_mode,
1026 i * UNITS_PER_WORD),
1027 simplify_gen_subreg_concatn (word_mode, src,
1028 orig_mode,
1029 i * UNITS_PER_WORD));
1030 }
1031
1032 if (real_dest != NULL_RTX)
1033 {
1034 rtx mdest, smove;
1035 rtx_insn *minsn;
1036
1037 if (dest_mode == orig_mode)
1038 mdest = dest;
1039 else
1040 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1041 minsn = emit_move_insn (real_dest, mdest);
1042
1043 if (AUTO_INC_DEC && MEM_P (real_dest)
1044 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1045 {
1046 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1047 if (note)
1048 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1049 }
1050
1051 smove = single_set (minsn);
1052 gcc_assert (smove != NULL_RTX);
1053
1054 resolve_simple_move (smove, minsn);
1055 }
1056
1057 insns = get_insns ();
1058 end_sequence ();
1059
1060 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1061
1062 emit_insn_before (insns, insn);
1063
1064 /* If we get here via self-recursion, then INSN is not yet in the insns
1065 chain and delete_insn will fail. We only want to remove INSN from the
1066 current sequence. See PR56738. */
1067 if (in_sequence_p ())
1068 remove_insn (insn);
1069 else
1070 delete_insn (insn);
1071
1072 return insns;
1073 }
1074
1075 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1076 component registers. Return whether we changed something. */
1077
1078 static bool
1079 resolve_clobber (rtx pat, rtx_insn *insn)
1080 {
1081 rtx reg;
1082 machine_mode orig_mode;
1083 unsigned int orig_size, words, i;
1084 int ret;
1085
1086 reg = XEXP (pat, 0);
1087 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1088 return false;
1089
1090 orig_mode = GET_MODE (reg);
1091 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1092 gcc_unreachable ();
1093
1094 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1095 simplify_gen_subreg_concatn (word_mode, reg,
1096 orig_mode, 0),
1097 0);
1098 df_insn_rescan (insn);
1099 gcc_assert (ret != 0);
1100
1101 for (i = words - 1; i > 0; --i)
1102 {
1103 rtx x;
1104
1105 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1106 i * UNITS_PER_WORD);
1107 x = gen_rtx_CLOBBER (VOIDmode, x);
1108 emit_insn_after (x, insn);
1109 }
1110
1111 resolve_reg_notes (insn);
1112
1113 return true;
1114 }
1115
1116 /* A USE of a decomposed register is no longer meaningful. Return
1117 whether we changed something. */
1118
1119 static bool
1120 resolve_use (rtx pat, rtx_insn *insn)
1121 {
1122 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1123 {
1124 delete_insn (insn);
1125 return true;
1126 }
1127
1128 resolve_reg_notes (insn);
1129
1130 return false;
1131 }
1132
1133 /* A VAR_LOCATION can be simplified. */
1134
1135 static void
1136 resolve_debug (rtx_insn *insn)
1137 {
1138 subrtx_ptr_iterator::array_type array;
1139 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1140 {
1141 rtx *loc = *iter;
1142 rtx x = *loc;
1143 if (resolve_subreg_p (x))
1144 {
1145 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1146 SUBREG_BYTE (x));
1147
1148 if (x)
1149 *loc = x;
1150 else
1151 x = copy_rtx (*loc);
1152 }
1153 if (resolve_reg_p (x))
1154 *loc = copy_rtx (x);
1155 }
1156
1157 df_insn_rescan (insn);
1158
1159 resolve_reg_notes (insn);
1160 }
1161
1162 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1163 set the decomposable_context bitmap accordingly. SPEED_P is true
1164 if we are optimizing INSN for speed rather than size. Return true
1165 if INSN is decomposable. */
1166
1167 static bool
1168 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1169 {
1170 rtx set;
1171 rtx op;
1172 rtx op_operand;
1173
1174 set = single_set (insn);
1175 if (!set)
1176 return false;
1177
1178 op = SET_SRC (set);
1179 if (GET_CODE (op) != ASHIFT
1180 && GET_CODE (op) != LSHIFTRT
1181 && GET_CODE (op) != ASHIFTRT
1182 && GET_CODE (op) != ZERO_EXTEND)
1183 return false;
1184
1185 op_operand = XEXP (op, 0);
1186 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1187 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1188 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1189 || GET_MODE (op) != twice_word_mode)
1190 return false;
1191
1192 if (GET_CODE (op) == ZERO_EXTEND)
1193 {
1194 if (GET_MODE (op_operand) != word_mode
1195 || !choices[speed_p].splitting_zext)
1196 return false;
1197 }
1198 else /* left or right shift */
1199 {
1200 bool *splitting = (GET_CODE (op) == ASHIFT
1201 ? choices[speed_p].splitting_ashift
1202 : GET_CODE (op) == ASHIFTRT
1203 ? choices[speed_p].splitting_ashiftrt
1204 : choices[speed_p].splitting_lshiftrt);
1205 if (!CONST_INT_P (XEXP (op, 1))
1206 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1207 2 * BITS_PER_WORD - 1)
1208 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1209 return false;
1210
1211 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1212 }
1213
1214 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1215
1216 return true;
1217 }
1218
1219 /* Decompose a more than word wide shift (in INSN) of a multiword
1220 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1221 and 'set to zero' insn. Return a pointer to the new insn when a
1222 replacement was done. */
1223
1224 static rtx_insn *
1225 resolve_shift_zext (rtx_insn *insn)
1226 {
1227 rtx set;
1228 rtx op;
1229 rtx op_operand;
1230 rtx_insn *insns;
1231 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1232 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1233 scalar_int_mode inner_mode;
1234
1235 set = single_set (insn);
1236 if (!set)
1237 return NULL;
1238
1239 op = SET_SRC (set);
1240 if (GET_CODE (op) != ASHIFT
1241 && GET_CODE (op) != LSHIFTRT
1242 && GET_CODE (op) != ASHIFTRT
1243 && GET_CODE (op) != ZERO_EXTEND)
1244 return NULL;
1245
1246 op_operand = XEXP (op, 0);
1247 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1248 return NULL;
1249
1250 /* We can tear this operation apart only if the regs were already
1251 torn apart. */
1252 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1253 return NULL;
1254
1255 /* src_reg_num is the number of the word mode register which we
1256 are operating on. For a left shift and a zero_extend on little
1257 endian machines this is register 0. */
1258 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1259 ? 1 : 0;
1260
1261 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1262 src_reg_num = 1 - src_reg_num;
1263
1264 if (GET_CODE (op) == ZERO_EXTEND)
1265 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1266 else
1267 dest_reg_num = 1 - src_reg_num;
1268
1269 offset1 = UNITS_PER_WORD * dest_reg_num;
1270 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1271 src_offset = UNITS_PER_WORD * src_reg_num;
1272
1273 start_sequence ();
1274
1275 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1276 GET_MODE (SET_DEST (set)),
1277 offset1);
1278 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1279 GET_MODE (SET_DEST (set)),
1280 offset2);
1281 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1282 GET_MODE (op_operand),
1283 src_offset);
1284 if (GET_CODE (op) == ASHIFTRT
1285 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1286 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1287 BITS_PER_WORD - 1, NULL_RTX, 0);
1288
1289 if (GET_CODE (op) != ZERO_EXTEND)
1290 {
1291 int shift_count = INTVAL (XEXP (op, 1));
1292 if (shift_count > BITS_PER_WORD)
1293 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1294 LSHIFT_EXPR : RSHIFT_EXPR,
1295 word_mode, src_reg,
1296 shift_count - BITS_PER_WORD,
1297 dest_reg, GET_CODE (op) != ASHIFTRT);
1298 }
1299
1300 if (dest_reg != src_reg)
1301 emit_move_insn (dest_reg, src_reg);
1302 if (GET_CODE (op) != ASHIFTRT)
1303 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1304 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1305 emit_move_insn (dest_upper, copy_rtx (src_reg));
1306 else
1307 emit_move_insn (dest_upper, upper_src);
1308 insns = get_insns ();
1309
1310 end_sequence ();
1311
1312 emit_insn_before (insns, insn);
1313
1314 if (dump_file)
1315 {
1316 rtx_insn *in;
1317 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1318 for (in = insns; in != insn; in = NEXT_INSN (in))
1319 fprintf (dump_file, "%d ", INSN_UID (in));
1320 fprintf (dump_file, "\n");
1321 }
1322
1323 delete_insn (insn);
1324 return insns;
1325 }
1326
1327 /* Print to dump_file a description of what we're doing with shift code CODE.
1328 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1329
1330 static void
1331 dump_shift_choices (enum rtx_code code, bool *splitting)
1332 {
1333 int i;
1334 const char *sep;
1335
1336 fprintf (dump_file,
1337 " Splitting mode %s for %s lowering with shift amounts = ",
1338 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1339 sep = "";
1340 for (i = 0; i < BITS_PER_WORD; i++)
1341 if (splitting[i])
1342 {
1343 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1344 sep = ",";
1345 }
1346 fprintf (dump_file, "\n");
1347 }
1348
1349 /* Print to dump_file a description of what we're doing when optimizing
1350 for speed or size; SPEED_P says which. DESCRIPTION is a description
1351 of the SPEED_P choice. */
1352
1353 static void
1354 dump_choices (bool speed_p, const char *description)
1355 {
1356 unsigned int size, factor, i;
1357
1358 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1359
1360 for (i = 0; i < MAX_MACHINE_MODE; i++)
1361 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1362 && factor > 1)
1363 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1364 choices[speed_p].move_modes_to_split[i]
1365 ? "Splitting"
1366 : "Skipping",
1367 GET_MODE_NAME ((machine_mode) i));
1368
1369 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1370 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1371 GET_MODE_NAME (twice_word_mode));
1372
1373 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1374 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1375 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1376 fprintf (dump_file, "\n");
1377 }
1378
1379 /* Look for registers which are always accessed via word-sized SUBREGs
1380 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1381 registers into several word-sized pseudo-registers. */
1382
1383 static void
1384 decompose_multiword_subregs (bool decompose_copies)
1385 {
1386 unsigned int max;
1387 basic_block bb;
1388 bool speed_p;
1389
1390 if (dump_file)
1391 {
1392 dump_choices (false, "size");
1393 dump_choices (true, "speed");
1394 }
1395
1396 /* Check if this target even has any modes to consider lowering. */
1397 if (!choices[false].something_to_do && !choices[true].something_to_do)
1398 {
1399 if (dump_file)
1400 fprintf (dump_file, "Nothing to do!\n");
1401 return;
1402 }
1403
1404 max = max_reg_num ();
1405
1406 /* First see if there are any multi-word pseudo-registers. If there
1407 aren't, there is nothing we can do. This should speed up this
1408 pass in the normal case, since it should be faster than scanning
1409 all the insns. */
1410 {
1411 unsigned int i;
1412 bool useful_modes_seen = false;
1413
1414 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1415 if (regno_reg_rtx[i] != NULL)
1416 {
1417 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1418 if (choices[false].move_modes_to_split[(int) mode]
1419 || choices[true].move_modes_to_split[(int) mode])
1420 {
1421 useful_modes_seen = true;
1422 break;
1423 }
1424 }
1425
1426 if (!useful_modes_seen)
1427 {
1428 if (dump_file)
1429 fprintf (dump_file, "Nothing to lower in this function.\n");
1430 return;
1431 }
1432 }
1433
1434 if (df)
1435 {
1436 df_set_flags (DF_DEFER_INSN_RESCAN);
1437 run_word_dce ();
1438 }
1439
1440 /* FIXME: It may be possible to change this code to look for each
1441 multi-word pseudo-register and to find each insn which sets or
1442 uses that register. That should be faster than scanning all the
1443 insns. */
1444
1445 decomposable_context = BITMAP_ALLOC (NULL);
1446 non_decomposable_context = BITMAP_ALLOC (NULL);
1447 subreg_context = BITMAP_ALLOC (NULL);
1448
1449 reg_copy_graph.create (max);
1450 reg_copy_graph.safe_grow_cleared (max);
1451 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1452
1453 speed_p = optimize_function_for_speed_p (cfun);
1454 FOR_EACH_BB_FN (bb, cfun)
1455 {
1456 rtx_insn *insn;
1457
1458 FOR_BB_INSNS (bb, insn)
1459 {
1460 rtx set;
1461 enum classify_move_insn cmi;
1462 int i, n;
1463
1464 if (!INSN_P (insn)
1465 || GET_CODE (PATTERN (insn)) == CLOBBER
1466 || GET_CODE (PATTERN (insn)) == USE)
1467 continue;
1468
1469 recog_memoized (insn);
1470
1471 if (find_decomposable_shift_zext (insn, speed_p))
1472 continue;
1473
1474 extract_insn (insn);
1475
1476 set = simple_move (insn, speed_p);
1477
1478 if (!set)
1479 cmi = NOT_SIMPLE_MOVE;
1480 else
1481 {
1482 /* We mark pseudo-to-pseudo copies as decomposable during the
1483 second pass only. The first pass is so early that there is
1484 good chance such moves will be optimized away completely by
1485 subsequent optimizations anyway.
1486
1487 However, we call find_pseudo_copy even during the first pass
1488 so as to properly set up the reg_copy_graph. */
1489 if (find_pseudo_copy (set))
1490 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1491 else
1492 cmi = SIMPLE_MOVE;
1493 }
1494
1495 n = recog_data.n_operands;
1496 for (i = 0; i < n; ++i)
1497 {
1498 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1499
1500 /* We handle ASM_OPERANDS as a special case to support
1501 things like x86 rdtsc which returns a DImode value.
1502 We can decompose the output, which will certainly be
1503 operand 0, but not the inputs. */
1504
1505 if (cmi == SIMPLE_MOVE
1506 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1507 {
1508 gcc_assert (i == 0);
1509 cmi = NOT_SIMPLE_MOVE;
1510 }
1511 }
1512 }
1513 }
1514
1515 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1516 if (!bitmap_empty_p (decomposable_context))
1517 {
1518 unsigned int i;
1519 sbitmap_iterator sbi;
1520 bitmap_iterator iter;
1521 unsigned int regno;
1522
1523 propagate_pseudo_copies ();
1524
1525 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1526 bitmap_clear (sub_blocks);
1527
1528 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1529 decompose_register (regno);
1530
1531 FOR_EACH_BB_FN (bb, cfun)
1532 {
1533 rtx_insn *insn;
1534
1535 FOR_BB_INSNS (bb, insn)
1536 {
1537 rtx pat;
1538
1539 if (!INSN_P (insn))
1540 continue;
1541
1542 pat = PATTERN (insn);
1543 if (GET_CODE (pat) == CLOBBER)
1544 resolve_clobber (pat, insn);
1545 else if (GET_CODE (pat) == USE)
1546 resolve_use (pat, insn);
1547 else if (DEBUG_INSN_P (insn))
1548 resolve_debug (insn);
1549 else
1550 {
1551 rtx set;
1552 int i;
1553
1554 recog_memoized (insn);
1555 extract_insn (insn);
1556
1557 set = simple_move (insn, speed_p);
1558 if (set)
1559 {
1560 rtx_insn *orig_insn = insn;
1561 bool cfi = control_flow_insn_p (insn);
1562
1563 /* We can end up splitting loads to multi-word pseudos
1564 into separate loads to machine word size pseudos.
1565 When this happens, we first had one load that can
1566 throw, and after resolve_simple_move we'll have a
1567 bunch of loads (at least two). All those loads may
1568 trap if we can have non-call exceptions, so they
1569 all will end the current basic block. We split the
1570 block after the outer loop over all insns, but we
1571 make sure here that we will be able to split the
1572 basic block and still produce the correct control
1573 flow graph for it. */
1574 gcc_assert (!cfi
1575 || (cfun->can_throw_non_call_exceptions
1576 && can_throw_internal (insn)));
1577
1578 insn = resolve_simple_move (set, insn);
1579 if (insn != orig_insn)
1580 {
1581 recog_memoized (insn);
1582 extract_insn (insn);
1583
1584 if (cfi)
1585 bitmap_set_bit (sub_blocks, bb->index);
1586 }
1587 }
1588 else
1589 {
1590 rtx_insn *decomposed_shift;
1591
1592 decomposed_shift = resolve_shift_zext (insn);
1593 if (decomposed_shift != NULL_RTX)
1594 {
1595 insn = decomposed_shift;
1596 recog_memoized (insn);
1597 extract_insn (insn);
1598 }
1599 }
1600
1601 for (i = recog_data.n_operands - 1; i >= 0; --i)
1602 resolve_subreg_use (recog_data.operand_loc[i], insn);
1603
1604 resolve_reg_notes (insn);
1605
1606 if (num_validated_changes () > 0)
1607 {
1608 for (i = recog_data.n_dups - 1; i >= 0; --i)
1609 {
1610 rtx *pl = recog_data.dup_loc[i];
1611 int dup_num = recog_data.dup_num[i];
1612 rtx *px = recog_data.operand_loc[dup_num];
1613
1614 validate_unshare_change (insn, pl, *px, 1);
1615 }
1616
1617 i = apply_change_group ();
1618 gcc_assert (i);
1619 }
1620 }
1621 }
1622 }
1623
1624 /* If we had insns to split that caused control flow insns in the middle
1625 of a basic block, split those blocks now. Note that we only handle
1626 the case where splitting a load has caused multiple possibly trapping
1627 loads to appear. */
1628 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1629 {
1630 rtx_insn *insn, *end;
1631 edge fallthru;
1632
1633 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1634 insn = BB_HEAD (bb);
1635 end = BB_END (bb);
1636
1637 while (insn != end)
1638 {
1639 if (control_flow_insn_p (insn))
1640 {
1641 /* Split the block after insn. There will be a fallthru
1642 edge, which is OK so we keep it. We have to create the
1643 exception edges ourselves. */
1644 fallthru = split_block (bb, insn);
1645 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1646 bb = fallthru->dest;
1647 insn = BB_HEAD (bb);
1648 }
1649 else
1650 insn = NEXT_INSN (insn);
1651 }
1652 }
1653 }
1654
1655 {
1656 unsigned int i;
1657 bitmap b;
1658
1659 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1660 if (b)
1661 BITMAP_FREE (b);
1662 }
1663
1664 reg_copy_graph.release ();
1665
1666 BITMAP_FREE (decomposable_context);
1667 BITMAP_FREE (non_decomposable_context);
1668 BITMAP_FREE (subreg_context);
1669 }
1670 \f
1671 /* Implement first lower subreg pass. */
1672
1673 namespace {
1674
1675 const pass_data pass_data_lower_subreg =
1676 {
1677 RTL_PASS, /* type */
1678 "subreg1", /* name */
1679 OPTGROUP_NONE, /* optinfo_flags */
1680 TV_LOWER_SUBREG, /* tv_id */
1681 0, /* properties_required */
1682 0, /* properties_provided */
1683 0, /* properties_destroyed */
1684 0, /* todo_flags_start */
1685 0, /* todo_flags_finish */
1686 };
1687
1688 class pass_lower_subreg : public rtl_opt_pass
1689 {
1690 public:
1691 pass_lower_subreg (gcc::context *ctxt)
1692 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1693 {}
1694
1695 /* opt_pass methods: */
1696 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1697 virtual unsigned int execute (function *)
1698 {
1699 decompose_multiword_subregs (false);
1700 return 0;
1701 }
1702
1703 }; // class pass_lower_subreg
1704
1705 } // anon namespace
1706
1707 rtl_opt_pass *
1708 make_pass_lower_subreg (gcc::context *ctxt)
1709 {
1710 return new pass_lower_subreg (ctxt);
1711 }
1712
1713 /* Implement second lower subreg pass. */
1714
1715 namespace {
1716
1717 const pass_data pass_data_lower_subreg2 =
1718 {
1719 RTL_PASS, /* type */
1720 "subreg2", /* name */
1721 OPTGROUP_NONE, /* optinfo_flags */
1722 TV_LOWER_SUBREG, /* tv_id */
1723 0, /* properties_required */
1724 0, /* properties_provided */
1725 0, /* properties_destroyed */
1726 0, /* todo_flags_start */
1727 TODO_df_finish, /* todo_flags_finish */
1728 };
1729
1730 class pass_lower_subreg2 : public rtl_opt_pass
1731 {
1732 public:
1733 pass_lower_subreg2 (gcc::context *ctxt)
1734 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1735 {}
1736
1737 /* opt_pass methods: */
1738 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1739 virtual unsigned int execute (function *)
1740 {
1741 decompose_multiword_subregs (true);
1742 return 0;
1743 }
1744
1745 }; // class pass_lower_subreg2
1746
1747 } // anon namespace
1748
1749 rtl_opt_pass *
1750 make_pass_lower_subreg2 (gcc::context *ctxt)
1751 {
1752 return new pass_lower_subreg2 (ctxt);
1753 }