1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
30 #include "stringpool.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
49 #include "langhooks.h"
51 #include "sched-int.h"
54 #include "tm-constrs.h"
60 /* This file should be included last. */
61 #include "target-def.h"
63 /* Builtin types, data and prototypes. */
65 enum spu_builtin_type_index
67 SPU_BTI_END_OF_PARAMS
,
69 /* We create new type nodes for these. */
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
84 /* These all correspond to intSI_type_node */
98 /* These correspond to the standard types */
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
129 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
131 struct spu_builtin_range
136 static struct spu_builtin_range spu_builtin_range
[] = {
137 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
138 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
140 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
141 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
143 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
144 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
145 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn
*insn
);
157 static int spu_naked_function_p (tree func
);
158 static int mem_is_padded_component_ref (rtx x
);
159 static void fix_range (const char *);
160 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
162 /* Which instruction set architecture to use. */
164 /* Which cpu are we tuning for. */
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist
= (8*4) - (2*4);
188 IC_POOL
, /* constant pool */
189 IC_IL1
, /* one il* instruction */
190 IC_IL2
, /* both ilhu and iohl instructions */
191 IC_IL1s
, /* one il* instruction */
192 IC_IL2s
, /* both ilhu and iohl instructions */
193 IC_FSMBI
, /* the fsmbi instruction */
194 IC_CPAT
, /* one of the c*d instructions */
195 IC_FSMBI2
/* fsmbi plus 1 other instruction */
198 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
199 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
200 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
201 static enum immediate_class
classify_immediate (rtx op
,
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
211 /* Register to use for PIC accesses. */
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function
*
217 spu_init_machine_status (void)
219 return ggc_cleared_alloc
<machine_function
> ();
222 /* Implement TARGET_OPTION_OVERRIDE. */
224 spu_option_override (void)
226 /* Set up function hooks. */
227 init_machine_status
= spu_init_machine_status
;
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops
&& !flag_peel_loops
)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
233 global_options
.x_param_values
,
234 global_options_set
.x_param_values
);
236 flag_omit_frame_pointer
= 1;
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions
< 8)
242 spu_hint_dist
= 8*4 - spu_max_nops
*4;
243 if (spu_hint_dist
< 0)
246 if (spu_fixed_range_string
)
247 fix_range (spu_fixed_range_string
);
249 /* Determine processor architectural level. */
252 if (strcmp (&spu_arch_string
[0], "cell") == 0)
253 spu_arch
= PROCESSOR_CELL
;
254 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
255 spu_arch
= PROCESSOR_CELLEDP
;
257 error ("bad value (%s) for -march= switch", spu_arch_string
);
260 /* Determine processor to tune for. */
263 if (strcmp (&spu_tune_string
[0], "cell") == 0)
264 spu_tune
= PROCESSOR_CELL
;
265 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
266 spu_tune
= PROCESSOR_CELLEDP
;
268 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch
== PROCESSOR_CELLEDP
)
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
278 target_flags
&= ~MASK_SAFE_HINTS
;
281 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
284 /* Implement TARGET_HARD_REGNO_NREGS. */
287 spu_hard_regno_nregs (unsigned int, machine_mode mode
)
289 return CEIL (GET_MODE_BITSIZE (mode
), MAX_FIXED_MODE_SIZE
);
292 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
293 struct attribute_spec.handler. */
295 /* True if MODE is valid for the target. By "valid", we mean able to
296 be manipulated in non-trivial ways. In particular, this means all
297 the arithmetic is supported. */
299 spu_scalar_mode_supported_p (scalar_mode mode
)
317 /* Similarly for vector modes. "Supported" here is less strict. At
318 least some operations are supported; need to check optabs or builtins
319 for further details. */
321 spu_vector_mode_supported_p (machine_mode mode
)
338 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
339 least significant bytes of the outer mode. This function returns
340 TRUE for the SUBREG's where this is correct. */
342 valid_subreg (rtx op
)
344 machine_mode om
= GET_MODE (op
);
345 machine_mode im
= GET_MODE (SUBREG_REG (op
));
346 return om
!= VOIDmode
&& im
!= VOIDmode
347 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
348 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
349 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
352 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
353 and adjust the start offset. */
355 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
359 /* Strip any paradoxical SUBREG. */
360 if (GET_CODE (op
) == SUBREG
361 && (GET_MODE_BITSIZE (GET_MODE (op
))
362 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
366 GET_MODE_BITSIZE (GET_MODE (op
)) -
367 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
368 op
= SUBREG_REG (op
);
370 /* If it is smaller than SI, assure a SUBREG */
371 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
375 *start
+= 32 - op_size
;
378 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
379 mode
= int_mode_for_size (op_size
, 0).require ();
380 if (mode
!= GET_MODE (op
))
381 op
= gen_rtx_SUBREG (mode
, op
, 0);
386 spu_expand_extv (rtx ops
[], int unsignedp
)
388 rtx dst
= ops
[0], src
= ops
[1];
389 HOST_WIDE_INT width
= INTVAL (ops
[2]);
390 HOST_WIDE_INT start
= INTVAL (ops
[3]);
391 HOST_WIDE_INT align_mask
;
392 rtx s0
, s1
, mask
, r0
;
394 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
398 /* First, determine if we need 1 TImode load or 2. We need only 1
399 if the bits being extracted do not cross the alignment boundary
400 as determined by the MEM and its address. */
402 align_mask
= -MEM_ALIGN (src
);
403 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
405 /* Alignment is sufficient for 1 load. */
406 s0
= gen_reg_rtx (TImode
);
407 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
410 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
415 s0
= gen_reg_rtx (TImode
);
416 s1
= gen_reg_rtx (TImode
);
417 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
420 gcc_assert (start
+ width
<= 128);
423 rtx r1
= gen_reg_rtx (SImode
);
424 mask
= gen_reg_rtx (TImode
);
425 emit_move_insn (mask
, GEN_INT (-1));
426 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
427 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
428 if (GET_CODE (r0
) == CONST_INT
)
429 r1
= GEN_INT (INTVAL (r0
) & 15);
431 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
432 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
433 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
438 else if (GET_CODE (src
) == SUBREG
)
440 rtx r
= SUBREG_REG (src
);
441 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
442 s0
= gen_reg_rtx (TImode
);
443 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
444 emit_insn (gen_rtx_SET (s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
446 emit_move_insn (s0
, src
);
450 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
451 s0
= gen_reg_rtx (TImode
);
452 emit_move_insn (s0
, src
);
455 /* Now s0 is TImode and contains the bits to extract at start. */
458 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
461 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
463 emit_move_insn (dst
, s0
);
467 spu_expand_insv (rtx ops
[])
469 HOST_WIDE_INT width
= INTVAL (ops
[1]);
470 HOST_WIDE_INT start
= INTVAL (ops
[2]);
471 unsigned HOST_WIDE_INT maskbits
;
472 machine_mode dst_mode
;
473 rtx dst
= ops
[0], src
= ops
[3];
480 if (GET_CODE (ops
[0]) == MEM
)
481 dst
= gen_reg_rtx (TImode
);
483 dst
= adjust_operand (dst
, &start
);
484 dst_mode
= GET_MODE (dst
);
485 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
487 if (CONSTANT_P (src
))
490 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
491 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
493 src
= adjust_operand (src
, 0);
495 mask
= gen_reg_rtx (dst_mode
);
496 shift_reg
= gen_reg_rtx (dst_mode
);
497 shift
= dst_size
- start
- width
;
499 /* It's not safe to use subreg here because the compiler assumes
500 that the SUBREG_REG is right justified in the SUBREG. */
501 convert_move (shift_reg
, src
, 1);
508 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
511 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
514 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
526 maskbits
= (~(unsigned HOST_WIDE_INT
)0 << (32 - width
- start
));
528 maskbits
+= ((unsigned HOST_WIDE_INT
)1 << (32 - start
));
529 emit_move_insn (mask
, GEN_INT (maskbits
));
532 maskbits
= (~(unsigned HOST_WIDE_INT
)0 << (64 - width
- start
));
534 maskbits
+= ((unsigned HOST_WIDE_INT
)1 << (64 - start
));
535 emit_move_insn (mask
, GEN_INT (maskbits
));
539 unsigned char arr
[16];
541 memset (arr
, 0, sizeof (arr
));
542 arr
[i
] = 0xff >> (start
& 7);
543 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
545 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
546 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
552 if (GET_CODE (ops
[0]) == MEM
)
554 rtx low
= gen_reg_rtx (SImode
);
555 rtx rotl
= gen_reg_rtx (SImode
);
556 rtx mask0
= gen_reg_rtx (TImode
);
562 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
563 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
564 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
565 emit_insn (gen_negsi2 (rotl
, low
));
566 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
567 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
568 mem
= change_address (ops
[0], TImode
, addr0
);
569 set_mem_alias_set (mem
, 0);
570 emit_move_insn (dst
, mem
);
571 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
572 if (start
+ width
> MEM_ALIGN (ops
[0]))
574 rtx shl
= gen_reg_rtx (SImode
);
575 rtx mask1
= gen_reg_rtx (TImode
);
576 rtx dst1
= gen_reg_rtx (TImode
);
578 addr1
= plus_constant (Pmode
, addr
, 16);
579 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
580 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
581 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
582 mem1
= change_address (ops
[0], TImode
, addr1
);
583 set_mem_alias_set (mem1
, 0);
584 emit_move_insn (dst1
, mem1
);
585 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
586 emit_move_insn (mem1
, dst1
);
588 emit_move_insn (mem
, dst
);
591 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
596 spu_expand_block_move (rtx ops
[])
598 HOST_WIDE_INT bytes
, align
, offset
;
599 rtx src
, dst
, sreg
, dreg
, target
;
601 if (GET_CODE (ops
[2]) != CONST_INT
602 || GET_CODE (ops
[3]) != CONST_INT
603 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
606 bytes
= INTVAL (ops
[2]);
607 align
= INTVAL (ops
[3]);
617 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
619 dst
= adjust_address (ops
[0], V16QImode
, offset
);
620 src
= adjust_address (ops
[1], V16QImode
, offset
);
621 emit_move_insn (dst
, src
);
626 unsigned char arr
[16] = { 0 };
627 for (i
= 0; i
< bytes
- offset
; i
++)
629 dst
= adjust_address (ops
[0], V16QImode
, offset
);
630 src
= adjust_address (ops
[1], V16QImode
, offset
);
631 mask
= gen_reg_rtx (V16QImode
);
632 sreg
= gen_reg_rtx (V16QImode
);
633 dreg
= gen_reg_rtx (V16QImode
);
634 target
= gen_reg_rtx (V16QImode
);
635 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
636 emit_move_insn (dreg
, dst
);
637 emit_move_insn (sreg
, src
);
638 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
639 emit_move_insn (dst
, target
);
647 { SPU_EQ
, SPU_GT
, SPU_GTU
};
649 int spu_comp_icode
[12][3] = {
650 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
651 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
652 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
653 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
654 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
655 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
656 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
657 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
658 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
659 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
660 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
661 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
664 /* Generate a compare for CODE. Return a brand-new rtx that represents
665 the result of the compare. GCC can figure this out too if we don't
666 provide all variations of compares, but GCC always wants to use
667 WORD_MODE, we can generate better code in most cases if we do it
670 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
672 int reverse_compare
= 0;
673 int reverse_test
= 0;
674 rtx compare_result
, eq_result
;
675 rtx comp_rtx
, eq_rtx
;
676 machine_mode comp_mode
;
677 machine_mode op_mode
;
678 enum spu_comp_code scode
, eq_code
;
679 enum insn_code ior_code
;
680 enum rtx_code code
= GET_CODE (cmp
);
681 rtx op0
= XEXP (cmp
, 0);
682 rtx op1
= XEXP (cmp
, 1);
686 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
687 and so on, to keep the constant in operand 1. */
688 if (GET_CODE (op1
) == CONST_INT
)
690 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
691 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
715 /* However, if we generate an integer result, performing a reverse test
716 would require an extra negation, so avoid that where possible. */
717 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
719 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
720 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
737 op_mode
= GET_MODE (op0
);
743 if (HONOR_NANS (op_mode
))
758 if (HONOR_NANS (op_mode
))
850 comp_mode
= V4SImode
;
854 comp_mode
= V2DImode
;
861 if (GET_MODE (op1
) == DFmode
862 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
865 if (is_set
== 0 && op1
== const0_rtx
866 && (GET_MODE (op0
) == SImode
867 || GET_MODE (op0
) == HImode
868 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
870 /* Don't need to set a register with the result when we are
871 comparing against zero and branching. */
872 reverse_test
= !reverse_test
;
873 compare_result
= op0
;
877 compare_result
= gen_reg_rtx (comp_mode
);
886 if (spu_comp_icode
[index
][scode
] == 0)
889 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
891 op0
= force_reg (op_mode
, op0
);
892 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
894 op1
= force_reg (op_mode
, op1
);
895 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
899 emit_insn (comp_rtx
);
903 eq_result
= gen_reg_rtx (comp_mode
);
904 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
909 ior_code
= optab_handler (ior_optab
, comp_mode
);
910 gcc_assert (ior_code
!= CODE_FOR_nothing
);
911 emit_insn (GEN_FCN (ior_code
)
912 (compare_result
, compare_result
, eq_result
));
921 /* We don't have branch on QI compare insns, so we convert the
922 QI compare result to a HI result. */
923 if (comp_mode
== QImode
)
925 rtx old_res
= compare_result
;
926 compare_result
= gen_reg_rtx (HImode
);
928 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
932 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
934 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
936 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
937 emit_jump_insn (gen_rtx_SET (pc_rtx
,
938 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
941 else if (is_set
== 2)
943 rtx target
= operands
[0];
944 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
945 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
946 machine_mode mode
= int_mode_for_size (target_size
, 0).require ();
948 rtx op_t
= operands
[2];
949 rtx op_f
= operands
[3];
951 /* The result of the comparison can be SI, HI or QI mode. Create a
952 mask based on that result. */
953 if (target_size
> compare_size
)
955 select_mask
= gen_reg_rtx (mode
);
956 emit_insn (gen_extend_compare (select_mask
, compare_result
));
958 else if (target_size
< compare_size
)
960 gen_rtx_SUBREG (mode
, compare_result
,
961 (compare_size
- target_size
) / BITS_PER_UNIT
);
962 else if (comp_mode
!= mode
)
963 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
965 select_mask
= compare_result
;
967 if (GET_MODE (target
) != GET_MODE (op_t
)
968 || GET_MODE (target
) != GET_MODE (op_f
))
972 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
974 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
978 rtx target
= operands
[0];
980 emit_insn (gen_rtx_SET (compare_result
,
981 gen_rtx_NOT (comp_mode
, compare_result
)));
982 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
983 emit_insn (gen_extendhisi2 (target
, compare_result
));
984 else if (GET_MODE (target
) == SImode
985 && GET_MODE (compare_result
) == QImode
)
986 emit_insn (gen_extend_compare (target
, compare_result
));
988 emit_move_insn (target
, compare_result
);
993 const_double_to_hwint (rtx x
)
996 if (GET_MODE (x
) == SFmode
)
997 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), val
);
998 else if (GET_MODE (x
) == DFmode
)
1001 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
1003 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1011 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1015 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1018 tv
[0] = (v
<< 32) >> 32;
1019 else if (mode
== DFmode
)
1021 tv
[1] = (v
<< 32) >> 32;
1024 real_from_target (&rv
, tv
, mode
);
1025 return const_double_from_real_value (rv
, mode
);
1029 print_operand_address (FILE * file
, register rtx addr
)
1034 if (GET_CODE (addr
) == AND
1035 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1036 && INTVAL (XEXP (addr
, 1)) == -16)
1037 addr
= XEXP (addr
, 0);
1039 switch (GET_CODE (addr
))
1042 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1046 reg
= XEXP (addr
, 0);
1047 offset
= XEXP (addr
, 1);
1048 if (GET_CODE (offset
) == REG
)
1050 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1051 reg_names
[REGNO (offset
)]);
1053 else if (GET_CODE (offset
) == CONST_INT
)
1055 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1056 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1066 output_addr_const (file
, addr
);
1076 print_operand (FILE * file
, rtx x
, int code
)
1078 machine_mode mode
= GET_MODE (x
);
1080 unsigned char arr
[16];
1081 int xcode
= GET_CODE (x
);
1083 if (GET_MODE (x
) == VOIDmode
)
1086 case 'L': /* 128 bits, signed */
1087 case 'm': /* 128 bits, signed */
1088 case 'T': /* 128 bits, signed */
1089 case 't': /* 128 bits, signed */
1092 case 'K': /* 64 bits, signed */
1093 case 'k': /* 64 bits, signed */
1094 case 'D': /* 64 bits, signed */
1095 case 'd': /* 64 bits, signed */
1098 case 'J': /* 32 bits, signed */
1099 case 'j': /* 32 bits, signed */
1100 case 's': /* 32 bits, signed */
1101 case 'S': /* 32 bits, signed */
1108 case 'j': /* 32 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 if (xcode
== CONST_INT
1112 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1114 gcc_assert (logical_immediate_p (x
, mode
));
1115 constant_to_array (mode
, x
, arr
);
1116 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1117 val
= trunc_int_for_mode (val
, SImode
);
1118 switch (which_logical_immediate (val
))
1123 fprintf (file
, "h");
1126 fprintf (file
, "b");
1136 case 'J': /* 32 bits, signed */
1137 case 'K': /* 64 bits, signed */
1138 case 'L': /* 128 bits, signed */
1139 if (xcode
== CONST_INT
1140 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1142 gcc_assert (logical_immediate_p (x
, mode
)
1143 || iohl_immediate_p (x
, mode
));
1144 constant_to_array (mode
, x
, arr
);
1145 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1146 val
= trunc_int_for_mode (val
, SImode
);
1147 switch (which_logical_immediate (val
))
1153 val
= trunc_int_for_mode (val
, HImode
);
1156 val
= trunc_int_for_mode (val
, QImode
);
1161 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1167 case 't': /* 128 bits, signed */
1168 case 'd': /* 64 bits, signed */
1169 case 's': /* 32 bits, signed */
1172 enum immediate_class c
= classify_immediate (x
, mode
);
1176 constant_to_array (mode
, x
, arr
);
1177 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1178 val
= trunc_int_for_mode (val
, SImode
);
1179 switch (which_immediate_load (val
))
1184 fprintf (file
, "a");
1187 fprintf (file
, "h");
1190 fprintf (file
, "hu");
1197 constant_to_array (mode
, x
, arr
);
1198 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1200 fprintf (file
, "b");
1202 fprintf (file
, "h");
1204 fprintf (file
, "w");
1206 fprintf (file
, "d");
1209 if (xcode
== CONST_VECTOR
)
1211 x
= CONST_VECTOR_ELT (x
, 0);
1212 xcode
= GET_CODE (x
);
1214 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1215 fprintf (file
, "a");
1216 else if (xcode
== HIGH
)
1217 fprintf (file
, "hu");
1231 case 'T': /* 128 bits, signed */
1232 case 'D': /* 64 bits, signed */
1233 case 'S': /* 32 bits, signed */
1236 enum immediate_class c
= classify_immediate (x
, mode
);
1240 constant_to_array (mode
, x
, arr
);
1241 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1242 val
= trunc_int_for_mode (val
, SImode
);
1243 switch (which_immediate_load (val
))
1250 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1255 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1258 constant_to_array (mode
, x
, arr
);
1260 for (i
= 0; i
< 16; i
++)
1265 print_operand (file
, GEN_INT (val
), 0);
1268 constant_to_array (mode
, x
, arr
);
1269 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1270 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1275 if (GET_CODE (x
) == CONST_VECTOR
)
1276 x
= CONST_VECTOR_ELT (x
, 0);
1277 output_addr_const (file
, x
);
1279 fprintf (file
, "@h");
1293 if (xcode
== CONST_INT
)
1295 /* Only 4 least significant bits are relevant for generate
1296 control word instructions. */
1297 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1302 case 'M': /* print code for c*d */
1303 if (GET_CODE (x
) == CONST_INT
)
1307 fprintf (file
, "b");
1310 fprintf (file
, "h");
1313 fprintf (file
, "w");
1316 fprintf (file
, "d");
1325 case 'N': /* Negate the operand */
1326 if (xcode
== CONST_INT
)
1327 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1328 else if (xcode
== CONST_VECTOR
)
1329 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1330 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1333 case 'I': /* enable/disable interrupts */
1334 if (xcode
== CONST_INT
)
1335 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1338 case 'b': /* branch modifiers */
1340 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1341 else if (COMPARISON_P (x
))
1342 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1345 case 'i': /* indirect call */
1348 if (GET_CODE (XEXP (x
, 0)) == REG
)
1349 /* Used in indirect function calls. */
1350 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1352 output_address (GET_MODE (x
), XEXP (x
, 0));
1356 case 'p': /* load/store */
1360 xcode
= GET_CODE (x
);
1365 xcode
= GET_CODE (x
);
1368 fprintf (file
, "d");
1369 else if (xcode
== CONST_INT
)
1370 fprintf (file
, "a");
1371 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1372 fprintf (file
, "r");
1373 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1375 if (GET_CODE (XEXP (x
, 1)) == REG
)
1376 fprintf (file
, "x");
1378 fprintf (file
, "d");
1383 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1385 output_addr_const (file
, GEN_INT (val
));
1389 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1391 output_addr_const (file
, GEN_INT (val
));
1395 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1397 output_addr_const (file
, GEN_INT (val
));
1401 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1402 val
= (val
>> 3) & 0x1f;
1403 output_addr_const (file
, GEN_INT (val
));
1407 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1410 output_addr_const (file
, GEN_INT (val
));
1414 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1417 output_addr_const (file
, GEN_INT (val
));
1421 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1424 output_addr_const (file
, GEN_INT (val
));
1428 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1429 val
= -(val
& -8ll);
1430 val
= (val
>> 3) & 0x1f;
1431 output_addr_const (file
, GEN_INT (val
));
1436 constant_to_array (mode
, x
, arr
);
1437 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1438 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1443 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1444 else if (xcode
== MEM
)
1445 output_address (GET_MODE (x
), XEXP (x
, 0));
1446 else if (xcode
== CONST_VECTOR
)
1447 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1449 output_addr_const (file
, x
);
1456 output_operand_lossage ("invalid %%xn code");
1461 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1462 caller saved register. For leaf functions it is more efficient to
1463 use a volatile register because we won't need to save and restore the
1464 pic register. This routine is only valid after register allocation
1465 is completed, so we can pick an unused register. */
1469 if (!reload_completed
&& !reload_in_progress
)
1472 /* If we've already made the decision, we need to keep with it. Once we've
1473 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1474 return true since the register is now live; this should not cause us to
1475 "switch back" to using pic_offset_table_rtx. */
1476 if (!cfun
->machine
->pic_reg
)
1478 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1479 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1481 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1484 return cfun
->machine
->pic_reg
;
1487 /* Split constant addresses to handle cases that are too large.
1488 Add in the pic register when in PIC mode.
1489 Split immediates that require more than 1 instruction. */
1491 spu_split_immediate (rtx
* ops
)
1493 machine_mode mode
= GET_MODE (ops
[0]);
1494 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1500 unsigned char arrhi
[16];
1501 unsigned char arrlo
[16];
1502 rtx to
, temp
, hi
, lo
;
1504 /* We need to do reals as ints because the constant used in the
1505 IOR might not be a legitimate real constant. */
1506 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
1507 constant_to_array (mode
, ops
[1], arrhi
);
1509 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1512 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1513 for (i
= 0; i
< 16; i
+= 4)
1515 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1516 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1517 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1518 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1520 hi
= array_to_constant (imode
, arrhi
);
1521 lo
= array_to_constant (imode
, arrlo
);
1522 emit_move_insn (temp
, hi
);
1523 emit_insn (gen_rtx_SET (to
, gen_rtx_IOR (imode
, temp
, lo
)));
1528 unsigned char arr_fsmbi
[16];
1529 unsigned char arr_andbi
[16];
1530 rtx to
, reg_fsmbi
, reg_and
;
1532 /* We need to do reals as ints because the constant used in the
1533 * AND might not be a legitimate real constant. */
1534 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
1535 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1537 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1540 for (i
= 0; i
< 16; i
++)
1541 if (arr_fsmbi
[i
] != 0)
1543 arr_andbi
[0] = arr_fsmbi
[i
];
1544 arr_fsmbi
[i
] = 0xff;
1546 for (i
= 1; i
< 16; i
++)
1547 arr_andbi
[i
] = arr_andbi
[0];
1548 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1549 reg_and
= array_to_constant (imode
, arr_andbi
);
1550 emit_move_insn (to
, reg_fsmbi
);
1551 emit_insn (gen_rtx_SET (to
, gen_rtx_AND (imode
, to
, reg_and
)));
1555 if (reload_in_progress
|| reload_completed
)
1557 rtx mem
= force_const_mem (mode
, ops
[1]);
1558 if (TARGET_LARGE_MEM
)
1560 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1561 emit_move_insn (addr
, XEXP (mem
, 0));
1562 mem
= replace_equiv_address (mem
, addr
);
1564 emit_move_insn (ops
[0], mem
);
1570 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1574 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1575 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1578 emit_insn (gen_pic (ops
[0], ops
[1]));
1581 rtx pic_reg
= get_pic_reg ();
1582 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1584 return flag_pic
|| c
== IC_IL2s
;
1595 /* SAVING is TRUE when we are generating the actual load and store
1596 instructions for REGNO. When determining the size of the stack
1597 needed for saving register we must allocate enough space for the
1598 worst case, because we don't always have the information early enough
1599 to not allocate it. But we can at least eliminate the actual loads
1600 and stores during the prologue/epilogue. */
1602 need_to_save_reg (int regno
, int saving
)
1604 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1607 && regno
== PIC_OFFSET_TABLE_REGNUM
1608 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1613 /* This function is only correct starting with local register
1616 spu_saved_regs_size (void)
1618 int reg_save_size
= 0;
1621 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1622 if (need_to_save_reg (regno
, 0))
1623 reg_save_size
+= 0x10;
1624 return reg_save_size
;
1628 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1630 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1632 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1633 return emit_insn (gen_movv4si (mem
, reg
));
1637 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1639 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1641 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1642 return emit_insn (gen_movv4si (reg
, mem
));
1645 /* This happens after reload, so we need to expand it. */
1647 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1650 if (satisfies_constraint_K (GEN_INT (imm
)))
1652 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1656 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1657 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1658 if (REGNO (src
) == REGNO (scratch
))
1664 /* Return nonzero if this function is known to have a null epilogue. */
1667 direct_return (void)
1669 if (reload_completed
)
1671 if (cfun
->static_chain_decl
== 0
1672 && (spu_saved_regs_size ()
1674 + crtl
->outgoing_args_size
1675 + crtl
->args
.pretend_args_size
== 0)
1683 The stack frame looks like this:
1687 AP -> +-------------+
1690 prev SP | back chain |
1693 | reg save | crtl->args.pretend_args_size bytes
1696 | saved regs | spu_saved_regs_size() bytes
1697 FP -> +-------------+
1699 | vars | get_frame_size() bytes
1700 HFP -> +-------------+
1703 | args | crtl->outgoing_args_size bytes
1709 SP -> +-------------+
1713 spu_expand_prologue (void)
1715 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1716 HOST_WIDE_INT total_size
;
1717 HOST_WIDE_INT saved_regs_size
;
1718 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1719 rtx scratch_reg_0
, scratch_reg_1
;
1723 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1724 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1726 if (spu_naked_function_p (current_function_decl
))
1729 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1730 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1732 saved_regs_size
= spu_saved_regs_size ();
1733 total_size
= size
+ saved_regs_size
1734 + crtl
->outgoing_args_size
1735 + crtl
->args
.pretend_args_size
;
1738 || cfun
->calls_alloca
|| total_size
> 0)
1739 total_size
+= STACK_POINTER_OFFSET
;
1741 /* Save this first because code after this might use the link
1742 register as a scratch register. */
1745 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1746 RTX_FRAME_RELATED_P (insn
) = 1;
1751 offset
= -crtl
->args
.pretend_args_size
;
1752 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1753 if (need_to_save_reg (regno
, 1))
1756 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1757 RTX_FRAME_RELATED_P (insn
) = 1;
1761 if (flag_pic
&& cfun
->machine
->pic_reg
)
1763 rtx pic_reg
= cfun
->machine
->pic_reg
;
1764 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1765 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1770 if (flag_stack_check
)
1772 /* We compare against total_size-1 because
1773 ($sp >= total_size) <=> ($sp > total_size-1) */
1774 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1775 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1776 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1777 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1779 emit_move_insn (scratch_v4si
, size_v4si
);
1780 size_v4si
= scratch_v4si
;
1782 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1783 emit_insn (gen_vec_extractv4sisi
1784 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1785 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1788 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1789 the value of the previous $sp because we save it as the back
1791 if (total_size
<= 2000)
1793 /* In this case we save the back chain first. */
1794 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1796 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1800 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1802 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1804 RTX_FRAME_RELATED_P (insn
) = 1;
1805 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1806 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1808 if (total_size
> 2000)
1810 /* Save the back chain ptr */
1811 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1814 if (frame_pointer_needed
)
1816 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1817 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1818 + crtl
->outgoing_args_size
;
1819 /* Set the new frame_pointer */
1820 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1821 RTX_FRAME_RELATED_P (insn
) = 1;
1822 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1823 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1824 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1828 if (flag_stack_usage_info
)
1829 current_function_static_stack_size
= total_size
;
1833 spu_expand_epilogue (bool sibcall_p
)
1835 int size
= get_frame_size (), offset
, regno
;
1836 HOST_WIDE_INT saved_regs_size
, total_size
;
1837 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1840 if (spu_naked_function_p (current_function_decl
))
1843 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1845 saved_regs_size
= spu_saved_regs_size ();
1846 total_size
= size
+ saved_regs_size
1847 + crtl
->outgoing_args_size
1848 + crtl
->args
.pretend_args_size
;
1851 || cfun
->calls_alloca
|| total_size
> 0)
1852 total_size
+= STACK_POINTER_OFFSET
;
1856 if (cfun
->calls_alloca
)
1857 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1859 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1862 if (saved_regs_size
> 0)
1864 offset
= -crtl
->args
.pretend_args_size
;
1865 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1866 if (need_to_save_reg (regno
, 1))
1869 frame_emit_load (regno
, sp_reg
, offset
);
1875 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1879 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1880 emit_jump_insn (gen__return ());
1885 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1889 /* This is inefficient because it ends up copying to a save-register
1890 which then gets saved even though $lr has already been saved. But
1891 it does generate better code for leaf functions and we don't need
1892 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1893 used for __builtin_return_address anyway, so maybe we don't care if
1894 it's inefficient. */
1895 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1899 /* Given VAL, generate a constant appropriate for MODE.
1900 If MODE is a vector mode, every element will be VAL.
1901 For TImode, VAL will be zero extended to 128 bits. */
1903 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1909 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1910 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1911 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1912 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1914 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1915 return immed_double_const (val
, 0, mode
);
1917 /* val is the bit representation of the float */
1918 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1919 return hwint_to_const_double (mode
, val
);
1921 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1922 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1924 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1926 units
= GET_MODE_NUNITS (mode
);
1928 v
= rtvec_alloc (units
);
1930 for (i
= 0; i
< units
; ++i
)
1931 RTVEC_ELT (v
, i
) = inner
;
1933 return gen_rtx_CONST_VECTOR (mode
, v
);
1936 /* Create a MODE vector constant from 4 ints. */
1938 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1940 unsigned char arr
[16];
1941 arr
[0] = (a
>> 24) & 0xff;
1942 arr
[1] = (a
>> 16) & 0xff;
1943 arr
[2] = (a
>> 8) & 0xff;
1944 arr
[3] = (a
>> 0) & 0xff;
1945 arr
[4] = (b
>> 24) & 0xff;
1946 arr
[5] = (b
>> 16) & 0xff;
1947 arr
[6] = (b
>> 8) & 0xff;
1948 arr
[7] = (b
>> 0) & 0xff;
1949 arr
[8] = (c
>> 24) & 0xff;
1950 arr
[9] = (c
>> 16) & 0xff;
1951 arr
[10] = (c
>> 8) & 0xff;
1952 arr
[11] = (c
>> 0) & 0xff;
1953 arr
[12] = (d
>> 24) & 0xff;
1954 arr
[13] = (d
>> 16) & 0xff;
1955 arr
[14] = (d
>> 8) & 0xff;
1956 arr
[15] = (d
>> 0) & 0xff;
1957 return array_to_constant(mode
, arr
);
1960 /* branch hint stuff */
1962 /* An array of these is used to propagate hints to predecessor blocks. */
1965 rtx_insn
*prop_jump
; /* propagated from another block */
1966 int bb_index
; /* the original block. */
1968 static struct spu_bb_info
*spu_bb_info
;
1970 #define STOP_HINT_P(INSN) \
1972 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1973 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1975 /* 1 when RTX is a hinted branch or its target. We keep track of
1976 what has been hinted so the safe-hint code can test it easily. */
1977 #define HINTED_P(RTX) \
1978 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1980 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1981 #define SCHED_ON_EVEN_P(RTX) \
1982 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1984 /* Emit a nop for INSN such that the two will dual issue. This assumes
1985 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1986 We check for TImode to handle a MULTI1 insn which has dual issued its
1987 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1989 emit_nop_for_insn (rtx_insn
*insn
)
1994 /* We need to handle JUMP_TABLE_DATA separately. */
1995 if (JUMP_TABLE_DATA_P (insn
))
1997 new_insn
= emit_insn_after (gen_lnop(), insn
);
1998 recog_memoized (new_insn
);
1999 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2003 p
= get_pipe (insn
);
2004 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2005 new_insn
= emit_insn_after (gen_lnop (), insn
);
2006 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2008 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2009 PUT_MODE (new_insn
, TImode
);
2010 PUT_MODE (insn
, VOIDmode
);
2013 new_insn
= emit_insn_after (gen_lnop (), insn
);
2014 recog_memoized (new_insn
);
2015 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2018 /* Insert nops in basic blocks to meet dual issue alignment
2019 requirements. Also make sure hbrp and hint instructions are at least
2020 one cycle apart, possibly inserting a nop. */
2024 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2028 /* This sets up INSN_ADDRESSES. */
2029 shorten_branches (get_insns ());
2031 /* Keep track of length added by nops. */
2035 insn
= get_insns ();
2036 if (!active_insn_p (insn
))
2037 insn
= next_active_insn (insn
);
2038 for (; insn
; insn
= next_insn
)
2040 next_insn
= next_active_insn (insn
);
2041 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2042 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2046 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2047 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2048 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2051 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2052 PUT_MODE (prev_insn
, GET_MODE (insn
));
2053 PUT_MODE (insn
, TImode
);
2054 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2060 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2062 if (GET_MODE (insn
) == TImode
)
2063 PUT_MODE (next_insn
, TImode
);
2065 next_insn
= next_active_insn (insn
);
2067 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2068 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2070 if (((addr
+ length
) & 7) != 0)
2072 emit_nop_for_insn (prev_insn
);
2076 else if (GET_MODE (insn
) == TImode
2077 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2078 || get_attr_type (insn
) == TYPE_MULTI0
)
2079 && ((addr
+ length
) & 7) != 0)
2081 /* prev_insn will always be set because the first insn is
2082 always 8-byte aligned. */
2083 emit_nop_for_insn (prev_insn
);
2091 /* Routines for branch hints. */
2094 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2095 int distance
, sbitmap blocks
)
2099 rtx_jump_table_data
*table
;
2101 if (before
== 0 || branch
== 0 || target
== 0)
2104 /* While scheduling we require hints to be no further than 600, so
2105 we need to enforce that here too */
2109 /* If we have a Basic block note, emit it after the basic block note. */
2110 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2111 before
= NEXT_INSN (before
);
2113 rtx_code_label
*branch_label
= gen_label_rtx ();
2114 LABEL_NUSES (branch_label
)++;
2115 LABEL_PRESERVE_P (branch_label
) = 1;
2116 insn
= emit_label_before (branch_label
, branch
);
2117 rtx branch_label_ref
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2118 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2120 hint
= emit_insn_before (gen_hbr (branch_label_ref
, target
), before
);
2121 recog_memoized (hint
);
2122 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2123 HINTED_P (branch
) = 1;
2125 if (GET_CODE (target
) == LABEL_REF
)
2126 HINTED_P (XEXP (target
, 0)) = 1;
2127 else if (tablejump_p (branch
, 0, &table
))
2131 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2132 vec
= XVEC (PATTERN (table
), 0);
2134 vec
= XVEC (PATTERN (table
), 1);
2135 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2136 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2139 if (distance
>= 588)
2141 /* Make sure the hint isn't scheduled any earlier than this point,
2142 which could make it too far for the branch offest to fit */
2143 insn
= emit_insn_before (gen_blockage (), hint
);
2144 recog_memoized (insn
);
2145 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2147 else if (distance
<= 8 * 4)
2149 /* To guarantee at least 8 insns between the hint and branch we
2152 for (d
= distance
; d
< 8 * 4; d
+= 4)
2155 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2156 recog_memoized (insn
);
2157 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2160 /* Make sure any nops inserted aren't scheduled before the hint. */
2161 insn
= emit_insn_after (gen_blockage (), hint
);
2162 recog_memoized (insn
);
2163 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2165 /* Make sure any nops inserted aren't scheduled after the call. */
2166 if (CALL_P (branch
) && distance
< 8 * 4)
2168 insn
= emit_insn_before (gen_blockage (), branch
);
2169 recog_memoized (insn
);
2170 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2175 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2176 the rtx for the branch target. */
2178 get_branch_target (rtx_insn
*branch
)
2180 if (JUMP_P (branch
))
2184 /* Return statements */
2185 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2186 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2189 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2192 set
= single_set (branch
);
2193 src
= SET_SRC (set
);
2194 if (GET_CODE (SET_DEST (set
)) != PC
)
2197 if (GET_CODE (src
) == IF_THEN_ELSE
)
2200 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2203 /* If the more probable case is not a fall through, then
2204 try a branch hint. */
2205 int prob
= profile_probability::from_reg_br_prob_note
2206 (XINT (note
, 0)).to_reg_br_prob_base ();
2207 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2208 && GET_CODE (XEXP (src
, 1)) != PC
)
2209 lab
= XEXP (src
, 1);
2210 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2211 && GET_CODE (XEXP (src
, 2)) != PC
)
2212 lab
= XEXP (src
, 2);
2216 if (GET_CODE (lab
) == RETURN
)
2217 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2225 else if (CALL_P (branch
))
2228 /* All of our call patterns are in a PARALLEL and the CALL is
2229 the first pattern in the PARALLEL. */
2230 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2232 call
= XVECEXP (PATTERN (branch
), 0, 0);
2233 if (GET_CODE (call
) == SET
)
2234 call
= SET_SRC (call
);
2235 if (GET_CODE (call
) != CALL
)
2237 return XEXP (XEXP (call
, 0), 0);
2242 /* The special $hbr register is used to prevent the insn scheduler from
2243 moving hbr insns across instructions which invalidate them. It
2244 should only be used in a clobber, and this function searches for
2245 insns which clobber it. */
2247 insn_clobbers_hbr (rtx_insn
*insn
)
2250 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2252 rtx parallel
= PATTERN (insn
);
2255 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2257 clobber
= XVECEXP (parallel
, 0, j
);
2258 if (GET_CODE (clobber
) == CLOBBER
2259 && GET_CODE (XEXP (clobber
, 0)) == REG
2260 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2267 /* Search up to 32 insns starting at FIRST:
2268 - at any kind of hinted branch, just return
2269 - at any unconditional branch in the first 15 insns, just return
2270 - at a call or indirect branch, after the first 15 insns, force it to
2271 an even address and return
2272 - at any unconditional branch, after the first 15 insns, force it to
2274 At then end of the search, insert an hbrp within 4 insns of FIRST,
2275 and an hbrp within 16 instructions of FIRST.
2278 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2280 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2281 int addr
= 0, length
, first_addr
= -1;
2282 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2283 int insert_lnop_after
= 0;
2284 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2287 if (first_addr
== -1)
2288 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2289 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2290 length
= get_attr_length (insn
);
2292 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2294 /* We test for 14 instructions because the first hbrp will add
2295 up to 2 instructions. */
2296 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2299 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2301 /* Make sure an hbrp is at least 2 cycles away from a hint.
2302 Insert an lnop after the hbrp when necessary. */
2303 if (before_4
== 0 && addr
> 0)
2306 insert_lnop_after
|= 1;
2308 else if (before_4
&& addr
<= 4 * 4)
2309 insert_lnop_after
|= 1;
2310 if (before_16
== 0 && addr
> 10 * 4)
2313 insert_lnop_after
|= 2;
2315 else if (before_16
&& addr
<= 14 * 4)
2316 insert_lnop_after
|= 2;
2319 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2321 if (addr
< hbrp_addr0
)
2323 else if (addr
< hbrp_addr1
)
2327 if (CALL_P (insn
) || JUMP_P (insn
))
2329 if (HINTED_P (insn
))
2332 /* Any branch after the first 15 insns should be on an even
2333 address to avoid a special case branch. There might be
2334 some nops and/or hbrps inserted, so we test after 10
2337 SCHED_ON_EVEN_P (insn
) = 1;
2340 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2344 if (addr
+ length
>= 32 * 4)
2346 gcc_assert (before_4
&& before_16
);
2347 if (hbrp_addr0
> 4 * 4)
2350 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2351 recog_memoized (insn
);
2352 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2353 INSN_ADDRESSES_NEW (insn
,
2354 INSN_ADDRESSES (INSN_UID (before_4
)));
2355 PUT_MODE (insn
, GET_MODE (before_4
));
2356 PUT_MODE (before_4
, TImode
);
2357 if (insert_lnop_after
& 1)
2359 insn
= emit_insn_before (gen_lnop (), before_4
);
2360 recog_memoized (insn
);
2361 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2362 INSN_ADDRESSES_NEW (insn
,
2363 INSN_ADDRESSES (INSN_UID (before_4
)));
2364 PUT_MODE (insn
, TImode
);
2367 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2368 && hbrp_addr1
> 16 * 4)
2371 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2372 recog_memoized (insn
);
2373 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2374 INSN_ADDRESSES_NEW (insn
,
2375 INSN_ADDRESSES (INSN_UID (before_16
)));
2376 PUT_MODE (insn
, GET_MODE (before_16
));
2377 PUT_MODE (before_16
, TImode
);
2378 if (insert_lnop_after
& 2)
2380 insn
= emit_insn_before (gen_lnop (), before_16
);
2381 recog_memoized (insn
);
2382 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2383 INSN_ADDRESSES_NEW (insn
,
2384 INSN_ADDRESSES (INSN_UID
2386 PUT_MODE (insn
, TImode
);
2392 else if (BARRIER_P (insn
))
2397 /* The SPU might hang when it executes 48 inline instructions after a
2398 hinted branch jumps to its hinted target. The beginning of a
2399 function and the return from a call might have been hinted, and
2400 must be handled as well. To prevent a hang we insert 2 hbrps. The
2401 first should be within 6 insns of the branch target. The second
2402 should be within 22 insns of the branch target. When determining
2403 if hbrps are necessary, we look for only 32 inline instructions,
2404 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2405 when inserting new hbrps, we insert them within 4 and 16 insns of
2411 if (TARGET_SAFE_HINTS
)
2413 shorten_branches (get_insns ());
2414 /* Insert hbrp at beginning of function */
2415 insn
= next_active_insn (get_insns ());
2417 insert_hbrp_for_ilb_runout (insn
);
2418 /* Insert hbrp after hinted targets. */
2419 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2420 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2421 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2425 static int in_spu_reorg
;
2428 spu_var_tracking (void)
2430 if (flag_var_tracking
)
2433 timevar_push (TV_VAR_TRACKING
);
2434 variable_tracking_main ();
2435 timevar_pop (TV_VAR_TRACKING
);
2436 df_finish_pass (false);
2440 /* Insert branch hints. There are no branch optimizations after this
2441 pass, so it's safe to set our branch hints now. */
2443 spu_machine_dependent_reorg (void)
2447 rtx_insn
*branch
, *insn
;
2448 rtx branch_target
= 0;
2449 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2453 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2455 /* We still do it for unoptimized code because an external
2456 function might have hinted a call or return. */
2457 compute_bb_for_insn ();
2460 spu_var_tracking ();
2461 free_bb_for_insn ();
2465 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2466 bitmap_clear (blocks
);
2469 compute_bb_for_insn ();
2471 /* (Re-)discover loops so that bb->loop_father can be used
2472 in the analysis below. */
2473 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2478 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2479 sizeof (struct spu_bb_info
));
2481 /* We need exact insn addresses and lengths. */
2482 shorten_branches (get_insns ());
2484 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2486 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2488 if (spu_bb_info
[i
].prop_jump
)
2490 branch
= spu_bb_info
[i
].prop_jump
;
2491 branch_target
= get_branch_target (branch
);
2492 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2493 required_dist
= spu_hint_dist
;
2495 /* Search from end of a block to beginning. In this loop, find
2496 jumps which need a branch and emit them only when:
2497 - it's an indirect branch and we're at the insn which sets
2499 - we're at an insn that will invalidate the hint. e.g., a
2500 call, another hint insn, inline asm that clobbers $hbr, and
2501 some inlined operations (divmodsi4). Don't consider jumps
2502 because they are only at the end of a block and are
2503 considered when we are deciding whether to propagate
2504 - we're getting too far away from the branch. The hbr insns
2505 only have a signed 10 bit offset
2506 We go back as far as possible so the branch will be considered
2507 for propagation when we get to the beginning of the block. */
2508 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2512 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2514 && ((GET_CODE (branch_target
) == REG
2515 && set_of (branch_target
, insn
) != NULL_RTX
)
2516 || insn_clobbers_hbr (insn
)
2517 || branch_addr
- insn_addr
> 600))
2519 rtx_insn
*next
= NEXT_INSN (insn
);
2520 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2521 if (insn
!= BB_END (bb
)
2522 && branch_addr
- next_addr
>= required_dist
)
2526 "hint for %i in block %i before %i\n",
2527 INSN_UID (branch
), bb
->index
,
2529 spu_emit_branch_hint (next
, branch
, branch_target
,
2530 branch_addr
- next_addr
, blocks
);
2535 /* JUMP_P will only be true at the end of a block. When
2536 branch is already set it means we've previously decided
2537 to propagate a hint for that branch into this block. */
2538 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2541 if ((branch_target
= get_branch_target (insn
)))
2544 branch_addr
= insn_addr
;
2545 required_dist
= spu_hint_dist
;
2549 if (insn
== BB_HEAD (bb
))
2555 /* If we haven't emitted a hint for this branch yet, it might
2556 be profitable to emit it in one of the predecessor blocks,
2557 especially for loops. */
2559 basic_block prev
= 0, prop
= 0, prev2
= 0;
2560 int loop_exit
= 0, simple_loop
= 0;
2561 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2563 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2564 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2565 prev
= EDGE_PRED (bb
, j
)->src
;
2567 prev2
= EDGE_PRED (bb
, j
)->src
;
2569 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2570 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2572 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2575 /* If this branch is a loop exit then propagate to previous
2576 fallthru block. This catches the cases when it is a simple
2577 loop or when there is an initial branch into the loop. */
2578 if (prev
&& (loop_exit
|| simple_loop
)
2579 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2582 /* If there is only one adjacent predecessor. Don't propagate
2583 outside this loop. */
2584 else if (prev
&& single_pred_p (bb
)
2585 && prev
->loop_father
== bb
->loop_father
)
2588 /* If this is the JOIN block of a simple IF-THEN then
2589 propagate the hint to the HEADER block. */
2590 else if (prev
&& prev2
2591 && EDGE_COUNT (bb
->preds
) == 2
2592 && EDGE_COUNT (prev
->preds
) == 1
2593 && EDGE_PRED (prev
, 0)->src
== prev2
2594 && prev2
->loop_father
== bb
->loop_father
2595 && GET_CODE (branch_target
) != REG
)
2598 /* Don't propagate when:
2599 - this is a simple loop and the hint would be too far
2600 - this is not a simple loop and there are 16 insns in
2602 - the predecessor block ends in a branch that will be
2604 - the predecessor block ends in an insn that invalidates
2608 && (bbend
= BB_END (prop
))
2609 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2610 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2611 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2614 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2615 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2616 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2617 INSN_UID (branch
), loop_exit
, simple_loop
,
2618 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2620 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2621 spu_bb_info
[prop
->index
].bb_index
= i
;
2623 else if (branch_addr
- next_addr
>= required_dist
)
2626 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2627 INSN_UID (branch
), bb
->index
,
2628 INSN_UID (NEXT_INSN (insn
)));
2629 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2630 branch_addr
- next_addr
, blocks
);
2637 if (!bitmap_empty_p (blocks
))
2638 find_many_sub_basic_blocks (blocks
);
2640 /* We have to schedule to make sure alignment is ok. */
2641 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2643 /* The hints need to be scheduled, so call it again. */
2645 df_finish_pass (true);
2651 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2652 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2654 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2655 between its branch label and the branch . We don't move the
2656 label because GCC expects it at the beginning of the block. */
2657 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2658 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2659 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2662 for (branch
= NEXT_INSN (label
);
2663 !JUMP_P (branch
) && !CALL_P (branch
);
2664 branch
= NEXT_INSN (branch
))
2665 if (NONJUMP_INSN_P (branch
))
2666 offset
+= get_attr_length (branch
);
2668 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2671 spu_var_tracking ();
2673 loop_optimizer_finalize ();
2675 free_bb_for_insn ();
2681 /* Insn scheduling routines, primarily for dual issue. */
2683 spu_sched_issue_rate (void)
2689 uses_ls_unit(rtx_insn
*insn
)
2691 rtx set
= single_set (insn
);
2693 && (GET_CODE (SET_DEST (set
)) == MEM
2694 || GET_CODE (SET_SRC (set
)) == MEM
))
2700 get_pipe (rtx_insn
*insn
)
2703 /* Handle inline asm */
2704 if (INSN_CODE (insn
) == -1)
2706 t
= get_attr_type (insn
);
2731 case TYPE_IPREFETCH
:
2739 /* haifa-sched.c has a static variable that keeps track of the current
2740 cycle. It is passed to spu_sched_reorder, and we record it here for
2741 use by spu_sched_variable_issue. It won't be accurate if the
2742 scheduler updates it's clock_var between the two calls. */
2743 static int clock_var
;
2745 /* This is used to keep track of insn alignment. Set to 0 at the
2746 beginning of each block and increased by the "length" attr of each
2748 static int spu_sched_length
;
2750 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2751 ready list appropriately in spu_sched_reorder(). */
2752 static int pipe0_clock
;
2753 static int pipe1_clock
;
2755 static int prev_clock_var
;
2757 static int prev_priority
;
2759 /* The SPU needs to load the next ilb sometime during the execution of
2760 the previous ilb. There is a potential conflict if every cycle has a
2761 load or store. To avoid the conflict we make sure the load/store
2762 unit is free for at least one cycle during the execution of insns in
2763 the previous ilb. */
2764 static int spu_ls_first
;
2765 static int prev_ls_clock
;
2768 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2769 int max_ready ATTRIBUTE_UNUSED
)
2771 spu_sched_length
= 0;
2775 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2776 int max_ready ATTRIBUTE_UNUSED
)
2778 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2780 /* When any block might be at least 8-byte aligned, assume they
2781 will all be at least 8-byte aligned to make sure dual issue
2782 works out correctly. */
2783 spu_sched_length
= 0;
2785 spu_ls_first
= INT_MAX
;
2790 prev_clock_var
= -1;
2795 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2796 int verbose ATTRIBUTE_UNUSED
,
2797 rtx_insn
*insn
, int more
)
2801 if (GET_CODE (PATTERN (insn
)) == USE
2802 || GET_CODE (PATTERN (insn
)) == CLOBBER
2803 || (len
= get_attr_length (insn
)) == 0)
2806 spu_sched_length
+= len
;
2808 /* Reset on inline asm */
2809 if (INSN_CODE (insn
) == -1)
2811 spu_ls_first
= INT_MAX
;
2816 p
= get_pipe (insn
);
2818 pipe0_clock
= clock_var
;
2820 pipe1_clock
= clock_var
;
2824 if (clock_var
- prev_ls_clock
> 1
2825 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2826 spu_ls_first
= INT_MAX
;
2827 if (uses_ls_unit (insn
))
2829 if (spu_ls_first
== INT_MAX
)
2830 spu_ls_first
= spu_sched_length
;
2831 prev_ls_clock
= clock_var
;
2834 /* The scheduler hasn't inserted the nop, but we will later on.
2835 Include those nops in spu_sched_length. */
2836 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2837 spu_sched_length
+= 4;
2838 prev_clock_var
= clock_var
;
2840 /* more is -1 when called from spu_sched_reorder for new insns
2841 that don't have INSN_PRIORITY */
2843 prev_priority
= INSN_PRIORITY (insn
);
2846 /* Always try issuing more insns. spu_sched_reorder will decide
2847 when the cycle should be advanced. */
2851 /* This function is called for both TARGET_SCHED_REORDER and
2852 TARGET_SCHED_REORDER2. */
2854 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2855 rtx_insn
**ready
, int *nreadyp
, int clock
)
2857 int i
, nready
= *nreadyp
;
2858 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2863 if (nready
<= 0 || pipe1_clock
>= clock
)
2866 /* Find any rtl insns that don't generate assembly insns and schedule
2868 for (i
= nready
- 1; i
>= 0; i
--)
2871 if (INSN_CODE (insn
) == -1
2872 || INSN_CODE (insn
) == CODE_FOR_blockage
2873 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2875 ready
[i
] = ready
[nready
- 1];
2876 ready
[nready
- 1] = insn
;
2881 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2882 for (i
= 0; i
< nready
; i
++)
2883 if (INSN_CODE (ready
[i
]) != -1)
2886 switch (get_attr_type (insn
))
2912 case TYPE_IPREFETCH
:
2918 /* In the first scheduling phase, schedule loads and stores together
2919 to increase the chance they will get merged during postreload CSE. */
2920 if (!reload_completed
&& pipe_ls
>= 0)
2922 insn
= ready
[pipe_ls
];
2923 ready
[pipe_ls
] = ready
[nready
- 1];
2924 ready
[nready
- 1] = insn
;
2928 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2932 /* When we have loads/stores in every cycle of the last 15 insns and
2933 we are about to schedule another load/store, emit an hbrp insn
2936 && spu_sched_length
- spu_ls_first
>= 4 * 15
2937 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2939 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2940 recog_memoized (insn
);
2941 if (pipe0_clock
< clock
)
2942 PUT_MODE (insn
, TImode
);
2943 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2947 /* In general, we want to emit nops to increase dual issue, but dual
2948 issue isn't faster when one of the insns could be scheduled later
2949 without effecting the critical path. We look at INSN_PRIORITY to
2950 make a good guess, but it isn't perfect so -mdual-nops=n can be
2951 used to effect it. */
2952 if (in_spu_reorg
&& spu_dual_nops
< 10)
2954 /* When we are at an even address and we are not issuing nops to
2955 improve scheduling then we need to advance the cycle. */
2956 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2957 && (spu_dual_nops
== 0
2960 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2963 /* When at an odd address, schedule the highest priority insn
2964 without considering pipeline. */
2965 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2966 && (spu_dual_nops
== 0
2968 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2973 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2974 pipe0 insn in the ready list, schedule it. */
2975 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2976 schedule_i
= pipe_0
;
2978 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2979 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2981 schedule_i
= pipe_1
;
2983 if (schedule_i
> -1)
2985 insn
= ready
[schedule_i
];
2986 ready
[schedule_i
] = ready
[nready
- 1];
2987 ready
[nready
- 1] = insn
;
2993 /* INSN is dependent on DEP_INSN. */
2995 spu_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
2996 int cost
, unsigned int)
3000 /* The blockage pattern is used to prevent instructions from being
3001 moved across it and has no cost. */
3002 if (INSN_CODE (insn
) == CODE_FOR_blockage
3003 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3006 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3007 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3010 /* Make sure hbrps are spread out. */
3011 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3012 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3015 /* Make sure hints and hbrps are 2 cycles apart. */
3016 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3017 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3018 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3019 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3022 /* An hbrp has no real dependency on other insns. */
3023 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3024 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3027 /* Assuming that it is unlikely an argument register will be used in
3028 the first cycle of the called function, we reduce the cost for
3029 slightly better scheduling of dep_insn. When not hinted, the
3030 mispredicted branch would hide the cost as well. */
3033 rtx target
= get_branch_target (insn
);
3034 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3039 /* And when returning from a function, let's assume the return values
3040 are completed sooner too. */
3041 if (CALL_P (dep_insn
))
3044 /* Make sure an instruction that loads from the back chain is schedule
3045 away from the return instruction so a hint is more likely to get
3047 if (INSN_CODE (insn
) == CODE_FOR__return
3048 && (set
= single_set (dep_insn
))
3049 && GET_CODE (SET_DEST (set
)) == REG
3050 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3053 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3054 scheduler makes every insn in a block anti-dependent on the final
3055 jump_insn. We adjust here so higher cost insns will get scheduled
3057 if (JUMP_P (insn
) && dep_type
== REG_DEP_ANTI
)
3058 return insn_cost (dep_insn
) - 3;
3063 /* Create a CONST_DOUBLE from a string. */
3065 spu_float_const (const char *string
, machine_mode mode
)
3067 REAL_VALUE_TYPE value
;
3068 value
= REAL_VALUE_ATOF (string
, mode
);
3069 return const_double_from_real_value (value
, mode
);
3073 spu_constant_address_p (rtx x
)
3075 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3076 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3077 || GET_CODE (x
) == HIGH
);
3080 static enum spu_immediate
3081 which_immediate_load (HOST_WIDE_INT val
)
3083 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3085 if (val
>= -0x8000 && val
<= 0x7fff)
3087 if (val
>= 0 && val
<= 0x3ffff)
3089 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3091 if ((val
& 0xffff) == 0)
3097 /* Return true when OP can be loaded by one of the il instructions, or
3098 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3100 immediate_load_p (rtx op
, machine_mode mode
)
3102 if (CONSTANT_P (op
))
3104 enum immediate_class c
= classify_immediate (op
, mode
);
3105 return c
== IC_IL1
|| c
== IC_IL1s
3106 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3111 /* Return true if the first SIZE bytes of arr is a constant that can be
3112 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3113 represent the size and offset of the instruction to use. */
3115 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3117 int cpat
, run
, i
, start
;
3121 for (i
= 0; i
< size
&& cpat
; i
++)
3129 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3131 else if (arr
[i
] == 0)
3133 while (arr
[i
+run
] == run
&& i
+run
< 16)
3135 if (run
!= 4 && run
!= 8)
3140 if ((i
& (run
-1)) != 0)
3147 if (cpat
&& (run
|| size
< 16))
3154 *pstart
= start
== -1 ? 16-run
: start
;
3160 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3161 it into a register. MODE is only valid when OP is a CONST_INT. */
3162 static enum immediate_class
3163 classify_immediate (rtx op
, machine_mode mode
)
3166 unsigned char arr
[16];
3167 int i
, j
, repeated
, fsmbi
, repeat
;
3169 gcc_assert (CONSTANT_P (op
));
3171 if (GET_MODE (op
) != VOIDmode
)
3172 mode
= GET_MODE (op
);
3174 /* A V4SI const_vector with all identical symbols is ok. */
3177 && GET_CODE (op
) == CONST_VECTOR
3178 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3179 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
)
3180 op
= unwrap_const_vec_duplicate (op
);
3182 switch (GET_CODE (op
))
3186 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3189 /* We can never know if the resulting address fits in 18 bits and can be
3190 loaded with ila. For now, assume the address will not overflow if
3191 the displacement is "small" (fits 'K' constraint). */
3192 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3194 rtx sym
= XEXP (XEXP (op
, 0), 0);
3195 rtx cst
= XEXP (XEXP (op
, 0), 1);
3197 if (GET_CODE (sym
) == SYMBOL_REF
3198 && GET_CODE (cst
) == CONST_INT
3199 && satisfies_constraint_K (cst
))
3208 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3209 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3210 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3216 constant_to_array (mode
, op
, arr
);
3218 /* Check that each 4-byte slot is identical. */
3220 for (i
= 4; i
< 16; i
+= 4)
3221 for (j
= 0; j
< 4; j
++)
3222 if (arr
[j
] != arr
[i
+ j
])
3227 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3228 val
= trunc_int_for_mode (val
, SImode
);
3230 if (which_immediate_load (val
) != SPU_NONE
)
3234 /* Any mode of 2 bytes or smaller can be loaded with an il
3236 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3240 for (i
= 0; i
< 16 && fsmbi
; i
++)
3241 if (arr
[i
] != 0 && repeat
== 0)
3243 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3246 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3248 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3261 static enum spu_immediate
3262 which_logical_immediate (HOST_WIDE_INT val
)
3264 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3266 if (val
>= -0x200 && val
<= 0x1ff)
3268 if (val
>= 0 && val
<= 0xffff)
3270 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3272 val
= trunc_int_for_mode (val
, HImode
);
3273 if (val
>= -0x200 && val
<= 0x1ff)
3275 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3277 val
= trunc_int_for_mode (val
, QImode
);
3278 if (val
>= -0x200 && val
<= 0x1ff)
3285 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3288 const_vector_immediate_p (rtx x
)
3291 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3292 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3293 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3294 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3300 logical_immediate_p (rtx op
, machine_mode mode
)
3303 unsigned char arr
[16];
3306 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3307 || GET_CODE (op
) == CONST_VECTOR
);
3309 if (GET_CODE (op
) == CONST_VECTOR
3310 && !const_vector_immediate_p (op
))
3313 if (GET_MODE (op
) != VOIDmode
)
3314 mode
= GET_MODE (op
);
3316 constant_to_array (mode
, op
, arr
);
3318 /* Check that bytes are repeated. */
3319 for (i
= 4; i
< 16; i
+= 4)
3320 for (j
= 0; j
< 4; j
++)
3321 if (arr
[j
] != arr
[i
+ j
])
3324 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3325 val
= trunc_int_for_mode (val
, SImode
);
3327 i
= which_logical_immediate (val
);
3328 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3332 iohl_immediate_p (rtx op
, machine_mode mode
)
3335 unsigned char arr
[16];
3338 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3339 || GET_CODE (op
) == CONST_VECTOR
);
3341 if (GET_CODE (op
) == CONST_VECTOR
3342 && !const_vector_immediate_p (op
))
3345 if (GET_MODE (op
) != VOIDmode
)
3346 mode
= GET_MODE (op
);
3348 constant_to_array (mode
, op
, arr
);
3350 /* Check that bytes are repeated. */
3351 for (i
= 4; i
< 16; i
+= 4)
3352 for (j
= 0; j
< 4; j
++)
3353 if (arr
[j
] != arr
[i
+ j
])
3356 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3357 val
= trunc_int_for_mode (val
, SImode
);
3359 return val
>= 0 && val
<= 0xffff;
3363 arith_immediate_p (rtx op
, machine_mode mode
,
3364 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3367 unsigned char arr
[16];
3370 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3371 || GET_CODE (op
) == CONST_VECTOR
);
3373 if (GET_CODE (op
) == CONST_VECTOR
3374 && !const_vector_immediate_p (op
))
3377 if (GET_MODE (op
) != VOIDmode
)
3378 mode
= GET_MODE (op
);
3380 constant_to_array (mode
, op
, arr
);
3382 bytes
= GET_MODE_UNIT_SIZE (mode
);
3383 mode
= int_mode_for_mode (GET_MODE_INNER (mode
)).require ();
3385 /* Check that bytes are repeated. */
3386 for (i
= bytes
; i
< 16; i
+= bytes
)
3387 for (j
= 0; j
< bytes
; j
++)
3388 if (arr
[j
] != arr
[i
+ j
])
3392 for (j
= 1; j
< bytes
; j
++)
3393 val
= (val
<< 8) | arr
[j
];
3395 val
= trunc_int_for_mode (val
, mode
);
3397 return val
>= low
&& val
<= high
;
3400 /* TRUE when op is an immediate and an exact power of 2, and given that
3401 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3402 all entries must be the same. */
3404 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3406 machine_mode int_mode
;
3408 unsigned char arr
[16];
3411 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3412 || GET_CODE (op
) == CONST_VECTOR
);
3414 if (GET_CODE (op
) == CONST_VECTOR
3415 && !const_vector_immediate_p (op
))
3418 if (GET_MODE (op
) != VOIDmode
)
3419 mode
= GET_MODE (op
);
3421 constant_to_array (mode
, op
, arr
);
3423 mode
= GET_MODE_INNER (mode
);
3425 bytes
= GET_MODE_SIZE (mode
);
3426 int_mode
= int_mode_for_mode (mode
).require ();
3428 /* Check that bytes are repeated. */
3429 for (i
= bytes
; i
< 16; i
+= bytes
)
3430 for (j
= 0; j
< bytes
; j
++)
3431 if (arr
[j
] != arr
[i
+ j
])
3435 for (j
= 1; j
< bytes
; j
++)
3436 val
= (val
<< 8) | arr
[j
];
3438 val
= trunc_int_for_mode (val
, int_mode
);
3440 /* Currently, we only handle SFmode */
3441 gcc_assert (mode
== SFmode
);
3444 int exp
= (val
>> 23) - 127;
3445 return val
> 0 && (val
& 0x007fffff) == 0
3446 && exp
>= low
&& exp
<= high
;
3451 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3454 ea_symbol_ref_p (const_rtx x
)
3458 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3460 rtx plus
= XEXP (x
, 0);
3461 rtx op0
= XEXP (plus
, 0);
3462 rtx op1
= XEXP (plus
, 1);
3463 if (GET_CODE (op1
) == CONST_INT
)
3467 return (GET_CODE (x
) == SYMBOL_REF
3468 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3469 && TREE_CODE (decl
) == VAR_DECL
3470 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3474 - any 32-bit constant (SImode, SFmode)
3475 - any constant that can be generated with fsmbi (any mode)
3476 - a 64-bit constant where the high and low bits are identical
3478 - a 128-bit constant where the four 32-bit words match. */
3480 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3482 subrtx_iterator::array_type array
;
3483 if (GET_CODE (x
) == HIGH
)
3486 /* Reject any __ea qualified reference. These can't appear in
3487 instructions but must be forced to the constant pool. */
3488 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3489 if (ea_symbol_ref_p (*iter
))
3492 /* V4SI with all identical symbols is valid. */
3495 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3496 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3497 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3498 return const_vec_duplicate_p (x
);
3500 if (GET_CODE (x
) == CONST_VECTOR
3501 && !const_vector_immediate_p (x
))
3506 /* Valid address are:
3507 - symbol_ref, label_ref, const
3509 - reg + const_int, where const_int is 16 byte aligned
3510 - reg + reg, alignment doesn't matter
3511 The alignment matters in the reg+const case because lqd and stqd
3512 ignore the 4 least significant bits of the const. We only care about
3513 16 byte modes because the expand phase will change all smaller MEM
3514 references to TImode. */
3516 spu_legitimate_address_p (machine_mode mode
,
3517 rtx x
, bool reg_ok_strict
)
3519 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3521 && GET_CODE (x
) == AND
3522 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3523 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3525 switch (GET_CODE (x
))
3528 return !TARGET_LARGE_MEM
;
3532 /* Keep __ea references until reload so that spu_expand_mov can see them
3534 if (ea_symbol_ref_p (x
))
3535 return !reload_in_progress
&& !reload_completed
;
3536 return !TARGET_LARGE_MEM
;
3539 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3548 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3553 rtx op0
= XEXP (x
, 0);
3554 rtx op1
= XEXP (x
, 1);
3555 if (GET_CODE (op0
) == SUBREG
)
3556 op0
= XEXP (op0
, 0);
3557 if (GET_CODE (op1
) == SUBREG
)
3558 op1
= XEXP (op1
, 0);
3559 if (GET_CODE (op0
) == REG
3560 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3561 && GET_CODE (op1
) == CONST_INT
3562 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3563 /* If virtual registers are involved, the displacement will
3564 change later on anyway, so checking would be premature.
3565 Reload will make sure the final displacement after
3566 register elimination is OK. */
3567 || op0
== arg_pointer_rtx
3568 || op0
== frame_pointer_rtx
3569 || op0
== virtual_stack_vars_rtx
)
3570 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3572 if (GET_CODE (op0
) == REG
3573 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3574 && GET_CODE (op1
) == REG
3575 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3586 /* Like spu_legitimate_address_p, except with named addresses. */
3588 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3589 bool reg_ok_strict
, addr_space_t as
)
3591 if (as
== ADDR_SPACE_EA
)
3592 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3594 else if (as
!= ADDR_SPACE_GENERIC
)
3597 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3600 /* When the address is reg + const_int, force the const_int into a
3603 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3604 machine_mode mode ATTRIBUTE_UNUSED
)
3607 /* Make sure both operands are registers. */
3608 if (GET_CODE (x
) == PLUS
)
3612 if (ALIGNED_SYMBOL_REF_P (op0
))
3614 op0
= force_reg (Pmode
, op0
);
3615 mark_reg_pointer (op0
, 128);
3617 else if (GET_CODE (op0
) != REG
)
3618 op0
= force_reg (Pmode
, op0
);
3619 if (ALIGNED_SYMBOL_REF_P (op1
))
3621 op1
= force_reg (Pmode
, op1
);
3622 mark_reg_pointer (op1
, 128);
3624 else if (GET_CODE (op1
) != REG
)
3625 op1
= force_reg (Pmode
, op1
);
3626 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3631 /* Like spu_legitimate_address, except with named address support. */
3633 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3636 if (as
!= ADDR_SPACE_GENERIC
)
3639 return spu_legitimize_address (x
, oldx
, mode
);
3642 /* Reload reg + const_int for out-of-range displacements. */
3644 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3645 int opnum
, int type
)
3647 bool removed_and
= false;
3649 if (GET_CODE (ad
) == AND
3650 && CONST_INT_P (XEXP (ad
, 1))
3651 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3657 if (GET_CODE (ad
) == PLUS
3658 && REG_P (XEXP (ad
, 0))
3659 && CONST_INT_P (XEXP (ad
, 1))
3660 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3661 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3663 /* Unshare the sum. */
3666 /* Reload the displacement. */
3667 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3668 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3669 opnum
, (enum reload_type
) type
);
3671 /* Add back AND for alignment if we stripped it. */
3673 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3681 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3682 struct attribute_spec.handler. */
3684 spu_handle_fndecl_attribute (tree
* node
,
3686 tree args ATTRIBUTE_UNUSED
,
3687 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3689 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3691 warning (0, "%qE attribute only applies to functions",
3693 *no_add_attrs
= true;
3699 /* Handle the "vector" attribute. */
3701 spu_handle_vector_attribute (tree
* node
, tree name
,
3702 tree args ATTRIBUTE_UNUSED
,
3703 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3705 tree type
= *node
, result
= NULL_TREE
;
3709 while (POINTER_TYPE_P (type
)
3710 || TREE_CODE (type
) == FUNCTION_TYPE
3711 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3712 type
= TREE_TYPE (type
);
3714 mode
= TYPE_MODE (type
);
3716 unsigned_p
= TYPE_UNSIGNED (type
);
3720 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3723 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3726 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3729 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3732 result
= V4SF_type_node
;
3735 result
= V2DF_type_node
;
3741 /* Propagate qualifiers attached to the element type
3742 onto the vector type. */
3743 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3744 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3746 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3749 warning (0, "%qE attribute ignored", name
);
3751 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3756 /* Return nonzero if FUNC is a naked function. */
3758 spu_naked_function_p (tree func
)
3762 if (TREE_CODE (func
) != FUNCTION_DECL
)
3765 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3766 return a
!= NULL_TREE
;
3770 spu_initial_elimination_offset (int from
, int to
)
3772 int saved_regs_size
= spu_saved_regs_size ();
3774 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3775 || get_frame_size () || saved_regs_size
)
3776 sp_offset
= STACK_POINTER_OFFSET
;
3777 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3778 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3779 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3780 return get_frame_size ();
3781 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3782 return sp_offset
+ crtl
->outgoing_args_size
3783 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3784 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3785 return get_frame_size () + saved_regs_size
+ sp_offset
;
3791 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3793 machine_mode mode
= TYPE_MODE (type
);
3794 int byte_size
= ((mode
== BLKmode
)
3795 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3797 /* Make sure small structs are left justified in a register. */
3798 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3799 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3804 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3805 int n
= byte_size
/ UNITS_PER_WORD
;
3806 v
= rtvec_alloc (nregs
);
3807 for (i
= 0; i
< n
; i
++)
3809 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3810 gen_rtx_REG (TImode
,
3813 GEN_INT (UNITS_PER_WORD
* i
));
3814 byte_size
-= UNITS_PER_WORD
;
3821 smode
= smallest_int_mode_for_size (byte_size
* BITS_PER_UNIT
);
3823 gen_rtx_EXPR_LIST (VOIDmode
,
3824 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3825 GEN_INT (UNITS_PER_WORD
* n
));
3827 return gen_rtx_PARALLEL (mode
, v
);
3829 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3833 spu_function_arg (cumulative_args_t cum_v
,
3835 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3837 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3840 if (*cum
>= MAX_REGISTER_ARGS
)
3843 byte_size
= ((mode
== BLKmode
)
3844 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3846 /* The ABI does not allow parameters to be passed partially in
3847 reg and partially in stack. */
3848 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3851 /* Make sure small structs are left justified in a register. */
3852 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3853 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3859 smode
= smallest_int_mode_for_size (byte_size
* BITS_PER_UNIT
);
3860 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3861 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3863 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3866 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3870 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3871 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3873 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3875 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3878 ? ((int_size_in_bytes (type
) + 15) / 16)
3881 : spu_hard_regno_nregs (FIRST_ARG_REGNUM
, mode
));
3884 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3886 static pad_direction
3887 spu_function_arg_padding (machine_mode
, const_tree
)
3892 /* Variable sized types are passed by reference. */
3894 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3895 machine_mode mode ATTRIBUTE_UNUSED
,
3896 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3898 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3904 /* Create and return the va_list datatype.
3906 On SPU, va_list is an array type equivalent to
3908 typedef struct __va_list_tag
3910 void *__args __attribute__((__aligned(16)));
3911 void *__skip __attribute__((__aligned(16)));
3915 where __args points to the arg that will be returned by the next
3916 va_arg(), and __skip points to the previous stack frame such that
3917 when __args == __skip we should advance __args by 32 bytes. */
3919 spu_build_builtin_va_list (void)
3921 tree f_args
, f_skip
, record
, type_decl
;
3924 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3927 build_decl (BUILTINS_LOCATION
,
3928 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3930 f_args
= build_decl (BUILTINS_LOCATION
,
3931 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3932 f_skip
= build_decl (BUILTINS_LOCATION
,
3933 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3935 DECL_FIELD_CONTEXT (f_args
) = record
;
3936 SET_DECL_ALIGN (f_args
, 128);
3937 DECL_USER_ALIGN (f_args
) = 1;
3939 DECL_FIELD_CONTEXT (f_skip
) = record
;
3940 SET_DECL_ALIGN (f_skip
, 128);
3941 DECL_USER_ALIGN (f_skip
) = 1;
3943 TYPE_STUB_DECL (record
) = type_decl
;
3944 TYPE_NAME (record
) = type_decl
;
3945 TYPE_FIELDS (record
) = f_args
;
3946 DECL_CHAIN (f_args
) = f_skip
;
3948 /* We know this is being padded and we want it too. It is an internal
3949 type so hide the warnings from the user. */
3951 warn_padded
= false;
3953 layout_type (record
);
3957 /* The correct type is an array type of one element. */
3958 return build_array_type (record
, build_index_type (size_zero_node
));
3961 /* Implement va_start by filling the va_list structure VALIST.
3962 NEXTARG points to the first anonymous stack argument.
3964 The following global variables are used to initialize
3965 the va_list structure:
3968 the CUMULATIVE_ARGS for this function
3970 crtl->args.arg_offset_rtx:
3971 holds the offset of the first anonymous stack argument
3972 (relative to the virtual arg pointer). */
3975 spu_va_start (tree valist
, rtx nextarg
)
3977 tree f_args
, f_skip
;
3980 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3981 f_skip
= DECL_CHAIN (f_args
);
3983 valist
= build_simple_mem_ref (valist
);
3985 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3987 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3989 /* Find the __args area. */
3990 t
= make_tree (TREE_TYPE (args
), nextarg
);
3991 if (crtl
->args
.pretend_args_size
> 0)
3992 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
3993 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
3994 TREE_SIDE_EFFECTS (t
) = 1;
3995 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3997 /* Find the __skip area. */
3998 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3999 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4000 - STACK_POINTER_OFFSET
));
4001 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4002 TREE_SIDE_EFFECTS (t
) = 1;
4003 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4006 /* Gimplify va_arg by updating the va_list structure
4007 VALIST as required to retrieve an argument of type
4008 TYPE, and returning that argument.
4010 ret = va_arg(VALIST, TYPE);
4012 generates code equivalent to:
4014 paddedsize = (sizeof(TYPE) + 15) & -16;
4015 if (VALIST.__args + paddedsize > VALIST.__skip
4016 && VALIST.__args <= VALIST.__skip)
4017 addr = VALIST.__skip + 32;
4019 addr = VALIST.__args;
4020 VALIST.__args = addr + paddedsize;
4021 ret = *(TYPE *)addr;
4024 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4025 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4027 tree f_args
, f_skip
;
4029 HOST_WIDE_INT size
, rsize
;
4031 bool pass_by_reference_p
;
4033 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4034 f_skip
= DECL_CHAIN (f_args
);
4037 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4039 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4041 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4043 /* if an object is dynamically sized, a pointer to it is passed
4044 instead of the object itself. */
4045 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4047 if (pass_by_reference_p
)
4048 type
= build_pointer_type (type
);
4049 size
= int_size_in_bytes (type
);
4050 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4052 /* build conditional expression to calculate addr. The expression
4053 will be gimplified later. */
4054 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4055 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4056 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4057 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4058 unshare_expr (skip
)));
4060 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4061 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4062 unshare_expr (args
));
4064 gimplify_assign (addr
, tmp
, pre_p
);
4066 /* update VALIST.__args */
4067 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4068 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4070 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4073 if (pass_by_reference_p
)
4074 addr
= build_va_arg_indirect_ref (addr
);
4076 return build_va_arg_indirect_ref (addr
);
4079 /* Save parameter registers starting with the register that corresponds
4080 to the first unnamed parameters. If the first unnamed parameter is
4081 in the stack then save no registers. Set pretend_args_size to the
4082 amount of space needed to save the registers. */
4084 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4085 tree type
, int *pretend_size
, int no_rtl
)
4092 int ncum
= *get_cumulative_args (cum
);
4094 /* cum currently points to the last named argument, we want to
4095 start at the next argument. */
4096 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4098 offset
= -STACK_POINTER_OFFSET
;
4099 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4101 tmp
= gen_frame_mem (V4SImode
,
4102 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4104 emit_move_insn (tmp
,
4105 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4108 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4113 spu_conditional_register_usage (void)
4117 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4118 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4122 /* This is called any time we inspect the alignment of a register for
4125 reg_aligned_for_addr (rtx x
)
4128 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4129 return REGNO_POINTER_ALIGN (regno
) >= 128;
4132 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4133 into its SYMBOL_REF_FLAGS. */
4135 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4137 default_encode_section_info (decl
, rtl
, first
);
4139 /* If a variable has a forced alignment to < 16 bytes, mark it with
4140 SYMBOL_FLAG_ALIGN1. */
4141 if (TREE_CODE (decl
) == VAR_DECL
4142 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4143 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4146 /* Return TRUE if we are certain the mem refers to a complete object
4147 which is both 16-byte aligned and padded to a 16-byte boundary. This
4148 would make it safe to store with a single instruction.
4149 We guarantee the alignment and padding for static objects by aligning
4150 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4151 FIXME: We currently cannot guarantee this for objects on the stack
4152 because assign_parm_setup_stack calls assign_stack_local with the
4153 alignment of the parameter mode and in that case the alignment never
4154 gets adjusted by LOCAL_ALIGNMENT. */
4156 store_with_one_insn_p (rtx mem
)
4158 machine_mode mode
= GET_MODE (mem
);
4159 rtx addr
= XEXP (mem
, 0);
4160 if (mode
== BLKmode
)
4162 if (GET_MODE_SIZE (mode
) >= 16)
4164 /* Only static objects. */
4165 if (GET_CODE (addr
) == SYMBOL_REF
)
4167 /* We use the associated declaration to make sure the access is
4168 referring to the whole object.
4169 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4170 if it is necessary. Will there be cases where one exists, and
4171 the other does not? Will there be cases where both exist, but
4172 have different types? */
4173 tree decl
= MEM_EXPR (mem
);
4175 && TREE_CODE (decl
) == VAR_DECL
4176 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4178 decl
= SYMBOL_REF_DECL (addr
);
4180 && TREE_CODE (decl
) == VAR_DECL
4181 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4187 /* Return 1 when the address is not valid for a simple load and store as
4188 required by the '_mov*' patterns. We could make this less strict
4189 for loads, but we prefer mem's to look the same so they are more
4190 likely to be merged. */
4192 address_needs_split (rtx mem
)
4194 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4195 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4196 || !(store_with_one_insn_p (mem
)
4197 || mem_is_padded_component_ref (mem
))))
4203 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4204 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4205 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4207 /* MEM is known to be an __ea qualified memory access. Emit a call to
4208 fetch the ppu memory to local store, and return its address in local
4212 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4216 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4217 if (!cache_fetch_dirty
)
4218 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4219 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4220 ea_addr
, EAmode
, ndirty
, SImode
);
4225 cache_fetch
= init_one_libfunc ("__cache_fetch");
4226 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4231 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4232 dirty bit marking, inline.
4234 The cache control data structure is an array of
4236 struct __cache_tag_array
4238 unsigned int tag_lo[4];
4239 unsigned int tag_hi[4];
4240 void *data_pointer[4];
4242 vector unsigned short dirty_bits[4];
4246 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4250 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4251 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4252 rtx index_mask
= gen_reg_rtx (SImode
);
4253 rtx tag_arr
= gen_reg_rtx (Pmode
);
4254 rtx splat_mask
= gen_reg_rtx (TImode
);
4255 rtx splat
= gen_reg_rtx (V4SImode
);
4256 rtx splat_hi
= NULL_RTX
;
4257 rtx tag_index
= gen_reg_rtx (Pmode
);
4258 rtx block_off
= gen_reg_rtx (SImode
);
4259 rtx tag_addr
= gen_reg_rtx (Pmode
);
4260 rtx tag
= gen_reg_rtx (V4SImode
);
4261 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4262 rtx cache_tag_hi
= NULL_RTX
;
4263 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4264 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4265 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4266 rtx tag_equal_hi
= NULL_RTX
;
4267 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4268 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4269 rtx eq_index
= gen_reg_rtx (SImode
);
4270 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4273 if (spu_ea_model
!= 32)
4275 splat_hi
= gen_reg_rtx (V4SImode
);
4276 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4277 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4280 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4281 emit_move_insn (tag_arr
, tag_arr_sym
);
4282 v
= 0x0001020300010203LL
;
4283 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4284 ea_addr_si
= ea_addr
;
4285 if (spu_ea_model
!= 32)
4286 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4288 /* tag_index = ea_addr & (tag_array_size - 128) */
4289 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4291 /* splat ea_addr to all 4 slots. */
4292 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4293 /* Similarly for high 32 bits of ea_addr. */
4294 if (spu_ea_model
!= 32)
4295 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4297 /* block_off = ea_addr & 127 */
4298 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4300 /* tag_addr = tag_arr + tag_index */
4301 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4303 /* Read cache tags. */
4304 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4305 if (spu_ea_model
!= 32)
4306 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4307 plus_constant (Pmode
,
4310 /* tag = ea_addr & -128 */
4311 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4313 /* Read all four cache data pointers. */
4314 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4315 plus_constant (Pmode
,
4319 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4320 if (spu_ea_model
!= 32)
4322 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4323 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4326 /* At most one of the tags compare equal, so tag_equal has one
4327 32-bit slot set to all 1's, with the other slots all zero.
4328 gbb picks off low bit from each byte in the 128-bit registers,
4329 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4331 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4332 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4334 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4335 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4337 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4338 (rotating eq_index mod 16 bytes). */
4339 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4340 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4342 /* Add block offset to form final data address. */
4343 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4345 /* Check that we did hit. */
4346 hit_label
= gen_label_rtx ();
4347 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4348 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4349 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
,
4350 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4352 /* Say that this branch is very likely to happen. */
4353 add_reg_br_prob_note (insn
, profile_probability::very_likely ());
4355 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4356 cont_label
= gen_label_rtx ();
4357 emit_jump_insn (gen_jump (cont_label
));
4360 emit_label (hit_label
);
4365 rtx dirty_bits
= gen_reg_rtx (TImode
);
4366 rtx dirty_off
= gen_reg_rtx (SImode
);
4367 rtx dirty_128
= gen_reg_rtx (TImode
);
4368 rtx neg_block_off
= gen_reg_rtx (SImode
);
4370 /* Set up mask with one dirty bit per byte of the mem we are
4371 writing, starting from top bit. */
4373 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4374 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4379 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4381 /* Form index into cache dirty_bits. eq_index is one of
4382 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4383 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4384 offset to each of the four dirty_bits elements. */
4385 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4387 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4389 /* Rotate bit mask to proper bit. */
4390 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4391 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4392 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4394 /* Or in the new dirty bits. */
4395 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4398 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4401 emit_label (cont_label
);
4405 expand_ea_mem (rtx mem
, bool is_store
)
4408 rtx data_addr
= gen_reg_rtx (Pmode
);
4411 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4412 if (optimize_size
|| optimize
== 0)
4413 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4415 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4417 if (ea_alias_set
== -1)
4418 ea_alias_set
= new_alias_set ();
4420 /* We generate a new MEM RTX to refer to the copy of the data
4421 in the cache. We do not copy memory attributes (except the
4422 alignment) from the original MEM, as they may no longer apply
4423 to the cache copy. */
4424 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4425 set_mem_alias_set (new_mem
, ea_alias_set
);
4426 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4432 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4434 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4436 /* Perform the move in the destination SUBREG's inner mode. */
4437 ops
[0] = SUBREG_REG (ops
[0]);
4438 mode
= GET_MODE (ops
[0]);
4439 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4440 gcc_assert (ops
[1]);
4443 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4445 rtx from
= SUBREG_REG (ops
[1]);
4446 scalar_int_mode imode
= int_mode_for_mode (GET_MODE (from
)).require ();
4448 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4449 && GET_MODE_CLASS (imode
) == MODE_INT
4450 && subreg_lowpart_p (ops
[1]));
4452 if (GET_MODE_SIZE (imode
) < 4)
4454 if (imode
!= GET_MODE (from
))
4455 from
= gen_rtx_SUBREG (imode
, from
, 0);
4457 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4459 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4461 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4464 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4468 /* At least one of the operands needs to be a register. */
4469 if ((reload_in_progress
| reload_completed
) == 0
4470 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4472 rtx temp
= force_reg (mode
, ops
[1]);
4473 emit_move_insn (ops
[0], temp
);
4476 if (reload_in_progress
|| reload_completed
)
4478 if (CONSTANT_P (ops
[1]))
4479 return spu_split_immediate (ops
);
4483 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4485 if (GET_CODE (ops
[1]) == CONST_INT
)
4487 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4488 if (val
!= INTVAL (ops
[1]))
4490 emit_move_insn (ops
[0], GEN_INT (val
));
4496 if (MEM_ADDR_SPACE (ops
[0]))
4497 ops
[0] = expand_ea_mem (ops
[0], true);
4498 return spu_split_store (ops
);
4502 if (MEM_ADDR_SPACE (ops
[1]))
4503 ops
[1] = expand_ea_mem (ops
[1], false);
4504 return spu_split_load (ops
);
4511 spu_convert_move (rtx dst
, rtx src
)
4513 machine_mode mode
= GET_MODE (dst
);
4514 machine_mode int_mode
= int_mode_for_mode (mode
).require ();
4516 gcc_assert (GET_MODE (src
) == TImode
);
4517 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4518 emit_insn (gen_rtx_SET (reg
,
4519 gen_rtx_TRUNCATE (int_mode
,
4520 gen_rtx_LSHIFTRT (TImode
, src
,
4521 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4522 if (int_mode
!= mode
)
4524 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4525 emit_move_insn (dst
, reg
);
4529 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4530 the address from SRC and SRC+16. Return a REG or CONST_INT that
4531 specifies how many bytes to rotate the loaded registers, plus any
4532 extra from EXTRA_ROTQBY. The address and rotate amounts are
4533 normalized to improve merging of loads and rotate computations. */
4535 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4537 rtx addr
= XEXP (src
, 0);
4538 rtx p0
, p1
, rot
, addr0
, addr1
;
4544 if (MEM_ALIGN (src
) >= 128)
4545 /* Address is already aligned; simply perform a TImode load. */ ;
4546 else if (GET_CODE (addr
) == PLUS
)
4549 aligned reg + aligned reg => lqx
4550 aligned reg + unaligned reg => lqx, rotqby
4551 aligned reg + aligned const => lqd
4552 aligned reg + unaligned const => lqd, rotqbyi
4553 unaligned reg + aligned reg => lqx, rotqby
4554 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4555 unaligned reg + aligned const => lqd, rotqby
4556 unaligned reg + unaligned const -> not allowed by legitimate address
4558 p0
= XEXP (addr
, 0);
4559 p1
= XEXP (addr
, 1);
4560 if (!reg_aligned_for_addr (p0
))
4562 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4564 rot
= gen_reg_rtx (SImode
);
4565 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4567 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4571 && INTVAL (p1
) * BITS_PER_UNIT
4572 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4574 rot
= gen_reg_rtx (SImode
);
4575 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4580 rtx x
= gen_reg_rtx (SImode
);
4581 emit_move_insn (x
, p1
);
4582 if (!spu_arith_operand (p1
, SImode
))
4584 rot
= gen_reg_rtx (SImode
);
4585 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4586 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4594 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4596 rot_amt
= INTVAL (p1
) & 15;
4597 if (INTVAL (p1
) & -16)
4599 p1
= GEN_INT (INTVAL (p1
) & -16);
4600 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4605 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4609 else if (REG_P (addr
))
4611 if (!reg_aligned_for_addr (addr
))
4614 else if (GET_CODE (addr
) == CONST
)
4616 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4617 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4618 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4620 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4622 addr
= gen_rtx_CONST (Pmode
,
4623 gen_rtx_PLUS (Pmode
,
4624 XEXP (XEXP (addr
, 0), 0),
4625 GEN_INT (rot_amt
& -16)));
4627 addr
= XEXP (XEXP (addr
, 0), 0);
4631 rot
= gen_reg_rtx (Pmode
);
4632 emit_move_insn (rot
, addr
);
4635 else if (GET_CODE (addr
) == CONST_INT
)
4637 rot_amt
= INTVAL (addr
);
4638 addr
= GEN_INT (rot_amt
& -16);
4640 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4642 rot
= gen_reg_rtx (Pmode
);
4643 emit_move_insn (rot
, addr
);
4646 rot_amt
+= extra_rotby
;
4652 rtx x
= gen_reg_rtx (SImode
);
4653 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4657 if (!rot
&& rot_amt
)
4658 rot
= GEN_INT (rot_amt
);
4660 addr0
= copy_rtx (addr
);
4661 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4662 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4666 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4667 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4668 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4675 spu_split_load (rtx
* ops
)
4677 machine_mode mode
= GET_MODE (ops
[0]);
4678 rtx addr
, load
, rot
;
4681 if (GET_MODE_SIZE (mode
) >= 16)
4684 addr
= XEXP (ops
[1], 0);
4685 gcc_assert (GET_CODE (addr
) != AND
);
4687 if (!address_needs_split (ops
[1]))
4689 ops
[1] = change_address (ops
[1], TImode
, addr
);
4690 load
= gen_reg_rtx (TImode
);
4691 emit_insn (gen__movti (load
, ops
[1]));
4692 spu_convert_move (ops
[0], load
);
4696 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4698 load
= gen_reg_rtx (TImode
);
4699 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4702 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4704 spu_convert_move (ops
[0], load
);
4709 spu_split_store (rtx
* ops
)
4711 machine_mode mode
= GET_MODE (ops
[0]);
4713 rtx addr
, p0
, p1
, p1_lo
, smem
;
4717 if (GET_MODE_SIZE (mode
) >= 16)
4720 addr
= XEXP (ops
[0], 0);
4721 gcc_assert (GET_CODE (addr
) != AND
);
4723 if (!address_needs_split (ops
[0]))
4725 reg
= gen_reg_rtx (TImode
);
4726 emit_insn (gen_spu_convert (reg
, ops
[1]));
4727 ops
[0] = change_address (ops
[0], TImode
, addr
);
4728 emit_move_insn (ops
[0], reg
);
4732 if (GET_CODE (addr
) == PLUS
)
4735 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4736 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4737 aligned reg + aligned const => lqd, c?d, shuf, stqx
4738 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4739 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4740 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4741 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4742 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4745 p0
= XEXP (addr
, 0);
4746 p1
= p1_lo
= XEXP (addr
, 1);
4747 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4749 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4750 if (reg_aligned_for_addr (p0
))
4752 p1
= GEN_INT (INTVAL (p1
) & -16);
4753 if (p1
== const0_rtx
)
4756 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4760 rtx x
= gen_reg_rtx (SImode
);
4761 emit_move_insn (x
, p1
);
4762 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4766 else if (REG_P (addr
))
4770 p1
= p1_lo
= const0_rtx
;
4775 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4776 p1
= 0; /* aform doesn't use p1 */
4778 if (ALIGNED_SYMBOL_REF_P (addr
))
4780 else if (GET_CODE (addr
) == CONST
4781 && GET_CODE (XEXP (addr
, 0)) == PLUS
4782 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4783 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4785 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4787 addr
= gen_rtx_CONST (Pmode
,
4788 gen_rtx_PLUS (Pmode
,
4789 XEXP (XEXP (addr
, 0), 0),
4790 GEN_INT (v
& -16)));
4792 addr
= XEXP (XEXP (addr
, 0), 0);
4793 p1_lo
= GEN_INT (v
& 15);
4795 else if (GET_CODE (addr
) == CONST_INT
)
4797 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4798 addr
= GEN_INT (INTVAL (addr
) & -16);
4802 p1_lo
= gen_reg_rtx (SImode
);
4803 emit_move_insn (p1_lo
, addr
);
4807 gcc_assert (aform
== 0 || aform
== 1);
4808 reg
= gen_reg_rtx (TImode
);
4810 scalar
= store_with_one_insn_p (ops
[0]);
4813 /* We could copy the flags from the ops[0] MEM to mem here,
4814 We don't because we want this load to be optimized away if
4815 possible, and copying the flags will prevent that in certain
4816 cases, e.g. consider the volatile flag. */
4818 rtx pat
= gen_reg_rtx (TImode
);
4819 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4820 set_mem_alias_set (lmem
, 0);
4821 emit_insn (gen_movti (reg
, lmem
));
4823 if (!p0
|| reg_aligned_for_addr (p0
))
4824 p0
= stack_pointer_rtx
;
4828 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4829 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4833 if (GET_CODE (ops
[1]) == REG
)
4834 emit_insn (gen_spu_convert (reg
, ops
[1]));
4835 else if (GET_CODE (ops
[1]) == SUBREG
)
4836 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4841 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4842 emit_insn (gen_ashlti3
4843 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4845 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4846 /* We can't use the previous alias set because the memory has changed
4847 size and can potentially overlap objects of other types. */
4848 set_mem_alias_set (smem
, 0);
4850 emit_insn (gen_movti (smem
, reg
));
4854 /* Return TRUE if X is MEM which is a struct member reference
4855 and the member can safely be loaded and stored with a single
4856 instruction because it is padded. */
4858 mem_is_padded_component_ref (rtx x
)
4860 tree t
= MEM_EXPR (x
);
4862 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4864 t
= TREE_OPERAND (t
, 1);
4865 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4866 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4868 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4869 r
= DECL_FIELD_CONTEXT (t
);
4870 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4872 /* Make sure they are the same mode */
4873 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4875 /* If there are no following fields then the field alignment assures
4876 the structure is padded to the alignment which means this field is
4878 if (TREE_CHAIN (t
) == 0)
4880 /* If the following field is also aligned then this field will be
4883 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4888 /* Parse the -mfixed-range= option string. */
4890 fix_range (const char *const_str
)
4893 char *str
, *dash
, *comma
;
4895 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4896 REG2 are either register names or register numbers. The effect
4897 of this option is to mark the registers in the range from REG1 to
4898 REG2 as ``fixed'' so they won't be used by the compiler. */
4900 i
= strlen (const_str
);
4901 str
= (char *) alloca (i
+ 1);
4902 memcpy (str
, const_str
, i
+ 1);
4906 dash
= strchr (str
, '-');
4909 warning (0, "value of -mfixed-range must have form REG1-REG2");
4913 comma
= strchr (dash
+ 1, ',');
4917 first
= decode_reg_name (str
);
4920 warning (0, "unknown register name: %s", str
);
4924 last
= decode_reg_name (dash
+ 1);
4927 warning (0, "unknown register name: %s", dash
+ 1);
4935 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4939 for (i
= first
; i
<= last
; ++i
)
4940 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4950 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4951 can be generated using the fsmbi instruction. */
4953 fsmbi_const_p (rtx x
)
4957 /* We can always choose TImode for CONST_INT because the high bits
4958 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4959 enum immediate_class c
= classify_immediate (x
, TImode
);
4960 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4965 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4966 can be generated using the cbd, chd, cwd or cdd instruction. */
4968 cpat_const_p (rtx x
, machine_mode mode
)
4972 enum immediate_class c
= classify_immediate (x
, mode
);
4973 return c
== IC_CPAT
;
4979 gen_cpat_const (rtx
* ops
)
4981 unsigned char dst
[16];
4982 int i
, offset
, shift
, isize
;
4983 if (GET_CODE (ops
[3]) != CONST_INT
4984 || GET_CODE (ops
[2]) != CONST_INT
4985 || (GET_CODE (ops
[1]) != CONST_INT
4986 && GET_CODE (ops
[1]) != REG
))
4988 if (GET_CODE (ops
[1]) == REG
4989 && (!REG_POINTER (ops
[1])
4990 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4993 for (i
= 0; i
< 16; i
++)
4995 isize
= INTVAL (ops
[3]);
4998 else if (isize
== 2)
5002 offset
= (INTVAL (ops
[2]) +
5003 (GET_CODE (ops
[1]) ==
5004 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5005 for (i
= 0; i
< isize
; i
++)
5006 dst
[offset
+ i
] = i
+ shift
;
5007 return array_to_constant (TImode
, dst
);
5010 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5011 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5012 than 16 bytes, the value is repeated across the rest of the array. */
5014 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5019 memset (arr
, 0, 16);
5020 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5021 if (GET_CODE (x
) == CONST_INT
5022 || (GET_CODE (x
) == CONST_DOUBLE
5023 && (mode
== SFmode
|| mode
== DFmode
)))
5025 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5027 if (GET_CODE (x
) == CONST_DOUBLE
)
5028 val
= const_double_to_hwint (x
);
5031 first
= GET_MODE_SIZE (mode
) - 1;
5032 for (i
= first
; i
>= 0; i
--)
5034 arr
[i
] = val
& 0xff;
5037 /* Splat the constant across the whole array. */
5038 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5041 j
= (j
== first
) ? 0 : j
+ 1;
5044 else if (GET_CODE (x
) == CONST_DOUBLE
)
5046 val
= CONST_DOUBLE_LOW (x
);
5047 for (i
= 15; i
>= 8; i
--)
5049 arr
[i
] = val
& 0xff;
5052 val
= CONST_DOUBLE_HIGH (x
);
5053 for (i
= 7; i
>= 0; i
--)
5055 arr
[i
] = val
& 0xff;
5059 else if (GET_CODE (x
) == CONST_VECTOR
)
5063 mode
= GET_MODE_INNER (mode
);
5064 units
= CONST_VECTOR_NUNITS (x
);
5065 for (i
= 0; i
< units
; i
++)
5067 elt
= CONST_VECTOR_ELT (x
, i
);
5068 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5070 if (GET_CODE (elt
) == CONST_DOUBLE
)
5071 val
= const_double_to_hwint (elt
);
5074 first
= GET_MODE_SIZE (mode
) - 1;
5075 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5077 for (j
= first
; j
>= 0; j
--)
5079 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5089 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5090 smaller than 16 bytes, use the bytes that would represent that value
5091 in a register, e.g., for QImode return the value of arr[3]. */
5093 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5095 machine_mode inner_mode
;
5097 int units
, size
, i
, j
, k
;
5100 if (GET_MODE_CLASS (mode
) == MODE_INT
5101 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5103 j
= GET_MODE_SIZE (mode
);
5104 i
= j
< 4 ? 4 - j
: 0;
5105 for (val
= 0; i
< j
; i
++)
5106 val
= (val
<< 8) | arr
[i
];
5107 val
= trunc_int_for_mode (val
, mode
);
5108 return GEN_INT (val
);
5114 for (i
= high
= 0; i
< 8; i
++)
5115 high
= (high
<< 8) | arr
[i
];
5116 for (i
= 8, val
= 0; i
< 16; i
++)
5117 val
= (val
<< 8) | arr
[i
];
5118 return immed_double_const (val
, high
, TImode
);
5122 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5123 val
= trunc_int_for_mode (val
, SImode
);
5124 return hwint_to_const_double (SFmode
, val
);
5128 for (i
= 0, val
= 0; i
< 8; i
++)
5129 val
= (val
<< 8) | arr
[i
];
5130 return hwint_to_const_double (DFmode
, val
);
5133 if (!VECTOR_MODE_P (mode
))
5136 units
= GET_MODE_NUNITS (mode
);
5137 size
= GET_MODE_UNIT_SIZE (mode
);
5138 inner_mode
= GET_MODE_INNER (mode
);
5139 v
= rtvec_alloc (units
);
5141 for (k
= i
= 0; i
< units
; ++i
)
5144 for (j
= 0; j
< size
; j
++, k
++)
5145 val
= (val
<< 8) | arr
[k
];
5147 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5148 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5150 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5155 return gen_rtx_CONST_VECTOR (mode
, v
);
5159 reloc_diagnostic (rtx x
)
5162 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5165 if (GET_CODE (x
) == SYMBOL_REF
)
5166 decl
= SYMBOL_REF_DECL (x
);
5167 else if (GET_CODE (x
) == CONST
5168 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5169 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5171 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5172 if (decl
&& !DECL_P (decl
))
5175 /* The decl could be a string constant. */
5176 if (decl
&& DECL_P (decl
))
5179 /* We use last_assemble_variable_decl to get line information. It's
5180 not always going to be right and might not even be close, but will
5181 be right for the more common cases. */
5182 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5183 loc
= DECL_SOURCE_LOCATION (decl
);
5185 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5187 if (TARGET_WARN_RELOC
)
5189 "creating run-time relocation for %qD", decl
);
5192 "creating run-time relocation for %qD", decl
);
5196 if (TARGET_WARN_RELOC
)
5197 warning_at (input_location
, 0, "creating run-time relocation");
5199 error_at (input_location
, "creating run-time relocation");
5203 /* Hook into assemble_integer so we can generate an error for run-time
5204 relocations. The SPU ABI disallows them. */
5206 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5208 /* By default run-time relocations aren't supported, but we allow them
5209 in case users support it in their own run-time loader. And we provide
5210 a warning for those users that don't. */
5211 if ((GET_CODE (x
) == SYMBOL_REF
)
5212 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5213 reloc_diagnostic (x
);
5215 return default_assemble_integer (x
, size
, aligned_p
);
5219 spu_asm_globalize_label (FILE * file
, const char *name
)
5221 fputs ("\t.global\t", file
);
5222 assemble_name (file
, name
);
5227 spu_rtx_costs (rtx x
, machine_mode mode
, int outer_code ATTRIBUTE_UNUSED
,
5228 int opno ATTRIBUTE_UNUSED
, int *total
,
5229 bool speed ATTRIBUTE_UNUSED
)
5231 int code
= GET_CODE (x
);
5232 int cost
= COSTS_N_INSNS (2);
5234 /* Folding to a CONST_VECTOR will use extra space but there might
5235 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5236 only if it allows us to fold away multiple insns. Changing the cost
5237 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5238 because this cost will only be compared against a single insn.
5239 if (code == CONST_VECTOR)
5240 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5243 /* Use defaults for float operations. Not accurate but good enough. */
5246 *total
= COSTS_N_INSNS (13);
5251 *total
= COSTS_N_INSNS (6);
5257 if (satisfies_constraint_K (x
))
5259 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5260 *total
= COSTS_N_INSNS (1);
5262 *total
= COSTS_N_INSNS (3);
5266 *total
= COSTS_N_INSNS (3);
5271 *total
= COSTS_N_INSNS (0);
5275 *total
= COSTS_N_INSNS (5);
5279 case FLOAT_TRUNCATE
:
5281 case UNSIGNED_FLOAT
:
5284 *total
= COSTS_N_INSNS (7);
5290 *total
= COSTS_N_INSNS (9);
5297 GET_CODE (XEXP (x
, 0)) ==
5298 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5299 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5301 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5303 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5304 cost
= COSTS_N_INSNS (14);
5305 if ((val
& 0xffff) == 0)
5306 cost
= COSTS_N_INSNS (9);
5307 else if (val
> 0 && val
< 0x10000)
5308 cost
= COSTS_N_INSNS (11);
5317 *total
= COSTS_N_INSNS (20);
5324 *total
= COSTS_N_INSNS (4);
5327 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5328 *total
= COSTS_N_INSNS (0);
5330 *total
= COSTS_N_INSNS (4);
5333 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5334 if (GET_MODE_CLASS (mode
) == MODE_INT
5335 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5336 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5337 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5342 static scalar_int_mode
5343 spu_unwind_word_mode (void)
5348 /* Decide whether we can make a sibling call to a function. DECL is the
5349 declaration of the function being targeted by the call and EXP is the
5350 CALL_EXPR representing the call. */
5352 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5354 return decl
&& !TARGET_LARGE_MEM
;
5357 /* We need to correctly update the back chain pointer and the Available
5358 Stack Size (which is in the second slot of the sp register.) */
5360 spu_allocate_stack (rtx op0
, rtx op1
)
5363 rtx chain
= gen_reg_rtx (V4SImode
);
5364 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5365 rtx sp
= gen_reg_rtx (V4SImode
);
5366 rtx splatted
= gen_reg_rtx (V4SImode
);
5367 rtx pat
= gen_reg_rtx (TImode
);
5369 /* copy the back chain so we can save it back again. */
5370 emit_move_insn (chain
, stack_bot
);
5372 op1
= force_reg (SImode
, op1
);
5374 v
= 0x1020300010203ll
;
5375 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5376 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5378 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5379 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5381 if (flag_stack_check
)
5383 rtx avail
= gen_reg_rtx(SImode
);
5384 rtx result
= gen_reg_rtx(SImode
);
5385 emit_insn (gen_vec_extractv4sisi (avail
, sp
, GEN_INT (1)));
5386 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5387 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5390 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5392 emit_move_insn (stack_bot
, chain
);
5394 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5398 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5400 static unsigned char arr
[16] =
5401 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5402 rtx temp
= gen_reg_rtx (SImode
);
5403 rtx temp2
= gen_reg_rtx (SImode
);
5404 rtx temp3
= gen_reg_rtx (V4SImode
);
5405 rtx temp4
= gen_reg_rtx (V4SImode
);
5406 rtx pat
= gen_reg_rtx (TImode
);
5407 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5409 /* Restore the backchain from the first word, sp from the second. */
5410 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5411 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5413 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5415 /* Compute Available Stack Size for sp */
5416 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5417 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5419 /* Compute Available Stack Size for back chain */
5420 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5421 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5422 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5424 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5425 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5429 spu_init_libfuncs (void)
5431 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5432 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5433 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5434 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5435 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5436 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5437 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5438 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5439 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5440 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5441 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5442 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5444 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5445 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5447 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5448 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5449 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5450 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5451 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5452 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5453 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5454 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5455 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5456 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5457 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5458 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5460 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5461 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5462 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5463 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5464 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5465 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5468 /* Make a subreg, stripping any existing subreg. We could possibly just
5469 call simplify_subreg, but in this case we know what we want. */
5471 spu_gen_subreg (machine_mode mode
, rtx x
)
5473 if (GET_CODE (x
) == SUBREG
)
5475 if (GET_MODE (x
) == mode
)
5477 return gen_rtx_SUBREG (mode
, x
, 0);
5481 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5483 return (TYPE_MODE (type
) == BLKmode
5485 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5486 || int_size_in_bytes (type
) >
5487 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5490 /* Create the built-in types and functions */
5492 enum spu_function_code
5494 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5495 #include "spu-builtins.def"
5500 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5502 struct spu_builtin_description spu_builtins
[] = {
5503 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5504 {fcode, icode, name, type, params},
5505 #include "spu-builtins.def"
5509 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5511 /* Returns the spu builtin decl for CODE. */
5514 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5516 if (code
>= NUM_SPU_BUILTINS
)
5517 return error_mark_node
;
5519 return spu_builtin_decls
[code
];
5524 spu_init_builtins (void)
5526 struct spu_builtin_description
*d
;
5529 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5530 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5531 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5532 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5533 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5534 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5536 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5537 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5538 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5539 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5541 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5543 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5544 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5545 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5546 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5547 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5548 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5549 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5550 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5551 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5552 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5553 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5554 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5556 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5557 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5558 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5559 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5560 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5561 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5562 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5563 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5565 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5566 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5568 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5570 spu_builtin_types
[SPU_BTI_PTR
] =
5571 build_pointer_type (build_qualified_type
5573 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5575 /* For each builtin we build a new prototype. The tree code will make
5576 sure nodes are shared. */
5577 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5580 char name
[64]; /* build_function will make a copy. */
5586 /* Find last parm. */
5587 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5592 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5594 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5596 sprintf (name
, "__builtin_%s", d
->name
);
5597 spu_builtin_decls
[i
] =
5598 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5599 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5600 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5602 /* These builtins don't throw. */
5603 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5608 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5610 static unsigned char arr
[16] =
5611 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5613 rtx temp
= gen_reg_rtx (Pmode
);
5614 rtx temp2
= gen_reg_rtx (V4SImode
);
5615 rtx temp3
= gen_reg_rtx (V4SImode
);
5616 rtx pat
= gen_reg_rtx (TImode
);
5617 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5619 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5621 /* Restore the sp. */
5622 emit_move_insn (temp
, op1
);
5623 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5625 /* Compute available stack size for sp. */
5626 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5627 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5629 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5630 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5634 spu_safe_dma (HOST_WIDE_INT channel
)
5636 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5640 spu_builtin_splats (rtx ops
[])
5642 machine_mode mode
= GET_MODE (ops
[0]);
5643 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5645 unsigned char arr
[16];
5646 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5647 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5651 rtx reg
= gen_reg_rtx (TImode
);
5653 if (GET_CODE (ops
[1]) != REG
5654 && GET_CODE (ops
[1]) != SUBREG
)
5655 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5661 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5667 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5672 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5677 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5683 emit_move_insn (reg
, shuf
);
5684 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5689 spu_builtin_extract (rtx ops
[])
5694 mode
= GET_MODE (ops
[1]);
5696 if (GET_CODE (ops
[2]) == CONST_INT
)
5701 emit_insn (gen_vec_extractv16qiqi (ops
[0], ops
[1], ops
[2]));
5704 emit_insn (gen_vec_extractv8hihi (ops
[0], ops
[1], ops
[2]));
5707 emit_insn (gen_vec_extractv4sfsf (ops
[0], ops
[1], ops
[2]));
5710 emit_insn (gen_vec_extractv4sisi (ops
[0], ops
[1], ops
[2]));
5713 emit_insn (gen_vec_extractv2didi (ops
[0], ops
[1], ops
[2]));
5716 emit_insn (gen_vec_extractv2dfdf (ops
[0], ops
[1], ops
[2]));
5724 from
= spu_gen_subreg (TImode
, ops
[1]);
5725 rot
= gen_reg_rtx (TImode
);
5726 tmp
= gen_reg_rtx (SImode
);
5731 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5734 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5735 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5739 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5743 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5748 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5750 emit_insn (gen_spu_convert (ops
[0], rot
));
5754 spu_builtin_insert (rtx ops
[])
5756 machine_mode mode
= GET_MODE (ops
[0]);
5757 machine_mode imode
= GET_MODE_INNER (mode
);
5758 rtx mask
= gen_reg_rtx (TImode
);
5761 if (GET_CODE (ops
[3]) == CONST_INT
)
5762 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5765 offset
= gen_reg_rtx (SImode
);
5766 emit_insn (gen_mulsi3
5767 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5770 (mask
, stack_pointer_rtx
, offset
,
5771 GEN_INT (GET_MODE_SIZE (imode
))));
5772 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5776 spu_builtin_promote (rtx ops
[])
5778 machine_mode mode
, imode
;
5779 rtx rot
, from
, offset
;
5782 mode
= GET_MODE (ops
[0]);
5783 imode
= GET_MODE_INNER (mode
);
5785 from
= gen_reg_rtx (TImode
);
5786 rot
= spu_gen_subreg (TImode
, ops
[0]);
5788 emit_insn (gen_spu_convert (from
, ops
[1]));
5790 if (GET_CODE (ops
[2]) == CONST_INT
)
5792 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5793 if (GET_MODE_SIZE (imode
) < 4)
5794 pos
+= 4 - GET_MODE_SIZE (imode
);
5795 offset
= GEN_INT (pos
& 15);
5799 offset
= gen_reg_rtx (SImode
);
5803 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5806 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5807 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5811 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5812 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5816 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5822 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5826 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5828 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5829 rtx shuf
= gen_reg_rtx (V4SImode
);
5830 rtx insn
= gen_reg_rtx (V4SImode
);
5835 fnaddr
= force_reg (SImode
, fnaddr
);
5836 cxt
= force_reg (SImode
, cxt
);
5838 if (TARGET_LARGE_MEM
)
5840 rtx rotl
= gen_reg_rtx (V4SImode
);
5841 rtx mask
= gen_reg_rtx (V4SImode
);
5842 rtx bi
= gen_reg_rtx (SImode
);
5843 static unsigned char const shufa
[16] = {
5844 2, 3, 0, 1, 18, 19, 16, 17,
5845 0, 1, 2, 3, 16, 17, 18, 19
5847 static unsigned char const insna
[16] = {
5849 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5851 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5854 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5855 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5857 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5858 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5859 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5860 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5862 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5863 emit_move_insn (mem
, insn
);
5865 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5866 mem
= adjust_address (m_tramp
, Pmode
, 16);
5867 emit_move_insn (mem
, bi
);
5871 rtx scxt
= gen_reg_rtx (SImode
);
5872 rtx sfnaddr
= gen_reg_rtx (SImode
);
5873 static unsigned char const insna
[16] = {
5874 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5880 shufc
= gen_reg_rtx (TImode
);
5881 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5883 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5884 fits 18 bits and the last 4 are zeros. This will be true if
5885 the stack pointer is initialized to 0x3fff0 at program start,
5886 otherwise the ila instruction will be garbage. */
5888 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5889 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5891 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5892 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5893 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5895 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5896 emit_move_insn (mem
, insn
);
5898 emit_insn (gen_sync ());
5902 spu_warn_func_return (tree decl
)
5904 /* Naked functions are implemented entirely in assembly, including the
5905 return sequence, so suppress warnings about this. */
5906 return !spu_naked_function_p (decl
);
5910 spu_expand_sign_extend (rtx ops
[])
5912 unsigned char arr
[16];
5913 rtx pat
= gen_reg_rtx (TImode
);
5916 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5917 if (GET_MODE (ops
[1]) == QImode
)
5919 sign
= gen_reg_rtx (HImode
);
5920 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5921 for (i
= 0; i
< 16; i
++)
5927 for (i
= 0; i
< 16; i
++)
5929 switch (GET_MODE (ops
[1]))
5932 sign
= gen_reg_rtx (SImode
);
5933 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5935 arr
[last
- 1] = 0x02;
5938 sign
= gen_reg_rtx (SImode
);
5939 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5940 for (i
= 0; i
< 4; i
++)
5941 arr
[last
- i
] = 3 - i
;
5944 sign
= gen_reg_rtx (SImode
);
5945 c
= gen_reg_rtx (SImode
);
5946 emit_insn (gen_spu_convert (c
, ops
[1]));
5947 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5948 for (i
= 0; i
< 8; i
++)
5949 arr
[last
- i
] = 7 - i
;
5955 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5956 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5959 /* expand vector initialization. If there are any constant parts,
5960 load constant parts first. Then load any non-constant parts. */
5962 spu_expand_vector_init (rtx target
, rtx vals
)
5964 machine_mode mode
= GET_MODE (target
);
5965 int n_elts
= GET_MODE_NUNITS (mode
);
5967 bool all_same
= true;
5968 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5971 first
= XVECEXP (vals
, 0, 0);
5972 for (i
= 0; i
< n_elts
; ++i
)
5974 x
= XVECEXP (vals
, 0, i
);
5975 if (!(CONST_INT_P (x
)
5976 || GET_CODE (x
) == CONST_DOUBLE
5977 || GET_CODE (x
) == CONST_FIXED
))
5981 if (first_constant
== NULL_RTX
)
5984 if (i
> 0 && !rtx_equal_p (x
, first
))
5988 /* if all elements are the same, use splats to repeat elements */
5991 if (!CONSTANT_P (first
)
5992 && !register_operand (first
, GET_MODE (x
)))
5993 first
= force_reg (GET_MODE (first
), first
);
5994 emit_insn (gen_spu_splats (target
, first
));
5998 /* load constant parts */
5999 if (n_var
!= n_elts
)
6003 emit_move_insn (target
,
6004 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6008 rtx constant_parts_rtx
= copy_rtx (vals
);
6010 gcc_assert (first_constant
!= NULL_RTX
);
6011 /* fill empty slots with the first constant, this increases
6012 our chance of using splats in the recursive call below. */
6013 for (i
= 0; i
< n_elts
; ++i
)
6015 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6016 if (!(CONST_INT_P (x
)
6017 || GET_CODE (x
) == CONST_DOUBLE
6018 || GET_CODE (x
) == CONST_FIXED
))
6019 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6022 spu_expand_vector_init (target
, constant_parts_rtx
);
6026 /* load variable parts */
6029 rtx insert_operands
[4];
6031 insert_operands
[0] = target
;
6032 insert_operands
[2] = target
;
6033 for (i
= 0; i
< n_elts
; ++i
)
6035 x
= XVECEXP (vals
, 0, i
);
6036 if (!(CONST_INT_P (x
)
6037 || GET_CODE (x
) == CONST_DOUBLE
6038 || GET_CODE (x
) == CONST_FIXED
))
6040 if (!register_operand (x
, GET_MODE (x
)))
6041 x
= force_reg (GET_MODE (x
), x
);
6042 insert_operands
[1] = x
;
6043 insert_operands
[3] = GEN_INT (i
);
6044 spu_builtin_insert (insert_operands
);
6050 /* Return insn index for the vector compare instruction for given CODE,
6051 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6054 get_vec_cmp_insn (enum rtx_code code
,
6055 machine_mode dest_mode
,
6056 machine_mode op_mode
)
6062 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6063 return CODE_FOR_ceq_v16qi
;
6064 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6065 return CODE_FOR_ceq_v8hi
;
6066 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6067 return CODE_FOR_ceq_v4si
;
6068 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6069 return CODE_FOR_ceq_v4sf
;
6070 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6071 return CODE_FOR_ceq_v2df
;
6074 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6075 return CODE_FOR_cgt_v16qi
;
6076 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6077 return CODE_FOR_cgt_v8hi
;
6078 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6079 return CODE_FOR_cgt_v4si
;
6080 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6081 return CODE_FOR_cgt_v4sf
;
6082 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6083 return CODE_FOR_cgt_v2df
;
6086 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6087 return CODE_FOR_clgt_v16qi
;
6088 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6089 return CODE_FOR_clgt_v8hi
;
6090 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6091 return CODE_FOR_clgt_v4si
;
6099 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6100 DMODE is expected destination mode. This is a recursive function. */
6103 spu_emit_vector_compare (enum rtx_code rcode
,
6109 machine_mode dest_mode
;
6110 machine_mode op_mode
= GET_MODE (op1
);
6112 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6114 /* Floating point vector compare instructions uses destination V4SImode.
6115 Double floating point vector compare instructions uses destination V2DImode.
6116 Move destination to appropriate mode later. */
6117 if (dmode
== V4SFmode
)
6118 dest_mode
= V4SImode
;
6119 else if (dmode
== V2DFmode
)
6120 dest_mode
= V2DImode
;
6124 mask
= gen_reg_rtx (dest_mode
);
6125 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6127 if (vec_cmp_insn
== -1)
6129 bool swap_operands
= false;
6130 bool try_again
= false;
6135 swap_operands
= true;
6140 swap_operands
= true;
6150 /* Treat A != B as ~(A==B). */
6152 enum rtx_code rev_code
;
6153 enum insn_code nor_code
;
6156 rev_code
= reverse_condition_maybe_unordered (rcode
);
6157 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6159 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6160 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6161 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6162 if (dmode
!= dest_mode
)
6164 rtx temp
= gen_reg_rtx (dest_mode
);
6165 convert_move (temp
, mask
, 0);
6175 /* Try GT/GTU/LT/LTU OR EQ */
6178 enum insn_code ior_code
;
6179 enum rtx_code new_code
;
6183 case GE
: new_code
= GT
; break;
6184 case GEU
: new_code
= GTU
; break;
6185 case LE
: new_code
= LT
; break;
6186 case LEU
: new_code
= LTU
; break;
6191 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6192 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6194 ior_code
= optab_handler (ior_optab
, dest_mode
);
6195 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6196 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6197 if (dmode
!= dest_mode
)
6199 rtx temp
= gen_reg_rtx (dest_mode
);
6200 convert_move (temp
, mask
, 0);
6210 enum insn_code ior_code
;
6212 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6213 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6215 ior_code
= optab_handler (ior_optab
, dest_mode
);
6216 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6217 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6218 if (dmode
!= dest_mode
)
6220 rtx temp
= gen_reg_rtx (dest_mode
);
6221 convert_move (temp
, mask
, 0);
6228 /* Implement as (A==A) & (B==B) */
6231 enum insn_code and_code
;
6233 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6234 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6236 and_code
= optab_handler (and_optab
, dest_mode
);
6237 gcc_assert (and_code
!= CODE_FOR_nothing
);
6238 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6239 if (dmode
!= dest_mode
)
6241 rtx temp
= gen_reg_rtx (dest_mode
);
6242 convert_move (temp
, mask
, 0);
6252 /* You only get two chances. */
6254 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6256 gcc_assert (vec_cmp_insn
!= -1);
6267 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6268 if (dmode
!= dest_mode
)
6270 rtx temp
= gen_reg_rtx (dest_mode
);
6271 convert_move (temp
, mask
, 0);
6278 /* Emit vector conditional expression.
6279 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6280 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6283 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6284 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6286 machine_mode dest_mode
= GET_MODE (dest
);
6287 enum rtx_code rcode
= GET_CODE (cond
);
6290 /* Get the vector mask for the given relational operations. */
6291 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6293 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6299 spu_force_reg (machine_mode mode
, rtx op
)
6302 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6304 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6305 || GET_MODE (op
) == BLKmode
)
6306 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6310 r
= force_reg (GET_MODE (op
), op
);
6311 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6313 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6318 x
= gen_reg_rtx (mode
);
6319 emit_insn (gen_spu_convert (x
, r
));
6324 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6326 HOST_WIDE_INT v
= 0;
6328 /* Check the range of immediate operands. */
6329 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6331 int range
= p
- SPU_BTI_7
;
6333 if (!CONSTANT_P (op
))
6334 error ("%s expects an integer literal in the range [%d, %d]",
6336 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6338 if (GET_CODE (op
) == CONST
6339 && (GET_CODE (XEXP (op
, 0)) == PLUS
6340 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6342 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6343 op
= XEXP (XEXP (op
, 0), 0);
6345 else if (GET_CODE (op
) == CONST_INT
)
6347 else if (GET_CODE (op
) == CONST_VECTOR
6348 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6349 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6351 /* The default for v is 0 which is valid in every range. */
6352 if (v
< spu_builtin_range
[range
].low
6353 || v
> spu_builtin_range
[range
].high
)
6354 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6356 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6365 /* This is only used in lqa, and stqa. Even though the insns
6366 encode 16 bits of the address (all but the 2 least
6367 significant), only 14 bits are used because it is masked to
6368 be 16 byte aligned. */
6372 /* This is used for lqr and stqr. */
6379 if (GET_CODE (op
) == LABEL_REF
6380 || (GET_CODE (op
) == SYMBOL_REF
6381 && SYMBOL_REF_FUNCTION_P (op
))
6382 || (v
& ((1 << lsbits
) - 1)) != 0)
6383 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6390 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6391 rtx target
, rtx ops
[])
6393 enum insn_code icode
= (enum insn_code
) d
->icode
;
6396 /* Expand the arguments into rtl. */
6398 if (d
->parm
[0] != SPU_BTI_VOID
)
6401 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6403 tree arg
= CALL_EXPR_ARG (exp
, a
);
6406 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6409 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6414 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6415 tree exp
, rtx target
)
6419 enum insn_code icode
= (enum insn_code
) d
->icode
;
6420 machine_mode mode
, tmode
;
6425 /* Set up ops[] with values from arglist. */
6426 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6428 /* Handle the target operand which must be operand 0. */
6430 if (d
->parm
[0] != SPU_BTI_VOID
)
6433 /* We prefer the mode specified for the match_operand otherwise
6434 use the mode from the builtin function prototype. */
6435 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6436 if (tmode
== VOIDmode
)
6437 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6439 /* Try to use target because not using it can lead to extra copies
6440 and when we are using all of the registers extra copies leads
6442 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6445 target
= ops
[0] = gen_reg_rtx (tmode
);
6447 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6453 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6455 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6460 arg
= CALL_EXPR_ARG (exp
, 0);
6461 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6462 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6463 addr
= memory_address (mode
, op
);
6466 op
= gen_reg_rtx (GET_MODE (addr
));
6467 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
6468 op
= gen_rtx_MEM (mode
, op
);
6470 pat
= GEN_FCN (icode
) (target
, op
);
6477 /* Ignore align_hint, but still expand it's args in case they have
6479 if (icode
== CODE_FOR_spu_align_hint
)
6482 /* Handle the rest of the operands. */
6483 for (p
= 1; i
< n_operands
; i
++, p
++)
6485 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6486 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6488 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6490 /* mode can be VOIDmode here for labels */
6492 /* For specific intrinsics with an immediate operand, e.g.,
6493 si_ai(), we sometimes need to convert the scalar argument to a
6494 vector argument by splatting the scalar. */
6495 if (VECTOR_MODE_P (mode
)
6496 && (GET_CODE (ops
[i
]) == CONST_INT
6497 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6498 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6500 if (GET_CODE (ops
[i
]) == CONST_INT
)
6501 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6504 rtx reg
= gen_reg_rtx (mode
);
6505 machine_mode imode
= GET_MODE_INNER (mode
);
6506 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6507 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6508 if (imode
!= GET_MODE (ops
[i
]))
6509 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6510 TYPE_UNSIGNED (spu_builtin_types
6512 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6517 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6519 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6520 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6526 pat
= GEN_FCN (icode
) (0);
6529 pat
= GEN_FCN (icode
) (ops
[0]);
6532 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6535 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6538 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6541 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6544 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6553 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6554 emit_call_insn (pat
);
6555 else if (d
->type
== B_JUMP
)
6557 emit_jump_insn (pat
);
6563 return_type
= spu_builtin_types
[d
->parm
[0]];
6564 if (d
->parm
[0] != SPU_BTI_VOID
6565 && GET_MODE (target
) != TYPE_MODE (return_type
))
6567 /* target is the return value. It should always be the mode of
6568 the builtin function prototype. */
6569 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6576 spu_expand_builtin (tree exp
,
6578 rtx subtarget ATTRIBUTE_UNUSED
,
6579 machine_mode mode ATTRIBUTE_UNUSED
,
6580 int ignore ATTRIBUTE_UNUSED
)
6582 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6583 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6584 struct spu_builtin_description
*d
;
6586 if (fcode
< NUM_SPU_BUILTINS
)
6588 d
= &spu_builtins
[fcode
];
6590 return spu_expand_builtin_1 (d
, exp
, target
);
6595 /* Implement targetm.vectorize.builtin_mask_for_load. */
6597 spu_builtin_mask_for_load (void)
6599 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6602 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6604 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6606 int misalign ATTRIBUTE_UNUSED
)
6610 switch (type_of_cost
)
6618 case cond_branch_not_taken
:
6620 case vec_promote_demote
:
6627 /* Load + rotate. */
6630 case unaligned_load
:
6633 case cond_branch_taken
:
6637 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6638 return elements
/ 2 + 1;
6645 /* Implement targetm.vectorize.init_cost. */
6648 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6650 unsigned *cost
= XNEWVEC (unsigned, 3);
6651 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6655 /* Implement targetm.vectorize.add_stmt_cost. */
6658 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6659 struct _stmt_vec_info
*stmt_info
, int misalign
,
6660 enum vect_cost_model_location where
)
6662 unsigned *cost
= (unsigned *) data
;
6663 unsigned retval
= 0;
6665 if (flag_vect_cost_model
)
6667 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6668 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6670 /* Statements in an inner loop relative to the loop being
6671 vectorized are weighted more heavily. The value here is
6672 arbitrary and could potentially be improved with analysis. */
6673 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6674 count
*= 50; /* FIXME. */
6676 retval
= (unsigned) (count
* stmt_cost
);
6677 cost
[where
] += retval
;
6683 /* Implement targetm.vectorize.finish_cost. */
6686 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6687 unsigned *body_cost
, unsigned *epilogue_cost
)
6689 unsigned *cost
= (unsigned *) data
;
6690 *prologue_cost
= cost
[vect_prologue
];
6691 *body_cost
= cost
[vect_body
];
6692 *epilogue_cost
= cost
[vect_epilogue
];
6695 /* Implement targetm.vectorize.destroy_cost_data. */
6698 spu_destroy_cost_data (void *data
)
6703 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6704 after applying N number of iterations. This routine does not determine
6705 how may iterations are required to reach desired alignment. */
6708 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6713 /* All other types are naturally aligned. */
6717 /* Return the appropriate mode for a named address pointer. */
6718 static scalar_int_mode
6719 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6723 case ADDR_SPACE_GENERIC
:
6732 /* Return the appropriate mode for a named address address. */
6733 static scalar_int_mode
6734 spu_addr_space_address_mode (addr_space_t addrspace
)
6738 case ADDR_SPACE_GENERIC
:
6747 /* Determine if one named address space is a subset of another. */
6750 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6752 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6753 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6755 if (subset
== superset
)
6758 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6759 being subsets but instead as disjoint address spaces. */
6760 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6764 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6767 /* Convert from one address space to another. */
6769 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6771 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6772 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6774 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6775 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6777 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6781 ls
= gen_const_mem (DImode
,
6782 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6783 set_mem_align (ls
, 128);
6785 result
= gen_reg_rtx (Pmode
);
6786 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6787 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6788 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6789 ls
, const0_rtx
, Pmode
, 1);
6791 emit_insn (gen_subsi3 (result
, op
, ls
));
6796 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6800 ls
= gen_const_mem (DImode
,
6801 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6802 set_mem_align (ls
, 128);
6804 result
= gen_reg_rtx (EAmode
);
6805 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6806 op
= force_reg (Pmode
, op
);
6807 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6808 ls
, const0_rtx
, EAmode
, 1);
6809 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6811 if (EAmode
== SImode
)
6812 emit_insn (gen_addsi3 (result
, op
, ls
));
6814 emit_insn (gen_adddi3 (result
, op
, ls
));
6824 /* Count the total number of instructions in each pipe and return the
6825 maximum, which is used as the Minimum Iteration Interval (MII)
6826 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6827 -2 are instructions that can go in pipe0 or pipe1. */
6829 spu_sms_res_mii (struct ddg
*g
)
6832 unsigned t
[4] = {0, 0, 0, 0};
6834 for (i
= 0; i
< g
->num_nodes
; i
++)
6836 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6837 int p
= get_pipe (insn
) + 2;
6839 gcc_assert (p
>= 0);
6843 if (dump_file
&& INSN_P (insn
))
6844 fprintf (dump_file
, "i%d %s %d %d\n",
6846 insn_data
[INSN_CODE(insn
)].name
,
6850 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6852 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6857 spu_init_expanders (void)
6862 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6863 frame_pointer_needed is true. We don't know that until we're
6864 expanding the prologue. */
6865 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6867 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6868 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6869 to be treated as aligned, so generate them here. */
6870 r0
= gen_reg_rtx (SImode
);
6871 r1
= gen_reg_rtx (SImode
);
6872 mark_reg_pointer (r0
, 128);
6873 mark_reg_pointer (r1
, 128);
6874 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6875 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6879 static scalar_int_mode
6880 spu_libgcc_cmp_return_mode (void)
6883 /* For SPU word mode is TI mode so it is better to use SImode
6884 for compare returns. */
6888 static scalar_int_mode
6889 spu_libgcc_shift_count_mode (void)
6891 /* For SPU word mode is TI mode so it is better to use SImode
6892 for shift counts. */
6896 /* Implement targetm.section_type_flags. */
6898 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6900 /* .toe needs to have type @nobits. */
6901 if (strcmp (name
, ".toe") == 0)
6903 /* Don't load _ea into the current address space. */
6904 if (strcmp (name
, "._ea") == 0)
6905 return SECTION_WRITE
| SECTION_DEBUG
;
6906 return default_section_type_flags (decl
, name
, reloc
);
6909 /* Implement targetm.select_section. */
6911 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6913 /* Variables and constants defined in the __ea address space
6914 go into a special section named "._ea". */
6915 if (TREE_TYPE (decl
) != error_mark_node
6916 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6918 /* We might get called with string constants, but get_named_section
6919 doesn't like them as they are not DECLs. Also, we need to set
6920 flags in that case. */
6922 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6924 return get_named_section (decl
, "._ea", reloc
);
6927 return default_elf_select_section (decl
, reloc
, align
);
6930 /* Implement targetm.unique_section. */
6932 spu_unique_section (tree decl
, int reloc
)
6934 /* We don't support unique section names in the __ea address
6936 if (TREE_TYPE (decl
) != error_mark_node
6937 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6940 default_unique_section (decl
, reloc
);
6943 /* Generate a constant or register which contains 2^SCALE. We assume
6944 the result is valid for MODE. Currently, MODE must be V4SFmode and
6945 SCALE must be SImode. */
6947 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6949 gcc_assert (mode
== V4SFmode
);
6950 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6951 if (GET_CODE (scale
) != CONST_INT
)
6953 /* unsigned int exp = (127 + scale) << 23;
6954 __vector float m = (__vector float) spu_splats (exp); */
6955 rtx reg
= force_reg (SImode
, scale
);
6956 rtx exp
= gen_reg_rtx (SImode
);
6957 rtx mul
= gen_reg_rtx (mode
);
6958 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6959 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6960 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6965 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6966 unsigned char arr
[16];
6967 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6968 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6969 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6970 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6971 return array_to_constant (mode
, arr
);
6975 /* After reload, just change the convert into a move instruction
6976 or a dead instruction. */
6978 spu_split_convert (rtx ops
[])
6980 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6981 emit_note (NOTE_INSN_DELETED
);
6984 /* Use TImode always as this might help hard reg copyprop. */
6985 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6986 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6987 emit_insn (gen_move_insn (op0
, op1
));
6992 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
6994 fprintf (file
, "# profile\n");
6995 fprintf (file
, "brsl $75, _mcount\n");
6998 /* Implement targetm.ref_may_alias_errno. */
7000 spu_ref_may_alias_errno (ao_ref
*ref
)
7002 tree base
= ao_ref_base (ref
);
7004 /* With SPU newlib, errno is defined as something like
7006 The default implementation of this target macro does not
7007 recognize such expressions, so special-code for it here. */
7009 if (TREE_CODE (base
) == VAR_DECL
7010 && !TREE_STATIC (base
)
7011 && DECL_EXTERNAL (base
)
7012 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7013 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7014 "_impure_data") == 0
7015 /* _errno is the first member of _impure_data. */
7016 && ref
->offset
== 0)
7019 return default_ref_may_alias_errno (ref
);
7022 /* Output thunk to FILE that implements a C++ virtual function call (with
7023 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7024 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7025 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7026 relative to the resulting this pointer. */
7029 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7030 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7035 /* Make sure unwind info is emitted for the thunk if needed. */
7036 final_start_function (emit_barrier (), file
, 1);
7038 /* Operand 0 is the target function. */
7039 op
[0] = XEXP (DECL_RTL (function
), 0);
7041 /* Operand 1 is the 'this' pointer. */
7042 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7043 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7045 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7047 /* Operands 2/3 are the low/high halfwords of delta. */
7048 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7049 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7051 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7052 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7053 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7055 /* Operands 6/7 are temporary registers. */
7056 op
[6] = gen_rtx_REG (Pmode
, 79);
7057 op
[7] = gen_rtx_REG (Pmode
, 78);
7059 /* Add DELTA to this pointer. */
7062 if (delta
>= -0x200 && delta
< 0x200)
7063 output_asm_insn ("ai\t%1,%1,%2", op
);
7064 else if (delta
>= -0x8000 && delta
< 0x8000)
7066 output_asm_insn ("il\t%6,%2", op
);
7067 output_asm_insn ("a\t%1,%1,%6", op
);
7071 output_asm_insn ("ilhu\t%6,%3", op
);
7072 output_asm_insn ("iohl\t%6,%2", op
);
7073 output_asm_insn ("a\t%1,%1,%6", op
);
7077 /* Perform vcall adjustment. */
7080 output_asm_insn ("lqd\t%7,0(%1)", op
);
7081 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7083 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7084 output_asm_insn ("ai\t%7,%7,%4", op
);
7085 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7087 output_asm_insn ("il\t%6,%4", op
);
7088 output_asm_insn ("a\t%7,%7,%6", op
);
7092 output_asm_insn ("ilhu\t%6,%5", op
);
7093 output_asm_insn ("iohl\t%6,%4", op
);
7094 output_asm_insn ("a\t%7,%7,%6", op
);
7097 output_asm_insn ("lqd\t%6,0(%7)", op
);
7098 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7099 output_asm_insn ("a\t%1,%1,%6", op
);
7102 /* Jump to target. */
7103 output_asm_insn ("br\t%0", op
);
7105 final_end_function ();
7108 /* Canonicalize a comparison from one we don't have to one we do have. */
7110 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7111 bool op0_preserve_value
)
7113 if (!op0_preserve_value
7114 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7119 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7123 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7124 to perform. MEM is the memory on which to operate. VAL is the second
7125 operand of the binary operator. BEFORE and AFTER are optional locations to
7126 return the value of MEM either before of after the operation. */
7128 spu_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
7129 rtx orig_before
, rtx orig_after
)
7131 machine_mode mode
= GET_MODE (mem
);
7132 rtx before
= orig_before
, after
= orig_after
;
7134 if (before
== NULL_RTX
)
7135 before
= gen_reg_rtx (mode
);
7137 emit_move_insn (before
, mem
);
7139 if (code
== MULT
) /* NAND operation */
7141 rtx x
= expand_simple_binop (mode
, AND
, before
, val
,
7142 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
7143 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
7147 after
= expand_simple_binop (mode
, code
, before
, val
,
7148 after
, 1, OPTAB_LIB_WIDEN
);
7151 emit_move_insn (mem
, after
);
7153 if (orig_after
&& after
!= orig_after
)
7154 emit_move_insn (orig_after
, after
);
7157 /* Implement TARGET_MODES_TIEABLE_P. */
7160 spu_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
7162 return (GET_MODE_BITSIZE (mode1
) <= MAX_FIXED_MODE_SIZE
7163 && GET_MODE_BITSIZE (mode2
) <= MAX_FIXED_MODE_SIZE
);
7166 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7167 in the lowpart of a register, which is only true for SPU. */
7170 spu_can_change_mode_class (machine_mode from
, machine_mode to
, reg_class_t
)
7172 return (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
7173 || (GET_MODE_SIZE (from
) <= 4 && GET_MODE_SIZE (to
) <= 4)
7174 || (GET_MODE_SIZE (from
) >= 16 && GET_MODE_SIZE (to
) >= 16));
7177 /* Table of machine attributes. */
7178 static const struct attribute_spec spu_attribute_table
[] =
7180 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7181 affects_type_identity } */
7182 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7184 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7186 { NULL
, 0, 0, false, false, false, NULL
, false }
7189 /* TARGET overrides. */
7192 #define TARGET_LRA_P hook_bool_void_false
7194 #undef TARGET_ADDR_SPACE_POINTER_MODE
7195 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7197 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7198 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7200 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7201 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7202 spu_addr_space_legitimate_address_p
7204 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7205 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7207 #undef TARGET_ADDR_SPACE_SUBSET_P
7208 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7210 #undef TARGET_ADDR_SPACE_CONVERT
7211 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7213 #undef TARGET_INIT_BUILTINS
7214 #define TARGET_INIT_BUILTINS spu_init_builtins
7215 #undef TARGET_BUILTIN_DECL
7216 #define TARGET_BUILTIN_DECL spu_builtin_decl
7218 #undef TARGET_EXPAND_BUILTIN
7219 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7221 #undef TARGET_UNWIND_WORD_MODE
7222 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7224 #undef TARGET_LEGITIMIZE_ADDRESS
7225 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7227 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7228 and .quad for the debugger. When it is known that the assembler is fixed,
7229 these can be removed. */
7230 #undef TARGET_ASM_UNALIGNED_SI_OP
7231 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7233 #undef TARGET_ASM_ALIGNED_DI_OP
7234 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7236 /* The .8byte directive doesn't seem to work well for a 32 bit
7238 #undef TARGET_ASM_UNALIGNED_DI_OP
7239 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7241 #undef TARGET_RTX_COSTS
7242 #define TARGET_RTX_COSTS spu_rtx_costs
7244 #undef TARGET_ADDRESS_COST
7245 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7247 #undef TARGET_SCHED_ISSUE_RATE
7248 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7250 #undef TARGET_SCHED_INIT_GLOBAL
7251 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7253 #undef TARGET_SCHED_INIT
7254 #define TARGET_SCHED_INIT spu_sched_init
7256 #undef TARGET_SCHED_VARIABLE_ISSUE
7257 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7259 #undef TARGET_SCHED_REORDER
7260 #define TARGET_SCHED_REORDER spu_sched_reorder
7262 #undef TARGET_SCHED_REORDER2
7263 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7265 #undef TARGET_SCHED_ADJUST_COST
7266 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7268 #undef TARGET_ATTRIBUTE_TABLE
7269 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7271 #undef TARGET_ASM_INTEGER
7272 #define TARGET_ASM_INTEGER spu_assemble_integer
7274 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7275 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7277 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7278 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7281 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7283 #undef TARGET_ASM_GLOBALIZE_LABEL
7284 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7286 #undef TARGET_PASS_BY_REFERENCE
7287 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7289 #undef TARGET_FUNCTION_ARG
7290 #define TARGET_FUNCTION_ARG spu_function_arg
7292 #undef TARGET_FUNCTION_ARG_ADVANCE
7293 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7295 #undef TARGET_FUNCTION_ARG_PADDING
7296 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7298 #undef TARGET_MUST_PASS_IN_STACK
7299 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7301 #undef TARGET_BUILD_BUILTIN_VA_LIST
7302 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7304 #undef TARGET_EXPAND_BUILTIN_VA_START
7305 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7307 #undef TARGET_SETUP_INCOMING_VARARGS
7308 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7310 #undef TARGET_MACHINE_DEPENDENT_REORG
7311 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7313 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7314 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7316 #undef TARGET_INIT_LIBFUNCS
7317 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7319 #undef TARGET_RETURN_IN_MEMORY
7320 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7322 #undef TARGET_ENCODE_SECTION_INFO
7323 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7325 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7326 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7328 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7329 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7331 #undef TARGET_VECTORIZE_INIT_COST
7332 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7334 #undef TARGET_VECTORIZE_ADD_STMT_COST
7335 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7337 #undef TARGET_VECTORIZE_FINISH_COST
7338 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7340 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7341 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7343 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7344 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7346 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7347 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7349 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7350 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7352 #undef TARGET_SCHED_SMS_RES_MII
7353 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7355 #undef TARGET_SECTION_TYPE_FLAGS
7356 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7358 #undef TARGET_ASM_SELECT_SECTION
7359 #define TARGET_ASM_SELECT_SECTION spu_select_section
7361 #undef TARGET_ASM_UNIQUE_SECTION
7362 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7364 #undef TARGET_LEGITIMATE_ADDRESS_P
7365 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7367 #undef TARGET_LEGITIMATE_CONSTANT_P
7368 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7370 #undef TARGET_TRAMPOLINE_INIT
7371 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7373 #undef TARGET_WARN_FUNC_RETURN
7374 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7376 #undef TARGET_OPTION_OVERRIDE
7377 #define TARGET_OPTION_OVERRIDE spu_option_override
7379 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7380 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7382 #undef TARGET_REF_MAY_ALIAS_ERRNO
7383 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7385 #undef TARGET_ASM_OUTPUT_MI_THUNK
7386 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7387 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7388 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7390 /* Variable tracking should be run after all optimizations which
7391 change order of insns. It also needs a valid CFG. */
7392 #undef TARGET_DELAY_VARTRACK
7393 #define TARGET_DELAY_VARTRACK true
7395 #undef TARGET_CANONICALIZE_COMPARISON
7396 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7398 #undef TARGET_CAN_USE_DOLOOP_P
7399 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7401 #undef TARGET_MODES_TIEABLE_P
7402 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7404 #undef TARGET_HARD_REGNO_NREGS
7405 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7407 #undef TARGET_CAN_CHANGE_MODE_CLASS
7408 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7410 struct gcc_target targetm
= TARGET_INITIALIZER
;