1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
27 #include "insn-config.h"
28 #include "conditions.h"
29 #include "insn-attr.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
44 #include "insn-codes.h"
52 #include "cfgcleanup.h"
53 #include "diagnostic-core.h"
56 #include "langhooks.h"
58 #include "sched-int.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
64 #include "tm-constrs.h"
72 /* This file should be included last. */
73 #include "target-def.h"
75 /* Builtin types, data and prototypes. */
77 enum spu_builtin_type_index
79 SPU_BTI_END_OF_PARAMS
,
81 /* We create new type nodes for these. */
93 /* A 16-byte type. (Implemented with V16QI_type_node) */
96 /* These all correspond to intSI_type_node */
110 /* These correspond to the standard types */
130 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
131 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
132 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
133 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
134 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
135 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
136 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
137 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
138 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
139 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
141 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
143 struct spu_builtin_range
148 static struct spu_builtin_range spu_builtin_range
[] = {
149 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
150 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
151 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
152 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
153 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
154 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
155 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
156 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
157 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
158 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
159 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
160 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
164 /* Target specific attribute specifications. */
165 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
167 /* Prototypes and external defs. */
168 static int get_pipe (rtx_insn
*insn
);
169 static int spu_naked_function_p (tree func
);
170 static int mem_is_padded_component_ref (rtx x
);
171 static void fix_range (const char *);
172 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
174 /* Which instruction set architecture to use. */
176 /* Which cpu are we tuning for. */
179 /* The hardware requires 8 insns between a hint and the branch it
180 effects. This variable describes how many rtl instructions the
181 compiler needs to see before inserting a hint, and then the compiler
182 will insert enough nops to make it at least 8 insns. The default is
183 for the compiler to allow up to 2 nops be emitted. The nops are
184 inserted in pairs, so we round down. */
185 int spu_hint_dist
= (8*4) - (2*4);
200 IC_POOL
, /* constant pool */
201 IC_IL1
, /* one il* instruction */
202 IC_IL2
, /* both ilhu and iohl instructions */
203 IC_IL1s
, /* one il* instruction */
204 IC_IL2s
, /* both ilhu and iohl instructions */
205 IC_FSMBI
, /* the fsmbi instruction */
206 IC_CPAT
, /* one of the c*d instructions */
207 IC_FSMBI2
/* fsmbi plus 1 other instruction */
210 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
211 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
212 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
213 static enum immediate_class
classify_immediate (rtx op
,
216 /* Pointer mode for __ea references. */
217 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
220 /* Define the structure for the machine field in struct function. */
221 struct GTY(()) machine_function
223 /* Register to use for PIC accesses. */
227 /* How to allocate a 'struct machine_function'. */
228 static struct machine_function
*
229 spu_init_machine_status (void)
231 return ggc_cleared_alloc
<machine_function
> ();
234 /* Implement TARGET_OPTION_OVERRIDE. */
236 spu_option_override (void)
238 /* Set up function hooks. */
239 init_machine_status
= spu_init_machine_status
;
241 /* Small loops will be unpeeled at -O3. For SPU it is more important
242 to keep code small by default. */
243 if (!flag_unroll_loops
&& !flag_peel_loops
)
244 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
245 global_options
.x_param_values
,
246 global_options_set
.x_param_values
);
248 flag_omit_frame_pointer
= 1;
250 /* Functions must be 8 byte aligned so we correctly handle dual issue */
251 if (align_functions
< 8)
254 spu_hint_dist
= 8*4 - spu_max_nops
*4;
255 if (spu_hint_dist
< 0)
258 if (spu_fixed_range_string
)
259 fix_range (spu_fixed_range_string
);
261 /* Determine processor architectural level. */
264 if (strcmp (&spu_arch_string
[0], "cell") == 0)
265 spu_arch
= PROCESSOR_CELL
;
266 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
267 spu_arch
= PROCESSOR_CELLEDP
;
269 error ("bad value (%s) for -march= switch", spu_arch_string
);
272 /* Determine processor to tune for. */
275 if (strcmp (&spu_tune_string
[0], "cell") == 0)
276 spu_tune
= PROCESSOR_CELL
;
277 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
278 spu_tune
= PROCESSOR_CELLEDP
;
280 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
283 /* Change defaults according to the processor architecture. */
284 if (spu_arch
== PROCESSOR_CELLEDP
)
286 /* If no command line option has been otherwise specified, change
287 the default to -mno-safe-hints on celledp -- only the original
288 Cell/B.E. processors require this workaround. */
289 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
290 target_flags
&= ~MASK_SAFE_HINTS
;
293 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
296 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
297 struct attribute_spec.handler. */
299 /* True if MODE is valid for the target. By "valid", we mean able to
300 be manipulated in non-trivial ways. In particular, this means all
301 the arithmetic is supported. */
303 spu_scalar_mode_supported_p (machine_mode mode
)
321 /* Similarly for vector modes. "Supported" here is less strict. At
322 least some operations are supported; need to check optabs or builtins
323 for further details. */
325 spu_vector_mode_supported_p (machine_mode mode
)
342 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
343 least significant bytes of the outer mode. This function returns
344 TRUE for the SUBREG's where this is correct. */
346 valid_subreg (rtx op
)
348 machine_mode om
= GET_MODE (op
);
349 machine_mode im
= GET_MODE (SUBREG_REG (op
));
350 return om
!= VOIDmode
&& im
!= VOIDmode
351 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
352 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
353 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
356 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
357 and adjust the start offset. */
359 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
363 /* Strip any paradoxical SUBREG. */
364 if (GET_CODE (op
) == SUBREG
365 && (GET_MODE_BITSIZE (GET_MODE (op
))
366 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
370 GET_MODE_BITSIZE (GET_MODE (op
)) -
371 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
372 op
= SUBREG_REG (op
);
374 /* If it is smaller than SI, assure a SUBREG */
375 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
379 *start
+= 32 - op_size
;
382 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
383 mode
= mode_for_size (op_size
, MODE_INT
, 0);
384 if (mode
!= GET_MODE (op
))
385 op
= gen_rtx_SUBREG (mode
, op
, 0);
390 spu_expand_extv (rtx ops
[], int unsignedp
)
392 rtx dst
= ops
[0], src
= ops
[1];
393 HOST_WIDE_INT width
= INTVAL (ops
[2]);
394 HOST_WIDE_INT start
= INTVAL (ops
[3]);
395 HOST_WIDE_INT align_mask
;
396 rtx s0
, s1
, mask
, r0
;
398 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
402 /* First, determine if we need 1 TImode load or 2. We need only 1
403 if the bits being extracted do not cross the alignment boundary
404 as determined by the MEM and its address. */
406 align_mask
= -MEM_ALIGN (src
);
407 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
409 /* Alignment is sufficient for 1 load. */
410 s0
= gen_reg_rtx (TImode
);
411 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
414 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
419 s0
= gen_reg_rtx (TImode
);
420 s1
= gen_reg_rtx (TImode
);
421 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
424 gcc_assert (start
+ width
<= 128);
427 rtx r1
= gen_reg_rtx (SImode
);
428 mask
= gen_reg_rtx (TImode
);
429 emit_move_insn (mask
, GEN_INT (-1));
430 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
431 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
432 if (GET_CODE (r0
) == CONST_INT
)
433 r1
= GEN_INT (INTVAL (r0
) & 15);
435 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
436 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
437 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
442 else if (GET_CODE (src
) == SUBREG
)
444 rtx r
= SUBREG_REG (src
);
445 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
446 s0
= gen_reg_rtx (TImode
);
447 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
448 emit_insn (gen_rtx_SET (s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
450 emit_move_insn (s0
, src
);
454 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
455 s0
= gen_reg_rtx (TImode
);
456 emit_move_insn (s0
, src
);
459 /* Now s0 is TImode and contains the bits to extract at start. */
462 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
465 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
467 emit_move_insn (dst
, s0
);
471 spu_expand_insv (rtx ops
[])
473 HOST_WIDE_INT width
= INTVAL (ops
[1]);
474 HOST_WIDE_INT start
= INTVAL (ops
[2]);
475 HOST_WIDE_INT maskbits
;
476 machine_mode dst_mode
;
477 rtx dst
= ops
[0], src
= ops
[3];
484 if (GET_CODE (ops
[0]) == MEM
)
485 dst
= gen_reg_rtx (TImode
);
487 dst
= adjust_operand (dst
, &start
);
488 dst_mode
= GET_MODE (dst
);
489 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
491 if (CONSTANT_P (src
))
494 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
495 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
497 src
= adjust_operand (src
, 0);
499 mask
= gen_reg_rtx (dst_mode
);
500 shift_reg
= gen_reg_rtx (dst_mode
);
501 shift
= dst_size
- start
- width
;
503 /* It's not safe to use subreg here because the compiler assumes
504 that the SUBREG_REG is right justified in the SUBREG. */
505 convert_move (shift_reg
, src
, 1);
512 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
515 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
518 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
530 maskbits
= (-1ll << (32 - width
- start
));
532 maskbits
+= (1ll << (32 - start
));
533 emit_move_insn (mask
, GEN_INT (maskbits
));
536 maskbits
= (-1ll << (64 - width
- start
));
538 maskbits
+= (1ll << (64 - start
));
539 emit_move_insn (mask
, GEN_INT (maskbits
));
543 unsigned char arr
[16];
545 memset (arr
, 0, sizeof (arr
));
546 arr
[i
] = 0xff >> (start
& 7);
547 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
549 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
550 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
556 if (GET_CODE (ops
[0]) == MEM
)
558 rtx low
= gen_reg_rtx (SImode
);
559 rtx rotl
= gen_reg_rtx (SImode
);
560 rtx mask0
= gen_reg_rtx (TImode
);
566 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
567 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
568 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
569 emit_insn (gen_negsi2 (rotl
, low
));
570 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
571 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
572 mem
= change_address (ops
[0], TImode
, addr0
);
573 set_mem_alias_set (mem
, 0);
574 emit_move_insn (dst
, mem
);
575 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
576 if (start
+ width
> MEM_ALIGN (ops
[0]))
578 rtx shl
= gen_reg_rtx (SImode
);
579 rtx mask1
= gen_reg_rtx (TImode
);
580 rtx dst1
= gen_reg_rtx (TImode
);
582 addr1
= plus_constant (Pmode
, addr
, 16);
583 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
584 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
585 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
586 mem1
= change_address (ops
[0], TImode
, addr1
);
587 set_mem_alias_set (mem1
, 0);
588 emit_move_insn (dst1
, mem1
);
589 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
590 emit_move_insn (mem1
, dst1
);
592 emit_move_insn (mem
, dst
);
595 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
600 spu_expand_block_move (rtx ops
[])
602 HOST_WIDE_INT bytes
, align
, offset
;
603 rtx src
, dst
, sreg
, dreg
, target
;
605 if (GET_CODE (ops
[2]) != CONST_INT
606 || GET_CODE (ops
[3]) != CONST_INT
607 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
610 bytes
= INTVAL (ops
[2]);
611 align
= INTVAL (ops
[3]);
621 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
623 dst
= adjust_address (ops
[0], V16QImode
, offset
);
624 src
= adjust_address (ops
[1], V16QImode
, offset
);
625 emit_move_insn (dst
, src
);
630 unsigned char arr
[16] = { 0 };
631 for (i
= 0; i
< bytes
- offset
; i
++)
633 dst
= adjust_address (ops
[0], V16QImode
, offset
);
634 src
= adjust_address (ops
[1], V16QImode
, offset
);
635 mask
= gen_reg_rtx (V16QImode
);
636 sreg
= gen_reg_rtx (V16QImode
);
637 dreg
= gen_reg_rtx (V16QImode
);
638 target
= gen_reg_rtx (V16QImode
);
639 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
640 emit_move_insn (dreg
, dst
);
641 emit_move_insn (sreg
, src
);
642 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
643 emit_move_insn (dst
, target
);
651 { SPU_EQ
, SPU_GT
, SPU_GTU
};
653 int spu_comp_icode
[12][3] = {
654 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
655 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
656 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
657 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
658 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
659 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
660 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
661 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
662 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
663 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
664 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
665 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
668 /* Generate a compare for CODE. Return a brand-new rtx that represents
669 the result of the compare. GCC can figure this out too if we don't
670 provide all variations of compares, but GCC always wants to use
671 WORD_MODE, we can generate better code in most cases if we do it
674 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
676 int reverse_compare
= 0;
677 int reverse_test
= 0;
678 rtx compare_result
, eq_result
;
679 rtx comp_rtx
, eq_rtx
;
680 machine_mode comp_mode
;
681 machine_mode op_mode
;
682 enum spu_comp_code scode
, eq_code
;
683 enum insn_code ior_code
;
684 enum rtx_code code
= GET_CODE (cmp
);
685 rtx op0
= XEXP (cmp
, 0);
686 rtx op1
= XEXP (cmp
, 1);
690 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
691 and so on, to keep the constant in operand 1. */
692 if (GET_CODE (op1
) == CONST_INT
)
694 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
695 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
719 /* However, if we generate an integer result, performing a reverse test
720 would require an extra negation, so avoid that where possible. */
721 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
723 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
724 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
741 op_mode
= GET_MODE (op0
);
747 if (HONOR_NANS (op_mode
))
762 if (HONOR_NANS (op_mode
))
854 comp_mode
= V4SImode
;
858 comp_mode
= V2DImode
;
865 if (GET_MODE (op1
) == DFmode
866 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
869 if (is_set
== 0 && op1
== const0_rtx
870 && (GET_MODE (op0
) == SImode
871 || GET_MODE (op0
) == HImode
872 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
874 /* Don't need to set a register with the result when we are
875 comparing against zero and branching. */
876 reverse_test
= !reverse_test
;
877 compare_result
= op0
;
881 compare_result
= gen_reg_rtx (comp_mode
);
890 if (spu_comp_icode
[index
][scode
] == 0)
893 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
895 op0
= force_reg (op_mode
, op0
);
896 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
898 op1
= force_reg (op_mode
, op1
);
899 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
903 emit_insn (comp_rtx
);
907 eq_result
= gen_reg_rtx (comp_mode
);
908 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
913 ior_code
= optab_handler (ior_optab
, comp_mode
);
914 gcc_assert (ior_code
!= CODE_FOR_nothing
);
915 emit_insn (GEN_FCN (ior_code
)
916 (compare_result
, compare_result
, eq_result
));
925 /* We don't have branch on QI compare insns, so we convert the
926 QI compare result to a HI result. */
927 if (comp_mode
== QImode
)
929 rtx old_res
= compare_result
;
930 compare_result
= gen_reg_rtx (HImode
);
932 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
936 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
938 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
940 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
941 emit_jump_insn (gen_rtx_SET (pc_rtx
,
942 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
945 else if (is_set
== 2)
947 rtx target
= operands
[0];
948 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
949 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
950 machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
952 rtx op_t
= operands
[2];
953 rtx op_f
= operands
[3];
955 /* The result of the comparison can be SI, HI or QI mode. Create a
956 mask based on that result. */
957 if (target_size
> compare_size
)
959 select_mask
= gen_reg_rtx (mode
);
960 emit_insn (gen_extend_compare (select_mask
, compare_result
));
962 else if (target_size
< compare_size
)
964 gen_rtx_SUBREG (mode
, compare_result
,
965 (compare_size
- target_size
) / BITS_PER_UNIT
);
966 else if (comp_mode
!= mode
)
967 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
969 select_mask
= compare_result
;
971 if (GET_MODE (target
) != GET_MODE (op_t
)
972 || GET_MODE (target
) != GET_MODE (op_f
))
976 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
978 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
982 rtx target
= operands
[0];
984 emit_insn (gen_rtx_SET (compare_result
,
985 gen_rtx_NOT (comp_mode
, compare_result
)));
986 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
987 emit_insn (gen_extendhisi2 (target
, compare_result
));
988 else if (GET_MODE (target
) == SImode
989 && GET_MODE (compare_result
) == QImode
)
990 emit_insn (gen_extend_compare (target
, compare_result
));
992 emit_move_insn (target
, compare_result
);
997 const_double_to_hwint (rtx x
)
1001 if (GET_MODE (x
) == SFmode
)
1003 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1004 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1006 else if (GET_MODE (x
) == DFmode
)
1009 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1010 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1012 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1020 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1024 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1027 tv
[0] = (v
<< 32) >> 32;
1028 else if (mode
== DFmode
)
1030 tv
[1] = (v
<< 32) >> 32;
1033 real_from_target (&rv
, tv
, mode
);
1034 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1038 print_operand_address (FILE * file
, register rtx addr
)
1043 if (GET_CODE (addr
) == AND
1044 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1045 && INTVAL (XEXP (addr
, 1)) == -16)
1046 addr
= XEXP (addr
, 0);
1048 switch (GET_CODE (addr
))
1051 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1055 reg
= XEXP (addr
, 0);
1056 offset
= XEXP (addr
, 1);
1057 if (GET_CODE (offset
) == REG
)
1059 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1060 reg_names
[REGNO (offset
)]);
1062 else if (GET_CODE (offset
) == CONST_INT
)
1064 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1065 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1075 output_addr_const (file
, addr
);
1085 print_operand (FILE * file
, rtx x
, int code
)
1087 machine_mode mode
= GET_MODE (x
);
1089 unsigned char arr
[16];
1090 int xcode
= GET_CODE (x
);
1092 if (GET_MODE (x
) == VOIDmode
)
1095 case 'L': /* 128 bits, signed */
1096 case 'm': /* 128 bits, signed */
1097 case 'T': /* 128 bits, signed */
1098 case 't': /* 128 bits, signed */
1101 case 'K': /* 64 bits, signed */
1102 case 'k': /* 64 bits, signed */
1103 case 'D': /* 64 bits, signed */
1104 case 'd': /* 64 bits, signed */
1107 case 'J': /* 32 bits, signed */
1108 case 'j': /* 32 bits, signed */
1109 case 's': /* 32 bits, signed */
1110 case 'S': /* 32 bits, signed */
1117 case 'j': /* 32 bits, signed */
1118 case 'k': /* 64 bits, signed */
1119 case 'm': /* 128 bits, signed */
1120 if (xcode
== CONST_INT
1121 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1123 gcc_assert (logical_immediate_p (x
, mode
));
1124 constant_to_array (mode
, x
, arr
);
1125 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1126 val
= trunc_int_for_mode (val
, SImode
);
1127 switch (which_logical_immediate (val
))
1132 fprintf (file
, "h");
1135 fprintf (file
, "b");
1145 case 'J': /* 32 bits, signed */
1146 case 'K': /* 64 bits, signed */
1147 case 'L': /* 128 bits, signed */
1148 if (xcode
== CONST_INT
1149 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1151 gcc_assert (logical_immediate_p (x
, mode
)
1152 || iohl_immediate_p (x
, mode
));
1153 constant_to_array (mode
, x
, arr
);
1154 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1155 val
= trunc_int_for_mode (val
, SImode
);
1156 switch (which_logical_immediate (val
))
1162 val
= trunc_int_for_mode (val
, HImode
);
1165 val
= trunc_int_for_mode (val
, QImode
);
1170 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1176 case 't': /* 128 bits, signed */
1177 case 'd': /* 64 bits, signed */
1178 case 's': /* 32 bits, signed */
1181 enum immediate_class c
= classify_immediate (x
, mode
);
1185 constant_to_array (mode
, x
, arr
);
1186 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1187 val
= trunc_int_for_mode (val
, SImode
);
1188 switch (which_immediate_load (val
))
1193 fprintf (file
, "a");
1196 fprintf (file
, "h");
1199 fprintf (file
, "hu");
1206 constant_to_array (mode
, x
, arr
);
1207 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1209 fprintf (file
, "b");
1211 fprintf (file
, "h");
1213 fprintf (file
, "w");
1215 fprintf (file
, "d");
1218 if (xcode
== CONST_VECTOR
)
1220 x
= CONST_VECTOR_ELT (x
, 0);
1221 xcode
= GET_CODE (x
);
1223 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1224 fprintf (file
, "a");
1225 else if (xcode
== HIGH
)
1226 fprintf (file
, "hu");
1240 case 'T': /* 128 bits, signed */
1241 case 'D': /* 64 bits, signed */
1242 case 'S': /* 32 bits, signed */
1245 enum immediate_class c
= classify_immediate (x
, mode
);
1249 constant_to_array (mode
, x
, arr
);
1250 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1251 val
= trunc_int_for_mode (val
, SImode
);
1252 switch (which_immediate_load (val
))
1259 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1264 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1267 constant_to_array (mode
, x
, arr
);
1269 for (i
= 0; i
< 16; i
++)
1274 print_operand (file
, GEN_INT (val
), 0);
1277 constant_to_array (mode
, x
, arr
);
1278 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1279 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1284 if (GET_CODE (x
) == CONST_VECTOR
)
1285 x
= CONST_VECTOR_ELT (x
, 0);
1286 output_addr_const (file
, x
);
1288 fprintf (file
, "@h");
1302 if (xcode
== CONST_INT
)
1304 /* Only 4 least significant bits are relevant for generate
1305 control word instructions. */
1306 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1311 case 'M': /* print code for c*d */
1312 if (GET_CODE (x
) == CONST_INT
)
1316 fprintf (file
, "b");
1319 fprintf (file
, "h");
1322 fprintf (file
, "w");
1325 fprintf (file
, "d");
1334 case 'N': /* Negate the operand */
1335 if (xcode
== CONST_INT
)
1336 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1337 else if (xcode
== CONST_VECTOR
)
1338 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1339 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1342 case 'I': /* enable/disable interrupts */
1343 if (xcode
== CONST_INT
)
1344 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1347 case 'b': /* branch modifiers */
1349 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1350 else if (COMPARISON_P (x
))
1351 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1354 case 'i': /* indirect call */
1357 if (GET_CODE (XEXP (x
, 0)) == REG
)
1358 /* Used in indirect function calls. */
1359 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1361 output_address (XEXP (x
, 0));
1365 case 'p': /* load/store */
1369 xcode
= GET_CODE (x
);
1374 xcode
= GET_CODE (x
);
1377 fprintf (file
, "d");
1378 else if (xcode
== CONST_INT
)
1379 fprintf (file
, "a");
1380 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1381 fprintf (file
, "r");
1382 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1384 if (GET_CODE (XEXP (x
, 1)) == REG
)
1385 fprintf (file
, "x");
1387 fprintf (file
, "d");
1392 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1394 output_addr_const (file
, GEN_INT (val
));
1398 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1400 output_addr_const (file
, GEN_INT (val
));
1404 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1406 output_addr_const (file
, GEN_INT (val
));
1410 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1411 val
= (val
>> 3) & 0x1f;
1412 output_addr_const (file
, GEN_INT (val
));
1416 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1419 output_addr_const (file
, GEN_INT (val
));
1423 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1426 output_addr_const (file
, GEN_INT (val
));
1430 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1433 output_addr_const (file
, GEN_INT (val
));
1437 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1438 val
= -(val
& -8ll);
1439 val
= (val
>> 3) & 0x1f;
1440 output_addr_const (file
, GEN_INT (val
));
1445 constant_to_array (mode
, x
, arr
);
1446 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1447 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1452 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1453 else if (xcode
== MEM
)
1454 output_address (XEXP (x
, 0));
1455 else if (xcode
== CONST_VECTOR
)
1456 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1458 output_addr_const (file
, x
);
1465 output_operand_lossage ("invalid %%xn code");
1470 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1471 caller saved register. For leaf functions it is more efficient to
1472 use a volatile register because we won't need to save and restore the
1473 pic register. This routine is only valid after register allocation
1474 is completed, so we can pick an unused register. */
1478 if (!reload_completed
&& !reload_in_progress
)
1481 /* If we've already made the decision, we need to keep with it. Once we've
1482 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1483 return true since the register is now live; this should not cause us to
1484 "switch back" to using pic_offset_table_rtx. */
1485 if (!cfun
->machine
->pic_reg
)
1487 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1488 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1490 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1493 return cfun
->machine
->pic_reg
;
1496 /* Split constant addresses to handle cases that are too large.
1497 Add in the pic register when in PIC mode.
1498 Split immediates that require more than 1 instruction. */
1500 spu_split_immediate (rtx
* ops
)
1502 machine_mode mode
= GET_MODE (ops
[0]);
1503 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1509 unsigned char arrhi
[16];
1510 unsigned char arrlo
[16];
1511 rtx to
, temp
, hi
, lo
;
1513 machine_mode imode
= mode
;
1514 /* We need to do reals as ints because the constant used in the
1515 IOR might not be a legitimate real constant. */
1516 imode
= int_mode_for_mode (mode
);
1517 constant_to_array (mode
, ops
[1], arrhi
);
1519 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1522 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1523 for (i
= 0; i
< 16; i
+= 4)
1525 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1526 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1527 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1528 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1530 hi
= array_to_constant (imode
, arrhi
);
1531 lo
= array_to_constant (imode
, arrlo
);
1532 emit_move_insn (temp
, hi
);
1533 emit_insn (gen_rtx_SET (to
, gen_rtx_IOR (imode
, temp
, lo
)));
1538 unsigned char arr_fsmbi
[16];
1539 unsigned char arr_andbi
[16];
1540 rtx to
, reg_fsmbi
, reg_and
;
1542 machine_mode imode
= mode
;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode
= int_mode_for_mode (mode
);
1546 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1548 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1551 for (i
= 0; i
< 16; i
++)
1552 if (arr_fsmbi
[i
] != 0)
1554 arr_andbi
[0] = arr_fsmbi
[i
];
1555 arr_fsmbi
[i
] = 0xff;
1557 for (i
= 1; i
< 16; i
++)
1558 arr_andbi
[i
] = arr_andbi
[0];
1559 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1560 reg_and
= array_to_constant (imode
, arr_andbi
);
1561 emit_move_insn (to
, reg_fsmbi
);
1562 emit_insn (gen_rtx_SET (to
, gen_rtx_AND (imode
, to
, reg_and
)));
1566 if (reload_in_progress
|| reload_completed
)
1568 rtx mem
= force_const_mem (mode
, ops
[1]);
1569 if (TARGET_LARGE_MEM
)
1571 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1572 emit_move_insn (addr
, XEXP (mem
, 0));
1573 mem
= replace_equiv_address (mem
, addr
);
1575 emit_move_insn (ops
[0], mem
);
1581 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1585 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1586 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1589 emit_insn (gen_pic (ops
[0], ops
[1]));
1592 rtx pic_reg
= get_pic_reg ();
1593 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1595 return flag_pic
|| c
== IC_IL2s
;
1606 /* SAVING is TRUE when we are generating the actual load and store
1607 instructions for REGNO. When determining the size of the stack
1608 needed for saving register we must allocate enough space for the
1609 worst case, because we don't always have the information early enough
1610 to not allocate it. But we can at least eliminate the actual loads
1611 and stores during the prologue/epilogue. */
1613 need_to_save_reg (int regno
, int saving
)
1615 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1618 && regno
== PIC_OFFSET_TABLE_REGNUM
1619 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1624 /* This function is only correct starting with local register
1627 spu_saved_regs_size (void)
1629 int reg_save_size
= 0;
1632 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1633 if (need_to_save_reg (regno
, 0))
1634 reg_save_size
+= 0x10;
1635 return reg_save_size
;
1639 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1641 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1643 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1644 return emit_insn (gen_movv4si (mem
, reg
));
1648 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1650 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1652 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1653 return emit_insn (gen_movv4si (reg
, mem
));
1656 /* This happens after reload, so we need to expand it. */
1658 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1661 if (satisfies_constraint_K (GEN_INT (imm
)))
1663 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1667 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1668 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1669 if (REGNO (src
) == REGNO (scratch
))
1675 /* Return nonzero if this function is known to have a null epilogue. */
1678 direct_return (void)
1680 if (reload_completed
)
1682 if (cfun
->static_chain_decl
== 0
1683 && (spu_saved_regs_size ()
1685 + crtl
->outgoing_args_size
1686 + crtl
->args
.pretend_args_size
== 0)
1694 The stack frame looks like this:
1698 AP -> +-------------+
1701 prev SP | back chain |
1704 | reg save | crtl->args.pretend_args_size bytes
1707 | saved regs | spu_saved_regs_size() bytes
1708 FP -> +-------------+
1710 | vars | get_frame_size() bytes
1711 HFP -> +-------------+
1714 | args | crtl->outgoing_args_size bytes
1720 SP -> +-------------+
1724 spu_expand_prologue (void)
1726 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1727 HOST_WIDE_INT total_size
;
1728 HOST_WIDE_INT saved_regs_size
;
1729 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1730 rtx scratch_reg_0
, scratch_reg_1
;
1734 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1735 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1737 if (spu_naked_function_p (current_function_decl
))
1740 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1741 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1743 saved_regs_size
= spu_saved_regs_size ();
1744 total_size
= size
+ saved_regs_size
1745 + crtl
->outgoing_args_size
1746 + crtl
->args
.pretend_args_size
;
1749 || cfun
->calls_alloca
|| total_size
> 0)
1750 total_size
+= STACK_POINTER_OFFSET
;
1752 /* Save this first because code after this might use the link
1753 register as a scratch register. */
1756 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1757 RTX_FRAME_RELATED_P (insn
) = 1;
1762 offset
= -crtl
->args
.pretend_args_size
;
1763 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1764 if (need_to_save_reg (regno
, 1))
1767 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1768 RTX_FRAME_RELATED_P (insn
) = 1;
1772 if (flag_pic
&& cfun
->machine
->pic_reg
)
1774 rtx pic_reg
= cfun
->machine
->pic_reg
;
1775 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1776 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1781 if (flag_stack_check
)
1783 /* We compare against total_size-1 because
1784 ($sp >= total_size) <=> ($sp > total_size-1) */
1785 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1786 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1787 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1788 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1790 emit_move_insn (scratch_v4si
, size_v4si
);
1791 size_v4si
= scratch_v4si
;
1793 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1794 emit_insn (gen_vec_extractv4si
1795 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1796 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1799 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1800 the value of the previous $sp because we save it as the back
1802 if (total_size
<= 2000)
1804 /* In this case we save the back chain first. */
1805 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1807 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1811 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1813 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1815 RTX_FRAME_RELATED_P (insn
) = 1;
1816 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1817 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1819 if (total_size
> 2000)
1821 /* Save the back chain ptr */
1822 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1825 if (frame_pointer_needed
)
1827 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1828 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1829 + crtl
->outgoing_args_size
;
1830 /* Set the new frame_pointer */
1831 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1832 RTX_FRAME_RELATED_P (insn
) = 1;
1833 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1834 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1835 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1839 if (flag_stack_usage_info
)
1840 current_function_static_stack_size
= total_size
;
1844 spu_expand_epilogue (bool sibcall_p
)
1846 int size
= get_frame_size (), offset
, regno
;
1847 HOST_WIDE_INT saved_regs_size
, total_size
;
1848 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1851 if (spu_naked_function_p (current_function_decl
))
1854 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1856 saved_regs_size
= spu_saved_regs_size ();
1857 total_size
= size
+ saved_regs_size
1858 + crtl
->outgoing_args_size
1859 + crtl
->args
.pretend_args_size
;
1862 || cfun
->calls_alloca
|| total_size
> 0)
1863 total_size
+= STACK_POINTER_OFFSET
;
1867 if (cfun
->calls_alloca
)
1868 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1870 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1873 if (saved_regs_size
> 0)
1875 offset
= -crtl
->args
.pretend_args_size
;
1876 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1877 if (need_to_save_reg (regno
, 1))
1880 frame_emit_load (regno
, sp_reg
, offset
);
1886 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1890 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1891 emit_jump_insn (gen__return ());
1896 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1900 /* This is inefficient because it ends up copying to a save-register
1901 which then gets saved even though $lr has already been saved. But
1902 it does generate better code for leaf functions and we don't need
1903 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1904 used for __builtin_return_address anyway, so maybe we don't care if
1905 it's inefficient. */
1906 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1910 /* Given VAL, generate a constant appropriate for MODE.
1911 If MODE is a vector mode, every element will be VAL.
1912 For TImode, VAL will be zero extended to 128 bits. */
1914 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1920 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1921 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1922 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1923 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1925 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1926 return immed_double_const (val
, 0, mode
);
1928 /* val is the bit representation of the float */
1929 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1930 return hwint_to_const_double (mode
, val
);
1932 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1933 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1935 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1937 units
= GET_MODE_NUNITS (mode
);
1939 v
= rtvec_alloc (units
);
1941 for (i
= 0; i
< units
; ++i
)
1942 RTVEC_ELT (v
, i
) = inner
;
1944 return gen_rtx_CONST_VECTOR (mode
, v
);
1947 /* Create a MODE vector constant from 4 ints. */
1949 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1951 unsigned char arr
[16];
1952 arr
[0] = (a
>> 24) & 0xff;
1953 arr
[1] = (a
>> 16) & 0xff;
1954 arr
[2] = (a
>> 8) & 0xff;
1955 arr
[3] = (a
>> 0) & 0xff;
1956 arr
[4] = (b
>> 24) & 0xff;
1957 arr
[5] = (b
>> 16) & 0xff;
1958 arr
[6] = (b
>> 8) & 0xff;
1959 arr
[7] = (b
>> 0) & 0xff;
1960 arr
[8] = (c
>> 24) & 0xff;
1961 arr
[9] = (c
>> 16) & 0xff;
1962 arr
[10] = (c
>> 8) & 0xff;
1963 arr
[11] = (c
>> 0) & 0xff;
1964 arr
[12] = (d
>> 24) & 0xff;
1965 arr
[13] = (d
>> 16) & 0xff;
1966 arr
[14] = (d
>> 8) & 0xff;
1967 arr
[15] = (d
>> 0) & 0xff;
1968 return array_to_constant(mode
, arr
);
1971 /* branch hint stuff */
1973 /* An array of these is used to propagate hints to predecessor blocks. */
1976 rtx_insn
*prop_jump
; /* propagated from another block */
1977 int bb_index
; /* the original block. */
1979 static struct spu_bb_info
*spu_bb_info
;
1981 #define STOP_HINT_P(INSN) \
1983 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1984 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1986 /* 1 when RTX is a hinted branch or its target. We keep track of
1987 what has been hinted so the safe-hint code can test it easily. */
1988 #define HINTED_P(RTX) \
1989 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1991 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1992 #define SCHED_ON_EVEN_P(RTX) \
1993 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1995 /* Emit a nop for INSN such that the two will dual issue. This assumes
1996 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1997 We check for TImode to handle a MULTI1 insn which has dual issued its
1998 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2000 emit_nop_for_insn (rtx_insn
*insn
)
2005 /* We need to handle JUMP_TABLE_DATA separately. */
2006 if (JUMP_TABLE_DATA_P (insn
))
2008 new_insn
= emit_insn_after (gen_lnop(), insn
);
2009 recog_memoized (new_insn
);
2010 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2014 p
= get_pipe (insn
);
2015 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2016 new_insn
= emit_insn_after (gen_lnop (), insn
);
2017 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2019 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2020 PUT_MODE (new_insn
, TImode
);
2021 PUT_MODE (insn
, VOIDmode
);
2024 new_insn
= emit_insn_after (gen_lnop (), insn
);
2025 recog_memoized (new_insn
);
2026 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2029 /* Insert nops in basic blocks to meet dual issue alignment
2030 requirements. Also make sure hbrp and hint instructions are at least
2031 one cycle apart, possibly inserting a nop. */
2035 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2039 /* This sets up INSN_ADDRESSES. */
2040 shorten_branches (get_insns ());
2042 /* Keep track of length added by nops. */
2046 insn
= get_insns ();
2047 if (!active_insn_p (insn
))
2048 insn
= next_active_insn (insn
);
2049 for (; insn
; insn
= next_insn
)
2051 next_insn
= next_active_insn (insn
);
2052 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2053 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2057 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2058 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2059 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2062 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2063 PUT_MODE (prev_insn
, GET_MODE (insn
));
2064 PUT_MODE (insn
, TImode
);
2065 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2071 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2073 if (GET_MODE (insn
) == TImode
)
2074 PUT_MODE (next_insn
, TImode
);
2076 next_insn
= next_active_insn (insn
);
2078 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2079 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2081 if (((addr
+ length
) & 7) != 0)
2083 emit_nop_for_insn (prev_insn
);
2087 else if (GET_MODE (insn
) == TImode
2088 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2089 || get_attr_type (insn
) == TYPE_MULTI0
)
2090 && ((addr
+ length
) & 7) != 0)
2092 /* prev_insn will always be set because the first insn is
2093 always 8-byte aligned. */
2094 emit_nop_for_insn (prev_insn
);
2102 /* Routines for branch hints. */
2105 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2106 int distance
, sbitmap blocks
)
2108 rtx branch_label
= 0;
2111 rtx_jump_table_data
*table
;
2113 if (before
== 0 || branch
== 0 || target
== 0)
2116 /* While scheduling we require hints to be no further than 600, so
2117 we need to enforce that here too */
2121 /* If we have a Basic block note, emit it after the basic block note. */
2122 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2123 before
= NEXT_INSN (before
);
2125 branch_label
= gen_label_rtx ();
2126 LABEL_NUSES (branch_label
)++;
2127 LABEL_PRESERVE_P (branch_label
) = 1;
2128 insn
= emit_label_before (branch_label
, branch
);
2129 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2130 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2132 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2133 recog_memoized (hint
);
2134 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2135 HINTED_P (branch
) = 1;
2137 if (GET_CODE (target
) == LABEL_REF
)
2138 HINTED_P (XEXP (target
, 0)) = 1;
2139 else if (tablejump_p (branch
, 0, &table
))
2143 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2144 vec
= XVEC (PATTERN (table
), 0);
2146 vec
= XVEC (PATTERN (table
), 1);
2147 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2148 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2151 if (distance
>= 588)
2153 /* Make sure the hint isn't scheduled any earlier than this point,
2154 which could make it too far for the branch offest to fit */
2155 insn
= emit_insn_before (gen_blockage (), hint
);
2156 recog_memoized (insn
);
2157 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2159 else if (distance
<= 8 * 4)
2161 /* To guarantee at least 8 insns between the hint and branch we
2164 for (d
= distance
; d
< 8 * 4; d
+= 4)
2167 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2168 recog_memoized (insn
);
2169 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2172 /* Make sure any nops inserted aren't scheduled before the hint. */
2173 insn
= emit_insn_after (gen_blockage (), hint
);
2174 recog_memoized (insn
);
2175 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2177 /* Make sure any nops inserted aren't scheduled after the call. */
2178 if (CALL_P (branch
) && distance
< 8 * 4)
2180 insn
= emit_insn_before (gen_blockage (), branch
);
2181 recog_memoized (insn
);
2182 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2187 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2188 the rtx for the branch target. */
2190 get_branch_target (rtx_insn
*branch
)
2192 if (JUMP_P (branch
))
2196 /* Return statements */
2197 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2198 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2201 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2204 set
= single_set (branch
);
2205 src
= SET_SRC (set
);
2206 if (GET_CODE (SET_DEST (set
)) != PC
)
2209 if (GET_CODE (src
) == IF_THEN_ELSE
)
2212 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2215 /* If the more probable case is not a fall through, then
2216 try a branch hint. */
2217 int prob
= XINT (note
, 0);
2218 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2219 && GET_CODE (XEXP (src
, 1)) != PC
)
2220 lab
= XEXP (src
, 1);
2221 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2222 && GET_CODE (XEXP (src
, 2)) != PC
)
2223 lab
= XEXP (src
, 2);
2227 if (GET_CODE (lab
) == RETURN
)
2228 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2236 else if (CALL_P (branch
))
2239 /* All of our call patterns are in a PARALLEL and the CALL is
2240 the first pattern in the PARALLEL. */
2241 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2243 call
= XVECEXP (PATTERN (branch
), 0, 0);
2244 if (GET_CODE (call
) == SET
)
2245 call
= SET_SRC (call
);
2246 if (GET_CODE (call
) != CALL
)
2248 return XEXP (XEXP (call
, 0), 0);
2253 /* The special $hbr register is used to prevent the insn scheduler from
2254 moving hbr insns across instructions which invalidate them. It
2255 should only be used in a clobber, and this function searches for
2256 insns which clobber it. */
2258 insn_clobbers_hbr (rtx_insn
*insn
)
2261 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2263 rtx parallel
= PATTERN (insn
);
2266 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2268 clobber
= XVECEXP (parallel
, 0, j
);
2269 if (GET_CODE (clobber
) == CLOBBER
2270 && GET_CODE (XEXP (clobber
, 0)) == REG
2271 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2278 /* Search up to 32 insns starting at FIRST:
2279 - at any kind of hinted branch, just return
2280 - at any unconditional branch in the first 15 insns, just return
2281 - at a call or indirect branch, after the first 15 insns, force it to
2282 an even address and return
2283 - at any unconditional branch, after the first 15 insns, force it to
2285 At then end of the search, insert an hbrp within 4 insns of FIRST,
2286 and an hbrp within 16 instructions of FIRST.
2289 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2291 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2292 int addr
= 0, length
, first_addr
= -1;
2293 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2294 int insert_lnop_after
= 0;
2295 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2298 if (first_addr
== -1)
2299 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2300 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2301 length
= get_attr_length (insn
);
2303 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2305 /* We test for 14 instructions because the first hbrp will add
2306 up to 2 instructions. */
2307 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2310 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2312 /* Make sure an hbrp is at least 2 cycles away from a hint.
2313 Insert an lnop after the hbrp when necessary. */
2314 if (before_4
== 0 && addr
> 0)
2317 insert_lnop_after
|= 1;
2319 else if (before_4
&& addr
<= 4 * 4)
2320 insert_lnop_after
|= 1;
2321 if (before_16
== 0 && addr
> 10 * 4)
2324 insert_lnop_after
|= 2;
2326 else if (before_16
&& addr
<= 14 * 4)
2327 insert_lnop_after
|= 2;
2330 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2332 if (addr
< hbrp_addr0
)
2334 else if (addr
< hbrp_addr1
)
2338 if (CALL_P (insn
) || JUMP_P (insn
))
2340 if (HINTED_P (insn
))
2343 /* Any branch after the first 15 insns should be on an even
2344 address to avoid a special case branch. There might be
2345 some nops and/or hbrps inserted, so we test after 10
2348 SCHED_ON_EVEN_P (insn
) = 1;
2351 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2355 if (addr
+ length
>= 32 * 4)
2357 gcc_assert (before_4
&& before_16
);
2358 if (hbrp_addr0
> 4 * 4)
2361 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2362 recog_memoized (insn
);
2363 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2364 INSN_ADDRESSES_NEW (insn
,
2365 INSN_ADDRESSES (INSN_UID (before_4
)));
2366 PUT_MODE (insn
, GET_MODE (before_4
));
2367 PUT_MODE (before_4
, TImode
);
2368 if (insert_lnop_after
& 1)
2370 insn
= emit_insn_before (gen_lnop (), before_4
);
2371 recog_memoized (insn
);
2372 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2373 INSN_ADDRESSES_NEW (insn
,
2374 INSN_ADDRESSES (INSN_UID (before_4
)));
2375 PUT_MODE (insn
, TImode
);
2378 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2379 && hbrp_addr1
> 16 * 4)
2382 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2383 recog_memoized (insn
);
2384 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2385 INSN_ADDRESSES_NEW (insn
,
2386 INSN_ADDRESSES (INSN_UID (before_16
)));
2387 PUT_MODE (insn
, GET_MODE (before_16
));
2388 PUT_MODE (before_16
, TImode
);
2389 if (insert_lnop_after
& 2)
2391 insn
= emit_insn_before (gen_lnop (), before_16
);
2392 recog_memoized (insn
);
2393 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2394 INSN_ADDRESSES_NEW (insn
,
2395 INSN_ADDRESSES (INSN_UID
2397 PUT_MODE (insn
, TImode
);
2403 else if (BARRIER_P (insn
))
2408 /* The SPU might hang when it executes 48 inline instructions after a
2409 hinted branch jumps to its hinted target. The beginning of a
2410 function and the return from a call might have been hinted, and
2411 must be handled as well. To prevent a hang we insert 2 hbrps. The
2412 first should be within 6 insns of the branch target. The second
2413 should be within 22 insns of the branch target. When determining
2414 if hbrps are necessary, we look for only 32 inline instructions,
2415 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2416 when inserting new hbrps, we insert them within 4 and 16 insns of
2422 if (TARGET_SAFE_HINTS
)
2424 shorten_branches (get_insns ());
2425 /* Insert hbrp at beginning of function */
2426 insn
= next_active_insn (get_insns ());
2428 insert_hbrp_for_ilb_runout (insn
);
2429 /* Insert hbrp after hinted targets. */
2430 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2431 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2432 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2436 static int in_spu_reorg
;
2439 spu_var_tracking (void)
2441 if (flag_var_tracking
)
2444 timevar_push (TV_VAR_TRACKING
);
2445 variable_tracking_main ();
2446 timevar_pop (TV_VAR_TRACKING
);
2447 df_finish_pass (false);
2451 /* Insert branch hints. There are no branch optimizations after this
2452 pass, so it's safe to set our branch hints now. */
2454 spu_machine_dependent_reorg (void)
2458 rtx_insn
*branch
, *insn
;
2459 rtx branch_target
= 0;
2460 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2464 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2466 /* We still do it for unoptimized code because an external
2467 function might have hinted a call or return. */
2468 compute_bb_for_insn ();
2471 spu_var_tracking ();
2472 free_bb_for_insn ();
2476 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2477 bitmap_clear (blocks
);
2480 compute_bb_for_insn ();
2482 /* (Re-)discover loops so that bb->loop_father can be used
2483 in the analysis below. */
2484 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2489 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2490 sizeof (struct spu_bb_info
));
2492 /* We need exact insn addresses and lengths. */
2493 shorten_branches (get_insns ());
2495 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2497 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2499 if (spu_bb_info
[i
].prop_jump
)
2501 branch
= spu_bb_info
[i
].prop_jump
;
2502 branch_target
= get_branch_target (branch
);
2503 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2504 required_dist
= spu_hint_dist
;
2506 /* Search from end of a block to beginning. In this loop, find
2507 jumps which need a branch and emit them only when:
2508 - it's an indirect branch and we're at the insn which sets
2510 - we're at an insn that will invalidate the hint. e.g., a
2511 call, another hint insn, inline asm that clobbers $hbr, and
2512 some inlined operations (divmodsi4). Don't consider jumps
2513 because they are only at the end of a block and are
2514 considered when we are deciding whether to propagate
2515 - we're getting too far away from the branch. The hbr insns
2516 only have a signed 10 bit offset
2517 We go back as far as possible so the branch will be considered
2518 for propagation when we get to the beginning of the block. */
2519 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2523 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2525 && ((GET_CODE (branch_target
) == REG
2526 && set_of (branch_target
, insn
) != NULL_RTX
)
2527 || insn_clobbers_hbr (insn
)
2528 || branch_addr
- insn_addr
> 600))
2530 rtx_insn
*next
= NEXT_INSN (insn
);
2531 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2532 if (insn
!= BB_END (bb
)
2533 && branch_addr
- next_addr
>= required_dist
)
2537 "hint for %i in block %i before %i\n",
2538 INSN_UID (branch
), bb
->index
,
2540 spu_emit_branch_hint (next
, branch
, branch_target
,
2541 branch_addr
- next_addr
, blocks
);
2546 /* JUMP_P will only be true at the end of a block. When
2547 branch is already set it means we've previously decided
2548 to propagate a hint for that branch into this block. */
2549 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2552 if ((branch_target
= get_branch_target (insn
)))
2555 branch_addr
= insn_addr
;
2556 required_dist
= spu_hint_dist
;
2560 if (insn
== BB_HEAD (bb
))
2566 /* If we haven't emitted a hint for this branch yet, it might
2567 be profitable to emit it in one of the predecessor blocks,
2568 especially for loops. */
2570 basic_block prev
= 0, prop
= 0, prev2
= 0;
2571 int loop_exit
= 0, simple_loop
= 0;
2572 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2574 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2575 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2576 prev
= EDGE_PRED (bb
, j
)->src
;
2578 prev2
= EDGE_PRED (bb
, j
)->src
;
2580 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2581 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2583 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2586 /* If this branch is a loop exit then propagate to previous
2587 fallthru block. This catches the cases when it is a simple
2588 loop or when there is an initial branch into the loop. */
2589 if (prev
&& (loop_exit
|| simple_loop
)
2590 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2593 /* If there is only one adjacent predecessor. Don't propagate
2594 outside this loop. */
2595 else if (prev
&& single_pred_p (bb
)
2596 && prev
->loop_father
== bb
->loop_father
)
2599 /* If this is the JOIN block of a simple IF-THEN then
2600 propagate the hint to the HEADER block. */
2601 else if (prev
&& prev2
2602 && EDGE_COUNT (bb
->preds
) == 2
2603 && EDGE_COUNT (prev
->preds
) == 1
2604 && EDGE_PRED (prev
, 0)->src
== prev2
2605 && prev2
->loop_father
== bb
->loop_father
2606 && GET_CODE (branch_target
) != REG
)
2609 /* Don't propagate when:
2610 - this is a simple loop and the hint would be too far
2611 - this is not a simple loop and there are 16 insns in
2613 - the predecessor block ends in a branch that will be
2615 - the predecessor block ends in an insn that invalidates
2619 && (bbend
= BB_END (prop
))
2620 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2621 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2622 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2625 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2626 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2627 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2628 INSN_UID (branch
), loop_exit
, simple_loop
,
2629 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2631 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2632 spu_bb_info
[prop
->index
].bb_index
= i
;
2634 else if (branch_addr
- next_addr
>= required_dist
)
2637 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2638 INSN_UID (branch
), bb
->index
,
2639 INSN_UID (NEXT_INSN (insn
)));
2640 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2641 branch_addr
- next_addr
, blocks
);
2648 if (!bitmap_empty_p (blocks
))
2649 find_many_sub_basic_blocks (blocks
);
2651 /* We have to schedule to make sure alignment is ok. */
2652 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2654 /* The hints need to be scheduled, so call it again. */
2656 df_finish_pass (true);
2662 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2663 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2665 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2666 between its branch label and the branch . We don't move the
2667 label because GCC expects it at the beginning of the block. */
2668 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2669 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2670 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2673 for (branch
= NEXT_INSN (label
);
2674 !JUMP_P (branch
) && !CALL_P (branch
);
2675 branch
= NEXT_INSN (branch
))
2676 if (NONJUMP_INSN_P (branch
))
2677 offset
+= get_attr_length (branch
);
2679 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2682 spu_var_tracking ();
2684 loop_optimizer_finalize ();
2686 free_bb_for_insn ();
2692 /* Insn scheduling routines, primarily for dual issue. */
2694 spu_sched_issue_rate (void)
2700 uses_ls_unit(rtx_insn
*insn
)
2702 rtx set
= single_set (insn
);
2704 && (GET_CODE (SET_DEST (set
)) == MEM
2705 || GET_CODE (SET_SRC (set
)) == MEM
))
2711 get_pipe (rtx_insn
*insn
)
2714 /* Handle inline asm */
2715 if (INSN_CODE (insn
) == -1)
2717 t
= get_attr_type (insn
);
2742 case TYPE_IPREFETCH
:
2750 /* haifa-sched.c has a static variable that keeps track of the current
2751 cycle. It is passed to spu_sched_reorder, and we record it here for
2752 use by spu_sched_variable_issue. It won't be accurate if the
2753 scheduler updates it's clock_var between the two calls. */
2754 static int clock_var
;
2756 /* This is used to keep track of insn alignment. Set to 0 at the
2757 beginning of each block and increased by the "length" attr of each
2759 static int spu_sched_length
;
2761 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2762 ready list appropriately in spu_sched_reorder(). */
2763 static int pipe0_clock
;
2764 static int pipe1_clock
;
2766 static int prev_clock_var
;
2768 static int prev_priority
;
2770 /* The SPU needs to load the next ilb sometime during the execution of
2771 the previous ilb. There is a potential conflict if every cycle has a
2772 load or store. To avoid the conflict we make sure the load/store
2773 unit is free for at least one cycle during the execution of insns in
2774 the previous ilb. */
2775 static int spu_ls_first
;
2776 static int prev_ls_clock
;
2779 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2780 int max_ready ATTRIBUTE_UNUSED
)
2782 spu_sched_length
= 0;
2786 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2787 int max_ready ATTRIBUTE_UNUSED
)
2789 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2791 /* When any block might be at least 8-byte aligned, assume they
2792 will all be at least 8-byte aligned to make sure dual issue
2793 works out correctly. */
2794 spu_sched_length
= 0;
2796 spu_ls_first
= INT_MAX
;
2801 prev_clock_var
= -1;
2806 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2807 int verbose ATTRIBUTE_UNUSED
,
2808 rtx_insn
*insn
, int more
)
2812 if (GET_CODE (PATTERN (insn
)) == USE
2813 || GET_CODE (PATTERN (insn
)) == CLOBBER
2814 || (len
= get_attr_length (insn
)) == 0)
2817 spu_sched_length
+= len
;
2819 /* Reset on inline asm */
2820 if (INSN_CODE (insn
) == -1)
2822 spu_ls_first
= INT_MAX
;
2827 p
= get_pipe (insn
);
2829 pipe0_clock
= clock_var
;
2831 pipe1_clock
= clock_var
;
2835 if (clock_var
- prev_ls_clock
> 1
2836 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2837 spu_ls_first
= INT_MAX
;
2838 if (uses_ls_unit (insn
))
2840 if (spu_ls_first
== INT_MAX
)
2841 spu_ls_first
= spu_sched_length
;
2842 prev_ls_clock
= clock_var
;
2845 /* The scheduler hasn't inserted the nop, but we will later on.
2846 Include those nops in spu_sched_length. */
2847 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2848 spu_sched_length
+= 4;
2849 prev_clock_var
= clock_var
;
2851 /* more is -1 when called from spu_sched_reorder for new insns
2852 that don't have INSN_PRIORITY */
2854 prev_priority
= INSN_PRIORITY (insn
);
2857 /* Always try issuing more insns. spu_sched_reorder will decide
2858 when the cycle should be advanced. */
2862 /* This function is called for both TARGET_SCHED_REORDER and
2863 TARGET_SCHED_REORDER2. */
2865 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2866 rtx_insn
**ready
, int *nreadyp
, int clock
)
2868 int i
, nready
= *nreadyp
;
2869 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2874 if (nready
<= 0 || pipe1_clock
>= clock
)
2877 /* Find any rtl insns that don't generate assembly insns and schedule
2879 for (i
= nready
- 1; i
>= 0; i
--)
2882 if (INSN_CODE (insn
) == -1
2883 || INSN_CODE (insn
) == CODE_FOR_blockage
2884 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2886 ready
[i
] = ready
[nready
- 1];
2887 ready
[nready
- 1] = insn
;
2892 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2893 for (i
= 0; i
< nready
; i
++)
2894 if (INSN_CODE (ready
[i
]) != -1)
2897 switch (get_attr_type (insn
))
2922 case TYPE_IPREFETCH
:
2928 /* In the first scheduling phase, schedule loads and stores together
2929 to increase the chance they will get merged during postreload CSE. */
2930 if (!reload_completed
&& pipe_ls
>= 0)
2932 insn
= ready
[pipe_ls
];
2933 ready
[pipe_ls
] = ready
[nready
- 1];
2934 ready
[nready
- 1] = insn
;
2938 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2942 /* When we have loads/stores in every cycle of the last 15 insns and
2943 we are about to schedule another load/store, emit an hbrp insn
2946 && spu_sched_length
- spu_ls_first
>= 4 * 15
2947 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2949 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2950 recog_memoized (insn
);
2951 if (pipe0_clock
< clock
)
2952 PUT_MODE (insn
, TImode
);
2953 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2957 /* In general, we want to emit nops to increase dual issue, but dual
2958 issue isn't faster when one of the insns could be scheduled later
2959 without effecting the critical path. We look at INSN_PRIORITY to
2960 make a good guess, but it isn't perfect so -mdual-nops=n can be
2961 used to effect it. */
2962 if (in_spu_reorg
&& spu_dual_nops
< 10)
2964 /* When we are at an even address and we are not issuing nops to
2965 improve scheduling then we need to advance the cycle. */
2966 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2967 && (spu_dual_nops
== 0
2970 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2973 /* When at an odd address, schedule the highest priority insn
2974 without considering pipeline. */
2975 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2976 && (spu_dual_nops
== 0
2978 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2983 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2984 pipe0 insn in the ready list, schedule it. */
2985 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2986 schedule_i
= pipe_0
;
2988 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2989 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2991 schedule_i
= pipe_1
;
2993 if (schedule_i
> -1)
2995 insn
= ready
[schedule_i
];
2996 ready
[schedule_i
] = ready
[nready
- 1];
2997 ready
[nready
- 1] = insn
;
3003 /* INSN is dependent on DEP_INSN. */
3005 spu_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
3009 /* The blockage pattern is used to prevent instructions from being
3010 moved across it and has no cost. */
3011 if (INSN_CODE (insn
) == CODE_FOR_blockage
3012 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3015 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3016 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3019 /* Make sure hbrps are spread out. */
3020 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3021 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3024 /* Make sure hints and hbrps are 2 cycles apart. */
3025 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3026 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3027 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3031 /* An hbrp has no real dependency on other insns. */
3032 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3033 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3036 /* Assuming that it is unlikely an argument register will be used in
3037 the first cycle of the called function, we reduce the cost for
3038 slightly better scheduling of dep_insn. When not hinted, the
3039 mispredicted branch would hide the cost as well. */
3042 rtx target
= get_branch_target (insn
);
3043 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3048 /* And when returning from a function, let's assume the return values
3049 are completed sooner too. */
3050 if (CALL_P (dep_insn
))
3053 /* Make sure an instruction that loads from the back chain is schedule
3054 away from the return instruction so a hint is more likely to get
3056 if (INSN_CODE (insn
) == CODE_FOR__return
3057 && (set
= single_set (dep_insn
))
3058 && GET_CODE (SET_DEST (set
)) == REG
3059 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3062 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3063 scheduler makes every insn in a block anti-dependent on the final
3064 jump_insn. We adjust here so higher cost insns will get scheduled
3066 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3067 return insn_cost (dep_insn
) - 3;
3072 /* Create a CONST_DOUBLE from a string. */
3074 spu_float_const (const char *string
, machine_mode mode
)
3076 REAL_VALUE_TYPE value
;
3077 value
= REAL_VALUE_ATOF (string
, mode
);
3078 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3082 spu_constant_address_p (rtx x
)
3084 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3085 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3086 || GET_CODE (x
) == HIGH
);
3089 static enum spu_immediate
3090 which_immediate_load (HOST_WIDE_INT val
)
3092 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3094 if (val
>= -0x8000 && val
<= 0x7fff)
3096 if (val
>= 0 && val
<= 0x3ffff)
3098 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3100 if ((val
& 0xffff) == 0)
3106 /* Return true when OP can be loaded by one of the il instructions, or
3107 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3109 immediate_load_p (rtx op
, machine_mode mode
)
3111 if (CONSTANT_P (op
))
3113 enum immediate_class c
= classify_immediate (op
, mode
);
3114 return c
== IC_IL1
|| c
== IC_IL1s
3115 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3120 /* Return true if the first SIZE bytes of arr is a constant that can be
3121 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3122 represent the size and offset of the instruction to use. */
3124 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3126 int cpat
, run
, i
, start
;
3130 for (i
= 0; i
< size
&& cpat
; i
++)
3138 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3140 else if (arr
[i
] == 0)
3142 while (arr
[i
+run
] == run
&& i
+run
< 16)
3144 if (run
!= 4 && run
!= 8)
3149 if ((i
& (run
-1)) != 0)
3156 if (cpat
&& (run
|| size
< 16))
3163 *pstart
= start
== -1 ? 16-run
: start
;
3169 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3170 it into a register. MODE is only valid when OP is a CONST_INT. */
3171 static enum immediate_class
3172 classify_immediate (rtx op
, machine_mode mode
)
3175 unsigned char arr
[16];
3176 int i
, j
, repeated
, fsmbi
, repeat
;
3178 gcc_assert (CONSTANT_P (op
));
3180 if (GET_MODE (op
) != VOIDmode
)
3181 mode
= GET_MODE (op
);
3183 /* A V4SI const_vector with all identical symbols is ok. */
3186 && GET_CODE (op
) == CONST_VECTOR
3187 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3188 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3189 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3190 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3191 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3192 op
= CONST_VECTOR_ELT (op
, 0);
3194 switch (GET_CODE (op
))
3198 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3201 /* We can never know if the resulting address fits in 18 bits and can be
3202 loaded with ila. For now, assume the address will not overflow if
3203 the displacement is "small" (fits 'K' constraint). */
3204 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3206 rtx sym
= XEXP (XEXP (op
, 0), 0);
3207 rtx cst
= XEXP (XEXP (op
, 0), 1);
3209 if (GET_CODE (sym
) == SYMBOL_REF
3210 && GET_CODE (cst
) == CONST_INT
3211 && satisfies_constraint_K (cst
))
3220 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3221 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3222 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3228 constant_to_array (mode
, op
, arr
);
3230 /* Check that each 4-byte slot is identical. */
3232 for (i
= 4; i
< 16; i
+= 4)
3233 for (j
= 0; j
< 4; j
++)
3234 if (arr
[j
] != arr
[i
+ j
])
3239 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3240 val
= trunc_int_for_mode (val
, SImode
);
3242 if (which_immediate_load (val
) != SPU_NONE
)
3246 /* Any mode of 2 bytes or smaller can be loaded with an il
3248 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3252 for (i
= 0; i
< 16 && fsmbi
; i
++)
3253 if (arr
[i
] != 0 && repeat
== 0)
3255 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3258 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3260 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3273 static enum spu_immediate
3274 which_logical_immediate (HOST_WIDE_INT val
)
3276 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3278 if (val
>= -0x200 && val
<= 0x1ff)
3280 if (val
>= 0 && val
<= 0xffff)
3282 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3284 val
= trunc_int_for_mode (val
, HImode
);
3285 if (val
>= -0x200 && val
<= 0x1ff)
3287 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3289 val
= trunc_int_for_mode (val
, QImode
);
3290 if (val
>= -0x200 && val
<= 0x1ff)
3297 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3300 const_vector_immediate_p (rtx x
)
3303 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3304 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3305 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3306 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3312 logical_immediate_p (rtx op
, machine_mode mode
)
3315 unsigned char arr
[16];
3318 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3319 || GET_CODE (op
) == CONST_VECTOR
);
3321 if (GET_CODE (op
) == CONST_VECTOR
3322 && !const_vector_immediate_p (op
))
3325 if (GET_MODE (op
) != VOIDmode
)
3326 mode
= GET_MODE (op
);
3328 constant_to_array (mode
, op
, arr
);
3330 /* Check that bytes are repeated. */
3331 for (i
= 4; i
< 16; i
+= 4)
3332 for (j
= 0; j
< 4; j
++)
3333 if (arr
[j
] != arr
[i
+ j
])
3336 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3337 val
= trunc_int_for_mode (val
, SImode
);
3339 i
= which_logical_immediate (val
);
3340 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3344 iohl_immediate_p (rtx op
, machine_mode mode
)
3347 unsigned char arr
[16];
3350 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3351 || GET_CODE (op
) == CONST_VECTOR
);
3353 if (GET_CODE (op
) == CONST_VECTOR
3354 && !const_vector_immediate_p (op
))
3357 if (GET_MODE (op
) != VOIDmode
)
3358 mode
= GET_MODE (op
);
3360 constant_to_array (mode
, op
, arr
);
3362 /* Check that bytes are repeated. */
3363 for (i
= 4; i
< 16; i
+= 4)
3364 for (j
= 0; j
< 4; j
++)
3365 if (arr
[j
] != arr
[i
+ j
])
3368 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3369 val
= trunc_int_for_mode (val
, SImode
);
3371 return val
>= 0 && val
<= 0xffff;
3375 arith_immediate_p (rtx op
, machine_mode mode
,
3376 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3379 unsigned char arr
[16];
3382 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3383 || GET_CODE (op
) == CONST_VECTOR
);
3385 if (GET_CODE (op
) == CONST_VECTOR
3386 && !const_vector_immediate_p (op
))
3389 if (GET_MODE (op
) != VOIDmode
)
3390 mode
= GET_MODE (op
);
3392 constant_to_array (mode
, op
, arr
);
3394 bytes
= GET_MODE_UNIT_SIZE (mode
);
3395 mode
= mode_for_size (GET_MODE_UNIT_BITSIZE (mode
), MODE_INT
, 0);
3397 /* Check that bytes are repeated. */
3398 for (i
= bytes
; i
< 16; i
+= bytes
)
3399 for (j
= 0; j
< bytes
; j
++)
3400 if (arr
[j
] != arr
[i
+ j
])
3404 for (j
= 1; j
< bytes
; j
++)
3405 val
= (val
<< 8) | arr
[j
];
3407 val
= trunc_int_for_mode (val
, mode
);
3409 return val
>= low
&& val
<= high
;
3412 /* TRUE when op is an immediate and an exact power of 2, and given that
3413 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3414 all entries must be the same. */
3416 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3418 machine_mode int_mode
;
3420 unsigned char arr
[16];
3423 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3424 || GET_CODE (op
) == CONST_VECTOR
);
3426 if (GET_CODE (op
) == CONST_VECTOR
3427 && !const_vector_immediate_p (op
))
3430 if (GET_MODE (op
) != VOIDmode
)
3431 mode
= GET_MODE (op
);
3433 constant_to_array (mode
, op
, arr
);
3435 mode
= GET_MODE_INNER (mode
);
3437 bytes
= GET_MODE_SIZE (mode
);
3438 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3440 /* Check that bytes are repeated. */
3441 for (i
= bytes
; i
< 16; i
+= bytes
)
3442 for (j
= 0; j
< bytes
; j
++)
3443 if (arr
[j
] != arr
[i
+ j
])
3447 for (j
= 1; j
< bytes
; j
++)
3448 val
= (val
<< 8) | arr
[j
];
3450 val
= trunc_int_for_mode (val
, int_mode
);
3452 /* Currently, we only handle SFmode */
3453 gcc_assert (mode
== SFmode
);
3456 int exp
= (val
>> 23) - 127;
3457 return val
> 0 && (val
& 0x007fffff) == 0
3458 && exp
>= low
&& exp
<= high
;
3463 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3466 ea_symbol_ref_p (const_rtx x
)
3470 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3472 rtx plus
= XEXP (x
, 0);
3473 rtx op0
= XEXP (plus
, 0);
3474 rtx op1
= XEXP (plus
, 1);
3475 if (GET_CODE (op1
) == CONST_INT
)
3479 return (GET_CODE (x
) == SYMBOL_REF
3480 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3481 && TREE_CODE (decl
) == VAR_DECL
3482 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3486 - any 32-bit constant (SImode, SFmode)
3487 - any constant that can be generated with fsmbi (any mode)
3488 - a 64-bit constant where the high and low bits are identical
3490 - a 128-bit constant where the four 32-bit words match. */
3492 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3494 subrtx_iterator::array_type array
;
3495 if (GET_CODE (x
) == HIGH
)
3498 /* Reject any __ea qualified reference. These can't appear in
3499 instructions but must be forced to the constant pool. */
3500 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3501 if (ea_symbol_ref_p (*iter
))
3504 /* V4SI with all identical symbols is valid. */
3507 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3508 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3509 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3510 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3511 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3512 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3514 if (GET_CODE (x
) == CONST_VECTOR
3515 && !const_vector_immediate_p (x
))
3520 /* Valid address are:
3521 - symbol_ref, label_ref, const
3523 - reg + const_int, where const_int is 16 byte aligned
3524 - reg + reg, alignment doesn't matter
3525 The alignment matters in the reg+const case because lqd and stqd
3526 ignore the 4 least significant bits of the const. We only care about
3527 16 byte modes because the expand phase will change all smaller MEM
3528 references to TImode. */
3530 spu_legitimate_address_p (machine_mode mode
,
3531 rtx x
, bool reg_ok_strict
)
3533 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3535 && GET_CODE (x
) == AND
3536 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3537 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3539 switch (GET_CODE (x
))
3542 return !TARGET_LARGE_MEM
;
3546 /* Keep __ea references until reload so that spu_expand_mov can see them
3548 if (ea_symbol_ref_p (x
))
3549 return !reload_in_progress
&& !reload_completed
;
3550 return !TARGET_LARGE_MEM
;
3553 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3561 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3566 rtx op0
= XEXP (x
, 0);
3567 rtx op1
= XEXP (x
, 1);
3568 if (GET_CODE (op0
) == SUBREG
)
3569 op0
= XEXP (op0
, 0);
3570 if (GET_CODE (op1
) == SUBREG
)
3571 op1
= XEXP (op1
, 0);
3572 if (GET_CODE (op0
) == REG
3573 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3574 && GET_CODE (op1
) == CONST_INT
3575 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3576 /* If virtual registers are involved, the displacement will
3577 change later on anyway, so checking would be premature.
3578 Reload will make sure the final displacement after
3579 register elimination is OK. */
3580 || op0
== arg_pointer_rtx
3581 || op0
== frame_pointer_rtx
3582 || op0
== virtual_stack_vars_rtx
)
3583 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3585 if (GET_CODE (op0
) == REG
3586 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3587 && GET_CODE (op1
) == REG
3588 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3599 /* Like spu_legitimate_address_p, except with named addresses. */
3601 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3602 bool reg_ok_strict
, addr_space_t as
)
3604 if (as
== ADDR_SPACE_EA
)
3605 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3607 else if (as
!= ADDR_SPACE_GENERIC
)
3610 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3613 /* When the address is reg + const_int, force the const_int into a
3616 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3617 machine_mode mode ATTRIBUTE_UNUSED
)
3620 /* Make sure both operands are registers. */
3621 if (GET_CODE (x
) == PLUS
)
3625 if (ALIGNED_SYMBOL_REF_P (op0
))
3627 op0
= force_reg (Pmode
, op0
);
3628 mark_reg_pointer (op0
, 128);
3630 else if (GET_CODE (op0
) != REG
)
3631 op0
= force_reg (Pmode
, op0
);
3632 if (ALIGNED_SYMBOL_REF_P (op1
))
3634 op1
= force_reg (Pmode
, op1
);
3635 mark_reg_pointer (op1
, 128);
3637 else if (GET_CODE (op1
) != REG
)
3638 op1
= force_reg (Pmode
, op1
);
3639 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3644 /* Like spu_legitimate_address, except with named address support. */
3646 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3649 if (as
!= ADDR_SPACE_GENERIC
)
3652 return spu_legitimize_address (x
, oldx
, mode
);
3655 /* Reload reg + const_int for out-of-range displacements. */
3657 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3658 int opnum
, int type
)
3660 bool removed_and
= false;
3662 if (GET_CODE (ad
) == AND
3663 && CONST_INT_P (XEXP (ad
, 1))
3664 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3670 if (GET_CODE (ad
) == PLUS
3671 && REG_P (XEXP (ad
, 0))
3672 && CONST_INT_P (XEXP (ad
, 1))
3673 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3674 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3676 /* Unshare the sum. */
3679 /* Reload the displacement. */
3680 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3681 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3682 opnum
, (enum reload_type
) type
);
3684 /* Add back AND for alignment if we stripped it. */
3686 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3694 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3695 struct attribute_spec.handler. */
3697 spu_handle_fndecl_attribute (tree
* node
,
3699 tree args ATTRIBUTE_UNUSED
,
3700 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3702 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3704 warning (0, "%qE attribute only applies to functions",
3706 *no_add_attrs
= true;
3712 /* Handle the "vector" attribute. */
3714 spu_handle_vector_attribute (tree
* node
, tree name
,
3715 tree args ATTRIBUTE_UNUSED
,
3716 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3718 tree type
= *node
, result
= NULL_TREE
;
3722 while (POINTER_TYPE_P (type
)
3723 || TREE_CODE (type
) == FUNCTION_TYPE
3724 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3725 type
= TREE_TYPE (type
);
3727 mode
= TYPE_MODE (type
);
3729 unsigned_p
= TYPE_UNSIGNED (type
);
3733 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3736 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3739 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3742 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3745 result
= V4SF_type_node
;
3748 result
= V2DF_type_node
;
3754 /* Propagate qualifiers attached to the element type
3755 onto the vector type. */
3756 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3757 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3759 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3762 warning (0, "%qE attribute ignored", name
);
3764 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3769 /* Return nonzero if FUNC is a naked function. */
3771 spu_naked_function_p (tree func
)
3775 if (TREE_CODE (func
) != FUNCTION_DECL
)
3778 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3779 return a
!= NULL_TREE
;
3783 spu_initial_elimination_offset (int from
, int to
)
3785 int saved_regs_size
= spu_saved_regs_size ();
3787 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3788 || get_frame_size () || saved_regs_size
)
3789 sp_offset
= STACK_POINTER_OFFSET
;
3790 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3791 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3792 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3793 return get_frame_size ();
3794 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3795 return sp_offset
+ crtl
->outgoing_args_size
3796 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3797 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3798 return get_frame_size () + saved_regs_size
+ sp_offset
;
3804 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3806 machine_mode mode
= TYPE_MODE (type
);
3807 int byte_size
= ((mode
== BLKmode
)
3808 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3810 /* Make sure small structs are left justified in a register. */
3811 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3812 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3817 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3818 int n
= byte_size
/ UNITS_PER_WORD
;
3819 v
= rtvec_alloc (nregs
);
3820 for (i
= 0; i
< n
; i
++)
3822 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3823 gen_rtx_REG (TImode
,
3826 GEN_INT (UNITS_PER_WORD
* i
));
3827 byte_size
-= UNITS_PER_WORD
;
3835 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3837 gen_rtx_EXPR_LIST (VOIDmode
,
3838 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3839 GEN_INT (UNITS_PER_WORD
* n
));
3841 return gen_rtx_PARALLEL (mode
, v
);
3843 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3847 spu_function_arg (cumulative_args_t cum_v
,
3849 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3851 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3854 if (*cum
>= MAX_REGISTER_ARGS
)
3857 byte_size
= ((mode
== BLKmode
)
3858 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3860 /* The ABI does not allow parameters to be passed partially in
3861 reg and partially in stack. */
3862 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3865 /* Make sure small structs are left justified in a register. */
3866 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3867 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3873 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3874 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3875 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3877 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3880 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3884 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3885 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3887 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3889 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3892 ? ((int_size_in_bytes (type
) + 15) / 16)
3895 : HARD_REGNO_NREGS (cum
, mode
));
3898 /* Variable sized types are passed by reference. */
3900 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3901 machine_mode mode ATTRIBUTE_UNUSED
,
3902 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3904 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3910 /* Create and return the va_list datatype.
3912 On SPU, va_list is an array type equivalent to
3914 typedef struct __va_list_tag
3916 void *__args __attribute__((__aligned(16)));
3917 void *__skip __attribute__((__aligned(16)));
3921 where __args points to the arg that will be returned by the next
3922 va_arg(), and __skip points to the previous stack frame such that
3923 when __args == __skip we should advance __args by 32 bytes. */
3925 spu_build_builtin_va_list (void)
3927 tree f_args
, f_skip
, record
, type_decl
;
3930 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3933 build_decl (BUILTINS_LOCATION
,
3934 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3936 f_args
= build_decl (BUILTINS_LOCATION
,
3937 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3938 f_skip
= build_decl (BUILTINS_LOCATION
,
3939 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3941 DECL_FIELD_CONTEXT (f_args
) = record
;
3942 DECL_ALIGN (f_args
) = 128;
3943 DECL_USER_ALIGN (f_args
) = 1;
3945 DECL_FIELD_CONTEXT (f_skip
) = record
;
3946 DECL_ALIGN (f_skip
) = 128;
3947 DECL_USER_ALIGN (f_skip
) = 1;
3949 TYPE_STUB_DECL (record
) = type_decl
;
3950 TYPE_NAME (record
) = type_decl
;
3951 TYPE_FIELDS (record
) = f_args
;
3952 DECL_CHAIN (f_args
) = f_skip
;
3954 /* We know this is being padded and we want it too. It is an internal
3955 type so hide the warnings from the user. */
3957 warn_padded
= false;
3959 layout_type (record
);
3963 /* The correct type is an array type of one element. */
3964 return build_array_type (record
, build_index_type (size_zero_node
));
3967 /* Implement va_start by filling the va_list structure VALIST.
3968 NEXTARG points to the first anonymous stack argument.
3970 The following global variables are used to initialize
3971 the va_list structure:
3974 the CUMULATIVE_ARGS for this function
3976 crtl->args.arg_offset_rtx:
3977 holds the offset of the first anonymous stack argument
3978 (relative to the virtual arg pointer). */
3981 spu_va_start (tree valist
, rtx nextarg
)
3983 tree f_args
, f_skip
;
3986 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3987 f_skip
= DECL_CHAIN (f_args
);
3989 valist
= build_simple_mem_ref (valist
);
3991 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3993 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3995 /* Find the __args area. */
3996 t
= make_tree (TREE_TYPE (args
), nextarg
);
3997 if (crtl
->args
.pretend_args_size
> 0)
3998 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
3999 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4000 TREE_SIDE_EFFECTS (t
) = 1;
4001 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4003 /* Find the __skip area. */
4004 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4005 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4006 - STACK_POINTER_OFFSET
));
4007 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4008 TREE_SIDE_EFFECTS (t
) = 1;
4009 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4012 /* Gimplify va_arg by updating the va_list structure
4013 VALIST as required to retrieve an argument of type
4014 TYPE, and returning that argument.
4016 ret = va_arg(VALIST, TYPE);
4018 generates code equivalent to:
4020 paddedsize = (sizeof(TYPE) + 15) & -16;
4021 if (VALIST.__args + paddedsize > VALIST.__skip
4022 && VALIST.__args <= VALIST.__skip)
4023 addr = VALIST.__skip + 32;
4025 addr = VALIST.__args;
4026 VALIST.__args = addr + paddedsize;
4027 ret = *(TYPE *)addr;
4030 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4031 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4033 tree f_args
, f_skip
;
4035 HOST_WIDE_INT size
, rsize
;
4037 bool pass_by_reference_p
;
4039 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4040 f_skip
= DECL_CHAIN (f_args
);
4043 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4045 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4047 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4049 /* if an object is dynamically sized, a pointer to it is passed
4050 instead of the object itself. */
4051 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4053 if (pass_by_reference_p
)
4054 type
= build_pointer_type (type
);
4055 size
= int_size_in_bytes (type
);
4056 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4058 /* build conditional expression to calculate addr. The expression
4059 will be gimplified later. */
4060 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4061 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4062 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4063 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4064 unshare_expr (skip
)));
4066 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4067 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4068 unshare_expr (args
));
4070 gimplify_assign (addr
, tmp
, pre_p
);
4072 /* update VALIST.__args */
4073 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4074 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4076 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4079 if (pass_by_reference_p
)
4080 addr
= build_va_arg_indirect_ref (addr
);
4082 return build_va_arg_indirect_ref (addr
);
4085 /* Save parameter registers starting with the register that corresponds
4086 to the first unnamed parameters. If the first unnamed parameter is
4087 in the stack then save no registers. Set pretend_args_size to the
4088 amount of space needed to save the registers. */
4090 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4091 tree type
, int *pretend_size
, int no_rtl
)
4098 int ncum
= *get_cumulative_args (cum
);
4100 /* cum currently points to the last named argument, we want to
4101 start at the next argument. */
4102 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4104 offset
= -STACK_POINTER_OFFSET
;
4105 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4107 tmp
= gen_frame_mem (V4SImode
,
4108 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4110 emit_move_insn (tmp
,
4111 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4114 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4119 spu_conditional_register_usage (void)
4123 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4124 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4128 /* This is called any time we inspect the alignment of a register for
4131 reg_aligned_for_addr (rtx x
)
4134 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4135 return REGNO_POINTER_ALIGN (regno
) >= 128;
4138 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4139 into its SYMBOL_REF_FLAGS. */
4141 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4143 default_encode_section_info (decl
, rtl
, first
);
4145 /* If a variable has a forced alignment to < 16 bytes, mark it with
4146 SYMBOL_FLAG_ALIGN1. */
4147 if (TREE_CODE (decl
) == VAR_DECL
4148 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4149 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4152 /* Return TRUE if we are certain the mem refers to a complete object
4153 which is both 16-byte aligned and padded to a 16-byte boundary. This
4154 would make it safe to store with a single instruction.
4155 We guarantee the alignment and padding for static objects by aligning
4156 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4157 FIXME: We currently cannot guarantee this for objects on the stack
4158 because assign_parm_setup_stack calls assign_stack_local with the
4159 alignment of the parameter mode and in that case the alignment never
4160 gets adjusted by LOCAL_ALIGNMENT. */
4162 store_with_one_insn_p (rtx mem
)
4164 machine_mode mode
= GET_MODE (mem
);
4165 rtx addr
= XEXP (mem
, 0);
4166 if (mode
== BLKmode
)
4168 if (GET_MODE_SIZE (mode
) >= 16)
4170 /* Only static objects. */
4171 if (GET_CODE (addr
) == SYMBOL_REF
)
4173 /* We use the associated declaration to make sure the access is
4174 referring to the whole object.
4175 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4176 if it is necessary. Will there be cases where one exists, and
4177 the other does not? Will there be cases where both exist, but
4178 have different types? */
4179 tree decl
= MEM_EXPR (mem
);
4181 && TREE_CODE (decl
) == VAR_DECL
4182 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4184 decl
= SYMBOL_REF_DECL (addr
);
4186 && TREE_CODE (decl
) == VAR_DECL
4187 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4193 /* Return 1 when the address is not valid for a simple load and store as
4194 required by the '_mov*' patterns. We could make this less strict
4195 for loads, but we prefer mem's to look the same so they are more
4196 likely to be merged. */
4198 address_needs_split (rtx mem
)
4200 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4201 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4202 || !(store_with_one_insn_p (mem
)
4203 || mem_is_padded_component_ref (mem
))))
4209 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4210 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4211 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4213 /* MEM is known to be an __ea qualified memory access. Emit a call to
4214 fetch the ppu memory to local store, and return its address in local
4218 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4222 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4223 if (!cache_fetch_dirty
)
4224 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4225 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4226 2, ea_addr
, EAmode
, ndirty
, SImode
);
4231 cache_fetch
= init_one_libfunc ("__cache_fetch");
4232 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4233 1, ea_addr
, EAmode
);
4237 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4238 dirty bit marking, inline.
4240 The cache control data structure is an array of
4242 struct __cache_tag_array
4244 unsigned int tag_lo[4];
4245 unsigned int tag_hi[4];
4246 void *data_pointer[4];
4248 vector unsigned short dirty_bits[4];
4252 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4256 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4257 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4258 rtx index_mask
= gen_reg_rtx (SImode
);
4259 rtx tag_arr
= gen_reg_rtx (Pmode
);
4260 rtx splat_mask
= gen_reg_rtx (TImode
);
4261 rtx splat
= gen_reg_rtx (V4SImode
);
4262 rtx splat_hi
= NULL_RTX
;
4263 rtx tag_index
= gen_reg_rtx (Pmode
);
4264 rtx block_off
= gen_reg_rtx (SImode
);
4265 rtx tag_addr
= gen_reg_rtx (Pmode
);
4266 rtx tag
= gen_reg_rtx (V4SImode
);
4267 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4268 rtx cache_tag_hi
= NULL_RTX
;
4269 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4270 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4271 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4272 rtx tag_equal_hi
= NULL_RTX
;
4273 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4274 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4275 rtx eq_index
= gen_reg_rtx (SImode
);
4276 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4279 if (spu_ea_model
!= 32)
4281 splat_hi
= gen_reg_rtx (V4SImode
);
4282 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4283 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4286 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4287 emit_move_insn (tag_arr
, tag_arr_sym
);
4288 v
= 0x0001020300010203LL
;
4289 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4290 ea_addr_si
= ea_addr
;
4291 if (spu_ea_model
!= 32)
4292 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4294 /* tag_index = ea_addr & (tag_array_size - 128) */
4295 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4297 /* splat ea_addr to all 4 slots. */
4298 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4299 /* Similarly for high 32 bits of ea_addr. */
4300 if (spu_ea_model
!= 32)
4301 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4303 /* block_off = ea_addr & 127 */
4304 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4306 /* tag_addr = tag_arr + tag_index */
4307 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4309 /* Read cache tags. */
4310 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4311 if (spu_ea_model
!= 32)
4312 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4313 plus_constant (Pmode
,
4316 /* tag = ea_addr & -128 */
4317 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4319 /* Read all four cache data pointers. */
4320 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4321 plus_constant (Pmode
,
4325 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4326 if (spu_ea_model
!= 32)
4328 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4329 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4332 /* At most one of the tags compare equal, so tag_equal has one
4333 32-bit slot set to all 1's, with the other slots all zero.
4334 gbb picks off low bit from each byte in the 128-bit registers,
4335 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4337 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4338 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4340 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4341 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4343 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4344 (rotating eq_index mod 16 bytes). */
4345 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4346 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4348 /* Add block offset to form final data address. */
4349 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4351 /* Check that we did hit. */
4352 hit_label
= gen_label_rtx ();
4353 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4354 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4355 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
,
4356 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4358 /* Say that this branch is very likely to happen. */
4359 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4360 add_int_reg_note (insn
, REG_BR_PROB
, v
);
4362 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4363 cont_label
= gen_label_rtx ();
4364 emit_jump_insn (gen_jump (cont_label
));
4367 emit_label (hit_label
);
4372 rtx dirty_bits
= gen_reg_rtx (TImode
);
4373 rtx dirty_off
= gen_reg_rtx (SImode
);
4374 rtx dirty_128
= gen_reg_rtx (TImode
);
4375 rtx neg_block_off
= gen_reg_rtx (SImode
);
4377 /* Set up mask with one dirty bit per byte of the mem we are
4378 writing, starting from top bit. */
4380 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4381 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4386 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4388 /* Form index into cache dirty_bits. eq_index is one of
4389 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4390 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4391 offset to each of the four dirty_bits elements. */
4392 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4394 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4396 /* Rotate bit mask to proper bit. */
4397 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4398 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4399 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4401 /* Or in the new dirty bits. */
4402 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4405 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4408 emit_label (cont_label
);
4412 expand_ea_mem (rtx mem
, bool is_store
)
4415 rtx data_addr
= gen_reg_rtx (Pmode
);
4418 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4419 if (optimize_size
|| optimize
== 0)
4420 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4422 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4424 if (ea_alias_set
== -1)
4425 ea_alias_set
= new_alias_set ();
4427 /* We generate a new MEM RTX to refer to the copy of the data
4428 in the cache. We do not copy memory attributes (except the
4429 alignment) from the original MEM, as they may no longer apply
4430 to the cache copy. */
4431 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4432 set_mem_alias_set (new_mem
, ea_alias_set
);
4433 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4439 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4441 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4443 /* Perform the move in the destination SUBREG's inner mode. */
4444 ops
[0] = SUBREG_REG (ops
[0]);
4445 mode
= GET_MODE (ops
[0]);
4446 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4447 gcc_assert (ops
[1]);
4450 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4452 rtx from
= SUBREG_REG (ops
[1]);
4453 machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4455 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4456 && GET_MODE_CLASS (imode
) == MODE_INT
4457 && subreg_lowpart_p (ops
[1]));
4459 if (GET_MODE_SIZE (imode
) < 4)
4461 if (imode
!= GET_MODE (from
))
4462 from
= gen_rtx_SUBREG (imode
, from
, 0);
4464 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4466 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4468 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4471 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4475 /* At least one of the operands needs to be a register. */
4476 if ((reload_in_progress
| reload_completed
) == 0
4477 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4479 rtx temp
= force_reg (mode
, ops
[1]);
4480 emit_move_insn (ops
[0], temp
);
4483 if (reload_in_progress
|| reload_completed
)
4485 if (CONSTANT_P (ops
[1]))
4486 return spu_split_immediate (ops
);
4490 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4492 if (GET_CODE (ops
[1]) == CONST_INT
)
4494 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4495 if (val
!= INTVAL (ops
[1]))
4497 emit_move_insn (ops
[0], GEN_INT (val
));
4503 if (MEM_ADDR_SPACE (ops
[0]))
4504 ops
[0] = expand_ea_mem (ops
[0], true);
4505 return spu_split_store (ops
);
4509 if (MEM_ADDR_SPACE (ops
[1]))
4510 ops
[1] = expand_ea_mem (ops
[1], false);
4511 return spu_split_load (ops
);
4518 spu_convert_move (rtx dst
, rtx src
)
4520 machine_mode mode
= GET_MODE (dst
);
4521 machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4523 gcc_assert (GET_MODE (src
) == TImode
);
4524 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4525 emit_insn (gen_rtx_SET (reg
,
4526 gen_rtx_TRUNCATE (int_mode
,
4527 gen_rtx_LSHIFTRT (TImode
, src
,
4528 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4529 if (int_mode
!= mode
)
4531 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4532 emit_move_insn (dst
, reg
);
4536 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4537 the address from SRC and SRC+16. Return a REG or CONST_INT that
4538 specifies how many bytes to rotate the loaded registers, plus any
4539 extra from EXTRA_ROTQBY. The address and rotate amounts are
4540 normalized to improve merging of loads and rotate computations. */
4542 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4544 rtx addr
= XEXP (src
, 0);
4545 rtx p0
, p1
, rot
, addr0
, addr1
;
4551 if (MEM_ALIGN (src
) >= 128)
4552 /* Address is already aligned; simply perform a TImode load. */ ;
4553 else if (GET_CODE (addr
) == PLUS
)
4556 aligned reg + aligned reg => lqx
4557 aligned reg + unaligned reg => lqx, rotqby
4558 aligned reg + aligned const => lqd
4559 aligned reg + unaligned const => lqd, rotqbyi
4560 unaligned reg + aligned reg => lqx, rotqby
4561 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4562 unaligned reg + aligned const => lqd, rotqby
4563 unaligned reg + unaligned const -> not allowed by legitimate address
4565 p0
= XEXP (addr
, 0);
4566 p1
= XEXP (addr
, 1);
4567 if (!reg_aligned_for_addr (p0
))
4569 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4571 rot
= gen_reg_rtx (SImode
);
4572 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4574 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4578 && INTVAL (p1
) * BITS_PER_UNIT
4579 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4581 rot
= gen_reg_rtx (SImode
);
4582 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4587 rtx x
= gen_reg_rtx (SImode
);
4588 emit_move_insn (x
, p1
);
4589 if (!spu_arith_operand (p1
, SImode
))
4591 rot
= gen_reg_rtx (SImode
);
4592 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4593 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4601 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4603 rot_amt
= INTVAL (p1
) & 15;
4604 if (INTVAL (p1
) & -16)
4606 p1
= GEN_INT (INTVAL (p1
) & -16);
4607 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4612 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4616 else if (REG_P (addr
))
4618 if (!reg_aligned_for_addr (addr
))
4621 else if (GET_CODE (addr
) == CONST
)
4623 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4624 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4625 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4627 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4629 addr
= gen_rtx_CONST (Pmode
,
4630 gen_rtx_PLUS (Pmode
,
4631 XEXP (XEXP (addr
, 0), 0),
4632 GEN_INT (rot_amt
& -16)));
4634 addr
= XEXP (XEXP (addr
, 0), 0);
4638 rot
= gen_reg_rtx (Pmode
);
4639 emit_move_insn (rot
, addr
);
4642 else if (GET_CODE (addr
) == CONST_INT
)
4644 rot_amt
= INTVAL (addr
);
4645 addr
= GEN_INT (rot_amt
& -16);
4647 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4649 rot
= gen_reg_rtx (Pmode
);
4650 emit_move_insn (rot
, addr
);
4653 rot_amt
+= extra_rotby
;
4659 rtx x
= gen_reg_rtx (SImode
);
4660 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4664 if (!rot
&& rot_amt
)
4665 rot
= GEN_INT (rot_amt
);
4667 addr0
= copy_rtx (addr
);
4668 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4669 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4673 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4674 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4675 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4682 spu_split_load (rtx
* ops
)
4684 machine_mode mode
= GET_MODE (ops
[0]);
4685 rtx addr
, load
, rot
;
4688 if (GET_MODE_SIZE (mode
) >= 16)
4691 addr
= XEXP (ops
[1], 0);
4692 gcc_assert (GET_CODE (addr
) != AND
);
4694 if (!address_needs_split (ops
[1]))
4696 ops
[1] = change_address (ops
[1], TImode
, addr
);
4697 load
= gen_reg_rtx (TImode
);
4698 emit_insn (gen__movti (load
, ops
[1]));
4699 spu_convert_move (ops
[0], load
);
4703 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4705 load
= gen_reg_rtx (TImode
);
4706 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4709 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4711 spu_convert_move (ops
[0], load
);
4716 spu_split_store (rtx
* ops
)
4718 machine_mode mode
= GET_MODE (ops
[0]);
4720 rtx addr
, p0
, p1
, p1_lo
, smem
;
4724 if (GET_MODE_SIZE (mode
) >= 16)
4727 addr
= XEXP (ops
[0], 0);
4728 gcc_assert (GET_CODE (addr
) != AND
);
4730 if (!address_needs_split (ops
[0]))
4732 reg
= gen_reg_rtx (TImode
);
4733 emit_insn (gen_spu_convert (reg
, ops
[1]));
4734 ops
[0] = change_address (ops
[0], TImode
, addr
);
4735 emit_move_insn (ops
[0], reg
);
4739 if (GET_CODE (addr
) == PLUS
)
4742 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4743 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4744 aligned reg + aligned const => lqd, c?d, shuf, stqx
4745 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4746 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4747 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4748 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4749 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4752 p0
= XEXP (addr
, 0);
4753 p1
= p1_lo
= XEXP (addr
, 1);
4754 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4756 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4757 if (reg_aligned_for_addr (p0
))
4759 p1
= GEN_INT (INTVAL (p1
) & -16);
4760 if (p1
== const0_rtx
)
4763 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4767 rtx x
= gen_reg_rtx (SImode
);
4768 emit_move_insn (x
, p1
);
4769 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4773 else if (REG_P (addr
))
4777 p1
= p1_lo
= const0_rtx
;
4782 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4783 p1
= 0; /* aform doesn't use p1 */
4785 if (ALIGNED_SYMBOL_REF_P (addr
))
4787 else if (GET_CODE (addr
) == CONST
4788 && GET_CODE (XEXP (addr
, 0)) == PLUS
4789 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4790 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4792 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4794 addr
= gen_rtx_CONST (Pmode
,
4795 gen_rtx_PLUS (Pmode
,
4796 XEXP (XEXP (addr
, 0), 0),
4797 GEN_INT (v
& -16)));
4799 addr
= XEXP (XEXP (addr
, 0), 0);
4800 p1_lo
= GEN_INT (v
& 15);
4802 else if (GET_CODE (addr
) == CONST_INT
)
4804 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4805 addr
= GEN_INT (INTVAL (addr
) & -16);
4809 p1_lo
= gen_reg_rtx (SImode
);
4810 emit_move_insn (p1_lo
, addr
);
4814 gcc_assert (aform
== 0 || aform
== 1);
4815 reg
= gen_reg_rtx (TImode
);
4817 scalar
= store_with_one_insn_p (ops
[0]);
4820 /* We could copy the flags from the ops[0] MEM to mem here,
4821 We don't because we want this load to be optimized away if
4822 possible, and copying the flags will prevent that in certain
4823 cases, e.g. consider the volatile flag. */
4825 rtx pat
= gen_reg_rtx (TImode
);
4826 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4827 set_mem_alias_set (lmem
, 0);
4828 emit_insn (gen_movti (reg
, lmem
));
4830 if (!p0
|| reg_aligned_for_addr (p0
))
4831 p0
= stack_pointer_rtx
;
4835 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4836 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4840 if (GET_CODE (ops
[1]) == REG
)
4841 emit_insn (gen_spu_convert (reg
, ops
[1]));
4842 else if (GET_CODE (ops
[1]) == SUBREG
)
4843 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4848 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4849 emit_insn (gen_ashlti3
4850 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4852 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4853 /* We can't use the previous alias set because the memory has changed
4854 size and can potentially overlap objects of other types. */
4855 set_mem_alias_set (smem
, 0);
4857 emit_insn (gen_movti (smem
, reg
));
4861 /* Return TRUE if X is MEM which is a struct member reference
4862 and the member can safely be loaded and stored with a single
4863 instruction because it is padded. */
4865 mem_is_padded_component_ref (rtx x
)
4867 tree t
= MEM_EXPR (x
);
4869 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4871 t
= TREE_OPERAND (t
, 1);
4872 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4873 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4875 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4876 r
= DECL_FIELD_CONTEXT (t
);
4877 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4879 /* Make sure they are the same mode */
4880 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4882 /* If there are no following fields then the field alignment assures
4883 the structure is padded to the alignment which means this field is
4885 if (TREE_CHAIN (t
) == 0)
4887 /* If the following field is also aligned then this field will be
4890 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4895 /* Parse the -mfixed-range= option string. */
4897 fix_range (const char *const_str
)
4900 char *str
, *dash
, *comma
;
4902 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4903 REG2 are either register names or register numbers. The effect
4904 of this option is to mark the registers in the range from REG1 to
4905 REG2 as ``fixed'' so they won't be used by the compiler. */
4907 i
= strlen (const_str
);
4908 str
= (char *) alloca (i
+ 1);
4909 memcpy (str
, const_str
, i
+ 1);
4913 dash
= strchr (str
, '-');
4916 warning (0, "value of -mfixed-range must have form REG1-REG2");
4920 comma
= strchr (dash
+ 1, ',');
4924 first
= decode_reg_name (str
);
4927 warning (0, "unknown register name: %s", str
);
4931 last
= decode_reg_name (dash
+ 1);
4934 warning (0, "unknown register name: %s", dash
+ 1);
4942 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4946 for (i
= first
; i
<= last
; ++i
)
4947 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4957 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4958 can be generated using the fsmbi instruction. */
4960 fsmbi_const_p (rtx x
)
4964 /* We can always choose TImode for CONST_INT because the high bits
4965 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4966 enum immediate_class c
= classify_immediate (x
, TImode
);
4967 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4972 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4973 can be generated using the cbd, chd, cwd or cdd instruction. */
4975 cpat_const_p (rtx x
, machine_mode mode
)
4979 enum immediate_class c
= classify_immediate (x
, mode
);
4980 return c
== IC_CPAT
;
4986 gen_cpat_const (rtx
* ops
)
4988 unsigned char dst
[16];
4989 int i
, offset
, shift
, isize
;
4990 if (GET_CODE (ops
[3]) != CONST_INT
4991 || GET_CODE (ops
[2]) != CONST_INT
4992 || (GET_CODE (ops
[1]) != CONST_INT
4993 && GET_CODE (ops
[1]) != REG
))
4995 if (GET_CODE (ops
[1]) == REG
4996 && (!REG_POINTER (ops
[1])
4997 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5000 for (i
= 0; i
< 16; i
++)
5002 isize
= INTVAL (ops
[3]);
5005 else if (isize
== 2)
5009 offset
= (INTVAL (ops
[2]) +
5010 (GET_CODE (ops
[1]) ==
5011 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5012 for (i
= 0; i
< isize
; i
++)
5013 dst
[offset
+ i
] = i
+ shift
;
5014 return array_to_constant (TImode
, dst
);
5017 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5018 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5019 than 16 bytes, the value is repeated across the rest of the array. */
5021 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5026 memset (arr
, 0, 16);
5027 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5028 if (GET_CODE (x
) == CONST_INT
5029 || (GET_CODE (x
) == CONST_DOUBLE
5030 && (mode
== SFmode
|| mode
== DFmode
)))
5032 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5034 if (GET_CODE (x
) == CONST_DOUBLE
)
5035 val
= const_double_to_hwint (x
);
5038 first
= GET_MODE_SIZE (mode
) - 1;
5039 for (i
= first
; i
>= 0; i
--)
5041 arr
[i
] = val
& 0xff;
5044 /* Splat the constant across the whole array. */
5045 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5048 j
= (j
== first
) ? 0 : j
+ 1;
5051 else if (GET_CODE (x
) == CONST_DOUBLE
)
5053 val
= CONST_DOUBLE_LOW (x
);
5054 for (i
= 15; i
>= 8; i
--)
5056 arr
[i
] = val
& 0xff;
5059 val
= CONST_DOUBLE_HIGH (x
);
5060 for (i
= 7; i
>= 0; i
--)
5062 arr
[i
] = val
& 0xff;
5066 else if (GET_CODE (x
) == CONST_VECTOR
)
5070 mode
= GET_MODE_INNER (mode
);
5071 units
= CONST_VECTOR_NUNITS (x
);
5072 for (i
= 0; i
< units
; i
++)
5074 elt
= CONST_VECTOR_ELT (x
, i
);
5075 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5077 if (GET_CODE (elt
) == CONST_DOUBLE
)
5078 val
= const_double_to_hwint (elt
);
5081 first
= GET_MODE_SIZE (mode
) - 1;
5082 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5084 for (j
= first
; j
>= 0; j
--)
5086 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5096 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5097 smaller than 16 bytes, use the bytes that would represent that value
5098 in a register, e.g., for QImode return the value of arr[3]. */
5100 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5102 machine_mode inner_mode
;
5104 int units
, size
, i
, j
, k
;
5107 if (GET_MODE_CLASS (mode
) == MODE_INT
5108 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5110 j
= GET_MODE_SIZE (mode
);
5111 i
= j
< 4 ? 4 - j
: 0;
5112 for (val
= 0; i
< j
; i
++)
5113 val
= (val
<< 8) | arr
[i
];
5114 val
= trunc_int_for_mode (val
, mode
);
5115 return GEN_INT (val
);
5121 for (i
= high
= 0; i
< 8; i
++)
5122 high
= (high
<< 8) | arr
[i
];
5123 for (i
= 8, val
= 0; i
< 16; i
++)
5124 val
= (val
<< 8) | arr
[i
];
5125 return immed_double_const (val
, high
, TImode
);
5129 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5130 val
= trunc_int_for_mode (val
, SImode
);
5131 return hwint_to_const_double (SFmode
, val
);
5135 for (i
= 0, val
= 0; i
< 8; i
++)
5136 val
= (val
<< 8) | arr
[i
];
5137 return hwint_to_const_double (DFmode
, val
);
5140 if (!VECTOR_MODE_P (mode
))
5143 units
= GET_MODE_NUNITS (mode
);
5144 size
= GET_MODE_UNIT_SIZE (mode
);
5145 inner_mode
= GET_MODE_INNER (mode
);
5146 v
= rtvec_alloc (units
);
5148 for (k
= i
= 0; i
< units
; ++i
)
5151 for (j
= 0; j
< size
; j
++, k
++)
5152 val
= (val
<< 8) | arr
[k
];
5154 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5155 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5157 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5162 return gen_rtx_CONST_VECTOR (mode
, v
);
5166 reloc_diagnostic (rtx x
)
5169 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5172 if (GET_CODE (x
) == SYMBOL_REF
)
5173 decl
= SYMBOL_REF_DECL (x
);
5174 else if (GET_CODE (x
) == CONST
5175 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5176 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5178 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5179 if (decl
&& !DECL_P (decl
))
5182 /* The decl could be a string constant. */
5183 if (decl
&& DECL_P (decl
))
5186 /* We use last_assemble_variable_decl to get line information. It's
5187 not always going to be right and might not even be close, but will
5188 be right for the more common cases. */
5189 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5190 loc
= DECL_SOURCE_LOCATION (decl
);
5192 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5194 if (TARGET_WARN_RELOC
)
5196 "creating run-time relocation for %qD", decl
);
5199 "creating run-time relocation for %qD", decl
);
5203 if (TARGET_WARN_RELOC
)
5204 warning_at (input_location
, 0, "creating run-time relocation");
5206 error_at (input_location
, "creating run-time relocation");
5210 /* Hook into assemble_integer so we can generate an error for run-time
5211 relocations. The SPU ABI disallows them. */
5213 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5215 /* By default run-time relocations aren't supported, but we allow them
5216 in case users support it in their own run-time loader. And we provide
5217 a warning for those users that don't. */
5218 if ((GET_CODE (x
) == SYMBOL_REF
)
5219 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5220 reloc_diagnostic (x
);
5222 return default_assemble_integer (x
, size
, aligned_p
);
5226 spu_asm_globalize_label (FILE * file
, const char *name
)
5228 fputs ("\t.global\t", file
);
5229 assemble_name (file
, name
);
5234 spu_rtx_costs (rtx x
, machine_mode mode
, int outer_code ATTRIBUTE_UNUSED
,
5235 int opno ATTRIBUTE_UNUSED
, int *total
,
5236 bool speed ATTRIBUTE_UNUSED
)
5238 int code
= GET_CODE (x
);
5239 int cost
= COSTS_N_INSNS (2);
5241 /* Folding to a CONST_VECTOR will use extra space but there might
5242 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5243 only if it allows us to fold away multiple insns. Changing the cost
5244 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5245 because this cost will only be compared against a single insn.
5246 if (code == CONST_VECTOR)
5247 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5250 /* Use defaults for float operations. Not accurate but good enough. */
5253 *total
= COSTS_N_INSNS (13);
5258 *total
= COSTS_N_INSNS (6);
5264 if (satisfies_constraint_K (x
))
5266 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5267 *total
= COSTS_N_INSNS (1);
5269 *total
= COSTS_N_INSNS (3);
5273 *total
= COSTS_N_INSNS (3);
5278 *total
= COSTS_N_INSNS (0);
5282 *total
= COSTS_N_INSNS (5);
5286 case FLOAT_TRUNCATE
:
5288 case UNSIGNED_FLOAT
:
5291 *total
= COSTS_N_INSNS (7);
5297 *total
= COSTS_N_INSNS (9);
5304 GET_CODE (XEXP (x
, 0)) ==
5305 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5306 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5308 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5310 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5311 cost
= COSTS_N_INSNS (14);
5312 if ((val
& 0xffff) == 0)
5313 cost
= COSTS_N_INSNS (9);
5314 else if (val
> 0 && val
< 0x10000)
5315 cost
= COSTS_N_INSNS (11);
5324 *total
= COSTS_N_INSNS (20);
5331 *total
= COSTS_N_INSNS (4);
5334 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5335 *total
= COSTS_N_INSNS (0);
5337 *total
= COSTS_N_INSNS (4);
5340 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5341 if (GET_MODE_CLASS (mode
) == MODE_INT
5342 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5343 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5344 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5350 spu_unwind_word_mode (void)
5355 /* Decide whether we can make a sibling call to a function. DECL is the
5356 declaration of the function being targeted by the call and EXP is the
5357 CALL_EXPR representing the call. */
5359 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5361 return decl
&& !TARGET_LARGE_MEM
;
5364 /* We need to correctly update the back chain pointer and the Available
5365 Stack Size (which is in the second slot of the sp register.) */
5367 spu_allocate_stack (rtx op0
, rtx op1
)
5370 rtx chain
= gen_reg_rtx (V4SImode
);
5371 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5372 rtx sp
= gen_reg_rtx (V4SImode
);
5373 rtx splatted
= gen_reg_rtx (V4SImode
);
5374 rtx pat
= gen_reg_rtx (TImode
);
5376 /* copy the back chain so we can save it back again. */
5377 emit_move_insn (chain
, stack_bot
);
5379 op1
= force_reg (SImode
, op1
);
5381 v
= 0x1020300010203ll
;
5382 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5383 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5385 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5386 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5388 if (flag_stack_check
)
5390 rtx avail
= gen_reg_rtx(SImode
);
5391 rtx result
= gen_reg_rtx(SImode
);
5392 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5393 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5394 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5397 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5399 emit_move_insn (stack_bot
, chain
);
5401 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5405 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5407 static unsigned char arr
[16] =
5408 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5409 rtx temp
= gen_reg_rtx (SImode
);
5410 rtx temp2
= gen_reg_rtx (SImode
);
5411 rtx temp3
= gen_reg_rtx (V4SImode
);
5412 rtx temp4
= gen_reg_rtx (V4SImode
);
5413 rtx pat
= gen_reg_rtx (TImode
);
5414 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5416 /* Restore the backchain from the first word, sp from the second. */
5417 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5418 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5420 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5422 /* Compute Available Stack Size for sp */
5423 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5424 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5426 /* Compute Available Stack Size for back chain */
5427 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5428 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5429 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5431 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5432 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5436 spu_init_libfuncs (void)
5438 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5439 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5440 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5441 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5442 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5443 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5444 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5445 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5446 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5447 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5448 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5449 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5451 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5452 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5454 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5455 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5456 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5457 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5458 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5459 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5460 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5461 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5462 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5463 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5464 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5465 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5467 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5468 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5469 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5470 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5471 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5472 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5475 /* Make a subreg, stripping any existing subreg. We could possibly just
5476 call simplify_subreg, but in this case we know what we want. */
5478 spu_gen_subreg (machine_mode mode
, rtx x
)
5480 if (GET_CODE (x
) == SUBREG
)
5482 if (GET_MODE (x
) == mode
)
5484 return gen_rtx_SUBREG (mode
, x
, 0);
5488 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5490 return (TYPE_MODE (type
) == BLKmode
5492 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5493 || int_size_in_bytes (type
) >
5494 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5497 /* Create the built-in types and functions */
5499 enum spu_function_code
5501 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5502 #include "spu-builtins.def"
5507 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5509 struct spu_builtin_description spu_builtins
[] = {
5510 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5511 {fcode, icode, name, type, params},
5512 #include "spu-builtins.def"
5516 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5518 /* Returns the spu builtin decl for CODE. */
5521 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5523 if (code
>= NUM_SPU_BUILTINS
)
5524 return error_mark_node
;
5526 return spu_builtin_decls
[code
];
5531 spu_init_builtins (void)
5533 struct spu_builtin_description
*d
;
5536 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5537 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5538 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5539 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5540 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5541 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5543 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5544 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5545 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5546 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5548 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5550 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5551 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5552 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5553 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5554 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5555 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5556 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5557 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5558 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5559 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5560 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5561 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5563 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5564 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5565 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5566 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5567 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5568 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5569 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5570 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5572 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5573 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5575 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5577 spu_builtin_types
[SPU_BTI_PTR
] =
5578 build_pointer_type (build_qualified_type
5580 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5582 /* For each builtin we build a new prototype. The tree code will make
5583 sure nodes are shared. */
5584 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5587 char name
[64]; /* build_function will make a copy. */
5593 /* Find last parm. */
5594 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5599 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5601 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5603 sprintf (name
, "__builtin_%s", d
->name
);
5604 spu_builtin_decls
[i
] =
5605 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5606 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5607 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5609 /* These builtins don't throw. */
5610 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5615 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5617 static unsigned char arr
[16] =
5618 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5620 rtx temp
= gen_reg_rtx (Pmode
);
5621 rtx temp2
= gen_reg_rtx (V4SImode
);
5622 rtx temp3
= gen_reg_rtx (V4SImode
);
5623 rtx pat
= gen_reg_rtx (TImode
);
5624 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5626 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5628 /* Restore the sp. */
5629 emit_move_insn (temp
, op1
);
5630 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5632 /* Compute available stack size for sp. */
5633 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5634 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5636 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5637 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5641 spu_safe_dma (HOST_WIDE_INT channel
)
5643 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5647 spu_builtin_splats (rtx ops
[])
5649 machine_mode mode
= GET_MODE (ops
[0]);
5650 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5652 unsigned char arr
[16];
5653 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5654 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5658 rtx reg
= gen_reg_rtx (TImode
);
5660 if (GET_CODE (ops
[1]) != REG
5661 && GET_CODE (ops
[1]) != SUBREG
)
5662 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5668 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5674 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5679 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5684 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5690 emit_move_insn (reg
, shuf
);
5691 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5696 spu_builtin_extract (rtx ops
[])
5701 mode
= GET_MODE (ops
[1]);
5703 if (GET_CODE (ops
[2]) == CONST_INT
)
5708 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5711 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5714 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5717 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5720 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5723 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5731 from
= spu_gen_subreg (TImode
, ops
[1]);
5732 rot
= gen_reg_rtx (TImode
);
5733 tmp
= gen_reg_rtx (SImode
);
5738 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5741 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5742 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5746 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5750 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5755 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5757 emit_insn (gen_spu_convert (ops
[0], rot
));
5761 spu_builtin_insert (rtx ops
[])
5763 machine_mode mode
= GET_MODE (ops
[0]);
5764 machine_mode imode
= GET_MODE_INNER (mode
);
5765 rtx mask
= gen_reg_rtx (TImode
);
5768 if (GET_CODE (ops
[3]) == CONST_INT
)
5769 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5772 offset
= gen_reg_rtx (SImode
);
5773 emit_insn (gen_mulsi3
5774 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5777 (mask
, stack_pointer_rtx
, offset
,
5778 GEN_INT (GET_MODE_SIZE (imode
))));
5779 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5783 spu_builtin_promote (rtx ops
[])
5785 machine_mode mode
, imode
;
5786 rtx rot
, from
, offset
;
5789 mode
= GET_MODE (ops
[0]);
5790 imode
= GET_MODE_INNER (mode
);
5792 from
= gen_reg_rtx (TImode
);
5793 rot
= spu_gen_subreg (TImode
, ops
[0]);
5795 emit_insn (gen_spu_convert (from
, ops
[1]));
5797 if (GET_CODE (ops
[2]) == CONST_INT
)
5799 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5800 if (GET_MODE_SIZE (imode
) < 4)
5801 pos
+= 4 - GET_MODE_SIZE (imode
);
5802 offset
= GEN_INT (pos
& 15);
5806 offset
= gen_reg_rtx (SImode
);
5810 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5813 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5814 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5818 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5819 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5823 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5829 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5833 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5835 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5836 rtx shuf
= gen_reg_rtx (V4SImode
);
5837 rtx insn
= gen_reg_rtx (V4SImode
);
5842 fnaddr
= force_reg (SImode
, fnaddr
);
5843 cxt
= force_reg (SImode
, cxt
);
5845 if (TARGET_LARGE_MEM
)
5847 rtx rotl
= gen_reg_rtx (V4SImode
);
5848 rtx mask
= gen_reg_rtx (V4SImode
);
5849 rtx bi
= gen_reg_rtx (SImode
);
5850 static unsigned char const shufa
[16] = {
5851 2, 3, 0, 1, 18, 19, 16, 17,
5852 0, 1, 2, 3, 16, 17, 18, 19
5854 static unsigned char const insna
[16] = {
5856 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5858 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5861 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5862 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5864 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5865 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5866 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5867 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5869 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5870 emit_move_insn (mem
, insn
);
5872 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5873 mem
= adjust_address (m_tramp
, Pmode
, 16);
5874 emit_move_insn (mem
, bi
);
5878 rtx scxt
= gen_reg_rtx (SImode
);
5879 rtx sfnaddr
= gen_reg_rtx (SImode
);
5880 static unsigned char const insna
[16] = {
5881 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5887 shufc
= gen_reg_rtx (TImode
);
5888 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5890 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5891 fits 18 bits and the last 4 are zeros. This will be true if
5892 the stack pointer is initialized to 0x3fff0 at program start,
5893 otherwise the ila instruction will be garbage. */
5895 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5896 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5898 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5899 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5900 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5902 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5903 emit_move_insn (mem
, insn
);
5905 emit_insn (gen_sync ());
5909 spu_warn_func_return (tree decl
)
5911 /* Naked functions are implemented entirely in assembly, including the
5912 return sequence, so suppress warnings about this. */
5913 return !spu_naked_function_p (decl
);
5917 spu_expand_sign_extend (rtx ops
[])
5919 unsigned char arr
[16];
5920 rtx pat
= gen_reg_rtx (TImode
);
5923 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5924 if (GET_MODE (ops
[1]) == QImode
)
5926 sign
= gen_reg_rtx (HImode
);
5927 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5928 for (i
= 0; i
< 16; i
++)
5934 for (i
= 0; i
< 16; i
++)
5936 switch (GET_MODE (ops
[1]))
5939 sign
= gen_reg_rtx (SImode
);
5940 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5942 arr
[last
- 1] = 0x02;
5945 sign
= gen_reg_rtx (SImode
);
5946 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5947 for (i
= 0; i
< 4; i
++)
5948 arr
[last
- i
] = 3 - i
;
5951 sign
= gen_reg_rtx (SImode
);
5952 c
= gen_reg_rtx (SImode
);
5953 emit_insn (gen_spu_convert (c
, ops
[1]));
5954 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5955 for (i
= 0; i
< 8; i
++)
5956 arr
[last
- i
] = 7 - i
;
5962 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5963 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5966 /* expand vector initialization. If there are any constant parts,
5967 load constant parts first. Then load any non-constant parts. */
5969 spu_expand_vector_init (rtx target
, rtx vals
)
5971 machine_mode mode
= GET_MODE (target
);
5972 int n_elts
= GET_MODE_NUNITS (mode
);
5974 bool all_same
= true;
5975 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5978 first
= XVECEXP (vals
, 0, 0);
5979 for (i
= 0; i
< n_elts
; ++i
)
5981 x
= XVECEXP (vals
, 0, i
);
5982 if (!(CONST_INT_P (x
)
5983 || GET_CODE (x
) == CONST_DOUBLE
5984 || GET_CODE (x
) == CONST_FIXED
))
5988 if (first_constant
== NULL_RTX
)
5991 if (i
> 0 && !rtx_equal_p (x
, first
))
5995 /* if all elements are the same, use splats to repeat elements */
5998 if (!CONSTANT_P (first
)
5999 && !register_operand (first
, GET_MODE (x
)))
6000 first
= force_reg (GET_MODE (first
), first
);
6001 emit_insn (gen_spu_splats (target
, first
));
6005 /* load constant parts */
6006 if (n_var
!= n_elts
)
6010 emit_move_insn (target
,
6011 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6015 rtx constant_parts_rtx
= copy_rtx (vals
);
6017 gcc_assert (first_constant
!= NULL_RTX
);
6018 /* fill empty slots with the first constant, this increases
6019 our chance of using splats in the recursive call below. */
6020 for (i
= 0; i
< n_elts
; ++i
)
6022 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6023 if (!(CONST_INT_P (x
)
6024 || GET_CODE (x
) == CONST_DOUBLE
6025 || GET_CODE (x
) == CONST_FIXED
))
6026 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6029 spu_expand_vector_init (target
, constant_parts_rtx
);
6033 /* load variable parts */
6036 rtx insert_operands
[4];
6038 insert_operands
[0] = target
;
6039 insert_operands
[2] = target
;
6040 for (i
= 0; i
< n_elts
; ++i
)
6042 x
= XVECEXP (vals
, 0, i
);
6043 if (!(CONST_INT_P (x
)
6044 || GET_CODE (x
) == CONST_DOUBLE
6045 || GET_CODE (x
) == CONST_FIXED
))
6047 if (!register_operand (x
, GET_MODE (x
)))
6048 x
= force_reg (GET_MODE (x
), x
);
6049 insert_operands
[1] = x
;
6050 insert_operands
[3] = GEN_INT (i
);
6051 spu_builtin_insert (insert_operands
);
6057 /* Return insn index for the vector compare instruction for given CODE,
6058 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6061 get_vec_cmp_insn (enum rtx_code code
,
6062 machine_mode dest_mode
,
6063 machine_mode op_mode
)
6069 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6070 return CODE_FOR_ceq_v16qi
;
6071 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6072 return CODE_FOR_ceq_v8hi
;
6073 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6074 return CODE_FOR_ceq_v4si
;
6075 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6076 return CODE_FOR_ceq_v4sf
;
6077 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6078 return CODE_FOR_ceq_v2df
;
6081 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6082 return CODE_FOR_cgt_v16qi
;
6083 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6084 return CODE_FOR_cgt_v8hi
;
6085 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6086 return CODE_FOR_cgt_v4si
;
6087 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6088 return CODE_FOR_cgt_v4sf
;
6089 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6090 return CODE_FOR_cgt_v2df
;
6093 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6094 return CODE_FOR_clgt_v16qi
;
6095 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6096 return CODE_FOR_clgt_v8hi
;
6097 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6098 return CODE_FOR_clgt_v4si
;
6106 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6107 DMODE is expected destination mode. This is a recursive function. */
6110 spu_emit_vector_compare (enum rtx_code rcode
,
6116 machine_mode dest_mode
;
6117 machine_mode op_mode
= GET_MODE (op1
);
6119 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6121 /* Floating point vector compare instructions uses destination V4SImode.
6122 Double floating point vector compare instructions uses destination V2DImode.
6123 Move destination to appropriate mode later. */
6124 if (dmode
== V4SFmode
)
6125 dest_mode
= V4SImode
;
6126 else if (dmode
== V2DFmode
)
6127 dest_mode
= V2DImode
;
6131 mask
= gen_reg_rtx (dest_mode
);
6132 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6134 if (vec_cmp_insn
== -1)
6136 bool swap_operands
= false;
6137 bool try_again
= false;
6142 swap_operands
= true;
6147 swap_operands
= true;
6157 /* Treat A != B as ~(A==B). */
6159 enum rtx_code rev_code
;
6160 enum insn_code nor_code
;
6163 rev_code
= reverse_condition_maybe_unordered (rcode
);
6164 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6166 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6167 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6168 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6169 if (dmode
!= dest_mode
)
6171 rtx temp
= gen_reg_rtx (dest_mode
);
6172 convert_move (temp
, mask
, 0);
6182 /* Try GT/GTU/LT/LTU OR EQ */
6185 enum insn_code ior_code
;
6186 enum rtx_code new_code
;
6190 case GE
: new_code
= GT
; break;
6191 case GEU
: new_code
= GTU
; break;
6192 case LE
: new_code
= LT
; break;
6193 case LEU
: new_code
= LTU
; break;
6198 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6199 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6201 ior_code
= optab_handler (ior_optab
, dest_mode
);
6202 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6203 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6204 if (dmode
!= dest_mode
)
6206 rtx temp
= gen_reg_rtx (dest_mode
);
6207 convert_move (temp
, mask
, 0);
6217 enum insn_code ior_code
;
6219 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6220 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6222 ior_code
= optab_handler (ior_optab
, dest_mode
);
6223 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6224 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6225 if (dmode
!= dest_mode
)
6227 rtx temp
= gen_reg_rtx (dest_mode
);
6228 convert_move (temp
, mask
, 0);
6235 /* Implement as (A==A) & (B==B) */
6238 enum insn_code and_code
;
6240 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6241 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6243 and_code
= optab_handler (and_optab
, dest_mode
);
6244 gcc_assert (and_code
!= CODE_FOR_nothing
);
6245 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6246 if (dmode
!= dest_mode
)
6248 rtx temp
= gen_reg_rtx (dest_mode
);
6249 convert_move (temp
, mask
, 0);
6259 /* You only get two chances. */
6261 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6263 gcc_assert (vec_cmp_insn
!= -1);
6274 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6275 if (dmode
!= dest_mode
)
6277 rtx temp
= gen_reg_rtx (dest_mode
);
6278 convert_move (temp
, mask
, 0);
6285 /* Emit vector conditional expression.
6286 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6287 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6290 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6291 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6293 machine_mode dest_mode
= GET_MODE (dest
);
6294 enum rtx_code rcode
= GET_CODE (cond
);
6297 /* Get the vector mask for the given relational operations. */
6298 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6300 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6306 spu_force_reg (machine_mode mode
, rtx op
)
6309 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6311 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6312 || GET_MODE (op
) == BLKmode
)
6313 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6317 r
= force_reg (GET_MODE (op
), op
);
6318 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6320 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6325 x
= gen_reg_rtx (mode
);
6326 emit_insn (gen_spu_convert (x
, r
));
6331 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6333 HOST_WIDE_INT v
= 0;
6335 /* Check the range of immediate operands. */
6336 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6338 int range
= p
- SPU_BTI_7
;
6340 if (!CONSTANT_P (op
))
6341 error ("%s expects an integer literal in the range [%d, %d]",
6343 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6345 if (GET_CODE (op
) == CONST
6346 && (GET_CODE (XEXP (op
, 0)) == PLUS
6347 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6349 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6350 op
= XEXP (XEXP (op
, 0), 0);
6352 else if (GET_CODE (op
) == CONST_INT
)
6354 else if (GET_CODE (op
) == CONST_VECTOR
6355 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6356 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6358 /* The default for v is 0 which is valid in every range. */
6359 if (v
< spu_builtin_range
[range
].low
6360 || v
> spu_builtin_range
[range
].high
)
6361 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6363 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6372 /* This is only used in lqa, and stqa. Even though the insns
6373 encode 16 bits of the address (all but the 2 least
6374 significant), only 14 bits are used because it is masked to
6375 be 16 byte aligned. */
6379 /* This is used for lqr and stqr. */
6386 if (GET_CODE (op
) == LABEL_REF
6387 || (GET_CODE (op
) == SYMBOL_REF
6388 && SYMBOL_REF_FUNCTION_P (op
))
6389 || (v
& ((1 << lsbits
) - 1)) != 0)
6390 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6397 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6398 rtx target
, rtx ops
[])
6400 enum insn_code icode
= (enum insn_code
) d
->icode
;
6403 /* Expand the arguments into rtl. */
6405 if (d
->parm
[0] != SPU_BTI_VOID
)
6408 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6410 tree arg
= CALL_EXPR_ARG (exp
, a
);
6413 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6416 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6421 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6422 tree exp
, rtx target
)
6426 enum insn_code icode
= (enum insn_code
) d
->icode
;
6427 machine_mode mode
, tmode
;
6432 /* Set up ops[] with values from arglist. */
6433 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6435 /* Handle the target operand which must be operand 0. */
6437 if (d
->parm
[0] != SPU_BTI_VOID
)
6440 /* We prefer the mode specified for the match_operand otherwise
6441 use the mode from the builtin function prototype. */
6442 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6443 if (tmode
== VOIDmode
)
6444 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6446 /* Try to use target because not using it can lead to extra copies
6447 and when we are using all of the registers extra copies leads
6449 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6452 target
= ops
[0] = gen_reg_rtx (tmode
);
6454 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6460 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6462 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6467 arg
= CALL_EXPR_ARG (exp
, 0);
6468 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6469 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6470 addr
= memory_address (mode
, op
);
6473 op
= gen_reg_rtx (GET_MODE (addr
));
6474 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
6475 op
= gen_rtx_MEM (mode
, op
);
6477 pat
= GEN_FCN (icode
) (target
, op
);
6484 /* Ignore align_hint, but still expand it's args in case they have
6486 if (icode
== CODE_FOR_spu_align_hint
)
6489 /* Handle the rest of the operands. */
6490 for (p
= 1; i
< n_operands
; i
++, p
++)
6492 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6493 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6495 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6497 /* mode can be VOIDmode here for labels */
6499 /* For specific intrinsics with an immediate operand, e.g.,
6500 si_ai(), we sometimes need to convert the scalar argument to a
6501 vector argument by splatting the scalar. */
6502 if (VECTOR_MODE_P (mode
)
6503 && (GET_CODE (ops
[i
]) == CONST_INT
6504 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6505 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6507 if (GET_CODE (ops
[i
]) == CONST_INT
)
6508 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6511 rtx reg
= gen_reg_rtx (mode
);
6512 machine_mode imode
= GET_MODE_INNER (mode
);
6513 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6514 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6515 if (imode
!= GET_MODE (ops
[i
]))
6516 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6517 TYPE_UNSIGNED (spu_builtin_types
6519 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6524 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6526 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6527 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6533 pat
= GEN_FCN (icode
) (0);
6536 pat
= GEN_FCN (icode
) (ops
[0]);
6539 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6542 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6545 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6548 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6551 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6560 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6561 emit_call_insn (pat
);
6562 else if (d
->type
== B_JUMP
)
6564 emit_jump_insn (pat
);
6570 return_type
= spu_builtin_types
[d
->parm
[0]];
6571 if (d
->parm
[0] != SPU_BTI_VOID
6572 && GET_MODE (target
) != TYPE_MODE (return_type
))
6574 /* target is the return value. It should always be the mode of
6575 the builtin function prototype. */
6576 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6583 spu_expand_builtin (tree exp
,
6585 rtx subtarget ATTRIBUTE_UNUSED
,
6586 machine_mode mode ATTRIBUTE_UNUSED
,
6587 int ignore ATTRIBUTE_UNUSED
)
6589 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6590 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6591 struct spu_builtin_description
*d
;
6593 if (fcode
< NUM_SPU_BUILTINS
)
6595 d
= &spu_builtins
[fcode
];
6597 return spu_expand_builtin_1 (d
, exp
, target
);
6602 /* Implement targetm.vectorize.builtin_mask_for_load. */
6604 spu_builtin_mask_for_load (void)
6606 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6609 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6611 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6613 int misalign ATTRIBUTE_UNUSED
)
6617 switch (type_of_cost
)
6625 case cond_branch_not_taken
:
6627 case vec_promote_demote
:
6634 /* Load + rotate. */
6637 case unaligned_load
:
6640 case cond_branch_taken
:
6644 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6645 return elements
/ 2 + 1;
6652 /* Implement targetm.vectorize.init_cost. */
6655 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6657 unsigned *cost
= XNEWVEC (unsigned, 3);
6658 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6662 /* Implement targetm.vectorize.add_stmt_cost. */
6665 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6666 struct _stmt_vec_info
*stmt_info
, int misalign
,
6667 enum vect_cost_model_location where
)
6669 unsigned *cost
= (unsigned *) data
;
6670 unsigned retval
= 0;
6672 if (flag_vect_cost_model
)
6674 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6675 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6677 /* Statements in an inner loop relative to the loop being
6678 vectorized are weighted more heavily. The value here is
6679 arbitrary and could potentially be improved with analysis. */
6680 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6681 count
*= 50; /* FIXME. */
6683 retval
= (unsigned) (count
* stmt_cost
);
6684 cost
[where
] += retval
;
6690 /* Implement targetm.vectorize.finish_cost. */
6693 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6694 unsigned *body_cost
, unsigned *epilogue_cost
)
6696 unsigned *cost
= (unsigned *) data
;
6697 *prologue_cost
= cost
[vect_prologue
];
6698 *body_cost
= cost
[vect_body
];
6699 *epilogue_cost
= cost
[vect_epilogue
];
6702 /* Implement targetm.vectorize.destroy_cost_data. */
6705 spu_destroy_cost_data (void *data
)
6710 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6711 after applying N number of iterations. This routine does not determine
6712 how may iterations are required to reach desired alignment. */
6715 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6720 /* All other types are naturally aligned. */
6724 /* Return the appropriate mode for a named address pointer. */
6726 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6730 case ADDR_SPACE_GENERIC
:
6739 /* Return the appropriate mode for a named address address. */
6741 spu_addr_space_address_mode (addr_space_t addrspace
)
6745 case ADDR_SPACE_GENERIC
:
6754 /* Determine if one named address space is a subset of another. */
6757 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6759 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6760 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6762 if (subset
== superset
)
6765 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6766 being subsets but instead as disjoint address spaces. */
6767 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6771 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6774 /* Convert from one address space to another. */
6776 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6778 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6779 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6781 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6782 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6784 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6788 ls
= gen_const_mem (DImode
,
6789 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6790 set_mem_align (ls
, 128);
6792 result
= gen_reg_rtx (Pmode
);
6793 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6794 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6795 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6796 ls
, const0_rtx
, Pmode
, 1);
6798 emit_insn (gen_subsi3 (result
, op
, ls
));
6803 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6807 ls
= gen_const_mem (DImode
,
6808 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6809 set_mem_align (ls
, 128);
6811 result
= gen_reg_rtx (EAmode
);
6812 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6813 op
= force_reg (Pmode
, op
);
6814 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6815 ls
, const0_rtx
, EAmode
, 1);
6816 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6818 if (EAmode
== SImode
)
6819 emit_insn (gen_addsi3 (result
, op
, ls
));
6821 emit_insn (gen_adddi3 (result
, op
, ls
));
6831 /* Count the total number of instructions in each pipe and return the
6832 maximum, which is used as the Minimum Iteration Interval (MII)
6833 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6834 -2 are instructions that can go in pipe0 or pipe1. */
6836 spu_sms_res_mii (struct ddg
*g
)
6839 unsigned t
[4] = {0, 0, 0, 0};
6841 for (i
= 0; i
< g
->num_nodes
; i
++)
6843 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6844 int p
= get_pipe (insn
) + 2;
6846 gcc_assert (p
>= 0);
6850 if (dump_file
&& INSN_P (insn
))
6851 fprintf (dump_file
, "i%d %s %d %d\n",
6853 insn_data
[INSN_CODE(insn
)].name
,
6857 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6859 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6864 spu_init_expanders (void)
6869 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6870 frame_pointer_needed is true. We don't know that until we're
6871 expanding the prologue. */
6872 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6874 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6875 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6876 to be treated as aligned, so generate them here. */
6877 r0
= gen_reg_rtx (SImode
);
6878 r1
= gen_reg_rtx (SImode
);
6879 mark_reg_pointer (r0
, 128);
6880 mark_reg_pointer (r1
, 128);
6881 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6882 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6887 spu_libgcc_cmp_return_mode (void)
6890 /* For SPU word mode is TI mode so it is better to use SImode
6891 for compare returns. */
6896 spu_libgcc_shift_count_mode (void)
6898 /* For SPU word mode is TI mode so it is better to use SImode
6899 for shift counts. */
6903 /* Implement targetm.section_type_flags. */
6905 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6907 /* .toe needs to have type @nobits. */
6908 if (strcmp (name
, ".toe") == 0)
6910 /* Don't load _ea into the current address space. */
6911 if (strcmp (name
, "._ea") == 0)
6912 return SECTION_WRITE
| SECTION_DEBUG
;
6913 return default_section_type_flags (decl
, name
, reloc
);
6916 /* Implement targetm.select_section. */
6918 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6920 /* Variables and constants defined in the __ea address space
6921 go into a special section named "._ea". */
6922 if (TREE_TYPE (decl
) != error_mark_node
6923 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6925 /* We might get called with string constants, but get_named_section
6926 doesn't like them as they are not DECLs. Also, we need to set
6927 flags in that case. */
6929 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6931 return get_named_section (decl
, "._ea", reloc
);
6934 return default_elf_select_section (decl
, reloc
, align
);
6937 /* Implement targetm.unique_section. */
6939 spu_unique_section (tree decl
, int reloc
)
6941 /* We don't support unique section names in the __ea address
6943 if (TREE_TYPE (decl
) != error_mark_node
6944 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6947 default_unique_section (decl
, reloc
);
6950 /* Generate a constant or register which contains 2^SCALE. We assume
6951 the result is valid for MODE. Currently, MODE must be V4SFmode and
6952 SCALE must be SImode. */
6954 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6956 gcc_assert (mode
== V4SFmode
);
6957 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6958 if (GET_CODE (scale
) != CONST_INT
)
6960 /* unsigned int exp = (127 + scale) << 23;
6961 __vector float m = (__vector float) spu_splats (exp); */
6962 rtx reg
= force_reg (SImode
, scale
);
6963 rtx exp
= gen_reg_rtx (SImode
);
6964 rtx mul
= gen_reg_rtx (mode
);
6965 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6966 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6967 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6972 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6973 unsigned char arr
[16];
6974 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6975 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6976 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6977 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6978 return array_to_constant (mode
, arr
);
6982 /* After reload, just change the convert into a move instruction
6983 or a dead instruction. */
6985 spu_split_convert (rtx ops
[])
6987 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6988 emit_note (NOTE_INSN_DELETED
);
6991 /* Use TImode always as this might help hard reg copyprop. */
6992 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6993 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6994 emit_insn (gen_move_insn (op0
, op1
));
6999 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7001 fprintf (file
, "# profile\n");
7002 fprintf (file
, "brsl $75, _mcount\n");
7005 /* Implement targetm.ref_may_alias_errno. */
7007 spu_ref_may_alias_errno (ao_ref
*ref
)
7009 tree base
= ao_ref_base (ref
);
7011 /* With SPU newlib, errno is defined as something like
7013 The default implementation of this target macro does not
7014 recognize such expressions, so special-code for it here. */
7016 if (TREE_CODE (base
) == VAR_DECL
7017 && !TREE_STATIC (base
)
7018 && DECL_EXTERNAL (base
)
7019 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7020 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7021 "_impure_data") == 0
7022 /* _errno is the first member of _impure_data. */
7023 && ref
->offset
== 0)
7026 return default_ref_may_alias_errno (ref
);
7029 /* Output thunk to FILE that implements a C++ virtual function call (with
7030 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7031 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7032 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7033 relative to the resulting this pointer. */
7036 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7037 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7042 /* Make sure unwind info is emitted for the thunk if needed. */
7043 final_start_function (emit_barrier (), file
, 1);
7045 /* Operand 0 is the target function. */
7046 op
[0] = XEXP (DECL_RTL (function
), 0);
7048 /* Operand 1 is the 'this' pointer. */
7049 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7050 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7052 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7054 /* Operands 2/3 are the low/high halfwords of delta. */
7055 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7056 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7058 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7059 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7060 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7062 /* Operands 6/7 are temporary registers. */
7063 op
[6] = gen_rtx_REG (Pmode
, 79);
7064 op
[7] = gen_rtx_REG (Pmode
, 78);
7066 /* Add DELTA to this pointer. */
7069 if (delta
>= -0x200 && delta
< 0x200)
7070 output_asm_insn ("ai\t%1,%1,%2", op
);
7071 else if (delta
>= -0x8000 && delta
< 0x8000)
7073 output_asm_insn ("il\t%6,%2", op
);
7074 output_asm_insn ("a\t%1,%1,%6", op
);
7078 output_asm_insn ("ilhu\t%6,%3", op
);
7079 output_asm_insn ("iohl\t%6,%2", op
);
7080 output_asm_insn ("a\t%1,%1,%6", op
);
7084 /* Perform vcall adjustment. */
7087 output_asm_insn ("lqd\t%7,0(%1)", op
);
7088 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7090 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7091 output_asm_insn ("ai\t%7,%7,%4", op
);
7092 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7094 output_asm_insn ("il\t%6,%4", op
);
7095 output_asm_insn ("a\t%7,%7,%6", op
);
7099 output_asm_insn ("ilhu\t%6,%5", op
);
7100 output_asm_insn ("iohl\t%6,%4", op
);
7101 output_asm_insn ("a\t%7,%7,%6", op
);
7104 output_asm_insn ("lqd\t%6,0(%7)", op
);
7105 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7106 output_asm_insn ("a\t%1,%1,%6", op
);
7109 /* Jump to target. */
7110 output_asm_insn ("br\t%0", op
);
7112 final_end_function ();
7115 /* Canonicalize a comparison from one we don't have to one we do have. */
7117 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7118 bool op0_preserve_value
)
7120 if (!op0_preserve_value
7121 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7126 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7130 /* Table of machine attributes. */
7131 static const struct attribute_spec spu_attribute_table
[] =
7133 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7134 affects_type_identity } */
7135 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7137 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7139 { NULL
, 0, 0, false, false, false, NULL
, false }
7142 /* TARGET overrides. */
7144 #undef TARGET_ADDR_SPACE_POINTER_MODE
7145 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7147 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7148 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7150 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7151 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7152 spu_addr_space_legitimate_address_p
7154 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7155 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7157 #undef TARGET_ADDR_SPACE_SUBSET_P
7158 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7160 #undef TARGET_ADDR_SPACE_CONVERT
7161 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7163 #undef TARGET_INIT_BUILTINS
7164 #define TARGET_INIT_BUILTINS spu_init_builtins
7165 #undef TARGET_BUILTIN_DECL
7166 #define TARGET_BUILTIN_DECL spu_builtin_decl
7168 #undef TARGET_EXPAND_BUILTIN
7169 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7171 #undef TARGET_UNWIND_WORD_MODE
7172 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7174 #undef TARGET_LEGITIMIZE_ADDRESS
7175 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7177 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7178 and .quad for the debugger. When it is known that the assembler is fixed,
7179 these can be removed. */
7180 #undef TARGET_ASM_UNALIGNED_SI_OP
7181 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7183 #undef TARGET_ASM_ALIGNED_DI_OP
7184 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7186 /* The .8byte directive doesn't seem to work well for a 32 bit
7188 #undef TARGET_ASM_UNALIGNED_DI_OP
7189 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7191 #undef TARGET_RTX_COSTS
7192 #define TARGET_RTX_COSTS spu_rtx_costs
7194 #undef TARGET_ADDRESS_COST
7195 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7197 #undef TARGET_SCHED_ISSUE_RATE
7198 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7200 #undef TARGET_SCHED_INIT_GLOBAL
7201 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7203 #undef TARGET_SCHED_INIT
7204 #define TARGET_SCHED_INIT spu_sched_init
7206 #undef TARGET_SCHED_VARIABLE_ISSUE
7207 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7209 #undef TARGET_SCHED_REORDER
7210 #define TARGET_SCHED_REORDER spu_sched_reorder
7212 #undef TARGET_SCHED_REORDER2
7213 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7215 #undef TARGET_SCHED_ADJUST_COST
7216 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7218 #undef TARGET_ATTRIBUTE_TABLE
7219 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7221 #undef TARGET_ASM_INTEGER
7222 #define TARGET_ASM_INTEGER spu_assemble_integer
7224 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7225 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7227 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7228 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7231 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7233 #undef TARGET_ASM_GLOBALIZE_LABEL
7234 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7236 #undef TARGET_PASS_BY_REFERENCE
7237 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7239 #undef TARGET_FUNCTION_ARG
7240 #define TARGET_FUNCTION_ARG spu_function_arg
7242 #undef TARGET_FUNCTION_ARG_ADVANCE
7243 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7245 #undef TARGET_MUST_PASS_IN_STACK
7246 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7248 #undef TARGET_BUILD_BUILTIN_VA_LIST
7249 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7251 #undef TARGET_EXPAND_BUILTIN_VA_START
7252 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7254 #undef TARGET_SETUP_INCOMING_VARARGS
7255 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7257 #undef TARGET_MACHINE_DEPENDENT_REORG
7258 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7260 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7261 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7263 #undef TARGET_INIT_LIBFUNCS
7264 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7266 #undef TARGET_RETURN_IN_MEMORY
7267 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7269 #undef TARGET_ENCODE_SECTION_INFO
7270 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7272 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7273 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7275 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7276 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7278 #undef TARGET_VECTORIZE_INIT_COST
7279 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7281 #undef TARGET_VECTORIZE_ADD_STMT_COST
7282 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7284 #undef TARGET_VECTORIZE_FINISH_COST
7285 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7287 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7288 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7290 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7291 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7293 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7294 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7296 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7297 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7299 #undef TARGET_SCHED_SMS_RES_MII
7300 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7302 #undef TARGET_SECTION_TYPE_FLAGS
7303 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7305 #undef TARGET_ASM_SELECT_SECTION
7306 #define TARGET_ASM_SELECT_SECTION spu_select_section
7308 #undef TARGET_ASM_UNIQUE_SECTION
7309 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7311 #undef TARGET_LEGITIMATE_ADDRESS_P
7312 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7314 #undef TARGET_LEGITIMATE_CONSTANT_P
7315 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7317 #undef TARGET_TRAMPOLINE_INIT
7318 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7320 #undef TARGET_WARN_FUNC_RETURN
7321 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7323 #undef TARGET_OPTION_OVERRIDE
7324 #define TARGET_OPTION_OVERRIDE spu_option_override
7326 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7327 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7329 #undef TARGET_REF_MAY_ALIAS_ERRNO
7330 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7332 #undef TARGET_ASM_OUTPUT_MI_THUNK
7333 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7334 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7335 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7337 /* Variable tracking should be run after all optimizations which
7338 change order of insns. It also needs a valid CFG. */
7339 #undef TARGET_DELAY_VARTRACK
7340 #define TARGET_DELAY_VARTRACK true
7342 #undef TARGET_CANONICALIZE_COMPARISON
7343 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7345 #undef TARGET_CAN_USE_DOLOOP_P
7346 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7348 struct gcc_target targetm
= TARGET_INITIALIZER
;