1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range
[] = {
65 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
66 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
68 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
69 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
71 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
72 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
73 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
87 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
88 static rtx
get_pic_reg (void);
89 static int need_to_save_reg (int regno
, int saving
);
90 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
91 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
92 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
94 static void emit_nop_for_insn (rtx insn
);
95 static bool insn_clobbers_hbr (rtx insn
);
96 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
98 static rtx
get_branch_target (rtx branch
);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
105 static int get_pipe (rtx insn
);
106 static int spu_sched_adjust_priority (rtx insn
, int pri
);
107 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
108 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
110 unsigned char *no_add_attrs
);
111 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
113 unsigned char *no_add_attrs
);
114 static int spu_naked_function_p (tree func
);
115 static unsigned char spu_pass_by_reference (int *cum
, enum machine_mode mode
,
116 tree type
, unsigned char named
);
117 static tree
spu_build_builtin_va_list (void);
118 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
120 static int regno_aligned_for_load (int regno
);
121 static int store_with_one_insn_p (rtx mem
);
122 static int reg_align (rtx reg
);
123 static int mem_is_padded_component_ref (rtx x
);
124 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
125 static void spu_asm_globalize_label (FILE * file
, const char *name
);
126 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
128 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type
, tree fntype
);
131 static void fix_range (const char *);
132 static void spu_encode_section_info (tree
, rtx
, int);
133 static tree
spu_builtin_mul_widen_even (tree
);
134 static tree
spu_builtin_mul_widen_odd (tree
);
135 static tree
spu_builtin_mask_for_load (void);
137 extern const char *reg_names
[];
138 rtx spu_compare_op0
, spu_compare_op1
;
153 IC_POOL
, /* constant pool */
154 IC_IL1
, /* one il* instruction */
155 IC_IL2
, /* both ilhu and iohl instructions */
156 IC_IL1s
, /* one il* instruction */
157 IC_IL2s
, /* both ilhu and iohl instructions */
158 IC_FSMBI
, /* the fsmbi instruction */
159 IC_CPAT
, /* one of the c*d instructions */
160 IC_FSMBI2
/* fsmbi plus 1 other instruction */
163 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
164 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
165 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
166 static enum immediate_class
classify_immediate (rtx op
,
167 enum machine_mode mode
);
169 /* Built in types. */
170 tree spu_builtin_types
[SPU_BTI_MAX
];
172 /* TARGET overrides. */
174 #undef TARGET_INIT_BUILTINS
175 #define TARGET_INIT_BUILTINS spu_init_builtins
177 #undef TARGET_EXPAND_BUILTIN
178 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
180 #undef TARGET_EH_RETURN_FILTER_MODE
181 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
183 /* The .8byte directive doesn't seem to work well for a 32 bit
185 #undef TARGET_ASM_UNALIGNED_DI_OP
186 #define TARGET_ASM_UNALIGNED_DI_OP NULL
188 #undef TARGET_RTX_COSTS
189 #define TARGET_RTX_COSTS spu_rtx_costs
191 #undef TARGET_ADDRESS_COST
192 #define TARGET_ADDRESS_COST hook_int_rtx_0
194 #undef TARGET_SCHED_ISSUE_RATE
195 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
197 #undef TARGET_SCHED_VARIABLE_ISSUE
198 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
200 #undef TARGET_SCHED_ADJUST_PRIORITY
201 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
203 #undef TARGET_SCHED_ADJUST_COST
204 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
206 const struct attribute_spec spu_attribute_table
[];
207 #undef TARGET_ATTRIBUTE_TABLE
208 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER spu_assemble_integer
213 #undef TARGET_SCALAR_MODE_SUPPORTED_P
214 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
216 #undef TARGET_VECTOR_MODE_SUPPORTED_P
217 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
219 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
220 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
222 #undef TARGET_ASM_GLOBALIZE_LABEL
223 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
225 #undef TARGET_PASS_BY_REFERENCE
226 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
228 #undef TARGET_MUST_PASS_IN_STACK
229 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
231 #undef TARGET_BUILD_BUILTIN_VA_LIST
232 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
234 #undef TARGET_SETUP_INCOMING_VARARGS
235 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
237 #undef TARGET_MACHINE_DEPENDENT_REORG
238 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
240 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
241 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
243 #undef TARGET_DEFAULT_TARGET_FLAGS
244 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
246 #undef TARGET_INIT_LIBFUNCS
247 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
249 #undef TARGET_RETURN_IN_MEMORY
250 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
252 #undef TARGET_ENCODE_SECTION_INFO
253 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
255 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
256 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
258 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
259 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
261 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
262 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
264 struct gcc_target targetm
= TARGET_INITIALIZER
;
267 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
269 /* Small loops will be unpeeled at -O3. For SPU it is more important
270 to keep code small by default. */
271 if (!flag_unroll_loops
&& !flag_peel_loops
)
272 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
274 /* Override some of the default param values. With so many registers
275 larger values are better for these params. */
276 MAX_PENDING_LIST_LENGTH
= 128;
278 /* With so many registers this is better on by default. */
279 flag_rename_registers
= 1;
282 /* Sometimes certain combinations of command options do not make sense
283 on a particular target machine. You can define a macro
284 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
285 executed once just after all the command options have been parsed. */
287 spu_override_options (void)
289 flag_omit_frame_pointer
= 1;
291 if (align_functions
< 8)
294 if (spu_fixed_range_string
)
295 fix_range (spu_fixed_range_string
);
298 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
299 struct attribute_spec.handler. */
301 /* Table of machine attributes. */
302 const struct attribute_spec spu_attribute_table
[] =
304 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
305 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
306 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
307 { NULL
, 0, 0, false, false, false, NULL
}
310 /* True if MODE is valid for the target. By "valid", we mean able to
311 be manipulated in non-trivial ways. In particular, this means all
312 the arithmetic is supported. */
314 spu_scalar_mode_supported_p (enum machine_mode mode
)
332 /* Similarly for vector modes. "Supported" here is less strict. At
333 least some operations are supported; need to check optabs or builtins
334 for further details. */
336 spu_vector_mode_supported_p (enum machine_mode mode
)
353 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
354 least significant bytes of the outer mode. This function returns
355 TRUE for the SUBREG's where this is correct. */
357 valid_subreg (rtx op
)
359 enum machine_mode om
= GET_MODE (op
);
360 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
361 return om
!= VOIDmode
&& im
!= VOIDmode
362 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
363 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4));
366 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
367 and adjust the start offset. */
369 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
371 enum machine_mode mode
;
373 /* Strip any SUBREG */
374 if (GET_CODE (op
) == SUBREG
)
378 GET_MODE_BITSIZE (GET_MODE (op
)) -
379 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
380 op
= SUBREG_REG (op
);
382 /* If it is smaller than SI, assure a SUBREG */
383 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
387 *start
+= 32 - op_size
;
390 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
391 mode
= mode_for_size (op_size
, MODE_INT
, 0);
392 if (mode
!= GET_MODE (op
))
393 op
= gen_rtx_SUBREG (mode
, op
, 0);
398 spu_expand_extv (rtx ops
[], int unsignedp
)
400 HOST_WIDE_INT width
= INTVAL (ops
[2]);
401 HOST_WIDE_INT start
= INTVAL (ops
[3]);
402 HOST_WIDE_INT src_size
, dst_size
;
403 enum machine_mode src_mode
, dst_mode
;
404 rtx dst
= ops
[0], src
= ops
[1];
407 dst
= adjust_operand (ops
[0], 0);
408 dst_mode
= GET_MODE (dst
);
409 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
411 src
= adjust_operand (src
, &start
);
412 src_mode
= GET_MODE (src
);
413 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
417 s
= gen_reg_rtx (src_mode
);
421 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
424 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
427 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
435 if (width
< src_size
)
442 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
445 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
448 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
453 s
= gen_reg_rtx (src_mode
);
454 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
459 convert_move (dst
, src
, unsignedp
);
463 spu_expand_insv (rtx ops
[])
465 HOST_WIDE_INT width
= INTVAL (ops
[1]);
466 HOST_WIDE_INT start
= INTVAL (ops
[2]);
467 HOST_WIDE_INT maskbits
;
468 enum machine_mode dst_mode
, src_mode
;
469 rtx dst
= ops
[0], src
= ops
[3];
470 int dst_size
, src_size
;
476 if (GET_CODE (ops
[0]) == MEM
)
477 dst
= gen_reg_rtx (TImode
);
479 dst
= adjust_operand (dst
, &start
);
480 dst_mode
= GET_MODE (dst
);
481 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
483 if (CONSTANT_P (src
))
485 enum machine_mode m
=
486 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
487 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
489 src
= adjust_operand (src
, 0);
490 src_mode
= GET_MODE (src
);
491 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
493 mask
= gen_reg_rtx (dst_mode
);
494 shift_reg
= gen_reg_rtx (dst_mode
);
495 shift
= dst_size
- start
- width
;
497 /* It's not safe to use subreg here because the compiler assumes
498 that the SUBREG_REG is right justified in the SUBREG. */
499 convert_move (shift_reg
, src
, 1);
506 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
509 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
512 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
524 maskbits
= (-1ll << (32 - width
- start
));
526 maskbits
+= (1ll << (32 - start
));
527 emit_move_insn (mask
, GEN_INT (maskbits
));
530 maskbits
= (-1ll << (64 - width
- start
));
532 maskbits
+= (1ll << (64 - start
));
533 emit_move_insn (mask
, GEN_INT (maskbits
));
537 unsigned char arr
[16];
539 memset (arr
, 0, sizeof (arr
));
540 arr
[i
] = 0xff >> (start
& 7);
541 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
543 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
544 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
550 if (GET_CODE (ops
[0]) == MEM
)
552 rtx aligned
= gen_reg_rtx (SImode
);
553 rtx low
= gen_reg_rtx (SImode
);
554 rtx addr
= gen_reg_rtx (SImode
);
555 rtx rotl
= gen_reg_rtx (SImode
);
556 rtx mask0
= gen_reg_rtx (TImode
);
559 emit_move_insn (addr
, XEXP (ops
[0], 0));
560 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
561 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
562 emit_insn (gen_negsi2 (rotl
, low
));
563 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
564 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
565 mem
= change_address (ops
[0], TImode
, aligned
);
566 set_mem_alias_set (mem
, 0);
567 emit_move_insn (dst
, mem
);
568 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
569 emit_move_insn (mem
, dst
);
570 if (start
+ width
> MEM_ALIGN (ops
[0]))
572 rtx shl
= gen_reg_rtx (SImode
);
573 rtx mask1
= gen_reg_rtx (TImode
);
574 rtx dst1
= gen_reg_rtx (TImode
);
576 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
577 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
578 mem1
= adjust_address (mem
, TImode
, 16);
579 set_mem_alias_set (mem1
, 0);
580 emit_move_insn (dst1
, mem1
);
581 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
582 emit_move_insn (mem1
, dst1
);
586 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask
));
591 spu_expand_block_move (rtx ops
[])
593 HOST_WIDE_INT bytes
, align
, offset
;
594 rtx src
, dst
, sreg
, dreg
, target
;
596 if (GET_CODE (ops
[2]) != CONST_INT
597 || GET_CODE (ops
[3]) != CONST_INT
598 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
601 bytes
= INTVAL (ops
[2]);
602 align
= INTVAL (ops
[3]);
612 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
614 dst
= adjust_address (ops
[0], V16QImode
, offset
);
615 src
= adjust_address (ops
[1], V16QImode
, offset
);
616 emit_move_insn (dst
, src
);
621 unsigned char arr
[16] = { 0 };
622 for (i
= 0; i
< bytes
- offset
; i
++)
624 dst
= adjust_address (ops
[0], V16QImode
, offset
);
625 src
= adjust_address (ops
[1], V16QImode
, offset
);
626 mask
= gen_reg_rtx (V16QImode
);
627 sreg
= gen_reg_rtx (V16QImode
);
628 dreg
= gen_reg_rtx (V16QImode
);
629 target
= gen_reg_rtx (V16QImode
);
630 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
631 emit_move_insn (dreg
, dst
);
632 emit_move_insn (sreg
, src
);
633 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
634 emit_move_insn (dst
, target
);
642 { SPU_EQ
, SPU_GT
, SPU_GTU
};
645 int spu_comp_icode
[8][3] = {
646 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
647 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
648 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
649 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
650 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
651 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
653 {CODE_FOR_ceq_vec
, 0, 0},
656 /* Generate a compare for CODE. Return a brand-new rtx that represents
657 the result of the compare. GCC can figure this out too if we don't
658 provide all variations of compares, but GCC always wants to use
659 WORD_MODE, we can generate better code in most cases if we do it
662 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
664 int reverse_compare
= 0;
665 int reverse_test
= 0;
668 rtx target
= operands
[0];
669 enum machine_mode comp_mode
;
670 enum machine_mode op_mode
;
671 enum spu_comp_code scode
;
674 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
675 and so on, to keep the constant in operand 1. */
676 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
678 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
679 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
683 spu_compare_op1
= GEN_INT (val
);
687 spu_compare_op1
= GEN_INT (val
);
691 spu_compare_op1
= GEN_INT (val
);
695 spu_compare_op1
= GEN_INT (val
);
756 op_mode
= GET_MODE (spu_compare_op0
);
795 if (GET_MODE (spu_compare_op1
) == DFmode
)
797 rtx reg
= gen_reg_rtx (DFmode
);
798 if (!flag_unsafe_math_optimizations
799 || (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
802 emit_insn (gen_subdf3 (reg
, spu_compare_op1
, spu_compare_op0
));
804 emit_insn (gen_subdf3 (reg
, spu_compare_op0
, spu_compare_op1
));
806 spu_compare_op0
= reg
;
807 spu_compare_op1
= CONST0_RTX (DFmode
);
810 if (is_set
== 0 && spu_compare_op1
== const0_rtx
811 && (GET_MODE (spu_compare_op0
) == SImode
812 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
814 /* Don't need to set a register with the result when we are
815 comparing against zero and branching. */
816 reverse_test
= !reverse_test
;
817 compare_result
= spu_compare_op0
;
821 compare_result
= gen_reg_rtx (comp_mode
);
825 rtx t
= spu_compare_op1
;
826 spu_compare_op1
= spu_compare_op0
;
830 if (spu_comp_icode
[index
][scode
] == 0)
833 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
834 (spu_compare_op0
, op_mode
))
835 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
836 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
837 (spu_compare_op1
, op_mode
))
838 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
839 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
844 emit_insn (comp_rtx
);
853 /* We don't have branch on QI compare insns, so we convert the
854 QI compare result to a HI result. */
855 if (comp_mode
== QImode
)
857 rtx old_res
= compare_result
;
858 compare_result
= gen_reg_rtx (HImode
);
860 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
864 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
866 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
868 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
869 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
870 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
873 else if (is_set
== 2)
875 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
876 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
877 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
879 rtx op_t
= operands
[2];
880 rtx op_f
= operands
[3];
882 /* The result of the comparison can be SI, HI or QI mode. Create a
883 mask based on that result. */
884 if (target_size
> compare_size
)
886 select_mask
= gen_reg_rtx (mode
);
887 emit_insn (gen_extend_compare (select_mask
, compare_result
));
889 else if (target_size
< compare_size
)
891 gen_rtx_SUBREG (mode
, compare_result
,
892 (compare_size
- target_size
) / BITS_PER_UNIT
);
893 else if (comp_mode
!= mode
)
894 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
896 select_mask
= compare_result
;
898 if (GET_MODE (target
) != GET_MODE (op_t
)
899 || GET_MODE (target
) != GET_MODE (op_f
))
903 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
905 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
910 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
911 gen_rtx_NOT (comp_mode
, compare_result
)));
912 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
913 emit_insn (gen_extendhisi2 (target
, compare_result
));
914 else if (GET_MODE (target
) == SImode
915 && GET_MODE (compare_result
) == QImode
)
916 emit_insn (gen_extend_compare (target
, compare_result
));
918 emit_move_insn (target
, compare_result
);
923 const_double_to_hwint (rtx x
)
927 if (GET_MODE (x
) == SFmode
)
929 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
930 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
932 else if (GET_MODE (x
) == DFmode
)
935 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
936 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
938 val
= (val
<< 32) | (l
[1] & 0xffffffff);
946 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
950 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
953 tv
[0] = (v
<< 32) >> 32;
954 else if (mode
== DFmode
)
956 tv
[1] = (v
<< 32) >> 32;
959 real_from_target (&rv
, tv
, mode
);
960 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
964 print_operand_address (FILE * file
, register rtx addr
)
969 if (GET_CODE (addr
) == AND
970 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
971 && INTVAL (XEXP (addr
, 1)) == -16)
972 addr
= XEXP (addr
, 0);
974 switch (GET_CODE (addr
))
977 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
981 reg
= XEXP (addr
, 0);
982 offset
= XEXP (addr
, 1);
983 if (GET_CODE (offset
) == REG
)
985 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
986 reg_names
[REGNO (offset
)]);
988 else if (GET_CODE (offset
) == CONST_INT
)
990 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
991 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1001 output_addr_const (file
, addr
);
1011 print_operand (FILE * file
, rtx x
, int code
)
1013 enum machine_mode mode
= GET_MODE (x
);
1015 unsigned char arr
[16];
1016 int xcode
= GET_CODE (x
);
1018 if (GET_MODE (x
) == VOIDmode
)
1021 case 'L': /* 128 bits, signed */
1022 case 'm': /* 128 bits, signed */
1023 case 'T': /* 128 bits, signed */
1024 case 't': /* 128 bits, signed */
1027 case 'K': /* 64 bits, signed */
1028 case 'k': /* 64 bits, signed */
1029 case 'D': /* 64 bits, signed */
1030 case 'd': /* 64 bits, signed */
1033 case 'J': /* 32 bits, signed */
1034 case 'j': /* 32 bits, signed */
1035 case 's': /* 32 bits, signed */
1036 case 'S': /* 32 bits, signed */
1043 case 'j': /* 32 bits, signed */
1044 case 'k': /* 64 bits, signed */
1045 case 'm': /* 128 bits, signed */
1046 if (xcode
== CONST_INT
1047 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1049 gcc_assert (logical_immediate_p (x
, mode
));
1050 constant_to_array (mode
, x
, arr
);
1051 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1052 val
= trunc_int_for_mode (val
, SImode
);
1053 switch (which_logical_immediate (val
))
1058 fprintf (file
, "h");
1061 fprintf (file
, "b");
1071 case 'J': /* 32 bits, signed */
1072 case 'K': /* 64 bits, signed */
1073 case 'L': /* 128 bits, signed */
1074 if (xcode
== CONST_INT
1075 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1077 gcc_assert (logical_immediate_p (x
, mode
)
1078 || iohl_immediate_p (x
, mode
));
1079 constant_to_array (mode
, x
, arr
);
1080 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1081 val
= trunc_int_for_mode (val
, SImode
);
1082 switch (which_logical_immediate (val
))
1088 val
= trunc_int_for_mode (val
, HImode
);
1091 val
= trunc_int_for_mode (val
, QImode
);
1096 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1102 case 't': /* 128 bits, signed */
1103 case 'd': /* 64 bits, signed */
1104 case 's': /* 32 bits, signed */
1107 enum immediate_class c
= classify_immediate (x
, mode
);
1111 constant_to_array (mode
, x
, arr
);
1112 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1113 val
= trunc_int_for_mode (val
, SImode
);
1114 switch (which_immediate_load (val
))
1119 fprintf (file
, "a");
1122 fprintf (file
, "h");
1125 fprintf (file
, "hu");
1132 constant_to_array (mode
, x
, arr
);
1133 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1135 fprintf (file
, "b");
1137 fprintf (file
, "h");
1139 fprintf (file
, "w");
1141 fprintf (file
, "d");
1144 if (xcode
== CONST_VECTOR
)
1146 x
= CONST_VECTOR_ELT (x
, 0);
1147 xcode
= GET_CODE (x
);
1149 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1150 fprintf (file
, "a");
1151 else if (xcode
== HIGH
)
1152 fprintf (file
, "hu");
1166 case 'T': /* 128 bits, signed */
1167 case 'D': /* 64 bits, signed */
1168 case 'S': /* 32 bits, signed */
1171 enum immediate_class c
= classify_immediate (x
, mode
);
1175 constant_to_array (mode
, x
, arr
);
1176 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1177 val
= trunc_int_for_mode (val
, SImode
);
1178 switch (which_immediate_load (val
))
1185 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1190 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1193 constant_to_array (mode
, x
, arr
);
1195 for (i
= 0; i
< 16; i
++)
1200 print_operand (file
, GEN_INT (val
), 0);
1203 constant_to_array (mode
, x
, arr
);
1204 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1205 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1210 if (GET_CODE (x
) == CONST_VECTOR
)
1211 x
= CONST_VECTOR_ELT (x
, 0);
1212 output_addr_const (file
, x
);
1214 fprintf (file
, "@h");
1228 if (xcode
== CONST_INT
)
1230 /* Only 4 least significant bits are relevant for generate
1231 control word instructions. */
1232 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1237 case 'M': /* print code for c*d */
1238 if (GET_CODE (x
) == CONST_INT
)
1242 fprintf (file
, "b");
1245 fprintf (file
, "h");
1248 fprintf (file
, "w");
1251 fprintf (file
, "d");
1260 case 'N': /* Negate the operand */
1261 if (xcode
== CONST_INT
)
1262 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1263 else if (xcode
== CONST_VECTOR
)
1264 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1265 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1268 case 'I': /* enable/disable interrupts */
1269 if (xcode
== CONST_INT
)
1270 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1273 case 'b': /* branch modifiers */
1275 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1276 else if (COMPARISON_P (x
))
1277 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1280 case 'i': /* indirect call */
1283 if (GET_CODE (XEXP (x
, 0)) == REG
)
1284 /* Used in indirect function calls. */
1285 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1287 output_address (XEXP (x
, 0));
1291 case 'p': /* load/store */
1295 xcode
= GET_CODE (x
);
1300 xcode
= GET_CODE (x
);
1303 fprintf (file
, "d");
1304 else if (xcode
== CONST_INT
)
1305 fprintf (file
, "a");
1306 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1307 fprintf (file
, "r");
1308 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1310 if (GET_CODE (XEXP (x
, 1)) == REG
)
1311 fprintf (file
, "x");
1313 fprintf (file
, "d");
1318 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1320 output_addr_const (file
, GEN_INT (val
));
1324 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1326 output_addr_const (file
, GEN_INT (val
));
1330 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1332 output_addr_const (file
, GEN_INT (val
));
1336 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1337 val
= (val
>> 3) & 0x1f;
1338 output_addr_const (file
, GEN_INT (val
));
1342 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1345 output_addr_const (file
, GEN_INT (val
));
1349 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1352 output_addr_const (file
, GEN_INT (val
));
1356 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1359 output_addr_const (file
, GEN_INT (val
));
1363 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1364 val
= -(val
& -8ll);
1365 val
= (val
>> 3) & 0x1f;
1366 output_addr_const (file
, GEN_INT (val
));
1371 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1372 else if (xcode
== MEM
)
1373 output_address (XEXP (x
, 0));
1374 else if (xcode
== CONST_VECTOR
)
1375 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1377 output_addr_const (file
, x
);
1384 output_operand_lossage ("invalid %%xn code");
1389 extern char call_used_regs
[];
1391 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1392 caller saved register. For leaf functions it is more efficient to
1393 use a volatile register because we won't need to save and restore the
1394 pic register. This routine is only valid after register allocation
1395 is completed, so we can pick an unused register. */
1399 rtx pic_reg
= pic_offset_table_rtx
;
1400 if (!reload_completed
&& !reload_in_progress
)
1405 /* Split constant addresses to handle cases that are too large.
1406 Add in the pic register when in PIC mode.
1407 Split immediates that require more than 1 instruction. */
1409 spu_split_immediate (rtx
* ops
)
1411 enum machine_mode mode
= GET_MODE (ops
[0]);
1412 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1418 unsigned char arrhi
[16];
1419 unsigned char arrlo
[16];
1422 constant_to_array (mode
, ops
[1], arrhi
);
1423 to
= no_new_pseudos
? ops
[0] : gen_reg_rtx (mode
);
1424 for (i
= 0; i
< 16; i
+= 4)
1426 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1427 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1428 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1429 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1431 hi
= array_to_constant (mode
, arrhi
);
1432 lo
= array_to_constant (mode
, arrlo
);
1433 emit_move_insn (to
, hi
);
1434 emit_insn (gen_rtx_SET
1435 (VOIDmode
, ops
[0], gen_rtx_IOR (mode
, to
, lo
)));
1440 unsigned char arr_fsmbi
[16];
1441 unsigned char arr_andbi
[16];
1442 rtx to
, reg_fsmbi
, reg_and
;
1444 enum machine_mode imode
= mode
;
1445 /* We need to do reals as ints because the constant used in the
1446 * AND might not be a legitimate real constant. */
1447 imode
= int_mode_for_mode (mode
);
1448 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1450 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1453 for (i
= 0; i
< 16; i
++)
1454 if (arr_fsmbi
[i
] != 0)
1456 arr_andbi
[0] = arr_fsmbi
[i
];
1457 arr_fsmbi
[i
] = 0xff;
1459 for (i
= 1; i
< 16; i
++)
1460 arr_andbi
[i
] = arr_andbi
[0];
1461 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1462 reg_and
= array_to_constant (imode
, arr_andbi
);
1463 emit_move_insn (to
, reg_fsmbi
);
1464 emit_insn (gen_rtx_SET
1465 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1469 if (reload_in_progress
|| reload_completed
)
1471 rtx mem
= force_const_mem (mode
, ops
[1]);
1472 if (TARGET_LARGE_MEM
)
1474 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1475 emit_move_insn (addr
, XEXP (mem
, 0));
1476 mem
= replace_equiv_address (mem
, addr
);
1478 emit_move_insn (ops
[0], mem
);
1484 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1488 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1489 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1492 emit_insn (gen_pic (ops
[0], ops
[1]));
1495 rtx pic_reg
= get_pic_reg ();
1496 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1497 current_function_uses_pic_offset_table
= 1;
1499 return flag_pic
|| c
== IC_IL2s
;
1510 /* SAVING is TRUE when we are generating the actual load and store
1511 instructions for REGNO. When determining the size of the stack
1512 needed for saving register we must allocate enough space for the
1513 worst case, because we don't always have the information early enough
1514 to not allocate it. But we can at least eliminate the actual loads
1515 and stores during the prologue/epilogue. */
1517 need_to_save_reg (int regno
, int saving
)
1519 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1522 && regno
== PIC_OFFSET_TABLE_REGNUM
1523 && (!saving
|| current_function_uses_pic_offset_table
)
1525 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1530 /* This function is only correct starting with local register
1533 spu_saved_regs_size (void)
1535 int reg_save_size
= 0;
1538 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1539 if (need_to_save_reg (regno
, 0))
1540 reg_save_size
+= 0x10;
1541 return reg_save_size
;
1545 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1547 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1549 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1550 return emit_insn (gen_movv4si (mem
, reg
));
1554 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1556 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1558 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1559 return emit_insn (gen_movv4si (reg
, mem
));
1562 /* This happens after reload, so we need to expand it. */
1564 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1567 if (satisfies_constraint_K (GEN_INT (imm
)))
1569 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1573 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1574 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1575 if (REGNO (src
) == REGNO (scratch
))
1581 /* Return nonzero if this function is known to have a null epilogue. */
1584 direct_return (void)
1586 if (reload_completed
)
1588 if (cfun
->static_chain_decl
== 0
1589 && (spu_saved_regs_size ()
1591 + current_function_outgoing_args_size
1592 + current_function_pretend_args_size
== 0)
1593 && current_function_is_leaf
)
1600 The stack frame looks like this:
1607 prev SP | back chain |
1610 | reg save | current_function_pretend_args_size bytes
1613 | saved regs | spu_saved_regs_size() bytes
1616 FP | vars | get_frame_size() bytes
1620 | args | current_function_outgoing_args_size bytes
1630 spu_expand_prologue (void)
1632 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1633 HOST_WIDE_INT total_size
;
1634 HOST_WIDE_INT saved_regs_size
;
1635 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1636 rtx scratch_reg_0
, scratch_reg_1
;
1639 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1640 the "toplevel" insn chain. */
1641 emit_note (NOTE_INSN_DELETED
);
1643 if (flag_pic
&& optimize
== 0)
1644 current_function_uses_pic_offset_table
= 1;
1646 if (spu_naked_function_p (current_function_decl
))
1649 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1650 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1652 saved_regs_size
= spu_saved_regs_size ();
1653 total_size
= size
+ saved_regs_size
1654 + current_function_outgoing_args_size
1655 + current_function_pretend_args_size
;
1657 if (!current_function_is_leaf
1658 || current_function_calls_alloca
|| total_size
> 0)
1659 total_size
+= STACK_POINTER_OFFSET
;
1661 /* Save this first because code after this might use the link
1662 register as a scratch register. */
1663 if (!current_function_is_leaf
)
1665 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1666 RTX_FRAME_RELATED_P (insn
) = 1;
1671 offset
= -current_function_pretend_args_size
;
1672 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1673 if (need_to_save_reg (regno
, 1))
1676 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1677 RTX_FRAME_RELATED_P (insn
) = 1;
1681 if (flag_pic
&& current_function_uses_pic_offset_table
)
1683 rtx pic_reg
= get_pic_reg ();
1684 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1685 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1690 if (flag_stack_check
)
1692 /* We compare against total_size-1 because
1693 ($sp >= total_size) <=> ($sp > total_size-1) */
1694 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1695 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1696 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1697 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1699 emit_move_insn (scratch_v4si
, size_v4si
);
1700 size_v4si
= scratch_v4si
;
1702 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1703 emit_insn (gen_vec_extractv4si
1704 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1705 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1708 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1709 the value of the previous $sp because we save it as the back
1711 if (total_size
<= 2000)
1713 /* In this case we save the back chain first. */
1714 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1716 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1718 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
1720 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1722 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
1726 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1728 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1730 RTX_FRAME_RELATED_P (insn
) = 1;
1731 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1733 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
1735 if (total_size
> 2000)
1737 /* Save the back chain ptr */
1738 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1741 if (frame_pointer_needed
)
1743 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1744 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1745 + current_function_outgoing_args_size
;
1746 /* Set the new frame_pointer */
1747 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1748 RTX_FRAME_RELATED_P (insn
) = 1;
1749 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1751 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1752 real
, REG_NOTES (insn
));
1753 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1757 emit_note (NOTE_INSN_DELETED
);
1761 spu_expand_epilogue (bool sibcall_p
)
1763 int size
= get_frame_size (), offset
, regno
;
1764 HOST_WIDE_INT saved_regs_size
, total_size
;
1765 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1766 rtx jump
, scratch_reg_0
;
1768 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1769 the "toplevel" insn chain. */
1770 emit_note (NOTE_INSN_DELETED
);
1772 if (spu_naked_function_p (current_function_decl
))
1775 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1777 saved_regs_size
= spu_saved_regs_size ();
1778 total_size
= size
+ saved_regs_size
1779 + current_function_outgoing_args_size
1780 + current_function_pretend_args_size
;
1782 if (!current_function_is_leaf
1783 || current_function_calls_alloca
|| total_size
> 0)
1784 total_size
+= STACK_POINTER_OFFSET
;
1788 if (current_function_calls_alloca
)
1789 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1791 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1794 if (saved_regs_size
> 0)
1796 offset
= -current_function_pretend_args_size
;
1797 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1798 if (need_to_save_reg (regno
, 1))
1801 frame_emit_load (regno
, sp_reg
, offset
);
1806 if (!current_function_is_leaf
)
1807 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1811 emit_insn (gen_rtx_USE
1812 (VOIDmode
, gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
)));
1813 jump
= emit_jump_insn (gen__return ());
1814 emit_barrier_after (jump
);
1817 emit_note (NOTE_INSN_DELETED
);
1821 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1825 /* This is inefficient because it ends up copying to a save-register
1826 which then gets saved even though $lr has already been saved. But
1827 it does generate better code for leaf functions and we don't need
1828 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1829 used for __builtin_return_address anyway, so maybe we don't care if
1830 it's inefficient. */
1831 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1835 /* Given VAL, generate a constant appropriate for MODE.
1836 If MODE is a vector mode, every element will be VAL.
1837 For TImode, VAL will be zero extended to 128 bits. */
1839 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1845 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1846 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1847 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1848 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1850 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1851 return immed_double_const (val
, 0, mode
);
1853 /* val is the bit representation of the float */
1854 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1855 return hwint_to_const_double (mode
, val
);
1857 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1858 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1860 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1862 units
= GET_MODE_NUNITS (mode
);
1864 v
= rtvec_alloc (units
);
1866 for (i
= 0; i
< units
; ++i
)
1867 RTVEC_ELT (v
, i
) = inner
;
1869 return gen_rtx_CONST_VECTOR (mode
, v
);
1872 /* branch hint stuff */
1874 /* The hardware requires 8 insns between a hint and the branch it
1875 effects. This variable describes how many rtl instructions the
1876 compiler needs to see before inserting a hint. (FIXME: We should
1877 accept less and insert nops to enforce it because hinting is always
1878 profitable for performance, but we do need to be careful of code
1880 int spu_hint_dist
= (8 * 4);
1882 /* An array of these is used to propagate hints to predecessor blocks. */
1885 rtx prop_jump
; /* propagated from another block */
1886 basic_block bb
; /* the original block. */
1889 /* The special $hbr register is used to prevent the insn scheduler from
1890 moving hbr insns across instructions which invalidate them. It
1891 should only be used in a clobber, and this function searches for
1892 insns which clobber it. */
1894 insn_clobbers_hbr (rtx insn
)
1896 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PARALLEL
)
1898 rtx parallel
= PATTERN (insn
);
1901 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
1903 clobber
= XVECEXP (parallel
, 0, j
);
1904 if (GET_CODE (clobber
) == CLOBBER
1905 && GET_CODE (XEXP (clobber
, 0)) == REG
1906 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
1914 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
, int distance
)
1917 rtx hint
, insn
, prev
, next
;
1919 if (before
== 0 || branch
== 0 || target
== 0)
1926 branch_label
= gen_label_rtx ();
1927 LABEL_NUSES (branch_label
)++;
1928 LABEL_PRESERVE_P (branch_label
) = 1;
1929 insn
= emit_label_before (branch_label
, branch
);
1930 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
1932 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1933 the current insn is pipe0, dual issue with it. */
1934 prev
= prev_active_insn (before
);
1935 if (prev
&& get_pipe (prev
) == 0)
1936 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
1937 else if (get_pipe (before
) == 0 && distance
> spu_hint_dist
)
1939 next
= next_active_insn (before
);
1940 hint
= emit_insn_after (gen_hbr (branch_label
, target
), before
);
1942 PUT_MODE (next
, TImode
);
1946 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
1947 PUT_MODE (hint
, TImode
);
1949 recog_memoized (hint
);
1952 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1953 the rtx for the branch target. */
1955 get_branch_target (rtx branch
)
1957 if (GET_CODE (branch
) == JUMP_INSN
)
1961 /* Return statements */
1962 if (GET_CODE (PATTERN (branch
)) == RETURN
)
1963 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
1966 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
1967 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
1970 set
= single_set (branch
);
1971 src
= SET_SRC (set
);
1972 if (GET_CODE (SET_DEST (set
)) != PC
)
1975 if (GET_CODE (src
) == IF_THEN_ELSE
)
1978 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
1981 /* If the more probable case is not a fall through, then
1982 try a branch hint. */
1983 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
1984 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
1985 && GET_CODE (XEXP (src
, 1)) != PC
)
1986 lab
= XEXP (src
, 1);
1987 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
1988 && GET_CODE (XEXP (src
, 2)) != PC
)
1989 lab
= XEXP (src
, 2);
1993 if (GET_CODE (lab
) == RETURN
)
1994 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2002 else if (GET_CODE (branch
) == CALL_INSN
)
2005 /* All of our call patterns are in a PARALLEL and the CALL is
2006 the first pattern in the PARALLEL. */
2007 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2009 call
= XVECEXP (PATTERN (branch
), 0, 0);
2010 if (GET_CODE (call
) == SET
)
2011 call
= SET_SRC (call
);
2012 if (GET_CODE (call
) != CALL
)
2014 return XEXP (XEXP (call
, 0), 0);
2020 insert_branch_hints (void)
2022 struct spu_bb_info
*spu_bb_info
;
2023 rtx branch
, insn
, next
;
2024 rtx branch_target
= 0;
2025 int branch_addr
= 0, insn_addr
, head_addr
;
2030 (struct spu_bb_info
*) xcalloc (last_basic_block
+ 1,
2031 sizeof (struct spu_bb_info
));
2033 /* We need exact insn addresses and lengths. */
2034 shorten_branches (get_insns ());
2036 FOR_EACH_BB_REVERSE (bb
)
2038 head_addr
= INSN_ADDRESSES (INSN_UID (BB_HEAD (bb
)));
2040 if (spu_bb_info
[bb
->index
].prop_jump
)
2042 branch
= spu_bb_info
[bb
->index
].prop_jump
;
2043 branch_target
= get_branch_target (branch
);
2044 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2046 /* Search from end of a block to beginning. In this loop, find
2047 jumps which need a branch and emit them only when:
2048 - it's an indirect branch and we're at the insn which sets
2050 - we're at an insn that will invalidate the hint. e.g., a
2051 call, another hint insn, inline asm that clobbers $hbr, and
2052 some inlined operations (divmodsi4). Don't consider jumps
2053 because they are only at the end of a block and are
2054 considered when we are deciding whether to propagate
2055 - we're getting too far away from the branch. The hbr insns
2056 only have a signed 10-bit offset
2057 We go back as far as possible so the branch will be considered
2058 for propagation when we get to the beginning of the block. */
2060 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2064 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2066 && ((GET_CODE (branch_target
) == REG
2067 && set_of (branch_target
, insn
) != NULL_RTX
)
2068 || insn_clobbers_hbr (insn
)
2069 || branch_addr
- insn_addr
> 600))
2071 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2072 if (insn
!= BB_END (bb
)
2073 && branch_addr
- next_addr
>= spu_hint_dist
)
2077 "hint for %i in block %i before %i\n",
2078 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2079 spu_emit_branch_hint (next
, branch
, branch_target
,
2080 branch_addr
- next_addr
);
2085 /* JUMP_P will only be true at the end of a block. When
2086 branch is already set it means we've previously decided
2087 to propagate a hint for that branch into this block. */
2088 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2091 if ((branch_target
= get_branch_target (insn
)))
2094 branch_addr
= insn_addr
;
2098 /* When a branch hint is emitted it will be inserted
2099 before "next". Make sure next is the beginning of a
2100 cycle to minimize impact on the scheduled insns. */
2101 if (GET_MODE (insn
) == TImode
)
2104 if (insn
== BB_HEAD (bb
))
2110 /* If we haven't emitted a hint for this branch yet, it might
2111 be profitable to emit it in one of the predecessor blocks,
2112 especially for loops. */
2114 basic_block prev
= 0, prop
= 0, prev2
= 0;
2115 int loop_exit
= 0, simple_loop
= 0;
2118 next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2120 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2121 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2122 prev
= EDGE_PRED (bb
, j
)->src
;
2124 prev2
= EDGE_PRED (bb
, j
)->src
;
2126 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2127 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2129 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2132 /* If this branch is a loop exit then propagate to previous
2133 fallthru block. This catches the cases when it is a simple
2134 loop or when there is an initial branch into the loop. */
2135 if (prev
&& loop_exit
&& prev
->loop_depth
<= bb
->loop_depth
)
2138 /* If there is only one adjacent predecessor. Don't propagate
2139 outside this loop. This loop_depth test isn't perfect, but
2140 I'm not sure the loop_father member is valid at this point. */
2141 else if (prev
&& single_pred_p (bb
)
2142 && prev
->loop_depth
== bb
->loop_depth
)
2145 /* If this is the JOIN block of a simple IF-THEN then
2146 propagate the hint to the HEADER block. */
2147 else if (prev
&& prev2
2148 && EDGE_COUNT (bb
->preds
) == 2
2149 && EDGE_COUNT (prev
->preds
) == 1
2150 && EDGE_PRED (prev
, 0)->src
== prev2
2151 && prev2
->loop_depth
== bb
->loop_depth
2152 && GET_CODE (branch_target
) != REG
)
2155 /* Don't propagate when:
2156 - this is a simple loop and the hint would be too far
2157 - this is not a simple loop and there are 16 insns in
2159 - the predecessor block ends in a branch that will be
2161 - the predecessor block ends in an insn that invalidates
2165 && (bbend
= BB_END (prop
))
2166 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2167 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2168 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2171 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2172 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2173 bb
->index
, prop
->index
, bb
->loop_depth
,
2174 INSN_UID (branch
), loop_exit
, simple_loop
,
2175 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2177 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2178 spu_bb_info
[prop
->index
].bb
= bb
;
2180 else if (next
&& branch_addr
- next_addr
>= spu_hint_dist
)
2183 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2184 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2185 spu_emit_branch_hint (next
, branch
, branch_target
,
2186 branch_addr
- next_addr
);
2194 /* Emit a nop for INSN such that the two will dual issue. This assumes
2195 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2196 We check for TImode to handle a MULTI1 insn which has dual issued its
2197 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2200 emit_nop_for_insn (rtx insn
)
2204 p
= get_pipe (insn
);
2205 if (p
== 1 && GET_MODE (insn
) == TImode
)
2207 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2208 PUT_MODE (new_insn
, TImode
);
2209 PUT_MODE (insn
, VOIDmode
);
2212 new_insn
= emit_insn_after (gen_lnop (), insn
);
2215 /* Insert nops in basic blocks to meet dual issue alignment
2220 rtx insn
, next_insn
, prev_insn
;
2224 /* This sets up INSN_ADDRESSES. */
2225 shorten_branches (get_insns ());
2227 /* Keep track of length added by nops. */
2231 for (insn
= get_insns (); insn
; insn
= next_insn
)
2233 next_insn
= next_active_insn (insn
);
2234 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2235 if (GET_MODE (insn
) == TImode
2237 && GET_MODE (next_insn
) != TImode
2238 && ((addr
+ length
) & 7) != 0)
2240 /* prev_insn will always be set because the first insn is
2241 always 8-byte aligned. */
2242 emit_nop_for_insn (prev_insn
);
2250 spu_machine_dependent_reorg (void)
2254 if (TARGET_BRANCH_HINTS
)
2255 insert_branch_hints ();
2261 /* Insn scheduling routines, primarily for dual issue. */
2263 spu_sched_issue_rate (void)
2269 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED
,
2270 int verbose ATTRIBUTE_UNUSED
, rtx insn
,
2273 if (GET_CODE (PATTERN (insn
)) != USE
2274 && GET_CODE (PATTERN (insn
)) != CLOBBER
2275 && get_pipe (insn
) != -2)
2277 return can_issue_more
;
2284 /* Handle inline asm */
2285 if (INSN_CODE (insn
) == -1)
2287 t
= get_attr_type (insn
);
2303 case TYPE_IPREFETCH
:
2320 spu_sched_adjust_priority (rtx insn
, int pri
)
2322 int p
= get_pipe (insn
);
2323 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2325 if (GET_CODE (PATTERN (insn
)) == USE
2326 || GET_CODE (PATTERN (insn
)) == CLOBBER
2329 /* Schedule pipe0 insns early for greedier dual issue. */
2335 /* INSN is dependent on DEP_INSN. */
2337 spu_sched_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
,
2338 rtx dep_insn ATTRIBUTE_UNUSED
, int cost
)
2340 if (GET_CODE (insn
) == CALL_INSN
)
2342 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2343 scheduler makes every insn in a block anti-dependent on the final
2344 jump_insn. We adjust here so higher cost insns will get scheduled
2346 if (GET_CODE (insn
) == JUMP_INSN
&& REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
2347 return insn_cost (dep_insn
) - 3;
2351 /* Create a CONST_DOUBLE from a string. */
2353 spu_float_const (const char *string
, enum machine_mode mode
)
2355 REAL_VALUE_TYPE value
;
2356 value
= REAL_VALUE_ATOF (string
, mode
);
2357 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
2360 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2361 CONST_INT fits constraint 'K', i.e., is small. */
2363 legitimate_const (rtx x
, int aligned
)
2365 /* We can never know if the resulting address fits in 18 bits and can be
2366 loaded with ila. Instead we should use the HI and LO relocations to
2367 load a 32-bit address. */
2370 gcc_assert (GET_CODE (x
) == CONST
);
2372 if (GET_CODE (XEXP (x
, 0)) != PLUS
)
2374 sym
= XEXP (XEXP (x
, 0), 0);
2375 cst
= XEXP (XEXP (x
, 0), 1);
2376 if (GET_CODE (sym
) != SYMBOL_REF
|| GET_CODE (cst
) != CONST_INT
)
2378 if (aligned
&& ((INTVAL (cst
) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym
)))
2380 return satisfies_constraint_K (cst
);
2384 spu_constant_address_p (rtx x
)
2386 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
2387 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
2388 || GET_CODE (x
) == HIGH
);
2391 static enum spu_immediate
2392 which_immediate_load (HOST_WIDE_INT val
)
2394 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2396 if (val
>= -0x8000 && val
<= 0x7fff)
2398 if (val
>= 0 && val
<= 0x3ffff)
2400 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2402 if ((val
& 0xffff) == 0)
2408 /* Return true when OP can be loaded by one of the il instructions, or
2409 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2411 immediate_load_p (rtx op
, enum machine_mode mode
)
2413 if (CONSTANT_P (op
))
2415 enum immediate_class c
= classify_immediate (op
, mode
);
2416 return c
== IC_IL1
|| c
== IC_IL1s
2417 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
2422 /* Return true if the first SIZE bytes of arr is a constant that can be
2423 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2424 represent the size and offset of the instruction to use. */
2426 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
2428 int cpat
, run
, i
, start
;
2432 for (i
= 0; i
< size
&& cpat
; i
++)
2440 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
2442 else if (arr
[i
] == 0)
2444 while (arr
[i
+run
] == run
&& i
+run
< 16)
2446 if (run
!= 4 && run
!= 8)
2451 if ((i
& (run
-1)) != 0)
2458 if (cpat
&& (run
|| size
< 16))
2465 *pstart
= start
== -1 ? 16-run
: start
;
2471 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2472 it into a register. MODE is only valid when OP is a CONST_INT. */
2473 static enum immediate_class
2474 classify_immediate (rtx op
, enum machine_mode mode
)
2477 unsigned char arr
[16];
2478 int i
, j
, repeated
, fsmbi
, repeat
;
2480 gcc_assert (CONSTANT_P (op
));
2482 if (GET_MODE (op
) != VOIDmode
)
2483 mode
= GET_MODE (op
);
2485 /* A V4SI const_vector with all identical symbols is ok. */
2488 && GET_CODE (op
) == CONST_VECTOR
2489 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
2490 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
2491 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
2492 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
2493 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
2494 op
= CONST_VECTOR_ELT (op
, 0);
2496 switch (GET_CODE (op
))
2500 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
2503 return TARGET_LARGE_MEM
2504 || !legitimate_const (op
, 0) ? IC_IL2s
: IC_IL1s
;
2510 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
2511 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
2512 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
2518 constant_to_array (mode
, op
, arr
);
2520 /* Check that each 4-byte slot is identical. */
2522 for (i
= 4; i
< 16; i
+= 4)
2523 for (j
= 0; j
< 4; j
++)
2524 if (arr
[j
] != arr
[i
+ j
])
2529 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2530 val
= trunc_int_for_mode (val
, SImode
);
2532 if (which_immediate_load (val
) != SPU_NONE
)
2536 /* Any mode of 2 bytes or smaller can be loaded with an il
2538 gcc_assert (GET_MODE_SIZE (mode
) > 2);
2542 for (i
= 0; i
< 16 && fsmbi
; i
++)
2543 if (arr
[i
] != 0 && repeat
== 0)
2545 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
2548 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
2550 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
2563 static enum spu_immediate
2564 which_logical_immediate (HOST_WIDE_INT val
)
2566 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2568 if (val
>= -0x200 && val
<= 0x1ff)
2570 if (val
>= 0 && val
<= 0xffff)
2572 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2574 val
= trunc_int_for_mode (val
, HImode
);
2575 if (val
>= -0x200 && val
<= 0x1ff)
2577 if ((val
& 0xff) == ((val
>> 8) & 0xff))
2579 val
= trunc_int_for_mode (val
, QImode
);
2580 if (val
>= -0x200 && val
<= 0x1ff)
2587 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2590 const_vector_immediate_p (rtx x
)
2593 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
2594 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
2595 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
2596 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
2602 logical_immediate_p (rtx op
, enum machine_mode mode
)
2605 unsigned char arr
[16];
2608 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2609 || GET_CODE (op
) == CONST_VECTOR
);
2611 if (GET_CODE (op
) == CONST_VECTOR
2612 && !const_vector_immediate_p (op
))
2615 if (GET_MODE (op
) != VOIDmode
)
2616 mode
= GET_MODE (op
);
2618 constant_to_array (mode
, op
, arr
);
2620 /* Check that bytes are repeated. */
2621 for (i
= 4; i
< 16; i
+= 4)
2622 for (j
= 0; j
< 4; j
++)
2623 if (arr
[j
] != arr
[i
+ j
])
2626 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2627 val
= trunc_int_for_mode (val
, SImode
);
2629 i
= which_logical_immediate (val
);
2630 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
2634 iohl_immediate_p (rtx op
, enum machine_mode mode
)
2637 unsigned char arr
[16];
2640 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2641 || GET_CODE (op
) == CONST_VECTOR
);
2643 if (GET_CODE (op
) == CONST_VECTOR
2644 && !const_vector_immediate_p (op
))
2647 if (GET_MODE (op
) != VOIDmode
)
2648 mode
= GET_MODE (op
);
2650 constant_to_array (mode
, op
, arr
);
2652 /* Check that bytes are repeated. */
2653 for (i
= 4; i
< 16; i
+= 4)
2654 for (j
= 0; j
< 4; j
++)
2655 if (arr
[j
] != arr
[i
+ j
])
2658 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2659 val
= trunc_int_for_mode (val
, SImode
);
2661 return val
>= 0 && val
<= 0xffff;
2665 arith_immediate_p (rtx op
, enum machine_mode mode
,
2666 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
2669 unsigned char arr
[16];
2672 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2673 || GET_CODE (op
) == CONST_VECTOR
);
2675 if (GET_CODE (op
) == CONST_VECTOR
2676 && !const_vector_immediate_p (op
))
2679 if (GET_MODE (op
) != VOIDmode
)
2680 mode
= GET_MODE (op
);
2682 constant_to_array (mode
, op
, arr
);
2684 if (VECTOR_MODE_P (mode
))
2685 mode
= GET_MODE_INNER (mode
);
2687 bytes
= GET_MODE_SIZE (mode
);
2688 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
2690 /* Check that bytes are repeated. */
2691 for (i
= bytes
; i
< 16; i
+= bytes
)
2692 for (j
= 0; j
< bytes
; j
++)
2693 if (arr
[j
] != arr
[i
+ j
])
2697 for (j
= 1; j
< bytes
; j
++)
2698 val
= (val
<< 8) | arr
[j
];
2700 val
= trunc_int_for_mode (val
, mode
);
2702 return val
>= low
&& val
<= high
;
2706 - any 32-bit constant (SImode, SFmode)
2707 - any constant that can be generated with fsmbi (any mode)
2708 - a 64-bit constant where the high and low bits are identical
2710 - a 128-bit constant where the four 32-bit words match. */
2712 spu_legitimate_constant_p (rtx x
)
2714 if (GET_CODE (x
) == HIGH
)
2716 /* V4SI with all identical symbols is valid. */
2718 && GET_MODE (x
) == V4SImode
2719 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
2720 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
2721 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
2722 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
2723 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
2724 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
2726 if (GET_CODE (x
) == CONST_VECTOR
2727 && !const_vector_immediate_p (x
))
2732 /* Valid address are:
2733 - symbol_ref, label_ref, const
2735 - reg + const, where either reg or const is 16 byte aligned
2736 - reg + reg, alignment doesn't matter
2737 The alignment matters in the reg+const case because lqd and stqd
2738 ignore the 4 least significant bits of the const. (TODO: It might be
2739 preferable to allow any alignment and fix it up when splitting.) */
2741 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
2742 rtx x
, int reg_ok_strict
)
2744 if (mode
== TImode
&& GET_CODE (x
) == AND
2745 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2746 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
2748 switch (GET_CODE (x
))
2752 return !TARGET_LARGE_MEM
;
2755 return !TARGET_LARGE_MEM
&& legitimate_const (x
, 0);
2758 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
2762 gcc_assert (GET_CODE (x
) == REG
);
2765 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
2770 rtx op0
= XEXP (x
, 0);
2771 rtx op1
= XEXP (x
, 1);
2772 if (GET_CODE (op0
) == SUBREG
)
2773 op0
= XEXP (op0
, 0);
2774 if (GET_CODE (op1
) == SUBREG
)
2775 op1
= XEXP (op1
, 0);
2776 /* We can't just accept any aligned register because CSE can
2777 change it to a register that is not marked aligned and then
2778 recog will fail. So we only accept frame registers because
2779 they will only be changed to other frame registers. */
2780 if (GET_CODE (op0
) == REG
2781 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2782 && GET_CODE (op1
) == CONST_INT
2783 && INTVAL (op1
) >= -0x2000
2784 && INTVAL (op1
) <= 0x1fff
2785 && (regno_aligned_for_load (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
2787 if (GET_CODE (op0
) == REG
2788 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2789 && GET_CODE (op1
) == REG
2790 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
2801 /* When the address is reg + const_int, force the const_int into a
2804 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
2805 enum machine_mode mode
)
2808 /* Make sure both operands are registers. */
2809 if (GET_CODE (x
) == PLUS
)
2813 if (ALIGNED_SYMBOL_REF_P (op0
))
2815 op0
= force_reg (Pmode
, op0
);
2816 mark_reg_pointer (op0
, 128);
2818 else if (GET_CODE (op0
) != REG
)
2819 op0
= force_reg (Pmode
, op0
);
2820 if (ALIGNED_SYMBOL_REF_P (op1
))
2822 op1
= force_reg (Pmode
, op1
);
2823 mark_reg_pointer (op1
, 128);
2825 else if (GET_CODE (op1
) != REG
)
2826 op1
= force_reg (Pmode
, op1
);
2827 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
2828 if (spu_legitimate_address (mode
, x
, 0))
2834 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2835 struct attribute_spec.handler. */
2837 spu_handle_fndecl_attribute (tree
* node
,
2839 tree args ATTRIBUTE_UNUSED
,
2840 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2842 if (TREE_CODE (*node
) != FUNCTION_DECL
)
2844 warning (0, "`%s' attribute only applies to functions",
2845 IDENTIFIER_POINTER (name
));
2846 *no_add_attrs
= true;
2852 /* Handle the "vector" attribute. */
2854 spu_handle_vector_attribute (tree
* node
, tree name
,
2855 tree args ATTRIBUTE_UNUSED
,
2856 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2858 tree type
= *node
, result
= NULL_TREE
;
2859 enum machine_mode mode
;
2862 while (POINTER_TYPE_P (type
)
2863 || TREE_CODE (type
) == FUNCTION_TYPE
2864 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
2865 type
= TREE_TYPE (type
);
2867 mode
= TYPE_MODE (type
);
2869 unsigned_p
= TYPE_UNSIGNED (type
);
2873 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
2876 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
2879 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
2882 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
2885 result
= V4SF_type_node
;
2888 result
= V2DF_type_node
;
2894 /* Propagate qualifiers attached to the element type
2895 onto the vector type. */
2896 if (result
&& result
!= type
&& TYPE_QUALS (type
))
2897 result
= build_qualified_type (result
, TYPE_QUALS (type
));
2899 *no_add_attrs
= true; /* No need to hang on to the attribute. */
2902 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
2904 *node
= reconstruct_complex_type (*node
, result
);
2909 /* Return nonzero if FUNC is a naked function. */
2911 spu_naked_function_p (tree func
)
2915 if (TREE_CODE (func
) != FUNCTION_DECL
)
2918 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
2919 return a
!= NULL_TREE
;
2923 spu_initial_elimination_offset (int from
, int to
)
2925 int saved_regs_size
= spu_saved_regs_size ();
2927 if (!current_function_is_leaf
|| current_function_outgoing_args_size
2928 || get_frame_size () || saved_regs_size
)
2929 sp_offset
= STACK_POINTER_OFFSET
;
2930 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
2931 return (sp_offset
+ current_function_outgoing_args_size
);
2932 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
2934 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
2935 return sp_offset
+ current_function_outgoing_args_size
2936 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
2937 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
2938 return get_frame_size () + saved_regs_size
+ sp_offset
;
2943 spu_function_value (tree type
, tree func ATTRIBUTE_UNUSED
)
2945 enum machine_mode mode
= TYPE_MODE (type
);
2946 int byte_size
= ((mode
== BLKmode
)
2947 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2949 /* Make sure small structs are left justified in a register. */
2950 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
2951 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
2953 enum machine_mode smode
;
2956 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2957 int n
= byte_size
/ UNITS_PER_WORD
;
2958 v
= rtvec_alloc (nregs
);
2959 for (i
= 0; i
< n
; i
++)
2961 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
2962 gen_rtx_REG (TImode
,
2965 GEN_INT (UNITS_PER_WORD
* i
));
2966 byte_size
-= UNITS_PER_WORD
;
2974 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
2976 gen_rtx_EXPR_LIST (VOIDmode
,
2977 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
2978 GEN_INT (UNITS_PER_WORD
* n
));
2980 return gen_rtx_PARALLEL (mode
, v
);
2982 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
2986 spu_function_arg (CUMULATIVE_ARGS cum
,
2987 enum machine_mode mode
,
2988 tree type
, int named ATTRIBUTE_UNUSED
)
2992 if (cum
>= MAX_REGISTER_ARGS
)
2995 byte_size
= ((mode
== BLKmode
)
2996 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2998 /* The ABI does not allow parameters to be passed partially in
2999 reg and partially in stack. */
3000 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3003 /* Make sure small structs are left justified in a register. */
3004 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3005 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3007 enum machine_mode smode
;
3011 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3012 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3013 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3015 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3018 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3021 /* Variable sized types are passed by reference. */
3023 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3024 enum machine_mode mode ATTRIBUTE_UNUSED
,
3025 tree type
, bool named ATTRIBUTE_UNUSED
)
3027 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3033 /* Create and return the va_list datatype.
3035 On SPU, va_list is an array type equivalent to
3037 typedef struct __va_list_tag
3039 void *__args __attribute__((__aligned(16)));
3040 void *__skip __attribute__((__aligned(16)));
3044 where __args points to the arg that will be returned by the next
3045 va_arg(), and __skip points to the previous stack frame such that
3046 when __args == __skip we should advance __args by 32 bytes. */
3048 spu_build_builtin_va_list (void)
3050 tree f_args
, f_skip
, record
, type_decl
;
3053 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3056 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3058 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3059 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3061 DECL_FIELD_CONTEXT (f_args
) = record
;
3062 DECL_ALIGN (f_args
) = 128;
3063 DECL_USER_ALIGN (f_args
) = 1;
3065 DECL_FIELD_CONTEXT (f_skip
) = record
;
3066 DECL_ALIGN (f_skip
) = 128;
3067 DECL_USER_ALIGN (f_skip
) = 1;
3069 TREE_CHAIN (record
) = type_decl
;
3070 TYPE_NAME (record
) = type_decl
;
3071 TYPE_FIELDS (record
) = f_args
;
3072 TREE_CHAIN (f_args
) = f_skip
;
3074 /* We know this is being padded and we want it too. It is an internal
3075 type so hide the warnings from the user. */
3077 warn_padded
= false;
3079 layout_type (record
);
3083 /* The correct type is an array type of one element. */
3084 return build_array_type (record
, build_index_type (size_zero_node
));
3087 /* Implement va_start by filling the va_list structure VALIST.
3088 NEXTARG points to the first anonymous stack argument.
3090 The following global variables are used to initialize
3091 the va_list structure:
3093 current_function_args_info;
3094 the CUMULATIVE_ARGS for this function
3096 current_function_arg_offset_rtx:
3097 holds the offset of the first anonymous stack argument
3098 (relative to the virtual arg pointer). */
3101 spu_va_start (tree valist
, rtx nextarg
)
3103 tree f_args
, f_skip
;
3106 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3107 f_skip
= TREE_CHAIN (f_args
);
3109 valist
= build_va_arg_indirect_ref (valist
);
3111 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3113 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3115 /* Find the __args area. */
3116 t
= make_tree (TREE_TYPE (args
), nextarg
);
3117 if (current_function_pretend_args_size
> 0)
3118 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
3119 size_int (-STACK_POINTER_OFFSET
));
3120 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, t
);
3121 TREE_SIDE_EFFECTS (t
) = 1;
3122 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3124 /* Find the __skip area. */
3125 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3126 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
3127 size_int (current_function_pretend_args_size
3128 - STACK_POINTER_OFFSET
));
3129 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (skip
), skip
, t
);
3130 TREE_SIDE_EFFECTS (t
) = 1;
3131 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3134 /* Gimplify va_arg by updating the va_list structure
3135 VALIST as required to retrieve an argument of type
3136 TYPE, and returning that argument.
3138 ret = va_arg(VALIST, TYPE);
3140 generates code equivalent to:
3142 paddedsize = (sizeof(TYPE) + 15) & -16;
3143 if (VALIST.__args + paddedsize > VALIST.__skip
3144 && VALIST.__args <= VALIST.__skip)
3145 addr = VALIST.__skip + 32;
3147 addr = VALIST.__args;
3148 VALIST.__args = addr + paddedsize;
3149 ret = *(TYPE *)addr;
3152 spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
3153 tree
* post_p ATTRIBUTE_UNUSED
)
3155 tree f_args
, f_skip
;
3157 HOST_WIDE_INT size
, rsize
;
3158 tree paddedsize
, addr
, tmp
;
3159 bool pass_by_reference_p
;
3161 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3162 f_skip
= TREE_CHAIN (f_args
);
3164 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3166 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3168 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3170 addr
= create_tmp_var (ptr_type_node
, "va_arg");
3171 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3173 /* if an object is dynamically sized, a pointer to it is passed
3174 instead of the object itself. */
3175 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
3177 if (pass_by_reference_p
)
3178 type
= build_pointer_type (type
);
3179 size
= int_size_in_bytes (type
);
3180 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
3182 /* build conditional expression to calculate addr. The expression
3183 will be gimplified later. */
3184 paddedsize
= size_int (rsize
);
3185 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, args
, paddedsize
);
3186 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
3187 build2 (GT_EXPR
, boolean_type_node
, tmp
, skip
),
3188 build2 (LE_EXPR
, boolean_type_node
, args
, skip
));
3190 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
3191 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, skip
,
3192 size_int (32)), args
);
3194 tmp
= build2 (GIMPLE_MODIFY_STMT
, ptr_type_node
, addr
, tmp
);
3195 gimplify_and_add (tmp
, pre_p
);
3197 /* update VALIST.__args */
3198 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
3199 tmp
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, tmp
);
3200 gimplify_and_add (tmp
, pre_p
);
3202 addr
= fold_convert (build_pointer_type (type
), addr
);
3204 if (pass_by_reference_p
)
3205 addr
= build_va_arg_indirect_ref (addr
);
3207 return build_va_arg_indirect_ref (addr
);
3210 /* Save parameter registers starting with the register that corresponds
3211 to the first unnamed parameters. If the first unnamed parameter is
3212 in the stack then save no registers. Set pretend_args_size to the
3213 amount of space needed to save the registers. */
3215 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
3216 tree type
, int *pretend_size
, int no_rtl
)
3225 /* cum currently points to the last named argument, we want to
3226 start at the next argument. */
3227 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
3229 offset
= -STACK_POINTER_OFFSET
;
3230 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
3232 tmp
= gen_frame_mem (V4SImode
,
3233 plus_constant (virtual_incoming_args_rtx
,
3235 emit_move_insn (tmp
,
3236 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
3239 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
3244 spu_conditional_register_usage (void)
3248 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3249 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3253 /* This is called to decide when we can simplify a load instruction. We
3254 must only return true for registers which we know will always be
3255 aligned. Taking into account that CSE might replace this reg with
3256 another one that has not been marked aligned.
3257 So this is really only true for frame, stack and virtual registers,
3258 which we know are always aligned and should not be adversely effected
3261 regno_aligned_for_load (int regno
)
3263 return regno
== FRAME_POINTER_REGNUM
3264 || (frame_pointer_needed
&& regno
== HARD_FRAME_POINTER_REGNUM
)
3265 || regno
== STACK_POINTER_REGNUM
3266 || (regno
>= FIRST_VIRTUAL_REGISTER
3267 && regno
<= LAST_VIRTUAL_REGISTER
);
3270 /* Return TRUE when mem is known to be 16-byte aligned. */
3272 aligned_mem_p (rtx mem
)
3274 if (MEM_ALIGN (mem
) >= 128)
3276 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
3278 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
3280 rtx p0
= XEXP (XEXP (mem
, 0), 0);
3281 rtx p1
= XEXP (XEXP (mem
, 0), 1);
3282 if (regno_aligned_for_load (REGNO (p0
)))
3284 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
3286 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3290 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
3292 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
3295 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
3297 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
3299 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
3300 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
3301 if (GET_CODE (p0
) == SYMBOL_REF
3302 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3308 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3309 into its SYMBOL_REF_FLAGS. */
3311 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
3313 default_encode_section_info (decl
, rtl
, first
);
3315 /* If a variable has a forced alignment to < 16 bytes, mark it with
3316 SYMBOL_FLAG_ALIGN1. */
3317 if (TREE_CODE (decl
) == VAR_DECL
3318 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
3319 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
3322 /* Return TRUE if we are certain the mem refers to a complete object
3323 which is both 16-byte aligned and padded to a 16-byte boundary. This
3324 would make it safe to store with a single instruction.
3325 We guarantee the alignment and padding for static objects by aligning
3326 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3327 FIXME: We currently cannot guarantee this for objects on the stack
3328 because assign_parm_setup_stack calls assign_stack_local with the
3329 alignment of the parameter mode and in that case the alignment never
3330 gets adjusted by LOCAL_ALIGNMENT. */
3332 store_with_one_insn_p (rtx mem
)
3334 rtx addr
= XEXP (mem
, 0);
3335 if (GET_MODE (mem
) == BLKmode
)
3337 /* Only static objects. */
3338 if (GET_CODE (addr
) == SYMBOL_REF
)
3340 /* We use the associated declaration to make sure the access is
3341 referring to the whole object.
3342 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3343 if it is necessary. Will there be cases where one exists, and
3344 the other does not? Will there be cases where both exist, but
3345 have different types? */
3346 tree decl
= MEM_EXPR (mem
);
3348 && TREE_CODE (decl
) == VAR_DECL
3349 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3351 decl
= SYMBOL_REF_DECL (addr
);
3353 && TREE_CODE (decl
) == VAR_DECL
3354 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3361 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
3363 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
3366 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
3368 rtx from
= SUBREG_REG (ops
[1]);
3369 enum machine_mode imode
= GET_MODE (from
);
3371 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
3372 && GET_MODE_CLASS (imode
) == MODE_INT
3373 && subreg_lowpart_p (ops
[1]));
3375 if (GET_MODE_SIZE (imode
) < 4)
3377 from
= gen_rtx_SUBREG (SImode
, from
, 0);
3381 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
3383 enum insn_code icode
= trunc_optab
->handlers
[mode
][imode
].insn_code
;
3384 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
3387 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
3391 /* At least one of the operands needs to be a register. */
3392 if ((reload_in_progress
| reload_completed
) == 0
3393 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3395 rtx temp
= force_reg (mode
, ops
[1]);
3396 emit_move_insn (ops
[0], temp
);
3399 if (reload_in_progress
|| reload_completed
)
3401 if (CONSTANT_P (ops
[1]))
3402 return spu_split_immediate (ops
);
3407 if (GET_CODE (ops
[0]) == MEM
)
3409 if (!spu_valid_move (ops
))
3411 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3412 gen_reg_rtx (TImode
)));
3416 else if (GET_CODE (ops
[1]) == MEM
)
3418 if (!spu_valid_move (ops
))
3421 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3422 gen_reg_rtx (SImode
)));
3426 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3428 if (GET_CODE (ops
[1]) == CONST_INT
)
3430 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
3431 if (val
!= INTVAL (ops
[1]))
3433 emit_move_insn (ops
[0], GEN_INT (val
));
3444 /* For now, only frame registers are known to be aligned at all times.
3445 We can't trust REGNO_POINTER_ALIGN because optimization will move
3446 registers around, potentially changing an "aligned" register in an
3447 address to an unaligned register, which would result in an invalid
3449 int regno
= REGNO (reg
);
3450 return REGNO_PTR_FRAME_P (regno
) ? REGNO_POINTER_ALIGN (regno
) : 1;
3454 spu_split_load (rtx
* ops
)
3456 enum machine_mode mode
= GET_MODE (ops
[0]);
3457 rtx addr
, load
, rot
, mem
, p0
, p1
;
3460 addr
= XEXP (ops
[1], 0);
3464 if (GET_CODE (addr
) == PLUS
)
3467 aligned reg + aligned reg => lqx
3468 aligned reg + unaligned reg => lqx, rotqby
3469 aligned reg + aligned const => lqd
3470 aligned reg + unaligned const => lqd, rotqbyi
3471 unaligned reg + aligned reg => lqx, rotqby
3472 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3473 unaligned reg + aligned const => lqd, rotqby
3474 unaligned reg + unaligned const -> not allowed by legitimate address
3476 p0
= XEXP (addr
, 0);
3477 p1
= XEXP (addr
, 1);
3478 if (reg_align (p0
) < 128)
3480 if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3482 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
3490 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
3492 rot_amt
= INTVAL (p1
) & 15;
3493 p1
= GEN_INT (INTVAL (p1
) & -16);
3494 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3496 else if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3500 else if (GET_CODE (addr
) == REG
)
3502 if (reg_align (addr
) < 128)
3505 else if (GET_CODE (addr
) == CONST
)
3507 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3508 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3509 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3511 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3513 addr
= gen_rtx_CONST (Pmode
,
3514 gen_rtx_PLUS (Pmode
,
3515 XEXP (XEXP (addr
, 0), 0),
3516 GEN_INT (rot_amt
& -16)));
3518 addr
= XEXP (XEXP (addr
, 0), 0);
3523 else if (GET_CODE (addr
) == CONST_INT
)
3525 rot_amt
= INTVAL (addr
);
3526 addr
= GEN_INT (rot_amt
& -16);
3528 else if (!ALIGNED_SYMBOL_REF_P (addr
))
3531 if (GET_MODE_SIZE (mode
) < 4)
3532 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
3538 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
3545 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3546 mem
= change_address (ops
[1], TImode
, addr
);
3548 emit_insn (gen_movti (load
, mem
));
3551 emit_insn (gen_rotqby_ti (load
, load
, rot
));
3553 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
3555 if (reload_completed
)
3556 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
3558 emit_insn (gen_spu_convert (ops
[0], load
));
3562 spu_split_store (rtx
* ops
)
3564 enum machine_mode mode
= GET_MODE (ops
[0]);
3567 rtx addr
, p0
, p1
, p1_lo
, smem
;
3571 addr
= XEXP (ops
[0], 0);
3573 if (GET_CODE (addr
) == PLUS
)
3576 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3577 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3578 aligned reg + aligned const => lqd, c?d, shuf, stqx
3579 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3580 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3581 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3582 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3583 unaligned reg + unaligned const -> not allowed by legitimate address
3586 p0
= XEXP (addr
, 0);
3587 p1
= p1_lo
= XEXP (addr
, 1);
3588 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
3590 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
3591 p1
= GEN_INT (INTVAL (p1
) & -16);
3592 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3595 else if (GET_CODE (addr
) == REG
)
3599 p1
= p1_lo
= const0_rtx
;
3604 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
3605 p1
= 0; /* aform doesn't use p1 */
3607 if (ALIGNED_SYMBOL_REF_P (addr
))
3609 else if (GET_CODE (addr
) == CONST
)
3611 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3612 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3613 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3615 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3617 addr
= gen_rtx_CONST (Pmode
,
3618 gen_rtx_PLUS (Pmode
,
3619 XEXP (XEXP (addr
, 0), 0),
3620 GEN_INT (v
& -16)));
3622 addr
= XEXP (XEXP (addr
, 0), 0);
3623 p1_lo
= GEN_INT (v
& 15);
3626 else if (GET_CODE (addr
) == CONST_INT
)
3628 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
3629 addr
= GEN_INT (INTVAL (addr
) & -16);
3633 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3635 scalar
= store_with_one_insn_p (ops
[0]);
3638 /* We could copy the flags from the ops[0] MEM to mem here,
3639 We don't because we want this load to be optimized away if
3640 possible, and copying the flags will prevent that in certain
3641 cases, e.g. consider the volatile flag. */
3643 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
3644 set_mem_alias_set (lmem
, 0);
3645 emit_insn (gen_movti (reg
, lmem
));
3647 if (!p0
|| reg_align (p0
) >= 128)
3648 p0
= stack_pointer_rtx
;
3652 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
3653 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
3655 else if (reload_completed
)
3657 if (GET_CODE (ops
[1]) == REG
)
3658 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
3659 else if (GET_CODE (ops
[1]) == SUBREG
)
3660 emit_move_insn (reg
,
3661 gen_rtx_REG (GET_MODE (reg
),
3662 REGNO (SUBREG_REG (ops
[1]))));
3668 if (GET_CODE (ops
[1]) == REG
)
3669 emit_insn (gen_spu_convert (reg
, ops
[1]));
3670 else if (GET_CODE (ops
[1]) == SUBREG
)
3671 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
3676 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
3677 emit_insn (gen_shlqby_ti
3678 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
3680 smem
= change_address (ops
[0], TImode
, addr
);
3681 /* We can't use the previous alias set because the memory has changed
3682 size and can potentially overlap objects of other types. */
3683 set_mem_alias_set (smem
, 0);
3685 emit_insn (gen_movti (smem
, reg
));
3688 /* Return TRUE if X is MEM which is a struct member reference
3689 and the member can safely be loaded and stored with a single
3690 instruction because it is padded. */
3692 mem_is_padded_component_ref (rtx x
)
3694 tree t
= MEM_EXPR (x
);
3696 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
3698 t
= TREE_OPERAND (t
, 1);
3699 if (!t
|| TREE_CODE (t
) != FIELD_DECL
3700 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
3702 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3703 r
= DECL_FIELD_CONTEXT (t
);
3704 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
3706 /* Make sure they are the same mode */
3707 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
3709 /* If there are no following fields then the field alignment assures
3710 the structure is padded to the alignment which means this field is
3712 if (TREE_CHAIN (t
) == 0)
3714 /* If the following field is also aligned then this field will be
3717 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
3722 /* Parse the -mfixed-range= option string. */
3724 fix_range (const char *const_str
)
3727 char *str
, *dash
, *comma
;
3729 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3730 REG2 are either register names or register numbers. The effect
3731 of this option is to mark the registers in the range from REG1 to
3732 REG2 as ``fixed'' so they won't be used by the compiler. */
3734 i
= strlen (const_str
);
3735 str
= (char *) alloca (i
+ 1);
3736 memcpy (str
, const_str
, i
+ 1);
3740 dash
= strchr (str
, '-');
3743 warning (0, "value of -mfixed-range must have form REG1-REG2");
3747 comma
= strchr (dash
+ 1, ',');
3751 first
= decode_reg_name (str
);
3754 warning (0, "unknown register name: %s", str
);
3758 last
= decode_reg_name (dash
+ 1);
3761 warning (0, "unknown register name: %s", dash
+ 1);
3769 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
3773 for (i
= first
; i
<= last
; ++i
)
3774 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3785 spu_valid_move (rtx
* ops
)
3787 enum machine_mode mode
= GET_MODE (ops
[0]);
3788 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3791 /* init_expr_once tries to recog against load and store insns to set
3792 the direct_load[] and direct_store[] arrays. We always want to
3793 consider those loads and stores valid. init_expr_once is called in
3794 the context of a dummy function which does not have a decl. */
3795 if (cfun
->decl
== 0)
3798 /* Don't allows loads/stores which would require more than 1 insn.
3799 During and after reload we assume loads and stores only take 1
3801 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
3803 if (GET_CODE (ops
[0]) == MEM
3804 && (GET_MODE_SIZE (mode
) < 4
3805 || !(store_with_one_insn_p (ops
[0])
3806 || mem_is_padded_component_ref (ops
[0]))))
3808 if (GET_CODE (ops
[1]) == MEM
3809 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
3815 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3816 can be generated using the fsmbi instruction. */
3818 fsmbi_const_p (rtx x
)
3822 /* We can always choose TImode for CONST_INT because the high bits
3823 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3824 enum immediate_class c
= classify_immediate (x
, TImode
);
3825 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
3830 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3831 can be generated using the cbd, chd, cwd or cdd instruction. */
3833 cpat_const_p (rtx x
, enum machine_mode mode
)
3837 enum immediate_class c
= classify_immediate (x
, mode
);
3838 return c
== IC_CPAT
;
3844 gen_cpat_const (rtx
* ops
)
3846 unsigned char dst
[16];
3847 int i
, offset
, shift
, isize
;
3848 if (GET_CODE (ops
[3]) != CONST_INT
3849 || GET_CODE (ops
[2]) != CONST_INT
3850 || (GET_CODE (ops
[1]) != CONST_INT
3851 && GET_CODE (ops
[1]) != REG
))
3853 if (GET_CODE (ops
[1]) == REG
3854 && (!REG_POINTER (ops
[1])
3855 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
3858 for (i
= 0; i
< 16; i
++)
3860 isize
= INTVAL (ops
[3]);
3863 else if (isize
== 2)
3867 offset
= (INTVAL (ops
[2]) +
3868 (GET_CODE (ops
[1]) ==
3869 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
3870 for (i
= 0; i
< isize
; i
++)
3871 dst
[offset
+ i
] = i
+ shift
;
3872 return array_to_constant (TImode
, dst
);
3875 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3876 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3877 than 16 bytes, the value is repeated across the rest of the array. */
3879 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
3884 memset (arr
, 0, 16);
3885 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
3886 if (GET_CODE (x
) == CONST_INT
3887 || (GET_CODE (x
) == CONST_DOUBLE
3888 && (mode
== SFmode
|| mode
== DFmode
)))
3890 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
3892 if (GET_CODE (x
) == CONST_DOUBLE
)
3893 val
= const_double_to_hwint (x
);
3896 first
= GET_MODE_SIZE (mode
) - 1;
3897 for (i
= first
; i
>= 0; i
--)
3899 arr
[i
] = val
& 0xff;
3902 /* Splat the constant across the whole array. */
3903 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
3906 j
= (j
== first
) ? 0 : j
+ 1;
3909 else if (GET_CODE (x
) == CONST_DOUBLE
)
3911 val
= CONST_DOUBLE_LOW (x
);
3912 for (i
= 15; i
>= 8; i
--)
3914 arr
[i
] = val
& 0xff;
3917 val
= CONST_DOUBLE_HIGH (x
);
3918 for (i
= 7; i
>= 0; i
--)
3920 arr
[i
] = val
& 0xff;
3924 else if (GET_CODE (x
) == CONST_VECTOR
)
3928 mode
= GET_MODE_INNER (mode
);
3929 units
= CONST_VECTOR_NUNITS (x
);
3930 for (i
= 0; i
< units
; i
++)
3932 elt
= CONST_VECTOR_ELT (x
, i
);
3933 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
3935 if (GET_CODE (elt
) == CONST_DOUBLE
)
3936 val
= const_double_to_hwint (elt
);
3939 first
= GET_MODE_SIZE (mode
) - 1;
3940 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
3942 for (j
= first
; j
>= 0; j
--)
3944 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
3954 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3955 smaller than 16 bytes, use the bytes that would represent that value
3956 in a register, e.g., for QImode return the value of arr[3]. */
3958 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
3960 enum machine_mode inner_mode
;
3962 int units
, size
, i
, j
, k
;
3965 if (GET_MODE_CLASS (mode
) == MODE_INT
3966 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
3968 j
= GET_MODE_SIZE (mode
);
3969 i
= j
< 4 ? 4 - j
: 0;
3970 for (val
= 0; i
< j
; i
++)
3971 val
= (val
<< 8) | arr
[i
];
3972 val
= trunc_int_for_mode (val
, mode
);
3973 return GEN_INT (val
);
3979 for (i
= high
= 0; i
< 8; i
++)
3980 high
= (high
<< 8) | arr
[i
];
3981 for (i
= 8, val
= 0; i
< 16; i
++)
3982 val
= (val
<< 8) | arr
[i
];
3983 return immed_double_const (val
, high
, TImode
);
3987 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3988 val
= trunc_int_for_mode (val
, SImode
);
3989 return hwint_to_const_double (SFmode
, val
);
3993 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3995 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
3996 return hwint_to_const_double (DFmode
, val
);
3999 if (!VECTOR_MODE_P (mode
))
4002 units
= GET_MODE_NUNITS (mode
);
4003 size
= GET_MODE_UNIT_SIZE (mode
);
4004 inner_mode
= GET_MODE_INNER (mode
);
4005 v
= rtvec_alloc (units
);
4007 for (k
= i
= 0; i
< units
; ++i
)
4010 for (j
= 0; j
< size
; j
++, k
++)
4011 val
= (val
<< 8) | arr
[k
];
4013 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4014 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4016 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4021 return gen_rtx_CONST_VECTOR (mode
, v
);
4025 reloc_diagnostic (rtx x
)
4027 tree loc_decl
, decl
= 0;
4029 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4032 if (GET_CODE (x
) == SYMBOL_REF
)
4033 decl
= SYMBOL_REF_DECL (x
);
4034 else if (GET_CODE (x
) == CONST
4035 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4036 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4038 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4039 if (decl
&& !DECL_P (decl
))
4042 /* We use last_assemble_variable_decl to get line information. It's
4043 not always going to be right and might not even be close, but will
4044 be right for the more common cases. */
4045 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4048 loc_decl
= last_assemble_variable_decl
;
4050 /* The decl could be a string constant. */
4051 if (decl
&& DECL_P (decl
))
4052 msg
= "%Jcreating run-time relocation for %qD";
4054 msg
= "creating run-time relocation";
4056 if (TARGET_WARN_RELOC
)
4057 warning (0, msg
, loc_decl
, decl
);
4059 error (msg
, loc_decl
, decl
);
4062 /* Hook into assemble_integer so we can generate an error for run-time
4063 relocations. The SPU ABI disallows them. */
4065 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4067 /* By default run-time relocations aren't supported, but we allow them
4068 in case users support it in their own run-time loader. And we provide
4069 a warning for those users that don't. */
4070 if ((GET_CODE (x
) == SYMBOL_REF
)
4071 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4072 reloc_diagnostic (x
);
4074 return default_assemble_integer (x
, size
, aligned_p
);
4078 spu_asm_globalize_label (FILE * file
, const char *name
)
4080 fputs ("\t.global\t", file
);
4081 assemble_name (file
, name
);
4086 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
4088 enum machine_mode mode
= GET_MODE (x
);
4089 int cost
= COSTS_N_INSNS (2);
4091 /* Folding to a CONST_VECTOR will use extra space but there might
4092 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4093 only if it allows us to fold away multiple insns. Changing the cost
4094 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4095 because this cost will only be compared against a single insn.
4096 if (code == CONST_VECTOR)
4097 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4100 /* Use defaults for float operations. Not accurate but good enough. */
4103 *total
= COSTS_N_INSNS (13);
4108 *total
= COSTS_N_INSNS (6);
4114 if (satisfies_constraint_K (x
))
4116 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
4117 *total
= COSTS_N_INSNS (1);
4119 *total
= COSTS_N_INSNS (3);
4123 *total
= COSTS_N_INSNS (3);
4128 *total
= COSTS_N_INSNS (0);
4132 *total
= COSTS_N_INSNS (5);
4136 case FLOAT_TRUNCATE
:
4138 case UNSIGNED_FLOAT
:
4141 *total
= COSTS_N_INSNS (7);
4147 *total
= COSTS_N_INSNS (9);
4154 GET_CODE (XEXP (x
, 0)) ==
4155 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4156 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
4158 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4160 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4161 cost
= COSTS_N_INSNS (14);
4162 if ((val
& 0xffff) == 0)
4163 cost
= COSTS_N_INSNS (9);
4164 else if (val
> 0 && val
< 0x10000)
4165 cost
= COSTS_N_INSNS (11);
4174 *total
= COSTS_N_INSNS (20);
4181 *total
= COSTS_N_INSNS (4);
4184 if (XINT (x
, 1) == UNSPEC_CONVERT
)
4185 *total
= COSTS_N_INSNS (0);
4187 *total
= COSTS_N_INSNS (4);
4190 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4191 if (GET_MODE_CLASS (mode
) == MODE_INT
4192 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
4193 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
4194 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
4200 spu_eh_return_filter_mode (void)
4202 /* We would like this to be SImode, but sjlj exceptions seems to work
4203 only with word_mode. */
4207 /* Decide whether we can make a sibling call to a function. DECL is the
4208 declaration of the function being targeted by the call and EXP is the
4209 CALL_EXPR representing the call. */
4211 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4213 return decl
&& !TARGET_LARGE_MEM
;
4216 /* We need to correctly update the back chain pointer and the Available
4217 Stack Size (which is in the second slot of the sp register.) */
4219 spu_allocate_stack (rtx op0
, rtx op1
)
4222 rtx chain
= gen_reg_rtx (V4SImode
);
4223 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
4224 rtx sp
= gen_reg_rtx (V4SImode
);
4225 rtx splatted
= gen_reg_rtx (V4SImode
);
4226 rtx pat
= gen_reg_rtx (TImode
);
4228 /* copy the back chain so we can save it back again. */
4229 emit_move_insn (chain
, stack_bot
);
4231 op1
= force_reg (SImode
, op1
);
4233 v
= 0x1020300010203ll
;
4234 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
4235 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
4237 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
4238 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
4240 if (flag_stack_check
)
4242 rtx avail
= gen_reg_rtx(SImode
);
4243 rtx result
= gen_reg_rtx(SImode
);
4244 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
4245 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
4246 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
4249 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
4251 emit_move_insn (stack_bot
, chain
);
4253 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
4257 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4259 static unsigned char arr
[16] =
4260 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4261 rtx temp
= gen_reg_rtx (SImode
);
4262 rtx temp2
= gen_reg_rtx (SImode
);
4263 rtx temp3
= gen_reg_rtx (V4SImode
);
4264 rtx temp4
= gen_reg_rtx (V4SImode
);
4265 rtx pat
= gen_reg_rtx (TImode
);
4266 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4268 /* Restore the backchain from the first word, sp from the second. */
4269 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
4270 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
4272 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4274 /* Compute Available Stack Size for sp */
4275 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4276 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4278 /* Compute Available Stack Size for back chain */
4279 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
4280 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
4281 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
4283 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4284 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
4288 spu_init_libfuncs (void)
4290 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
4291 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
4292 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
4293 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
4294 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
4295 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
4296 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
4297 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
4298 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
4299 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
4300 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
4302 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
4303 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
4306 /* Make a subreg, stripping any existing subreg. We could possibly just
4307 call simplify_subreg, but in this case we know what we want. */
4309 spu_gen_subreg (enum machine_mode mode
, rtx x
)
4311 if (GET_CODE (x
) == SUBREG
)
4313 if (GET_MODE (x
) == mode
)
4315 return gen_rtx_SUBREG (mode
, x
, 0);
4319 spu_return_in_memory (tree type
, tree fntype ATTRIBUTE_UNUSED
)
4321 return (TYPE_MODE (type
) == BLKmode
4323 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4324 || int_size_in_bytes (type
) >
4325 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
4328 /* Create the built-in types and functions */
4330 struct spu_builtin_description spu_builtins
[] = {
4331 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4332 {fcode, icode, name, type, params, NULL_TREE},
4333 #include "spu-builtins.def"
4338 spu_init_builtins (void)
4340 struct spu_builtin_description
*d
;
4343 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
4344 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
4345 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
4346 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
4347 V4SF_type_node
= build_vector_type (float_type_node
, 4);
4348 V2DF_type_node
= build_vector_type (double_type_node
, 2);
4350 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
4351 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
4352 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
4353 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
4355 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
4357 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
4358 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
4359 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
4360 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
4361 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
4362 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
4363 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
4364 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
4365 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
4366 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
4367 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
4368 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
4370 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
4371 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
4372 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
4373 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
4374 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
4375 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
4376 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
4377 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
4379 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
4380 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
4382 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
4384 spu_builtin_types
[SPU_BTI_PTR
] =
4385 build_pointer_type (build_qualified_type
4387 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
4389 /* For each builtin we build a new prototype. The tree code will make
4390 sure nodes are shared. */
4391 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
4394 char name
[64]; /* build_function will make a copy. */
4400 /* find last parm */
4401 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
4407 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
4409 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
4411 sprintf (name
, "__builtin_%s", d
->name
);
4413 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
4415 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
4416 TREE_READONLY (d
->fndecl
) = 1;
4421 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4423 static unsigned char arr
[16] =
4424 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4426 rtx temp
= gen_reg_rtx (Pmode
);
4427 rtx temp2
= gen_reg_rtx (V4SImode
);
4428 rtx temp3
= gen_reg_rtx (V4SImode
);
4429 rtx pat
= gen_reg_rtx (TImode
);
4430 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4432 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4434 /* Restore the sp. */
4435 emit_move_insn (temp
, op1
);
4436 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
4438 /* Compute available stack size for sp. */
4439 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4440 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4442 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4443 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
4447 spu_safe_dma (HOST_WIDE_INT channel
)
4449 return (channel
>= 21 && channel
<= 27);
4453 spu_builtin_splats (rtx ops
[])
4455 enum machine_mode mode
= GET_MODE (ops
[0]);
4456 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
4458 unsigned char arr
[16];
4459 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
4460 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
4462 else if (!flag_pic
&& GET_MODE (ops
[0]) == V4SImode
&& CONSTANT_P (ops
[1]))
4464 rtvec v
= rtvec_alloc (4);
4465 RTVEC_ELT (v
, 0) = ops
[1];
4466 RTVEC_ELT (v
, 1) = ops
[1];
4467 RTVEC_ELT (v
, 2) = ops
[1];
4468 RTVEC_ELT (v
, 3) = ops
[1];
4469 emit_move_insn (ops
[0], gen_rtx_CONST_VECTOR (mode
, v
));
4473 rtx reg
= gen_reg_rtx (TImode
);
4475 if (GET_CODE (ops
[1]) != REG
4476 && GET_CODE (ops
[1]) != SUBREG
)
4477 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
4483 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
4489 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
4494 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
4499 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
4505 emit_move_insn (reg
, shuf
);
4506 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
4511 spu_builtin_extract (rtx ops
[])
4513 enum machine_mode mode
;
4516 mode
= GET_MODE (ops
[1]);
4518 if (GET_CODE (ops
[2]) == CONST_INT
)
4523 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
4526 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
4529 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
4532 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
4535 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
4538 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
4546 from
= spu_gen_subreg (TImode
, ops
[1]);
4547 rot
= gen_reg_rtx (TImode
);
4548 tmp
= gen_reg_rtx (SImode
);
4553 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
4556 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
4557 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
4561 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
4565 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
4570 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
4572 emit_insn (gen_spu_convert (ops
[0], rot
));
4576 spu_builtin_insert (rtx ops
[])
4578 enum machine_mode mode
= GET_MODE (ops
[0]);
4579 enum machine_mode imode
= GET_MODE_INNER (mode
);
4580 rtx mask
= gen_reg_rtx (TImode
);
4583 if (GET_CODE (ops
[3]) == CONST_INT
)
4584 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
4587 offset
= gen_reg_rtx (SImode
);
4588 emit_insn (gen_mulsi3
4589 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
4592 (mask
, stack_pointer_rtx
, offset
,
4593 GEN_INT (GET_MODE_SIZE (imode
))));
4594 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
4598 spu_builtin_promote (rtx ops
[])
4600 enum machine_mode mode
, imode
;
4601 rtx rot
, from
, offset
;
4604 mode
= GET_MODE (ops
[0]);
4605 imode
= GET_MODE_INNER (mode
);
4607 from
= gen_reg_rtx (TImode
);
4608 rot
= spu_gen_subreg (TImode
, ops
[0]);
4610 emit_insn (gen_spu_convert (from
, ops
[1]));
4612 if (GET_CODE (ops
[2]) == CONST_INT
)
4614 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
4615 if (GET_MODE_SIZE (imode
) < 4)
4616 pos
+= 4 - GET_MODE_SIZE (imode
);
4617 offset
= GEN_INT (pos
& 15);
4621 offset
= gen_reg_rtx (SImode
);
4625 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
4628 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
4629 emit_insn (gen_addsi3 (offset
, offset
, offset
));
4633 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
4634 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
4638 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
4644 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
4648 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
4650 rtx shuf
= gen_reg_rtx (V4SImode
);
4651 rtx insn
= gen_reg_rtx (V4SImode
);
4656 fnaddr
= force_reg (SImode
, fnaddr
);
4657 cxt
= force_reg (SImode
, cxt
);
4659 if (TARGET_LARGE_MEM
)
4661 rtx rotl
= gen_reg_rtx (V4SImode
);
4662 rtx mask
= gen_reg_rtx (V4SImode
);
4663 rtx bi
= gen_reg_rtx (SImode
);
4664 unsigned char shufa
[16] = {
4665 2, 3, 0, 1, 18, 19, 16, 17,
4666 0, 1, 2, 3, 16, 17, 18, 19
4668 unsigned char insna
[16] = {
4670 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
4672 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4675 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
4676 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4678 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
4679 emit_insn (gen_rotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
4680 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
4681 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
4683 mem
= memory_address (Pmode
, tramp
);
4684 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4686 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
4687 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
4688 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
4692 rtx scxt
= gen_reg_rtx (SImode
);
4693 rtx sfnaddr
= gen_reg_rtx (SImode
);
4694 unsigned char insna
[16] = {
4695 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
4701 shufc
= gen_reg_rtx (TImode
);
4702 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4704 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4705 fits 18 bits and the last 4 are zeros. This will be true if
4706 the stack pointer is initialized to 0x3fff0 at program start,
4707 otherwise the ila instruction will be garbage. */
4709 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
4710 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
4712 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
4713 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
4714 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
4716 mem
= memory_address (Pmode
, tramp
);
4717 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4720 emit_insn (gen_sync ());
4724 spu_expand_sign_extend (rtx ops
[])
4726 unsigned char arr
[16];
4727 rtx pat
= gen_reg_rtx (TImode
);
4730 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
4731 if (GET_MODE (ops
[1]) == QImode
)
4733 sign
= gen_reg_rtx (HImode
);
4734 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
4735 for (i
= 0; i
< 16; i
++)
4741 for (i
= 0; i
< 16; i
++)
4743 switch (GET_MODE (ops
[1]))
4746 sign
= gen_reg_rtx (SImode
);
4747 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
4749 arr
[last
- 1] = 0x02;
4752 sign
= gen_reg_rtx (SImode
);
4753 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
4754 for (i
= 0; i
< 4; i
++)
4755 arr
[last
- i
] = 3 - i
;
4758 sign
= gen_reg_rtx (SImode
);
4759 c
= gen_reg_rtx (SImode
);
4760 emit_insn (gen_spu_convert (c
, ops
[1]));
4761 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
4762 for (i
= 0; i
< 8; i
++)
4763 arr
[last
- i
] = 7 - i
;
4769 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4770 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
4773 /* expand vector initialization. If there are any constant parts,
4774 load constant parts first. Then load any non-constant parts. */
4776 spu_expand_vector_init (rtx target
, rtx vals
)
4778 enum machine_mode mode
= GET_MODE (target
);
4779 int n_elts
= GET_MODE_NUNITS (mode
);
4781 bool all_same
= true;
4782 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
4785 first
= XVECEXP (vals
, 0, 0);
4786 for (i
= 0; i
< n_elts
; ++i
)
4788 x
= XVECEXP (vals
, 0, i
);
4789 if (!CONSTANT_P (x
))
4793 if (first_constant
== NULL_RTX
)
4796 if (i
> 0 && !rtx_equal_p (x
, first
))
4800 /* if all elements are the same, use splats to repeat elements */
4803 if (!CONSTANT_P (first
)
4804 && !register_operand (first
, GET_MODE (x
)))
4805 first
= force_reg (GET_MODE (first
), first
);
4806 emit_insn (gen_spu_splats (target
, first
));
4810 /* load constant parts */
4811 if (n_var
!= n_elts
)
4815 emit_move_insn (target
,
4816 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
4820 rtx constant_parts_rtx
= copy_rtx (vals
);
4822 gcc_assert (first_constant
!= NULL_RTX
);
4823 /* fill empty slots with the first constant, this increases
4824 our chance of using splats in the recursive call below. */
4825 for (i
= 0; i
< n_elts
; ++i
)
4826 if (!CONSTANT_P (XVECEXP (constant_parts_rtx
, 0, i
)))
4827 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
4829 spu_expand_vector_init (target
, constant_parts_rtx
);
4833 /* load variable parts */
4836 rtx insert_operands
[4];
4838 insert_operands
[0] = target
;
4839 insert_operands
[2] = target
;
4840 for (i
= 0; i
< n_elts
; ++i
)
4842 x
= XVECEXP (vals
, 0, i
);
4843 if (!CONSTANT_P (x
))
4845 if (!register_operand (x
, GET_MODE (x
)))
4846 x
= force_reg (GET_MODE (x
), x
);
4847 insert_operands
[1] = x
;
4848 insert_operands
[3] = GEN_INT (i
);
4849 spu_builtin_insert (insert_operands
);
4856 spu_force_reg (enum machine_mode mode
, rtx op
)
4859 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
4861 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
4862 || GET_MODE (op
) == BLKmode
)
4863 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
4867 r
= force_reg (GET_MODE (op
), op
);
4868 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
4870 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
4875 x
= gen_reg_rtx (mode
);
4876 emit_insn (gen_spu_convert (x
, r
));
4881 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
4883 HOST_WIDE_INT v
= 0;
4885 /* Check the range of immediate operands. */
4886 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
4888 int range
= p
- SPU_BTI_7
;
4890 if (!CONSTANT_P (op
))
4891 error ("%s expects an integer literal in the range [%d, %d].",
4893 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
4895 if (GET_CODE (op
) == CONST
4896 && (GET_CODE (XEXP (op
, 0)) == PLUS
4897 || GET_CODE (XEXP (op
, 0)) == MINUS
))
4899 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
4900 op
= XEXP (XEXP (op
, 0), 0);
4902 else if (GET_CODE (op
) == CONST_INT
)
4904 else if (GET_CODE (op
) == CONST_VECTOR
4905 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
4906 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
4908 /* The default for v is 0 which is valid in every range. */
4909 if (v
< spu_builtin_range
[range
].low
4910 || v
> spu_builtin_range
[range
].high
)
4911 error ("%s expects an integer literal in the range [%d, %d]. ("
4912 HOST_WIDE_INT_PRINT_DEC
")",
4914 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
4923 /* This is only used in lqa, and stqa. Even though the insns
4924 encode 16 bits of the address (all but the 2 least
4925 significant), only 14 bits are used because it is masked to
4926 be 16 byte aligned. */
4930 /* This is used for lqr and stqr. */
4937 if (GET_CODE (op
) == LABEL_REF
4938 || (GET_CODE (op
) == SYMBOL_REF
4939 && SYMBOL_REF_FUNCTION_P (op
))
4940 || (v
& ((1 << lsbits
) - 1)) != 0)
4941 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
4948 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
4949 rtx target
, rtx ops
[])
4951 enum insn_code icode
= d
->icode
;
4954 /* Expand the arguments into rtl. */
4956 if (d
->parm
[0] != SPU_BTI_VOID
)
4959 for (a
= 0; i
< insn_data
[icode
].n_operands
; i
++, a
++)
4961 tree arg
= CALL_EXPR_ARG (exp
, a
);
4964 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
4969 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
4970 tree exp
, rtx target
)
4974 enum insn_code icode
= d
->icode
;
4975 enum machine_mode mode
, tmode
;
4979 /* Set up ops[] with values from arglist. */
4980 expand_builtin_args (d
, exp
, target
, ops
);
4982 /* Handle the target operand which must be operand 0. */
4984 if (d
->parm
[0] != SPU_BTI_VOID
)
4987 /* We prefer the mode specified for the match_operand otherwise
4988 use the mode from the builtin function prototype. */
4989 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
4990 if (tmode
== VOIDmode
)
4991 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
4993 /* Try to use target because not using it can lead to extra copies
4994 and when we are using all of the registers extra copies leads
4996 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
4999 target
= ops
[0] = gen_reg_rtx (tmode
);
5001 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
5007 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5009 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
5014 arg
= CALL_EXPR_ARG (exp
, 0);
5015 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
5016 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
5017 addr
= memory_address (mode
, op
);
5020 op
= gen_reg_rtx (GET_MODE (addr
));
5021 emit_insn (gen_rtx_SET (VOIDmode
, op
,
5022 gen_rtx_NEG (GET_MODE (addr
), addr
)));
5023 op
= gen_rtx_MEM (mode
, op
);
5025 pat
= GEN_FCN (icode
) (target
, op
);
5032 /* Ignore align_hint, but still expand it's args in case they have
5034 if (icode
== CODE_FOR_spu_align_hint
)
5037 /* Handle the rest of the operands. */
5038 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
5040 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
5041 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
5043 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
5045 /* mode can be VOIDmode here for labels */
5047 /* For specific intrinsics with an immediate operand, e.g.,
5048 si_ai(), we sometimes need to convert the scalar argument to a
5049 vector argument by splatting the scalar. */
5050 if (VECTOR_MODE_P (mode
)
5051 && (GET_CODE (ops
[i
]) == CONST_INT
5052 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
5053 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
5055 if (GET_CODE (ops
[i
]) == CONST_INT
)
5056 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
5059 rtx reg
= gen_reg_rtx (mode
);
5060 enum machine_mode imode
= GET_MODE_INNER (mode
);
5061 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
5062 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
5063 if (imode
!= GET_MODE (ops
[i
]))
5064 ops
[i
] = convert_to_mode (imode
, ops
[i
],
5065 TYPE_UNSIGNED (spu_builtin_types
5067 emit_insn (gen_spu_splats (reg
, ops
[i
]));
5072 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
5074 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
5075 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
5078 switch (insn_data
[icode
].n_operands
)
5081 pat
= GEN_FCN (icode
) (0);
5084 pat
= GEN_FCN (icode
) (ops
[0]);
5087 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
5090 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
5093 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
5096 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
5099 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
5108 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
5109 emit_call_insn (pat
);
5110 else if (d
->type
== B_JUMP
)
5112 emit_jump_insn (pat
);
5118 return_type
= spu_builtin_types
[d
->parm
[0]];
5119 if (d
->parm
[0] != SPU_BTI_VOID
5120 && GET_MODE (target
) != TYPE_MODE (return_type
))
5122 /* target is the return value. It should always be the mode of
5123 the builtin function prototype. */
5124 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
5131 spu_expand_builtin (tree exp
,
5133 rtx subtarget ATTRIBUTE_UNUSED
,
5134 enum machine_mode mode ATTRIBUTE_UNUSED
,
5135 int ignore ATTRIBUTE_UNUSED
)
5137 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
5138 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
5139 struct spu_builtin_description
*d
;
5141 if (fcode
< NUM_SPU_BUILTINS
)
5143 d
= &spu_builtins
[fcode
];
5145 return spu_expand_builtin_1 (d
, exp
, target
);
5150 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5152 spu_builtin_mul_widen_even (tree type
)
5154 switch (TYPE_MODE (type
))
5157 if (TYPE_UNSIGNED (type
))
5158 return spu_builtins
[SPU_MULE_0
].fndecl
;
5160 return spu_builtins
[SPU_MULE_1
].fndecl
;
5167 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5169 spu_builtin_mul_widen_odd (tree type
)
5171 switch (TYPE_MODE (type
))
5174 if (TYPE_UNSIGNED (type
))
5175 return spu_builtins
[SPU_MULO_1
].fndecl
;
5177 return spu_builtins
[SPU_MULO_0
].fndecl
;
5184 /* Implement targetm.vectorize.builtin_mask_for_load. */
5186 spu_builtin_mask_for_load (void)
5188 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
5194 spu_init_expanders (void)
5196 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5197 * frame_pointer_needed is true. We don't know that until we're
5198 * expanding the prologue. */
5200 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;