]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* tree-vectorizer.h (vect_recog_func_ptr): Change the first
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
842ae815 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
644459d0 3
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
038d1e19 6 Software Foundation; either version 3 of the License, or (at your option)
644459d0 7 any later version.
8
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
038d1e19 15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
644459d0 17
18#include "config.h"
19#include "system.h"
20#include "coretypes.h"
21#include "tm.h"
22#include "rtl.h"
23#include "regs.h"
24#include "hard-reg-set.h"
644459d0 25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
0b205f4c 39#include "diagnostic-core.h"
644459d0 40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
644459d0 50#include "machmode.h"
75a70cf9 51#include "gimple.h"
644459d0 52#include "tm-constrs.h"
d52fd16a 53#include "ddg.h"
5a976006 54#include "sbitmap.h"
55#include "timevar.h"
56#include "df.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
c2233b46 59
60enum spu_builtin_type_index
61{
62 SPU_BTI_END_OF_PARAMS,
63
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
75
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
78
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
92
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
98
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
103
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
106
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
109
110 SPU_BTI_MAX
111};
112
113#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
123
124static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
125
6352eedf 126struct spu_builtin_range
127{
128 int low, high;
129};
130
131static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
144};
145
644459d0 146\f
147/* Target specific attribute specifications. */
148char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
149
150/* Prototypes and external defs. */
4c834714 151static void spu_option_override (void);
686e2769 152static void spu_option_default_params (void);
644459d0 153static void spu_init_builtins (void);
e6925042 154static tree spu_builtin_decl (unsigned, bool);
b62e30b8 155static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 157static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 158static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
644459d0 160static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161static rtx get_pic_reg (void);
162static int need_to_save_reg (int regno, int saving);
163static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167static void emit_nop_for_insn (rtx insn);
168static bool insn_clobbers_hbr (rtx insn);
169static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 170 int distance, sbitmap blocks);
5474166e 171static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
644459d0 173static rtx get_branch_target (rtx branch);
644459d0 174static void spu_machine_dependent_reorg (void);
175static int spu_sched_issue_rate (void);
176static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178static int get_pipe (rtx insn);
644459d0 179static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 180static void spu_sched_init_global (FILE *, int, int);
181static void spu_sched_init (FILE *, int, int);
182static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 183static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
b62e30b8 185 bool *no_add_attrs);
644459d0 186static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
b62e30b8 188 bool *no_add_attrs);
644459d0 189static int spu_naked_function_p (tree func);
b62e30b8 190static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
ee9034d4 192static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
644459d0 196static tree spu_build_builtin_va_list (void);
8a58ed0a 197static void spu_va_start (tree, rtx);
75a70cf9 198static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 200static int store_with_one_insn_p (rtx mem);
644459d0 201static int mem_is_padded_component_ref (rtx x);
9d98604b 202static int reg_aligned_for_addr (rtx x);
644459d0 203static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 205static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 208static void spu_init_libfuncs (void);
fb80456a 209static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 210static void fix_range (const char *);
69ced2d6 211static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 212static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 213static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
e99f512d 215static tree spu_builtin_mul_widen_even (tree);
216static tree spu_builtin_mul_widen_odd (tree);
a76866d3 217static tree spu_builtin_mask_for_load (void);
0822b158 218static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 219static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 220static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
5a976006 226static void asm_file_start (void);
a08dfd55 227static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 228static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229static void spu_unique_section (tree, int);
9d98604b 230static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 231static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 232static void spu_conditional_register_usage (void);
329c1e4e 233static bool spu_ref_may_alias_errno (ao_ref *);
f17d2d13 234static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
235 HOST_WIDE_INT, tree);
644459d0 236
5474166e 237/* Which instruction set architecture to use. */
238int spu_arch;
239/* Which cpu are we tuning for. */
240int spu_tune;
241
5a976006 242/* The hardware requires 8 insns between a hint and the branch it
243 effects. This variable describes how many rtl instructions the
244 compiler needs to see before inserting a hint, and then the compiler
245 will insert enough nops to make it at least 8 insns. The default is
246 for the compiler to allow up to 2 nops be emitted. The nops are
247 inserted in pairs, so we round down. */
248int spu_hint_dist = (8*4) - (2*4);
249
644459d0 250enum spu_immediate {
251 SPU_NONE,
252 SPU_IL,
253 SPU_ILA,
254 SPU_ILH,
255 SPU_ILHU,
256 SPU_ORI,
257 SPU_ORHI,
258 SPU_ORBI,
99369027 259 SPU_IOHL
644459d0 260};
dea01258 261enum immediate_class
262{
263 IC_POOL, /* constant pool */
264 IC_IL1, /* one il* instruction */
265 IC_IL2, /* both ilhu and iohl instructions */
266 IC_IL1s, /* one il* instruction */
267 IC_IL2s, /* both ilhu and iohl instructions */
268 IC_FSMBI, /* the fsmbi instruction */
269 IC_CPAT, /* one of the c*d instructions */
5df189be 270 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 271};
644459d0 272
273static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
274static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 275static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
276static enum immediate_class classify_immediate (rtx op,
277 enum machine_mode mode);
644459d0 278
1bd43494 279static enum machine_mode spu_unwind_word_mode (void);
280
ea32e033 281static enum machine_mode
282spu_libgcc_cmp_return_mode (void);
283
284static enum machine_mode
285spu_libgcc_shift_count_mode (void);
6cf5579e 286
287/* Pointer mode for __ea references. */
288#define EAmode (spu_ea_model != 32 ? DImode : SImode)
289
ef51d1e3 290\f
291/* Table of machine attributes. */
292static const struct attribute_spec spu_attribute_table[] =
293{
ac86af5d 294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
297 false },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
299 false },
300 { NULL, 0, 0, false, false, false, NULL, false }
ef51d1e3 301};
644459d0 302\f
303/* TARGET overrides. */
304
6cf5579e 305#undef TARGET_ADDR_SPACE_POINTER_MODE
306#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
307
308#undef TARGET_ADDR_SPACE_ADDRESS_MODE
309#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
310
311#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
312#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
313 spu_addr_space_legitimate_address_p
314
315#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
316#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
317
318#undef TARGET_ADDR_SPACE_SUBSET_P
319#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
320
321#undef TARGET_ADDR_SPACE_CONVERT
322#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
323
644459d0 324#undef TARGET_INIT_BUILTINS
325#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 326#undef TARGET_BUILTIN_DECL
327#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 328
644459d0 329#undef TARGET_EXPAND_BUILTIN
330#define TARGET_EXPAND_BUILTIN spu_expand_builtin
331
1bd43494 332#undef TARGET_UNWIND_WORD_MODE
333#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 334
41e3a0c7 335#undef TARGET_LEGITIMIZE_ADDRESS
336#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
337
6cf5579e 338/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
339 and .quad for the debugger. When it is known that the assembler is fixed,
340 these can be removed. */
341#undef TARGET_ASM_UNALIGNED_SI_OP
342#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
343
344#undef TARGET_ASM_ALIGNED_DI_OP
345#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346
644459d0 347/* The .8byte directive doesn't seem to work well for a 32 bit
348 architecture. */
349#undef TARGET_ASM_UNALIGNED_DI_OP
350#define TARGET_ASM_UNALIGNED_DI_OP NULL
351
352#undef TARGET_RTX_COSTS
353#define TARGET_RTX_COSTS spu_rtx_costs
354
355#undef TARGET_ADDRESS_COST
f529eb25 356#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 357
358#undef TARGET_SCHED_ISSUE_RATE
359#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
360
5a976006 361#undef TARGET_SCHED_INIT_GLOBAL
362#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
363
364#undef TARGET_SCHED_INIT
365#define TARGET_SCHED_INIT spu_sched_init
366
644459d0 367#undef TARGET_SCHED_VARIABLE_ISSUE
368#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
369
5a976006 370#undef TARGET_SCHED_REORDER
371#define TARGET_SCHED_REORDER spu_sched_reorder
372
373#undef TARGET_SCHED_REORDER2
374#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 375
376#undef TARGET_SCHED_ADJUST_COST
377#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
378
644459d0 379#undef TARGET_ATTRIBUTE_TABLE
380#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
381
382#undef TARGET_ASM_INTEGER
383#define TARGET_ASM_INTEGER spu_assemble_integer
384
385#undef TARGET_SCALAR_MODE_SUPPORTED_P
386#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
387
388#undef TARGET_VECTOR_MODE_SUPPORTED_P
389#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
390
391#undef TARGET_FUNCTION_OK_FOR_SIBCALL
392#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
393
394#undef TARGET_ASM_GLOBALIZE_LABEL
395#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
396
397#undef TARGET_PASS_BY_REFERENCE
398#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
399
ee9034d4 400#undef TARGET_FUNCTION_ARG
401#define TARGET_FUNCTION_ARG spu_function_arg
402
403#undef TARGET_FUNCTION_ARG_ADVANCE
404#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
405
644459d0 406#undef TARGET_MUST_PASS_IN_STACK
407#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408
409#undef TARGET_BUILD_BUILTIN_VA_LIST
410#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
411
8a58ed0a 412#undef TARGET_EXPAND_BUILTIN_VA_START
413#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
414
644459d0 415#undef TARGET_SETUP_INCOMING_VARARGS
416#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
417
418#undef TARGET_MACHINE_DEPENDENT_REORG
419#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
420
421#undef TARGET_GIMPLIFY_VA_ARG_EXPR
422#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
423
644459d0 424#undef TARGET_INIT_LIBFUNCS
425#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
426
427#undef TARGET_RETURN_IN_MEMORY
428#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
429
69ced2d6 430#undef TARGET_ENCODE_SECTION_INFO
431#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
432
e99f512d 433#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
434#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
435
436#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
437#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
438
a76866d3 439#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
440#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
441
a28df51d 442#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
443#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
444
202d6e5f 445#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
446#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 447
a0515226 448#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
449#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
450
ea32e033 451#undef TARGET_LIBGCC_CMP_RETURN_MODE
452#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
453
454#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
456
d52fd16a 457#undef TARGET_SCHED_SMS_RES_MII
458#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
459
5a976006 460#undef TARGET_ASM_FILE_START
461#define TARGET_ASM_FILE_START asm_file_start
462
a08dfd55 463#undef TARGET_SECTION_TYPE_FLAGS
464#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
465
6cf5579e 466#undef TARGET_ASM_SELECT_SECTION
467#define TARGET_ASM_SELECT_SECTION spu_select_section
468
469#undef TARGET_ASM_UNIQUE_SECTION
470#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
471
fd50b071 472#undef TARGET_LEGITIMATE_ADDRESS_P
473#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
474
ca316360 475#undef TARGET_LEGITIMATE_CONSTANT_P
476#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
477
e96f2783 478#undef TARGET_TRAMPOLINE_INIT
479#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
480
4c834714 481#undef TARGET_OPTION_OVERRIDE
482#define TARGET_OPTION_OVERRIDE spu_option_override
483
686e2769 484#undef TARGET_OPTION_DEFAULT_PARAMS
485#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
486
b2d7ede1 487#undef TARGET_CONDITIONAL_REGISTER_USAGE
488#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
489
329c1e4e 490#undef TARGET_REF_MAY_ALIAS_ERRNO
491#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
492
f17d2d13 493#undef TARGET_ASM_OUTPUT_MI_THUNK
494#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
495#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
497
8a42230a 498/* Variable tracking should be run after all optimizations which
499 change order of insns. It also needs a valid CFG. */
500#undef TARGET_DELAY_VARTRACK
501#define TARGET_DELAY_VARTRACK true
502
644459d0 503struct gcc_target targetm = TARGET_INITIALIZER;
504
686e2769 505/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
506static void
507spu_option_default_params (void)
508{
509 /* Override some of the default param values. With so many registers
510 larger values are better for these params. */
511 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
512}
513
4c834714 514/* Implement TARGET_OPTION_OVERRIDE. */
515static void
516spu_option_override (void)
644459d0 517{
14d408d9 518 /* Small loops will be unpeeled at -O3. For SPU it is more important
519 to keep code small by default. */
686e2769 520 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 521 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 522 global_options.x_param_values,
523 global_options_set.x_param_values);
14d408d9 524
644459d0 525 flag_omit_frame_pointer = 1;
526
5a976006 527 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 528 if (align_functions < 8)
529 align_functions = 8;
c7b91b14 530
5a976006 531 spu_hint_dist = 8*4 - spu_max_nops*4;
532 if (spu_hint_dist < 0)
533 spu_hint_dist = 0;
534
c7b91b14 535 if (spu_fixed_range_string)
536 fix_range (spu_fixed_range_string);
5474166e 537
538 /* Determine processor architectural level. */
539 if (spu_arch_string)
540 {
541 if (strcmp (&spu_arch_string[0], "cell") == 0)
542 spu_arch = PROCESSOR_CELL;
543 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
544 spu_arch = PROCESSOR_CELLEDP;
545 else
8e181c9d 546 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 547 }
548
549 /* Determine processor to tune for. */
550 if (spu_tune_string)
551 {
552 if (strcmp (&spu_tune_string[0], "cell") == 0)
553 spu_tune = PROCESSOR_CELL;
554 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
555 spu_tune = PROCESSOR_CELLEDP;
556 else
8e181c9d 557 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 558 }
98bbec1e 559
13684256 560 /* Change defaults according to the processor architecture. */
561 if (spu_arch == PROCESSOR_CELLEDP)
562 {
563 /* If no command line option has been otherwise specified, change
564 the default to -mno-safe-hints on celledp -- only the original
565 Cell/B.E. processors require this workaround. */
566 if (!(target_flags_explicit & MASK_SAFE_HINTS))
567 target_flags &= ~MASK_SAFE_HINTS;
568 }
569
98bbec1e 570 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 571}
572\f
573/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
574 struct attribute_spec.handler. */
575
644459d0 576/* True if MODE is valid for the target. By "valid", we mean able to
577 be manipulated in non-trivial ways. In particular, this means all
578 the arithmetic is supported. */
579static bool
580spu_scalar_mode_supported_p (enum machine_mode mode)
581{
582 switch (mode)
583 {
584 case QImode:
585 case HImode:
586 case SImode:
587 case SFmode:
588 case DImode:
589 case TImode:
590 case DFmode:
591 return true;
592
593 default:
594 return false;
595 }
596}
597
598/* Similarly for vector modes. "Supported" here is less strict. At
599 least some operations are supported; need to check optabs or builtins
600 for further details. */
601static bool
602spu_vector_mode_supported_p (enum machine_mode mode)
603{
604 switch (mode)
605 {
606 case V16QImode:
607 case V8HImode:
608 case V4SImode:
609 case V2DImode:
610 case V4SFmode:
611 case V2DFmode:
612 return true;
613
614 default:
615 return false;
616 }
617}
618
619/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
620 least significant bytes of the outer mode. This function returns
621 TRUE for the SUBREG's where this is correct. */
622int
623valid_subreg (rtx op)
624{
625 enum machine_mode om = GET_MODE (op);
626 enum machine_mode im = GET_MODE (SUBREG_REG (op));
627 return om != VOIDmode && im != VOIDmode
628 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 629 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
630 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 631}
632
633/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 634 and adjust the start offset. */
644459d0 635static rtx
636adjust_operand (rtx op, HOST_WIDE_INT * start)
637{
638 enum machine_mode mode;
639 int op_size;
38aca5eb 640 /* Strip any paradoxical SUBREG. */
641 if (GET_CODE (op) == SUBREG
642 && (GET_MODE_BITSIZE (GET_MODE (op))
643 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 644 {
645 if (start)
646 *start -=
647 GET_MODE_BITSIZE (GET_MODE (op)) -
648 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
649 op = SUBREG_REG (op);
650 }
651 /* If it is smaller than SI, assure a SUBREG */
652 op_size = GET_MODE_BITSIZE (GET_MODE (op));
653 if (op_size < 32)
654 {
655 if (start)
656 *start += 32 - op_size;
657 op_size = 32;
658 }
659 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
660 mode = mode_for_size (op_size, MODE_INT, 0);
661 if (mode != GET_MODE (op))
662 op = gen_rtx_SUBREG (mode, op, 0);
663 return op;
664}
665
666void
667spu_expand_extv (rtx ops[], int unsignedp)
668{
9d98604b 669 rtx dst = ops[0], src = ops[1];
644459d0 670 HOST_WIDE_INT width = INTVAL (ops[2]);
671 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 672 HOST_WIDE_INT align_mask;
673 rtx s0, s1, mask, r0;
644459d0 674
9d98604b 675 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 676
9d98604b 677 if (MEM_P (src))
644459d0 678 {
9d98604b 679 /* First, determine if we need 1 TImode load or 2. We need only 1
680 if the bits being extracted do not cross the alignment boundary
681 as determined by the MEM and its address. */
682
683 align_mask = -MEM_ALIGN (src);
684 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 685 {
9d98604b 686 /* Alignment is sufficient for 1 load. */
687 s0 = gen_reg_rtx (TImode);
688 r0 = spu_expand_load (s0, 0, src, start / 8);
689 start &= 7;
690 if (r0)
691 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 692 }
9d98604b 693 else
694 {
695 /* Need 2 loads. */
696 s0 = gen_reg_rtx (TImode);
697 s1 = gen_reg_rtx (TImode);
698 r0 = spu_expand_load (s0, s1, src, start / 8);
699 start &= 7;
700
701 gcc_assert (start + width <= 128);
702 if (r0)
703 {
704 rtx r1 = gen_reg_rtx (SImode);
705 mask = gen_reg_rtx (TImode);
706 emit_move_insn (mask, GEN_INT (-1));
707 emit_insn (gen_rotqby_ti (s0, s0, r0));
708 emit_insn (gen_rotqby_ti (s1, s1, r0));
709 if (GET_CODE (r0) == CONST_INT)
710 r1 = GEN_INT (INTVAL (r0) & 15);
711 else
712 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
713 emit_insn (gen_shlqby_ti (mask, mask, r1));
714 emit_insn (gen_selb (s0, s1, s0, mask));
715 }
716 }
717
718 }
719 else if (GET_CODE (src) == SUBREG)
720 {
721 rtx r = SUBREG_REG (src);
722 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
723 s0 = gen_reg_rtx (TImode);
724 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
725 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
726 else
727 emit_move_insn (s0, src);
728 }
729 else
730 {
731 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
732 s0 = gen_reg_rtx (TImode);
733 emit_move_insn (s0, src);
644459d0 734 }
735
9d98604b 736 /* Now s0 is TImode and contains the bits to extract at start. */
737
738 if (start)
739 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
740
741 if (128 - width)
f5ff0b21 742 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 743
9d98604b 744 emit_move_insn (dst, s0);
644459d0 745}
746
747void
748spu_expand_insv (rtx ops[])
749{
750 HOST_WIDE_INT width = INTVAL (ops[1]);
751 HOST_WIDE_INT start = INTVAL (ops[2]);
752 HOST_WIDE_INT maskbits;
4cbad5bb 753 enum machine_mode dst_mode;
644459d0 754 rtx dst = ops[0], src = ops[3];
4cbad5bb 755 int dst_size;
644459d0 756 rtx mask;
757 rtx shift_reg;
758 int shift;
759
760
761 if (GET_CODE (ops[0]) == MEM)
762 dst = gen_reg_rtx (TImode);
763 else
764 dst = adjust_operand (dst, &start);
765 dst_mode = GET_MODE (dst);
766 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
767
768 if (CONSTANT_P (src))
769 {
770 enum machine_mode m =
771 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
772 src = force_reg (m, convert_to_mode (m, src, 0));
773 }
774 src = adjust_operand (src, 0);
644459d0 775
776 mask = gen_reg_rtx (dst_mode);
777 shift_reg = gen_reg_rtx (dst_mode);
778 shift = dst_size - start - width;
779
780 /* It's not safe to use subreg here because the compiler assumes
781 that the SUBREG_REG is right justified in the SUBREG. */
782 convert_move (shift_reg, src, 1);
783
784 if (shift > 0)
785 {
786 switch (dst_mode)
787 {
788 case SImode:
789 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
790 break;
791 case DImode:
792 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
793 break;
794 case TImode:
795 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
796 break;
797 default:
798 abort ();
799 }
800 }
801 else if (shift < 0)
802 abort ();
803
804 switch (dst_size)
805 {
806 case 32:
807 maskbits = (-1ll << (32 - width - start));
808 if (start)
809 maskbits += (1ll << (32 - start));
810 emit_move_insn (mask, GEN_INT (maskbits));
811 break;
812 case 64:
813 maskbits = (-1ll << (64 - width - start));
814 if (start)
815 maskbits += (1ll << (64 - start));
816 emit_move_insn (mask, GEN_INT (maskbits));
817 break;
818 case 128:
819 {
820 unsigned char arr[16];
821 int i = start / 8;
822 memset (arr, 0, sizeof (arr));
823 arr[i] = 0xff >> (start & 7);
824 for (i++; i <= (start + width - 1) / 8; i++)
825 arr[i] = 0xff;
826 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
827 emit_move_insn (mask, array_to_constant (TImode, arr));
828 }
829 break;
830 default:
831 abort ();
832 }
833 if (GET_CODE (ops[0]) == MEM)
834 {
644459d0 835 rtx low = gen_reg_rtx (SImode);
644459d0 836 rtx rotl = gen_reg_rtx (SImode);
837 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 838 rtx addr;
839 rtx addr0;
840 rtx addr1;
644459d0 841 rtx mem;
842
9d98604b 843 addr = force_reg (Pmode, XEXP (ops[0], 0));
844 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 845 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
846 emit_insn (gen_negsi2 (rotl, low));
847 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
848 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 849 mem = change_address (ops[0], TImode, addr0);
644459d0 850 set_mem_alias_set (mem, 0);
851 emit_move_insn (dst, mem);
852 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 853 if (start + width > MEM_ALIGN (ops[0]))
854 {
855 rtx shl = gen_reg_rtx (SImode);
856 rtx mask1 = gen_reg_rtx (TImode);
857 rtx dst1 = gen_reg_rtx (TImode);
858 rtx mem1;
9d98604b 859 addr1 = plus_constant (addr, 16);
860 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 861 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
862 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 863 mem1 = change_address (ops[0], TImode, addr1);
644459d0 864 set_mem_alias_set (mem1, 0);
865 emit_move_insn (dst1, mem1);
866 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
867 emit_move_insn (mem1, dst1);
868 }
9d98604b 869 emit_move_insn (mem, dst);
644459d0 870 }
871 else
71cd778d 872 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 873}
874
875
876int
877spu_expand_block_move (rtx ops[])
878{
879 HOST_WIDE_INT bytes, align, offset;
880 rtx src, dst, sreg, dreg, target;
881 int i;
882 if (GET_CODE (ops[2]) != CONST_INT
883 || GET_CODE (ops[3]) != CONST_INT
48eb4342 884 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 885 return 0;
886
887 bytes = INTVAL (ops[2]);
888 align = INTVAL (ops[3]);
889
890 if (bytes <= 0)
891 return 1;
892
893 dst = ops[0];
894 src = ops[1];
895
896 if (align == 16)
897 {
898 for (offset = 0; offset + 16 <= bytes; offset += 16)
899 {
900 dst = adjust_address (ops[0], V16QImode, offset);
901 src = adjust_address (ops[1], V16QImode, offset);
902 emit_move_insn (dst, src);
903 }
904 if (offset < bytes)
905 {
906 rtx mask;
907 unsigned char arr[16] = { 0 };
908 for (i = 0; i < bytes - offset; i++)
909 arr[i] = 0xff;
910 dst = adjust_address (ops[0], V16QImode, offset);
911 src = adjust_address (ops[1], V16QImode, offset);
912 mask = gen_reg_rtx (V16QImode);
913 sreg = gen_reg_rtx (V16QImode);
914 dreg = gen_reg_rtx (V16QImode);
915 target = gen_reg_rtx (V16QImode);
916 emit_move_insn (mask, array_to_constant (V16QImode, arr));
917 emit_move_insn (dreg, dst);
918 emit_move_insn (sreg, src);
919 emit_insn (gen_selb (target, dreg, sreg, mask));
920 emit_move_insn (dst, target);
921 }
922 return 1;
923 }
924 return 0;
925}
926
927enum spu_comp_code
928{ SPU_EQ, SPU_GT, SPU_GTU };
929
5474166e 930int spu_comp_icode[12][3] = {
931 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
932 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
933 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
934 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
935 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
936 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
937 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
938 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
939 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
940 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
941 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
942 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 943};
944
945/* Generate a compare for CODE. Return a brand-new rtx that represents
946 the result of the compare. GCC can figure this out too if we don't
947 provide all variations of compares, but GCC always wants to use
948 WORD_MODE, we can generate better code in most cases if we do it
949 ourselves. */
950void
74f4459c 951spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 952{
953 int reverse_compare = 0;
954 int reverse_test = 0;
5d70b918 955 rtx compare_result, eq_result;
956 rtx comp_rtx, eq_rtx;
644459d0 957 enum machine_mode comp_mode;
958 enum machine_mode op_mode;
b9c74b4d 959 enum spu_comp_code scode, eq_code;
960 enum insn_code ior_code;
74f4459c 961 enum rtx_code code = GET_CODE (cmp);
962 rtx op0 = XEXP (cmp, 0);
963 rtx op1 = XEXP (cmp, 1);
644459d0 964 int index;
5d70b918 965 int eq_test = 0;
644459d0 966
74f4459c 967 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 968 and so on, to keep the constant in operand 1. */
74f4459c 969 if (GET_CODE (op1) == CONST_INT)
644459d0 970 {
74f4459c 971 HOST_WIDE_INT val = INTVAL (op1) - 1;
972 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 973 switch (code)
974 {
975 case GE:
74f4459c 976 op1 = GEN_INT (val);
644459d0 977 code = GT;
978 break;
979 case LT:
74f4459c 980 op1 = GEN_INT (val);
644459d0 981 code = LE;
982 break;
983 case GEU:
74f4459c 984 op1 = GEN_INT (val);
644459d0 985 code = GTU;
986 break;
987 case LTU:
74f4459c 988 op1 = GEN_INT (val);
644459d0 989 code = LEU;
990 break;
991 default:
992 break;
993 }
994 }
995
5d70b918 996 comp_mode = SImode;
74f4459c 997 op_mode = GET_MODE (op0);
5d70b918 998
644459d0 999 switch (code)
1000 {
1001 case GE:
644459d0 1002 scode = SPU_GT;
07027691 1003 if (HONOR_NANS (op_mode))
5d70b918 1004 {
1005 reverse_compare = 0;
1006 reverse_test = 0;
1007 eq_test = 1;
1008 eq_code = SPU_EQ;
1009 }
1010 else
1011 {
1012 reverse_compare = 1;
1013 reverse_test = 1;
1014 }
644459d0 1015 break;
1016 case LE:
644459d0 1017 scode = SPU_GT;
07027691 1018 if (HONOR_NANS (op_mode))
5d70b918 1019 {
1020 reverse_compare = 1;
1021 reverse_test = 0;
1022 eq_test = 1;
1023 eq_code = SPU_EQ;
1024 }
1025 else
1026 {
1027 reverse_compare = 0;
1028 reverse_test = 1;
1029 }
644459d0 1030 break;
1031 case LT:
1032 reverse_compare = 1;
1033 reverse_test = 0;
1034 scode = SPU_GT;
1035 break;
1036 case GEU:
1037 reverse_compare = 1;
1038 reverse_test = 1;
1039 scode = SPU_GTU;
1040 break;
1041 case LEU:
1042 reverse_compare = 0;
1043 reverse_test = 1;
1044 scode = SPU_GTU;
1045 break;
1046 case LTU:
1047 reverse_compare = 1;
1048 reverse_test = 0;
1049 scode = SPU_GTU;
1050 break;
1051 case NE:
1052 reverse_compare = 0;
1053 reverse_test = 1;
1054 scode = SPU_EQ;
1055 break;
1056
1057 case EQ:
1058 scode = SPU_EQ;
1059 break;
1060 case GT:
1061 scode = SPU_GT;
1062 break;
1063 case GTU:
1064 scode = SPU_GTU;
1065 break;
1066 default:
1067 scode = SPU_EQ;
1068 break;
1069 }
1070
644459d0 1071 switch (op_mode)
1072 {
1073 case QImode:
1074 index = 0;
1075 comp_mode = QImode;
1076 break;
1077 case HImode:
1078 index = 1;
1079 comp_mode = HImode;
1080 break;
1081 case SImode:
1082 index = 2;
1083 break;
1084 case DImode:
1085 index = 3;
1086 break;
1087 case TImode:
1088 index = 4;
1089 break;
1090 case SFmode:
1091 index = 5;
1092 break;
1093 case DFmode:
1094 index = 6;
1095 break;
1096 case V16QImode:
5474166e 1097 index = 7;
1098 comp_mode = op_mode;
1099 break;
644459d0 1100 case V8HImode:
5474166e 1101 index = 8;
1102 comp_mode = op_mode;
1103 break;
644459d0 1104 case V4SImode:
5474166e 1105 index = 9;
1106 comp_mode = op_mode;
1107 break;
644459d0 1108 case V4SFmode:
5474166e 1109 index = 10;
1110 comp_mode = V4SImode;
1111 break;
644459d0 1112 case V2DFmode:
5474166e 1113 index = 11;
1114 comp_mode = V2DImode;
644459d0 1115 break;
5474166e 1116 case V2DImode:
644459d0 1117 default:
1118 abort ();
1119 }
1120
74f4459c 1121 if (GET_MODE (op1) == DFmode
07027691 1122 && (scode != SPU_GT && scode != SPU_EQ))
1123 abort ();
644459d0 1124
74f4459c 1125 if (is_set == 0 && op1 == const0_rtx
1126 && (GET_MODE (op0) == SImode
1127 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1128 {
1129 /* Don't need to set a register with the result when we are
1130 comparing against zero and branching. */
1131 reverse_test = !reverse_test;
74f4459c 1132 compare_result = op0;
644459d0 1133 }
1134 else
1135 {
1136 compare_result = gen_reg_rtx (comp_mode);
1137
1138 if (reverse_compare)
1139 {
74f4459c 1140 rtx t = op1;
1141 op1 = op0;
1142 op0 = t;
644459d0 1143 }
1144
1145 if (spu_comp_icode[index][scode] == 0)
1146 abort ();
1147
1148 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1149 (op0, op_mode))
1150 op0 = force_reg (op_mode, op0);
644459d0 1151 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1152 (op1, op_mode))
1153 op1 = force_reg (op_mode, op1);
644459d0 1154 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1155 op0, op1);
644459d0 1156 if (comp_rtx == 0)
1157 abort ();
1158 emit_insn (comp_rtx);
1159
5d70b918 1160 if (eq_test)
1161 {
1162 eq_result = gen_reg_rtx (comp_mode);
1163 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1164 op0, op1);
5d70b918 1165 if (eq_rtx == 0)
1166 abort ();
1167 emit_insn (eq_rtx);
d6bf3b14 1168 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1169 gcc_assert (ior_code != CODE_FOR_nothing);
1170 emit_insn (GEN_FCN (ior_code)
1171 (compare_result, compare_result, eq_result));
1172 }
644459d0 1173 }
1174
1175 if (is_set == 0)
1176 {
1177 rtx bcomp;
1178 rtx loc_ref;
1179
1180 /* We don't have branch on QI compare insns, so we convert the
1181 QI compare result to a HI result. */
1182 if (comp_mode == QImode)
1183 {
1184 rtx old_res = compare_result;
1185 compare_result = gen_reg_rtx (HImode);
1186 comp_mode = HImode;
1187 emit_insn (gen_extendqihi2 (compare_result, old_res));
1188 }
1189
1190 if (reverse_test)
1191 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1192 else
1193 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1194
74f4459c 1195 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1196 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1197 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1198 loc_ref, pc_rtx)));
1199 }
1200 else if (is_set == 2)
1201 {
74f4459c 1202 rtx target = operands[0];
644459d0 1203 int compare_size = GET_MODE_BITSIZE (comp_mode);
1204 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1205 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1206 rtx select_mask;
1207 rtx op_t = operands[2];
1208 rtx op_f = operands[3];
1209
1210 /* The result of the comparison can be SI, HI or QI mode. Create a
1211 mask based on that result. */
1212 if (target_size > compare_size)
1213 {
1214 select_mask = gen_reg_rtx (mode);
1215 emit_insn (gen_extend_compare (select_mask, compare_result));
1216 }
1217 else if (target_size < compare_size)
1218 select_mask =
1219 gen_rtx_SUBREG (mode, compare_result,
1220 (compare_size - target_size) / BITS_PER_UNIT);
1221 else if (comp_mode != mode)
1222 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1223 else
1224 select_mask = compare_result;
1225
1226 if (GET_MODE (target) != GET_MODE (op_t)
1227 || GET_MODE (target) != GET_MODE (op_f))
1228 abort ();
1229
1230 if (reverse_test)
1231 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1232 else
1233 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1234 }
1235 else
1236 {
74f4459c 1237 rtx target = operands[0];
644459d0 1238 if (reverse_test)
1239 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1240 gen_rtx_NOT (comp_mode, compare_result)));
1241 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1242 emit_insn (gen_extendhisi2 (target, compare_result));
1243 else if (GET_MODE (target) == SImode
1244 && GET_MODE (compare_result) == QImode)
1245 emit_insn (gen_extend_compare (target, compare_result));
1246 else
1247 emit_move_insn (target, compare_result);
1248 }
1249}
1250
1251HOST_WIDE_INT
1252const_double_to_hwint (rtx x)
1253{
1254 HOST_WIDE_INT val;
1255 REAL_VALUE_TYPE rv;
1256 if (GET_MODE (x) == SFmode)
1257 {
1258 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1259 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1260 }
1261 else if (GET_MODE (x) == DFmode)
1262 {
1263 long l[2];
1264 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1265 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1266 val = l[0];
1267 val = (val << 32) | (l[1] & 0xffffffff);
1268 }
1269 else
1270 abort ();
1271 return val;
1272}
1273
1274rtx
1275hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1276{
1277 long tv[2];
1278 REAL_VALUE_TYPE rv;
1279 gcc_assert (mode == SFmode || mode == DFmode);
1280
1281 if (mode == SFmode)
1282 tv[0] = (v << 32) >> 32;
1283 else if (mode == DFmode)
1284 {
1285 tv[1] = (v << 32) >> 32;
1286 tv[0] = v >> 32;
1287 }
1288 real_from_target (&rv, tv, mode);
1289 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1290}
1291
1292void
1293print_operand_address (FILE * file, register rtx addr)
1294{
1295 rtx reg;
1296 rtx offset;
1297
e04cf423 1298 if (GET_CODE (addr) == AND
1299 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1300 && INTVAL (XEXP (addr, 1)) == -16)
1301 addr = XEXP (addr, 0);
1302
644459d0 1303 switch (GET_CODE (addr))
1304 {
1305 case REG:
1306 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1307 break;
1308
1309 case PLUS:
1310 reg = XEXP (addr, 0);
1311 offset = XEXP (addr, 1);
1312 if (GET_CODE (offset) == REG)
1313 {
1314 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1315 reg_names[REGNO (offset)]);
1316 }
1317 else if (GET_CODE (offset) == CONST_INT)
1318 {
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1320 INTVAL (offset), reg_names[REGNO (reg)]);
1321 }
1322 else
1323 abort ();
1324 break;
1325
1326 case CONST:
1327 case LABEL_REF:
1328 case SYMBOL_REF:
1329 case CONST_INT:
1330 output_addr_const (file, addr);
1331 break;
1332
1333 default:
1334 debug_rtx (addr);
1335 abort ();
1336 }
1337}
1338
1339void
1340print_operand (FILE * file, rtx x, int code)
1341{
1342 enum machine_mode mode = GET_MODE (x);
1343 HOST_WIDE_INT val;
1344 unsigned char arr[16];
1345 int xcode = GET_CODE (x);
dea01258 1346 int i, info;
644459d0 1347 if (GET_MODE (x) == VOIDmode)
1348 switch (code)
1349 {
644459d0 1350 case 'L': /* 128 bits, signed */
1351 case 'm': /* 128 bits, signed */
1352 case 'T': /* 128 bits, signed */
1353 case 't': /* 128 bits, signed */
1354 mode = TImode;
1355 break;
644459d0 1356 case 'K': /* 64 bits, signed */
1357 case 'k': /* 64 bits, signed */
1358 case 'D': /* 64 bits, signed */
1359 case 'd': /* 64 bits, signed */
1360 mode = DImode;
1361 break;
644459d0 1362 case 'J': /* 32 bits, signed */
1363 case 'j': /* 32 bits, signed */
1364 case 's': /* 32 bits, signed */
1365 case 'S': /* 32 bits, signed */
1366 mode = SImode;
1367 break;
1368 }
1369 switch (code)
1370 {
1371
1372 case 'j': /* 32 bits, signed */
1373 case 'k': /* 64 bits, signed */
1374 case 'm': /* 128 bits, signed */
1375 if (xcode == CONST_INT
1376 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1377 {
1378 gcc_assert (logical_immediate_p (x, mode));
1379 constant_to_array (mode, x, arr);
1380 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1381 val = trunc_int_for_mode (val, SImode);
1382 switch (which_logical_immediate (val))
1383 {
1384 case SPU_ORI:
1385 break;
1386 case SPU_ORHI:
1387 fprintf (file, "h");
1388 break;
1389 case SPU_ORBI:
1390 fprintf (file, "b");
1391 break;
1392 default:
1393 gcc_unreachable();
1394 }
1395 }
1396 else
1397 gcc_unreachable();
1398 return;
1399
1400 case 'J': /* 32 bits, signed */
1401 case 'K': /* 64 bits, signed */
1402 case 'L': /* 128 bits, signed */
1403 if (xcode == CONST_INT
1404 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1405 {
1406 gcc_assert (logical_immediate_p (x, mode)
1407 || iohl_immediate_p (x, mode));
1408 constant_to_array (mode, x, arr);
1409 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1410 val = trunc_int_for_mode (val, SImode);
1411 switch (which_logical_immediate (val))
1412 {
1413 case SPU_ORI:
1414 case SPU_IOHL:
1415 break;
1416 case SPU_ORHI:
1417 val = trunc_int_for_mode (val, HImode);
1418 break;
1419 case SPU_ORBI:
1420 val = trunc_int_for_mode (val, QImode);
1421 break;
1422 default:
1423 gcc_unreachable();
1424 }
1425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1426 }
1427 else
1428 gcc_unreachable();
1429 return;
1430
1431 case 't': /* 128 bits, signed */
1432 case 'd': /* 64 bits, signed */
1433 case 's': /* 32 bits, signed */
dea01258 1434 if (CONSTANT_P (x))
644459d0 1435 {
dea01258 1436 enum immediate_class c = classify_immediate (x, mode);
1437 switch (c)
1438 {
1439 case IC_IL1:
1440 constant_to_array (mode, x, arr);
1441 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1442 val = trunc_int_for_mode (val, SImode);
1443 switch (which_immediate_load (val))
1444 {
1445 case SPU_IL:
1446 break;
1447 case SPU_ILA:
1448 fprintf (file, "a");
1449 break;
1450 case SPU_ILH:
1451 fprintf (file, "h");
1452 break;
1453 case SPU_ILHU:
1454 fprintf (file, "hu");
1455 break;
1456 default:
1457 gcc_unreachable ();
1458 }
1459 break;
1460 case IC_CPAT:
1461 constant_to_array (mode, x, arr);
1462 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1463 if (info == 1)
1464 fprintf (file, "b");
1465 else if (info == 2)
1466 fprintf (file, "h");
1467 else if (info == 4)
1468 fprintf (file, "w");
1469 else if (info == 8)
1470 fprintf (file, "d");
1471 break;
1472 case IC_IL1s:
1473 if (xcode == CONST_VECTOR)
1474 {
1475 x = CONST_VECTOR_ELT (x, 0);
1476 xcode = GET_CODE (x);
1477 }
1478 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1479 fprintf (file, "a");
1480 else if (xcode == HIGH)
1481 fprintf (file, "hu");
1482 break;
1483 case IC_FSMBI:
5df189be 1484 case IC_FSMBI2:
dea01258 1485 case IC_IL2:
1486 case IC_IL2s:
1487 case IC_POOL:
1488 abort ();
1489 }
644459d0 1490 }
644459d0 1491 else
1492 gcc_unreachable ();
1493 return;
1494
1495 case 'T': /* 128 bits, signed */
1496 case 'D': /* 64 bits, signed */
1497 case 'S': /* 32 bits, signed */
dea01258 1498 if (CONSTANT_P (x))
644459d0 1499 {
dea01258 1500 enum immediate_class c = classify_immediate (x, mode);
1501 switch (c)
644459d0 1502 {
dea01258 1503 case IC_IL1:
1504 constant_to_array (mode, x, arr);
1505 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1506 val = trunc_int_for_mode (val, SImode);
1507 switch (which_immediate_load (val))
1508 {
1509 case SPU_IL:
1510 case SPU_ILA:
1511 break;
1512 case SPU_ILH:
1513 case SPU_ILHU:
1514 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1515 break;
1516 default:
1517 gcc_unreachable ();
1518 }
1519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1520 break;
1521 case IC_FSMBI:
1522 constant_to_array (mode, x, arr);
1523 val = 0;
1524 for (i = 0; i < 16; i++)
1525 {
1526 val <<= 1;
1527 val |= arr[i] & 1;
1528 }
1529 print_operand (file, GEN_INT (val), 0);
1530 break;
1531 case IC_CPAT:
1532 constant_to_array (mode, x, arr);
1533 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1535 break;
dea01258 1536 case IC_IL1s:
dea01258 1537 if (xcode == HIGH)
5df189be 1538 x = XEXP (x, 0);
1539 if (GET_CODE (x) == CONST_VECTOR)
1540 x = CONST_VECTOR_ELT (x, 0);
1541 output_addr_const (file, x);
1542 if (xcode == HIGH)
1543 fprintf (file, "@h");
644459d0 1544 break;
dea01258 1545 case IC_IL2:
1546 case IC_IL2s:
5df189be 1547 case IC_FSMBI2:
dea01258 1548 case IC_POOL:
1549 abort ();
644459d0 1550 }
c8befdb9 1551 }
644459d0 1552 else
1553 gcc_unreachable ();
1554 return;
1555
644459d0 1556 case 'C':
1557 if (xcode == CONST_INT)
1558 {
1559 /* Only 4 least significant bits are relevant for generate
1560 control word instructions. */
1561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1562 return;
1563 }
1564 break;
1565
1566 case 'M': /* print code for c*d */
1567 if (GET_CODE (x) == CONST_INT)
1568 switch (INTVAL (x))
1569 {
1570 case 1:
1571 fprintf (file, "b");
1572 break;
1573 case 2:
1574 fprintf (file, "h");
1575 break;
1576 case 4:
1577 fprintf (file, "w");
1578 break;
1579 case 8:
1580 fprintf (file, "d");
1581 break;
1582 default:
1583 gcc_unreachable();
1584 }
1585 else
1586 gcc_unreachable();
1587 return;
1588
1589 case 'N': /* Negate the operand */
1590 if (xcode == CONST_INT)
1591 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1592 else if (xcode == CONST_VECTOR)
1593 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1594 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1595 return;
1596
1597 case 'I': /* enable/disable interrupts */
1598 if (xcode == CONST_INT)
1599 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1600 return;
1601
1602 case 'b': /* branch modifiers */
1603 if (xcode == REG)
1604 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1605 else if (COMPARISON_P (x))
1606 fprintf (file, "%s", xcode == NE ? "n" : "");
1607 return;
1608
1609 case 'i': /* indirect call */
1610 if (xcode == MEM)
1611 {
1612 if (GET_CODE (XEXP (x, 0)) == REG)
1613 /* Used in indirect function calls. */
1614 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1615 else
1616 output_address (XEXP (x, 0));
1617 }
1618 return;
1619
1620 case 'p': /* load/store */
1621 if (xcode == MEM)
1622 {
1623 x = XEXP (x, 0);
1624 xcode = GET_CODE (x);
1625 }
e04cf423 1626 if (xcode == AND)
1627 {
1628 x = XEXP (x, 0);
1629 xcode = GET_CODE (x);
1630 }
644459d0 1631 if (xcode == REG)
1632 fprintf (file, "d");
1633 else if (xcode == CONST_INT)
1634 fprintf (file, "a");
1635 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1636 fprintf (file, "r");
1637 else if (xcode == PLUS || xcode == LO_SUM)
1638 {
1639 if (GET_CODE (XEXP (x, 1)) == REG)
1640 fprintf (file, "x");
1641 else
1642 fprintf (file, "d");
1643 }
1644 return;
1645
5df189be 1646 case 'e':
1647 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1648 val &= 0x7;
1649 output_addr_const (file, GEN_INT (val));
1650 return;
1651
1652 case 'f':
1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654 val &= 0x1f;
1655 output_addr_const (file, GEN_INT (val));
1656 return;
1657
1658 case 'g':
1659 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1660 val &= 0x3f;
1661 output_addr_const (file, GEN_INT (val));
1662 return;
1663
1664 case 'h':
1665 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1666 val = (val >> 3) & 0x1f;
1667 output_addr_const (file, GEN_INT (val));
1668 return;
1669
1670 case 'E':
1671 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1672 val = -val;
1673 val &= 0x7;
1674 output_addr_const (file, GEN_INT (val));
1675 return;
1676
1677 case 'F':
1678 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1679 val = -val;
1680 val &= 0x1f;
1681 output_addr_const (file, GEN_INT (val));
1682 return;
1683
1684 case 'G':
1685 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1686 val = -val;
1687 val &= 0x3f;
1688 output_addr_const (file, GEN_INT (val));
1689 return;
1690
1691 case 'H':
1692 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1693 val = -(val & -8ll);
1694 val = (val >> 3) & 0x1f;
1695 output_addr_const (file, GEN_INT (val));
1696 return;
1697
56c7bfc2 1698 case 'v':
1699 case 'w':
1700 constant_to_array (mode, x, arr);
1701 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1702 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1703 return;
1704
644459d0 1705 case 0:
1706 if (xcode == REG)
1707 fprintf (file, "%s", reg_names[REGNO (x)]);
1708 else if (xcode == MEM)
1709 output_address (XEXP (x, 0));
1710 else if (xcode == CONST_VECTOR)
dea01258 1711 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1712 else
1713 output_addr_const (file, x);
1714 return;
1715
f6a0d06f 1716 /* unused letters
56c7bfc2 1717 o qr u yz
5df189be 1718 AB OPQR UVWXYZ */
644459d0 1719 default:
1720 output_operand_lossage ("invalid %%xn code");
1721 }
1722 gcc_unreachable ();
1723}
1724
644459d0 1725/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1726 caller saved register. For leaf functions it is more efficient to
1727 use a volatile register because we won't need to save and restore the
1728 pic register. This routine is only valid after register allocation
1729 is completed, so we can pick an unused register. */
1730static rtx
1731get_pic_reg (void)
1732{
1733 rtx pic_reg = pic_offset_table_rtx;
1734 if (!reload_completed && !reload_in_progress)
1735 abort ();
87a95921 1736 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1737 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1738 return pic_reg;
1739}
1740
5df189be 1741/* Split constant addresses to handle cases that are too large.
1742 Add in the pic register when in PIC mode.
1743 Split immediates that require more than 1 instruction. */
dea01258 1744int
1745spu_split_immediate (rtx * ops)
c8befdb9 1746{
dea01258 1747 enum machine_mode mode = GET_MODE (ops[0]);
1748 enum immediate_class c = classify_immediate (ops[1], mode);
1749
1750 switch (c)
c8befdb9 1751 {
dea01258 1752 case IC_IL2:
1753 {
1754 unsigned char arrhi[16];
1755 unsigned char arrlo[16];
98bbec1e 1756 rtx to, temp, hi, lo;
dea01258 1757 int i;
98bbec1e 1758 enum machine_mode imode = mode;
1759 /* We need to do reals as ints because the constant used in the
1760 IOR might not be a legitimate real constant. */
1761 imode = int_mode_for_mode (mode);
dea01258 1762 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1763 if (imode != mode)
1764 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1765 else
1766 to = ops[0];
1767 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1768 for (i = 0; i < 16; i += 4)
1769 {
1770 arrlo[i + 2] = arrhi[i + 2];
1771 arrlo[i + 3] = arrhi[i + 3];
1772 arrlo[i + 0] = arrlo[i + 1] = 0;
1773 arrhi[i + 2] = arrhi[i + 3] = 0;
1774 }
98bbec1e 1775 hi = array_to_constant (imode, arrhi);
1776 lo = array_to_constant (imode, arrlo);
1777 emit_move_insn (temp, hi);
dea01258 1778 emit_insn (gen_rtx_SET
98bbec1e 1779 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1780 return 1;
1781 }
5df189be 1782 case IC_FSMBI2:
1783 {
1784 unsigned char arr_fsmbi[16];
1785 unsigned char arr_andbi[16];
1786 rtx to, reg_fsmbi, reg_and;
1787 int i;
1788 enum machine_mode imode = mode;
1789 /* We need to do reals as ints because the constant used in the
1790 * AND might not be a legitimate real constant. */
1791 imode = int_mode_for_mode (mode);
1792 constant_to_array (mode, ops[1], arr_fsmbi);
1793 if (imode != mode)
1794 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1795 else
1796 to = ops[0];
1797 for (i = 0; i < 16; i++)
1798 if (arr_fsmbi[i] != 0)
1799 {
1800 arr_andbi[0] = arr_fsmbi[i];
1801 arr_fsmbi[i] = 0xff;
1802 }
1803 for (i = 1; i < 16; i++)
1804 arr_andbi[i] = arr_andbi[0];
1805 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1806 reg_and = array_to_constant (imode, arr_andbi);
1807 emit_move_insn (to, reg_fsmbi);
1808 emit_insn (gen_rtx_SET
1809 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1810 return 1;
1811 }
dea01258 1812 case IC_POOL:
1813 if (reload_in_progress || reload_completed)
1814 {
1815 rtx mem = force_const_mem (mode, ops[1]);
1816 if (TARGET_LARGE_MEM)
1817 {
1818 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1819 emit_move_insn (addr, XEXP (mem, 0));
1820 mem = replace_equiv_address (mem, addr);
1821 }
1822 emit_move_insn (ops[0], mem);
1823 return 1;
1824 }
1825 break;
1826 case IC_IL1s:
1827 case IC_IL2s:
1828 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1829 {
1830 if (c == IC_IL2s)
1831 {
5df189be 1832 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1833 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1834 }
1835 else if (flag_pic)
1836 emit_insn (gen_pic (ops[0], ops[1]));
1837 if (flag_pic)
1838 {
1839 rtx pic_reg = get_pic_reg ();
1840 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1841 crtl->uses_pic_offset_table = 1;
dea01258 1842 }
1843 return flag_pic || c == IC_IL2s;
1844 }
1845 break;
1846 case IC_IL1:
1847 case IC_FSMBI:
1848 case IC_CPAT:
1849 break;
c8befdb9 1850 }
dea01258 1851 return 0;
c8befdb9 1852}
1853
644459d0 1854/* SAVING is TRUE when we are generating the actual load and store
1855 instructions for REGNO. When determining the size of the stack
1856 needed for saving register we must allocate enough space for the
1857 worst case, because we don't always have the information early enough
1858 to not allocate it. But we can at least eliminate the actual loads
1859 and stores during the prologue/epilogue. */
1860static int
1861need_to_save_reg (int regno, int saving)
1862{
3072d30e 1863 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1864 return 1;
1865 if (flag_pic
1866 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1867 && (!saving || crtl->uses_pic_offset_table)
644459d0 1868 && (!saving
3072d30e 1869 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1870 return 1;
1871 return 0;
1872}
1873
1874/* This function is only correct starting with local register
1875 allocation */
1876int
1877spu_saved_regs_size (void)
1878{
1879 int reg_save_size = 0;
1880 int regno;
1881
1882 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1883 if (need_to_save_reg (regno, 0))
1884 reg_save_size += 0x10;
1885 return reg_save_size;
1886}
1887
1888static rtx
1889frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1890{
1891 rtx reg = gen_rtx_REG (V4SImode, regno);
1892 rtx mem =
1893 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1894 return emit_insn (gen_movv4si (mem, reg));
1895}
1896
1897static rtx
1898frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1899{
1900 rtx reg = gen_rtx_REG (V4SImode, regno);
1901 rtx mem =
1902 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1903 return emit_insn (gen_movv4si (reg, mem));
1904}
1905
1906/* This happens after reload, so we need to expand it. */
1907static rtx
1908frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1909{
1910 rtx insn;
1911 if (satisfies_constraint_K (GEN_INT (imm)))
1912 {
1913 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1914 }
1915 else
1916 {
3072d30e 1917 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1918 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1919 if (REGNO (src) == REGNO (scratch))
1920 abort ();
1921 }
644459d0 1922 return insn;
1923}
1924
1925/* Return nonzero if this function is known to have a null epilogue. */
1926
1927int
1928direct_return (void)
1929{
1930 if (reload_completed)
1931 {
1932 if (cfun->static_chain_decl == 0
1933 && (spu_saved_regs_size ()
1934 + get_frame_size ()
abe32cce 1935 + crtl->outgoing_args_size
1936 + crtl->args.pretend_args_size == 0)
644459d0 1937 && current_function_is_leaf)
1938 return 1;
1939 }
1940 return 0;
1941}
1942
1943/*
1944 The stack frame looks like this:
1945 +-------------+
1946 | incoming |
a8e019fa 1947 | args |
1948 AP -> +-------------+
644459d0 1949 | $lr save |
1950 +-------------+
1951 prev SP | back chain |
1952 +-------------+
1953 | var args |
abe32cce 1954 | reg save | crtl->args.pretend_args_size bytes
644459d0 1955 +-------------+
1956 | ... |
1957 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1958 FP -> +-------------+
644459d0 1959 | ... |
a8e019fa 1960 | vars | get_frame_size() bytes
1961 HFP -> +-------------+
644459d0 1962 | ... |
1963 | outgoing |
abe32cce 1964 | args | crtl->outgoing_args_size bytes
644459d0 1965 +-------------+
1966 | $lr of next |
1967 | frame |
1968 +-------------+
a8e019fa 1969 | back chain |
1970 SP -> +-------------+
644459d0 1971
1972*/
1973void
1974spu_expand_prologue (void)
1975{
1976 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1977 HOST_WIDE_INT total_size;
1978 HOST_WIDE_INT saved_regs_size;
1979 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1980 rtx scratch_reg_0, scratch_reg_1;
1981 rtx insn, real;
1982
644459d0 1983 if (flag_pic && optimize == 0)
18d50ae6 1984 crtl->uses_pic_offset_table = 1;
644459d0 1985
1986 if (spu_naked_function_p (current_function_decl))
1987 return;
1988
1989 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1990 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1991
1992 saved_regs_size = spu_saved_regs_size ();
1993 total_size = size + saved_regs_size
abe32cce 1994 + crtl->outgoing_args_size
1995 + crtl->args.pretend_args_size;
644459d0 1996
1997 if (!current_function_is_leaf
18d50ae6 1998 || cfun->calls_alloca || total_size > 0)
644459d0 1999 total_size += STACK_POINTER_OFFSET;
2000
2001 /* Save this first because code after this might use the link
2002 register as a scratch register. */
2003 if (!current_function_is_leaf)
2004 {
2005 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2006 RTX_FRAME_RELATED_P (insn) = 1;
2007 }
2008
2009 if (total_size > 0)
2010 {
abe32cce 2011 offset = -crtl->args.pretend_args_size;
644459d0 2012 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2013 if (need_to_save_reg (regno, 1))
2014 {
2015 offset -= 16;
2016 insn = frame_emit_store (regno, sp_reg, offset);
2017 RTX_FRAME_RELATED_P (insn) = 1;
2018 }
2019 }
2020
18d50ae6 2021 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2022 {
2023 rtx pic_reg = get_pic_reg ();
2024 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2025 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2026 }
2027
2028 if (total_size > 0)
2029 {
2030 if (flag_stack_check)
2031 {
d819917f 2032 /* We compare against total_size-1 because
644459d0 2033 ($sp >= total_size) <=> ($sp > total_size-1) */
2034 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2035 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2036 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2037 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2038 {
2039 emit_move_insn (scratch_v4si, size_v4si);
2040 size_v4si = scratch_v4si;
2041 }
2042 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2043 emit_insn (gen_vec_extractv4si
2044 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2045 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2046 }
2047
2048 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2049 the value of the previous $sp because we save it as the back
2050 chain. */
2051 if (total_size <= 2000)
2052 {
2053 /* In this case we save the back chain first. */
2054 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2055 insn =
2056 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2057 }
644459d0 2058 else
2059 {
2060 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2061 insn =
2062 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2063 }
2064 RTX_FRAME_RELATED_P (insn) = 1;
2065 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2066 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2067
2068 if (total_size > 2000)
2069 {
2070 /* Save the back chain ptr */
2071 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2072 }
2073
2074 if (frame_pointer_needed)
2075 {
2076 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2077 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2078 + crtl->outgoing_args_size;
644459d0 2079 /* Set the new frame_pointer */
d8dfeb55 2080 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2081 RTX_FRAME_RELATED_P (insn) = 1;
2082 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2083 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2084 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2085 }
2086 }
2087
8c0dd614 2088 if (flag_stack_usage_info)
a512540d 2089 current_function_static_stack_size = total_size;
644459d0 2090}
2091
2092void
2093spu_expand_epilogue (bool sibcall_p)
2094{
2095 int size = get_frame_size (), offset, regno;
2096 HOST_WIDE_INT saved_regs_size, total_size;
2097 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 2098 rtx scratch_reg_0;
644459d0 2099
644459d0 2100 if (spu_naked_function_p (current_function_decl))
2101 return;
2102
2103 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2104
2105 saved_regs_size = spu_saved_regs_size ();
2106 total_size = size + saved_regs_size
abe32cce 2107 + crtl->outgoing_args_size
2108 + crtl->args.pretend_args_size;
644459d0 2109
2110 if (!current_function_is_leaf
18d50ae6 2111 || cfun->calls_alloca || total_size > 0)
644459d0 2112 total_size += STACK_POINTER_OFFSET;
2113
2114 if (total_size > 0)
2115 {
18d50ae6 2116 if (cfun->calls_alloca)
644459d0 2117 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2118 else
2119 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2120
2121
2122 if (saved_regs_size > 0)
2123 {
abe32cce 2124 offset = -crtl->args.pretend_args_size;
644459d0 2125 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2126 if (need_to_save_reg (regno, 1))
2127 {
2128 offset -= 0x10;
2129 frame_emit_load (regno, sp_reg, offset);
2130 }
2131 }
2132 }
2133
2134 if (!current_function_is_leaf)
2135 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2136
2137 if (!sibcall_p)
2138 {
18b42941 2139 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 2140 emit_jump_insn (gen__return ());
644459d0 2141 }
644459d0 2142}
2143
2144rtx
2145spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2146{
2147 if (count != 0)
2148 return 0;
2149 /* This is inefficient because it ends up copying to a save-register
2150 which then gets saved even though $lr has already been saved. But
2151 it does generate better code for leaf functions and we don't need
2152 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2153 used for __builtin_return_address anyway, so maybe we don't care if
2154 it's inefficient. */
2155 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2156}
2157\f
2158
2159/* Given VAL, generate a constant appropriate for MODE.
2160 If MODE is a vector mode, every element will be VAL.
2161 For TImode, VAL will be zero extended to 128 bits. */
2162rtx
2163spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2164{
2165 rtx inner;
2166 rtvec v;
2167 int units, i;
2168
2169 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2170 || GET_MODE_CLASS (mode) == MODE_FLOAT
2171 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2172 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2173
2174 if (GET_MODE_CLASS (mode) == MODE_INT)
2175 return immed_double_const (val, 0, mode);
2176
2177 /* val is the bit representation of the float */
2178 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2179 return hwint_to_const_double (mode, val);
2180
2181 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2182 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2183 else
2184 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2185
2186 units = GET_MODE_NUNITS (mode);
2187
2188 v = rtvec_alloc (units);
2189
2190 for (i = 0; i < units; ++i)
2191 RTVEC_ELT (v, i) = inner;
2192
2193 return gen_rtx_CONST_VECTOR (mode, v);
2194}
644459d0 2195
5474166e 2196/* Create a MODE vector constant from 4 ints. */
2197rtx
2198spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2199{
2200 unsigned char arr[16];
2201 arr[0] = (a >> 24) & 0xff;
2202 arr[1] = (a >> 16) & 0xff;
2203 arr[2] = (a >> 8) & 0xff;
2204 arr[3] = (a >> 0) & 0xff;
2205 arr[4] = (b >> 24) & 0xff;
2206 arr[5] = (b >> 16) & 0xff;
2207 arr[6] = (b >> 8) & 0xff;
2208 arr[7] = (b >> 0) & 0xff;
2209 arr[8] = (c >> 24) & 0xff;
2210 arr[9] = (c >> 16) & 0xff;
2211 arr[10] = (c >> 8) & 0xff;
2212 arr[11] = (c >> 0) & 0xff;
2213 arr[12] = (d >> 24) & 0xff;
2214 arr[13] = (d >> 16) & 0xff;
2215 arr[14] = (d >> 8) & 0xff;
2216 arr[15] = (d >> 0) & 0xff;
2217 return array_to_constant(mode, arr);
2218}
5a976006 2219\f
2220/* branch hint stuff */
5474166e 2221
644459d0 2222/* An array of these is used to propagate hints to predecessor blocks. */
2223struct spu_bb_info
2224{
5a976006 2225 rtx prop_jump; /* propagated from another block */
2226 int bb_index; /* the original block. */
644459d0 2227};
5a976006 2228static struct spu_bb_info *spu_bb_info;
644459d0 2229
5a976006 2230#define STOP_HINT_P(INSN) \
2231 (GET_CODE(INSN) == CALL_INSN \
2232 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2233 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2234
2235/* 1 when RTX is a hinted branch or its target. We keep track of
2236 what has been hinted so the safe-hint code can test it easily. */
2237#define HINTED_P(RTX) \
2238 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2239
2240/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2241#define SCHED_ON_EVEN_P(RTX) \
2242 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2243
2244/* Emit a nop for INSN such that the two will dual issue. This assumes
2245 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2246 We check for TImode to handle a MULTI1 insn which has dual issued its
2247 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2248 ADDR_VEC insns. */
2249static void
2250emit_nop_for_insn (rtx insn)
644459d0 2251{
5a976006 2252 int p;
2253 rtx new_insn;
2254 p = get_pipe (insn);
2255 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2256 new_insn = emit_insn_after (gen_lnop (), insn);
2257 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2258 {
5a976006 2259 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2260 PUT_MODE (new_insn, TImode);
2261 PUT_MODE (insn, VOIDmode);
2262 }
2263 else
2264 new_insn = emit_insn_after (gen_lnop (), insn);
2265 recog_memoized (new_insn);
2fbdf9ef 2266 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2267}
2268
2269/* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2272static void
2273pad_bb(void)
2274{
2275 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2276 int length;
2277 int addr;
2278
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2281
2282 /* Keep track of length added by nops. */
2283 length = 0;
2284
2285 prev_insn = 0;
2286 insn = get_insns ();
2287 if (!active_insn_p (insn))
2288 insn = next_active_insn (insn);
2289 for (; insn; insn = next_insn)
2290 {
2291 next_insn = next_active_insn (insn);
2292 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2294 {
5a976006 2295 if (hbr_insn)
2296 {
2297 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2298 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2299 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 || (a1 - a0 == 4))
2301 {
2302 prev_insn = emit_insn_before (gen_lnop (), insn);
2303 PUT_MODE (prev_insn, GET_MODE (insn));
2304 PUT_MODE (insn, TImode);
2fbdf9ef 2305 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2306 length += 4;
2307 }
2308 }
2309 hbr_insn = insn;
2310 }
2311 if (INSN_CODE (insn) == CODE_FOR_blockage)
2312 {
2313 if (GET_MODE (insn) == TImode)
2314 PUT_MODE (next_insn, TImode);
2315 insn = next_insn;
2316 next_insn = next_active_insn (insn);
2317 }
2318 addr = INSN_ADDRESSES (INSN_UID (insn));
2319 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2320 {
2321 if (((addr + length) & 7) != 0)
2322 {
2323 emit_nop_for_insn (prev_insn);
2324 length += 4;
2325 }
644459d0 2326 }
5a976006 2327 else if (GET_MODE (insn) == TImode
2328 && ((next_insn && GET_MODE (next_insn) != TImode)
2329 || get_attr_type (insn) == TYPE_MULTI0)
2330 && ((addr + length) & 7) != 0)
2331 {
2332 /* prev_insn will always be set because the first insn is
2333 always 8-byte aligned. */
2334 emit_nop_for_insn (prev_insn);
2335 length += 4;
2336 }
2337 prev_insn = insn;
644459d0 2338 }
644459d0 2339}
2340
5a976006 2341\f
2342/* Routines for branch hints. */
2343
644459d0 2344static void
5a976006 2345spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2346 int distance, sbitmap blocks)
644459d0 2347{
5a976006 2348 rtx branch_label = 0;
2349 rtx hint;
2350 rtx insn;
2351 rtx table;
644459d0 2352
2353 if (before == 0 || branch == 0 || target == 0)
2354 return;
2355
5a976006 2356 /* While scheduling we require hints to be no further than 600, so
2357 we need to enforce that here too */
644459d0 2358 if (distance > 600)
2359 return;
2360
5a976006 2361 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2362 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2363 before = NEXT_INSN (before);
644459d0 2364
2365 branch_label = gen_label_rtx ();
2366 LABEL_NUSES (branch_label)++;
2367 LABEL_PRESERVE_P (branch_label) = 1;
2368 insn = emit_label_before (branch_label, branch);
2369 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2370 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2371
2372 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2373 recog_memoized (hint);
2fbdf9ef 2374 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2375 HINTED_P (branch) = 1;
644459d0 2376
5a976006 2377 if (GET_CODE (target) == LABEL_REF)
2378 HINTED_P (XEXP (target, 0)) = 1;
2379 else if (tablejump_p (branch, 0, &table))
644459d0 2380 {
5a976006 2381 rtvec vec;
2382 int j;
2383 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2384 vec = XVEC (PATTERN (table), 0);
2385 else
2386 vec = XVEC (PATTERN (table), 1);
2387 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2388 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2389 }
5a976006 2390
2391 if (distance >= 588)
644459d0 2392 {
5a976006 2393 /* Make sure the hint isn't scheduled any earlier than this point,
2394 which could make it too far for the branch offest to fit */
2fbdf9ef 2395 insn = emit_insn_before (gen_blockage (), hint);
2396 recog_memoized (insn);
2397 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2398 }
2399 else if (distance <= 8 * 4)
2400 {
2401 /* To guarantee at least 8 insns between the hint and branch we
2402 insert nops. */
2403 int d;
2404 for (d = distance; d < 8 * 4; d += 4)
2405 {
2406 insn =
2407 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2408 recog_memoized (insn);
2fbdf9ef 2409 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2410 }
2411
2412 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2413 insn = emit_insn_after (gen_blockage (), hint);
2414 recog_memoized (insn);
2415 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2416
2417 /* Make sure any nops inserted aren't scheduled after the call. */
2418 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2419 {
2420 insn = emit_insn_before (gen_blockage (), branch);
2421 recog_memoized (insn);
2422 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2423 }
644459d0 2424 }
644459d0 2425}
2426
2427/* Returns 0 if we don't want a hint for this branch. Otherwise return
2428 the rtx for the branch target. */
2429static rtx
2430get_branch_target (rtx branch)
2431{
2432 if (GET_CODE (branch) == JUMP_INSN)
2433 {
2434 rtx set, src;
2435
2436 /* Return statements */
2437 if (GET_CODE (PATTERN (branch)) == RETURN)
2438 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2439
2440 /* jump table */
2441 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2442 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2443 return 0;
2444
fcc31b99 2445 /* ASM GOTOs. */
604157f6 2446 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2447 return NULL;
2448
644459d0 2449 set = single_set (branch);
2450 src = SET_SRC (set);
2451 if (GET_CODE (SET_DEST (set)) != PC)
2452 abort ();
2453
2454 if (GET_CODE (src) == IF_THEN_ELSE)
2455 {
2456 rtx lab = 0;
2457 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2458 if (note)
2459 {
2460 /* If the more probable case is not a fall through, then
2461 try a branch hint. */
2462 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2463 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2464 && GET_CODE (XEXP (src, 1)) != PC)
2465 lab = XEXP (src, 1);
2466 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2467 && GET_CODE (XEXP (src, 2)) != PC)
2468 lab = XEXP (src, 2);
2469 }
2470 if (lab)
2471 {
2472 if (GET_CODE (lab) == RETURN)
2473 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2474 return lab;
2475 }
2476 return 0;
2477 }
2478
2479 return src;
2480 }
2481 else if (GET_CODE (branch) == CALL_INSN)
2482 {
2483 rtx call;
2484 /* All of our call patterns are in a PARALLEL and the CALL is
2485 the first pattern in the PARALLEL. */
2486 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2487 abort ();
2488 call = XVECEXP (PATTERN (branch), 0, 0);
2489 if (GET_CODE (call) == SET)
2490 call = SET_SRC (call);
2491 if (GET_CODE (call) != CALL)
2492 abort ();
2493 return XEXP (XEXP (call, 0), 0);
2494 }
2495 return 0;
2496}
2497
5a976006 2498/* The special $hbr register is used to prevent the insn scheduler from
2499 moving hbr insns across instructions which invalidate them. It
2500 should only be used in a clobber, and this function searches for
2501 insns which clobber it. */
2502static bool
2503insn_clobbers_hbr (rtx insn)
2504{
2505 if (INSN_P (insn)
2506 && GET_CODE (PATTERN (insn)) == PARALLEL)
2507 {
2508 rtx parallel = PATTERN (insn);
2509 rtx clobber;
2510 int j;
2511 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2512 {
2513 clobber = XVECEXP (parallel, 0, j);
2514 if (GET_CODE (clobber) == CLOBBER
2515 && GET_CODE (XEXP (clobber, 0)) == REG
2516 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2517 return 1;
2518 }
2519 }
2520 return 0;
2521}
2522
2523/* Search up to 32 insns starting at FIRST:
2524 - at any kind of hinted branch, just return
2525 - at any unconditional branch in the first 15 insns, just return
2526 - at a call or indirect branch, after the first 15 insns, force it to
2527 an even address and return
2528 - at any unconditional branch, after the first 15 insns, force it to
2529 an even address.
2530 At then end of the search, insert an hbrp within 4 insns of FIRST,
2531 and an hbrp within 16 instructions of FIRST.
2532 */
644459d0 2533static void
5a976006 2534insert_hbrp_for_ilb_runout (rtx first)
644459d0 2535{
5a976006 2536 rtx insn, before_4 = 0, before_16 = 0;
2537 int addr = 0, length, first_addr = -1;
2538 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2539 int insert_lnop_after = 0;
2540 for (insn = first; insn; insn = NEXT_INSN (insn))
2541 if (INSN_P (insn))
2542 {
2543 if (first_addr == -1)
2544 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2545 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2546 length = get_attr_length (insn);
2547
2548 if (before_4 == 0 && addr + length >= 4 * 4)
2549 before_4 = insn;
2550 /* We test for 14 instructions because the first hbrp will add
2551 up to 2 instructions. */
2552 if (before_16 == 0 && addr + length >= 14 * 4)
2553 before_16 = insn;
2554
2555 if (INSN_CODE (insn) == CODE_FOR_hbr)
2556 {
2557 /* Make sure an hbrp is at least 2 cycles away from a hint.
2558 Insert an lnop after the hbrp when necessary. */
2559 if (before_4 == 0 && addr > 0)
2560 {
2561 before_4 = insn;
2562 insert_lnop_after |= 1;
2563 }
2564 else if (before_4 && addr <= 4 * 4)
2565 insert_lnop_after |= 1;
2566 if (before_16 == 0 && addr > 10 * 4)
2567 {
2568 before_16 = insn;
2569 insert_lnop_after |= 2;
2570 }
2571 else if (before_16 && addr <= 14 * 4)
2572 insert_lnop_after |= 2;
2573 }
644459d0 2574
5a976006 2575 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2576 {
2577 if (addr < hbrp_addr0)
2578 hbrp_addr0 = addr;
2579 else if (addr < hbrp_addr1)
2580 hbrp_addr1 = addr;
2581 }
644459d0 2582
5a976006 2583 if (CALL_P (insn) || JUMP_P (insn))
2584 {
2585 if (HINTED_P (insn))
2586 return;
2587
2588 /* Any branch after the first 15 insns should be on an even
2589 address to avoid a special case branch. There might be
2590 some nops and/or hbrps inserted, so we test after 10
2591 insns. */
2592 if (addr > 10 * 4)
2593 SCHED_ON_EVEN_P (insn) = 1;
2594 }
644459d0 2595
5a976006 2596 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2597 return;
2598
2599
2600 if (addr + length >= 32 * 4)
644459d0 2601 {
5a976006 2602 gcc_assert (before_4 && before_16);
2603 if (hbrp_addr0 > 4 * 4)
644459d0 2604 {
5a976006 2605 insn =
2606 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2607 recog_memoized (insn);
2fbdf9ef 2608 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2609 INSN_ADDRESSES_NEW (insn,
2610 INSN_ADDRESSES (INSN_UID (before_4)));
2611 PUT_MODE (insn, GET_MODE (before_4));
2612 PUT_MODE (before_4, TImode);
2613 if (insert_lnop_after & 1)
644459d0 2614 {
5a976006 2615 insn = emit_insn_before (gen_lnop (), before_4);
2616 recog_memoized (insn);
2fbdf9ef 2617 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2618 INSN_ADDRESSES_NEW (insn,
2619 INSN_ADDRESSES (INSN_UID (before_4)));
2620 PUT_MODE (insn, TImode);
644459d0 2621 }
644459d0 2622 }
5a976006 2623 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2624 && hbrp_addr1 > 16 * 4)
644459d0 2625 {
5a976006 2626 insn =
2627 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2628 recog_memoized (insn);
2fbdf9ef 2629 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2630 INSN_ADDRESSES_NEW (insn,
2631 INSN_ADDRESSES (INSN_UID (before_16)));
2632 PUT_MODE (insn, GET_MODE (before_16));
2633 PUT_MODE (before_16, TImode);
2634 if (insert_lnop_after & 2)
644459d0 2635 {
5a976006 2636 insn = emit_insn_before (gen_lnop (), before_16);
2637 recog_memoized (insn);
2fbdf9ef 2638 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2639 INSN_ADDRESSES_NEW (insn,
2640 INSN_ADDRESSES (INSN_UID
2641 (before_16)));
2642 PUT_MODE (insn, TImode);
644459d0 2643 }
2644 }
5a976006 2645 return;
644459d0 2646 }
644459d0 2647 }
5a976006 2648 else if (BARRIER_P (insn))
2649 return;
644459d0 2650
644459d0 2651}
5a976006 2652
2653/* The SPU might hang when it executes 48 inline instructions after a
2654 hinted branch jumps to its hinted target. The beginning of a
851d9296 2655 function and the return from a call might have been hinted, and
2656 must be handled as well. To prevent a hang we insert 2 hbrps. The
2657 first should be within 6 insns of the branch target. The second
2658 should be within 22 insns of the branch target. When determining
2659 if hbrps are necessary, we look for only 32 inline instructions,
2660 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2661 when inserting new hbrps, we insert them within 4 and 16 insns of
2662 the target. */
644459d0 2663static void
5a976006 2664insert_hbrp (void)
644459d0 2665{
5a976006 2666 rtx insn;
2667 if (TARGET_SAFE_HINTS)
644459d0 2668 {
5a976006 2669 shorten_branches (get_insns ());
2670 /* Insert hbrp at beginning of function */
2671 insn = next_active_insn (get_insns ());
2672 if (insn)
2673 insert_hbrp_for_ilb_runout (insn);
2674 /* Insert hbrp after hinted targets. */
2675 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2676 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2677 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2678 }
644459d0 2679}
2680
5a976006 2681static int in_spu_reorg;
2682
8a42230a 2683static void
2684spu_var_tracking (void)
2685{
2686 if (flag_var_tracking)
2687 {
2688 df_analyze ();
2689 timevar_push (TV_VAR_TRACKING);
2690 variable_tracking_main ();
2691 timevar_pop (TV_VAR_TRACKING);
2692 df_finish_pass (false);
2693 }
2694}
2695
5a976006 2696/* Insert branch hints. There are no branch optimizations after this
2697 pass, so it's safe to set our branch hints now. */
644459d0 2698static void
5a976006 2699spu_machine_dependent_reorg (void)
644459d0 2700{
5a976006 2701 sbitmap blocks;
2702 basic_block bb;
2703 rtx branch, insn;
2704 rtx branch_target = 0;
2705 int branch_addr = 0, insn_addr, required_dist = 0;
2706 int i;
2707 unsigned int j;
644459d0 2708
5a976006 2709 if (!TARGET_BRANCH_HINTS || optimize == 0)
2710 {
2711 /* We still do it for unoptimized code because an external
2712 function might have hinted a call or return. */
2713 insert_hbrp ();
2714 pad_bb ();
8a42230a 2715 spu_var_tracking ();
5a976006 2716 return;
2717 }
644459d0 2718
5a976006 2719 blocks = sbitmap_alloc (last_basic_block);
2720 sbitmap_zero (blocks);
644459d0 2721
5a976006 2722 in_spu_reorg = 1;
2723 compute_bb_for_insn ();
2724
2725 compact_blocks ();
2726
2727 spu_bb_info =
2728 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2729 sizeof (struct spu_bb_info));
2730
2731 /* We need exact insn addresses and lengths. */
2732 shorten_branches (get_insns ());
2733
2734 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2735 {
5a976006 2736 bb = BASIC_BLOCK (i);
2737 branch = 0;
2738 if (spu_bb_info[i].prop_jump)
644459d0 2739 {
5a976006 2740 branch = spu_bb_info[i].prop_jump;
2741 branch_target = get_branch_target (branch);
2742 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2743 required_dist = spu_hint_dist;
2744 }
2745 /* Search from end of a block to beginning. In this loop, find
2746 jumps which need a branch and emit them only when:
2747 - it's an indirect branch and we're at the insn which sets
2748 the register
2749 - we're at an insn that will invalidate the hint. e.g., a
2750 call, another hint insn, inline asm that clobbers $hbr, and
2751 some inlined operations (divmodsi4). Don't consider jumps
2752 because they are only at the end of a block and are
2753 considered when we are deciding whether to propagate
2754 - we're getting too far away from the branch. The hbr insns
2755 only have a signed 10 bit offset
2756 We go back as far as possible so the branch will be considered
2757 for propagation when we get to the beginning of the block. */
2758 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2759 {
2760 if (INSN_P (insn))
2761 {
2762 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2763 if (branch
2764 && ((GET_CODE (branch_target) == REG
2765 && set_of (branch_target, insn) != NULL_RTX)
2766 || insn_clobbers_hbr (insn)
2767 || branch_addr - insn_addr > 600))
2768 {
2769 rtx next = NEXT_INSN (insn);
2770 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2771 if (insn != BB_END (bb)
2772 && branch_addr - next_addr >= required_dist)
2773 {
2774 if (dump_file)
2775 fprintf (dump_file,
2776 "hint for %i in block %i before %i\n",
2777 INSN_UID (branch), bb->index,
2778 INSN_UID (next));
2779 spu_emit_branch_hint (next, branch, branch_target,
2780 branch_addr - next_addr, blocks);
2781 }
2782 branch = 0;
2783 }
2784
2785 /* JUMP_P will only be true at the end of a block. When
2786 branch is already set it means we've previously decided
2787 to propagate a hint for that branch into this block. */
2788 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2789 {
2790 branch = 0;
2791 if ((branch_target = get_branch_target (insn)))
2792 {
2793 branch = insn;
2794 branch_addr = insn_addr;
2795 required_dist = spu_hint_dist;
2796 }
2797 }
2798 }
2799 if (insn == BB_HEAD (bb))
2800 break;
2801 }
2802
2803 if (branch)
2804 {
2805 /* If we haven't emitted a hint for this branch yet, it might
2806 be profitable to emit it in one of the predecessor blocks,
2807 especially for loops. */
2808 rtx bbend;
2809 basic_block prev = 0, prop = 0, prev2 = 0;
2810 int loop_exit = 0, simple_loop = 0;
2811 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2812
2813 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2814 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2815 prev = EDGE_PRED (bb, j)->src;
2816 else
2817 prev2 = EDGE_PRED (bb, j)->src;
2818
2819 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2820 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2821 loop_exit = 1;
2822 else if (EDGE_SUCC (bb, j)->dest == bb)
2823 simple_loop = 1;
2824
2825 /* If this branch is a loop exit then propagate to previous
2826 fallthru block. This catches the cases when it is a simple
2827 loop or when there is an initial branch into the loop. */
2828 if (prev && (loop_exit || simple_loop)
2829 && prev->loop_depth <= bb->loop_depth)
2830 prop = prev;
2831
2832 /* If there is only one adjacent predecessor. Don't propagate
2833 outside this loop. This loop_depth test isn't perfect, but
2834 I'm not sure the loop_father member is valid at this point. */
2835 else if (prev && single_pred_p (bb)
2836 && prev->loop_depth == bb->loop_depth)
2837 prop = prev;
2838
2839 /* If this is the JOIN block of a simple IF-THEN then
2840 propogate the hint to the HEADER block. */
2841 else if (prev && prev2
2842 && EDGE_COUNT (bb->preds) == 2
2843 && EDGE_COUNT (prev->preds) == 1
2844 && EDGE_PRED (prev, 0)->src == prev2
2845 && prev2->loop_depth == bb->loop_depth
2846 && GET_CODE (branch_target) != REG)
2847 prop = prev;
2848
2849 /* Don't propagate when:
2850 - this is a simple loop and the hint would be too far
2851 - this is not a simple loop and there are 16 insns in
2852 this block already
2853 - the predecessor block ends in a branch that will be
2854 hinted
2855 - the predecessor block ends in an insn that invalidates
2856 the hint */
2857 if (prop
2858 && prop->index >= 0
2859 && (bbend = BB_END (prop))
2860 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2861 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2862 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2863 {
2864 if (dump_file)
2865 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2866 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2867 bb->index, prop->index, bb->loop_depth,
2868 INSN_UID (branch), loop_exit, simple_loop,
2869 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2870
2871 spu_bb_info[prop->index].prop_jump = branch;
2872 spu_bb_info[prop->index].bb_index = i;
2873 }
2874 else if (branch_addr - next_addr >= required_dist)
2875 {
2876 if (dump_file)
2877 fprintf (dump_file, "hint for %i in block %i before %i\n",
2878 INSN_UID (branch), bb->index,
2879 INSN_UID (NEXT_INSN (insn)));
2880 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2881 branch_addr - next_addr, blocks);
2882 }
2883 branch = 0;
644459d0 2884 }
644459d0 2885 }
5a976006 2886 free (spu_bb_info);
644459d0 2887
5a976006 2888 if (!sbitmap_empty_p (blocks))
2889 find_many_sub_basic_blocks (blocks);
2890
2891 /* We have to schedule to make sure alignment is ok. */
2892 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2893
2894 /* The hints need to be scheduled, so call it again. */
2895 schedule_insns ();
2fbdf9ef 2896 df_finish_pass (true);
5a976006 2897
2898 insert_hbrp ();
2899
2900 pad_bb ();
2901
8f1d58ad 2902 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2903 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2904 {
2905 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2906 between its branch label and the branch . We don't move the
2907 label because GCC expects it at the beginning of the block. */
2908 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2909 rtx label_ref = XVECEXP (unspec, 0, 0);
2910 rtx label = XEXP (label_ref, 0);
2911 rtx branch;
2912 int offset = 0;
2913 for (branch = NEXT_INSN (label);
2914 !JUMP_P (branch) && !CALL_P (branch);
2915 branch = NEXT_INSN (branch))
2916 if (NONJUMP_INSN_P (branch))
2917 offset += get_attr_length (branch);
2918 if (offset > 0)
2919 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2920 }
5a976006 2921
8a42230a 2922 spu_var_tracking ();
5a976006 2923
2924 free_bb_for_insn ();
2925
2926 in_spu_reorg = 0;
644459d0 2927}
2928\f
2929
2930/* Insn scheduling routines, primarily for dual issue. */
2931static int
2932spu_sched_issue_rate (void)
2933{
2934 return 2;
2935}
2936
2937static int
5a976006 2938uses_ls_unit(rtx insn)
644459d0 2939{
5a976006 2940 rtx set = single_set (insn);
2941 if (set != 0
2942 && (GET_CODE (SET_DEST (set)) == MEM
2943 || GET_CODE (SET_SRC (set)) == MEM))
2944 return 1;
2945 return 0;
644459d0 2946}
2947
2948static int
2949get_pipe (rtx insn)
2950{
2951 enum attr_type t;
2952 /* Handle inline asm */
2953 if (INSN_CODE (insn) == -1)
2954 return -1;
2955 t = get_attr_type (insn);
2956 switch (t)
2957 {
2958 case TYPE_CONVERT:
2959 return -2;
2960 case TYPE_MULTI0:
2961 return -1;
2962
2963 case TYPE_FX2:
2964 case TYPE_FX3:
2965 case TYPE_SPR:
2966 case TYPE_NOP:
2967 case TYPE_FXB:
2968 case TYPE_FPD:
2969 case TYPE_FP6:
2970 case TYPE_FP7:
644459d0 2971 return 0;
2972
2973 case TYPE_LNOP:
2974 case TYPE_SHUF:
2975 case TYPE_LOAD:
2976 case TYPE_STORE:
2977 case TYPE_BR:
2978 case TYPE_MULTI1:
2979 case TYPE_HBR:
5a976006 2980 case TYPE_IPREFETCH:
644459d0 2981 return 1;
2982 default:
2983 abort ();
2984 }
2985}
2986
5a976006 2987
2988/* haifa-sched.c has a static variable that keeps track of the current
2989 cycle. It is passed to spu_sched_reorder, and we record it here for
2990 use by spu_sched_variable_issue. It won't be accurate if the
2991 scheduler updates it's clock_var between the two calls. */
2992static int clock_var;
2993
2994/* This is used to keep track of insn alignment. Set to 0 at the
2995 beginning of each block and increased by the "length" attr of each
2996 insn scheduled. */
2997static int spu_sched_length;
2998
2999/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3000 ready list appropriately in spu_sched_reorder(). */
3001static int pipe0_clock;
3002static int pipe1_clock;
3003
3004static int prev_clock_var;
3005
3006static int prev_priority;
3007
3008/* The SPU needs to load the next ilb sometime during the execution of
3009 the previous ilb. There is a potential conflict if every cycle has a
3010 load or store. To avoid the conflict we make sure the load/store
3011 unit is free for at least one cycle during the execution of insns in
3012 the previous ilb. */
3013static int spu_ls_first;
3014static int prev_ls_clock;
3015
3016static void
3017spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3018 int max_ready ATTRIBUTE_UNUSED)
3019{
3020 spu_sched_length = 0;
3021}
3022
3023static void
3024spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3025 int max_ready ATTRIBUTE_UNUSED)
3026{
3027 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3028 {
3029 /* When any block might be at least 8-byte aligned, assume they
3030 will all be at least 8-byte aligned to make sure dual issue
3031 works out correctly. */
3032 spu_sched_length = 0;
3033 }
3034 spu_ls_first = INT_MAX;
3035 clock_var = -1;
3036 prev_ls_clock = -1;
3037 pipe0_clock = -1;
3038 pipe1_clock = -1;
3039 prev_clock_var = -1;
3040 prev_priority = -1;
3041}
3042
644459d0 3043static int
5a976006 3044spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3045 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3046{
5a976006 3047 int len;
3048 int p;
644459d0 3049 if (GET_CODE (PATTERN (insn)) == USE
3050 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3051 || (len = get_attr_length (insn)) == 0)
3052 return more;
3053
3054 spu_sched_length += len;
3055
3056 /* Reset on inline asm */
3057 if (INSN_CODE (insn) == -1)
3058 {
3059 spu_ls_first = INT_MAX;
3060 pipe0_clock = -1;
3061 pipe1_clock = -1;
3062 return 0;
3063 }
3064 p = get_pipe (insn);
3065 if (p == 0)
3066 pipe0_clock = clock_var;
3067 else
3068 pipe1_clock = clock_var;
3069
3070 if (in_spu_reorg)
3071 {
3072 if (clock_var - prev_ls_clock > 1
3073 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3074 spu_ls_first = INT_MAX;
3075 if (uses_ls_unit (insn))
3076 {
3077 if (spu_ls_first == INT_MAX)
3078 spu_ls_first = spu_sched_length;
3079 prev_ls_clock = clock_var;
3080 }
3081
3082 /* The scheduler hasn't inserted the nop, but we will later on.
3083 Include those nops in spu_sched_length. */
3084 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3085 spu_sched_length += 4;
3086 prev_clock_var = clock_var;
3087
3088 /* more is -1 when called from spu_sched_reorder for new insns
3089 that don't have INSN_PRIORITY */
3090 if (more >= 0)
3091 prev_priority = INSN_PRIORITY (insn);
3092 }
3093
3094 /* Always try issueing more insns. spu_sched_reorder will decide
3095 when the cycle should be advanced. */
3096 return 1;
3097}
3098
3099/* This function is called for both TARGET_SCHED_REORDER and
3100 TARGET_SCHED_REORDER2. */
3101static int
3102spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3103 rtx *ready, int *nreadyp, int clock)
3104{
3105 int i, nready = *nreadyp;
3106 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3107 rtx insn;
3108
3109 clock_var = clock;
3110
3111 if (nready <= 0 || pipe1_clock >= clock)
3112 return 0;
3113
3114 /* Find any rtl insns that don't generate assembly insns and schedule
3115 them first. */
3116 for (i = nready - 1; i >= 0; i--)
3117 {
3118 insn = ready[i];
3119 if (INSN_CODE (insn) == -1
3120 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3121 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3122 {
3123 ready[i] = ready[nready - 1];
3124 ready[nready - 1] = insn;
3125 return 1;
3126 }
3127 }
3128
3129 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3130 for (i = 0; i < nready; i++)
3131 if (INSN_CODE (ready[i]) != -1)
3132 {
3133 insn = ready[i];
3134 switch (get_attr_type (insn))
3135 {
3136 default:
3137 case TYPE_MULTI0:
3138 case TYPE_CONVERT:
3139 case TYPE_FX2:
3140 case TYPE_FX3:
3141 case TYPE_SPR:
3142 case TYPE_NOP:
3143 case TYPE_FXB:
3144 case TYPE_FPD:
3145 case TYPE_FP6:
3146 case TYPE_FP7:
3147 pipe_0 = i;
3148 break;
3149 case TYPE_LOAD:
3150 case TYPE_STORE:
3151 pipe_ls = i;
3152 case TYPE_LNOP:
3153 case TYPE_SHUF:
3154 case TYPE_BR:
3155 case TYPE_MULTI1:
3156 case TYPE_HBR:
3157 pipe_1 = i;
3158 break;
3159 case TYPE_IPREFETCH:
3160 pipe_hbrp = i;
3161 break;
3162 }
3163 }
3164
3165 /* In the first scheduling phase, schedule loads and stores together
3166 to increase the chance they will get merged during postreload CSE. */
3167 if (!reload_completed && pipe_ls >= 0)
3168 {
3169 insn = ready[pipe_ls];
3170 ready[pipe_ls] = ready[nready - 1];
3171 ready[nready - 1] = insn;
3172 return 1;
3173 }
3174
3175 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3176 if (pipe_hbrp >= 0)
3177 pipe_1 = pipe_hbrp;
3178
3179 /* When we have loads/stores in every cycle of the last 15 insns and
3180 we are about to schedule another load/store, emit an hbrp insn
3181 instead. */
3182 if (in_spu_reorg
3183 && spu_sched_length - spu_ls_first >= 4 * 15
3184 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3185 {
3186 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3187 recog_memoized (insn);
3188 if (pipe0_clock < clock)
3189 PUT_MODE (insn, TImode);
3190 spu_sched_variable_issue (file, verbose, insn, -1);
3191 return 0;
3192 }
3193
3194 /* In general, we want to emit nops to increase dual issue, but dual
3195 issue isn't faster when one of the insns could be scheduled later
3196 without effecting the critical path. We look at INSN_PRIORITY to
3197 make a good guess, but it isn't perfect so -mdual-nops=n can be
3198 used to effect it. */
3199 if (in_spu_reorg && spu_dual_nops < 10)
3200 {
3201 /* When we are at an even address and we are not issueing nops to
3202 improve scheduling then we need to advance the cycle. */
3203 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3204 && (spu_dual_nops == 0
3205 || (pipe_1 != -1
3206 && prev_priority >
3207 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3208 return 0;
3209
3210 /* When at an odd address, schedule the highest priority insn
3211 without considering pipeline. */
3212 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3213 && (spu_dual_nops == 0
3214 || (prev_priority >
3215 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3216 return 1;
3217 }
3218
3219
3220 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3221 pipe0 insn in the ready list, schedule it. */
3222 if (pipe0_clock < clock && pipe_0 >= 0)
3223 schedule_i = pipe_0;
3224
3225 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3226 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3227 else
3228 schedule_i = pipe_1;
3229
3230 if (schedule_i > -1)
3231 {
3232 insn = ready[schedule_i];
3233 ready[schedule_i] = ready[nready - 1];
3234 ready[nready - 1] = insn;
3235 return 1;
3236 }
3237 return 0;
644459d0 3238}
3239
3240/* INSN is dependent on DEP_INSN. */
3241static int
5a976006 3242spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3243{
5a976006 3244 rtx set;
3245
3246 /* The blockage pattern is used to prevent instructions from being
3247 moved across it and has no cost. */
3248 if (INSN_CODE (insn) == CODE_FOR_blockage
3249 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3250 return 0;
3251
9d98604b 3252 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3253 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3254 return 0;
3255
3256 /* Make sure hbrps are spread out. */
3257 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3258 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3259 return 8;
3260
3261 /* Make sure hints and hbrps are 2 cycles apart. */
3262 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3263 || INSN_CODE (insn) == CODE_FOR_hbr)
3264 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3265 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3266 return 2;
3267
3268 /* An hbrp has no real dependency on other insns. */
3269 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3270 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3271 return 0;
3272
3273 /* Assuming that it is unlikely an argument register will be used in
3274 the first cycle of the called function, we reduce the cost for
3275 slightly better scheduling of dep_insn. When not hinted, the
3276 mispredicted branch would hide the cost as well. */
3277 if (CALL_P (insn))
3278 {
3279 rtx target = get_branch_target (insn);
3280 if (GET_CODE (target) != REG || !set_of (target, insn))
3281 return cost - 2;
3282 return cost;
3283 }
3284
3285 /* And when returning from a function, let's assume the return values
3286 are completed sooner too. */
3287 if (CALL_P (dep_insn))
644459d0 3288 return cost - 2;
5a976006 3289
3290 /* Make sure an instruction that loads from the back chain is schedule
3291 away from the return instruction so a hint is more likely to get
3292 issued. */
3293 if (INSN_CODE (insn) == CODE_FOR__return
3294 && (set = single_set (dep_insn))
3295 && GET_CODE (SET_DEST (set)) == REG
3296 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3297 return 20;
3298
644459d0 3299 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3300 scheduler makes every insn in a block anti-dependent on the final
3301 jump_insn. We adjust here so higher cost insns will get scheduled
3302 earlier. */
5a976006 3303 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3304 return insn_cost (dep_insn) - 3;
5a976006 3305
644459d0 3306 return cost;
3307}
3308\f
3309/* Create a CONST_DOUBLE from a string. */
842ae815 3310rtx
644459d0 3311spu_float_const (const char *string, enum machine_mode mode)
3312{
3313 REAL_VALUE_TYPE value;
3314 value = REAL_VALUE_ATOF (string, mode);
3315 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3316}
3317
644459d0 3318int
3319spu_constant_address_p (rtx x)
3320{
3321 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3322 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3323 || GET_CODE (x) == HIGH);
3324}
3325
3326static enum spu_immediate
3327which_immediate_load (HOST_WIDE_INT val)
3328{
3329 gcc_assert (val == trunc_int_for_mode (val, SImode));
3330
3331 if (val >= -0x8000 && val <= 0x7fff)
3332 return SPU_IL;
3333 if (val >= 0 && val <= 0x3ffff)
3334 return SPU_ILA;
3335 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3336 return SPU_ILH;
3337 if ((val & 0xffff) == 0)
3338 return SPU_ILHU;
3339
3340 return SPU_NONE;
3341}
3342
dea01258 3343/* Return true when OP can be loaded by one of the il instructions, or
3344 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3345int
3346immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3347{
3348 if (CONSTANT_P (op))
3349 {
3350 enum immediate_class c = classify_immediate (op, mode);
5df189be 3351 return c == IC_IL1 || c == IC_IL1s
3072d30e 3352 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3353 }
3354 return 0;
3355}
3356
3357/* Return true if the first SIZE bytes of arr is a constant that can be
3358 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3359 represent the size and offset of the instruction to use. */
3360static int
3361cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3362{
3363 int cpat, run, i, start;
3364 cpat = 1;
3365 run = 0;
3366 start = -1;
3367 for (i = 0; i < size && cpat; i++)
3368 if (arr[i] != i+16)
3369 {
3370 if (!run)
3371 {
3372 start = i;
3373 if (arr[i] == 3)
3374 run = 1;
3375 else if (arr[i] == 2 && arr[i+1] == 3)
3376 run = 2;
3377 else if (arr[i] == 0)
3378 {
3379 while (arr[i+run] == run && i+run < 16)
3380 run++;
3381 if (run != 4 && run != 8)
3382 cpat = 0;
3383 }
3384 else
3385 cpat = 0;
3386 if ((i & (run-1)) != 0)
3387 cpat = 0;
3388 i += run;
3389 }
3390 else
3391 cpat = 0;
3392 }
b01a6dc3 3393 if (cpat && (run || size < 16))
dea01258 3394 {
3395 if (run == 0)
3396 run = 1;
3397 if (prun)
3398 *prun = run;
3399 if (pstart)
3400 *pstart = start == -1 ? 16-run : start;
3401 return 1;
3402 }
3403 return 0;
3404}
3405
3406/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3407 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3408static enum immediate_class
3409classify_immediate (rtx op, enum machine_mode mode)
644459d0 3410{
3411 HOST_WIDE_INT val;
3412 unsigned char arr[16];
5df189be 3413 int i, j, repeated, fsmbi, repeat;
dea01258 3414
3415 gcc_assert (CONSTANT_P (op));
3416
644459d0 3417 if (GET_MODE (op) != VOIDmode)
3418 mode = GET_MODE (op);
3419
dea01258 3420 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3421 if (!flag_pic
3422 && mode == V4SImode
dea01258 3423 && GET_CODE (op) == CONST_VECTOR
3424 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3425 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3426 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3427 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3428 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3429 op = CONST_VECTOR_ELT (op, 0);
644459d0 3430
dea01258 3431 switch (GET_CODE (op))
3432 {
3433 case SYMBOL_REF:
3434 case LABEL_REF:
3435 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3436
dea01258 3437 case CONST:
0cfc65d4 3438 /* We can never know if the resulting address fits in 18 bits and can be
3439 loaded with ila. For now, assume the address will not overflow if
3440 the displacement is "small" (fits 'K' constraint). */
3441 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3442 {
3443 rtx sym = XEXP (XEXP (op, 0), 0);
3444 rtx cst = XEXP (XEXP (op, 0), 1);
3445
3446 if (GET_CODE (sym) == SYMBOL_REF
3447 && GET_CODE (cst) == CONST_INT
3448 && satisfies_constraint_K (cst))
3449 return IC_IL1s;
3450 }
3451 return IC_IL2s;
644459d0 3452
dea01258 3453 case HIGH:
3454 return IC_IL1s;
3455
3456 case CONST_VECTOR:
3457 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3458 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3459 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3460 return IC_POOL;
3461 /* Fall through. */
3462
3463 case CONST_INT:
3464 case CONST_DOUBLE:
3465 constant_to_array (mode, op, arr);
644459d0 3466
dea01258 3467 /* Check that each 4-byte slot is identical. */
3468 repeated = 1;
3469 for (i = 4; i < 16; i += 4)
3470 for (j = 0; j < 4; j++)
3471 if (arr[j] != arr[i + j])
3472 repeated = 0;
3473
3474 if (repeated)
3475 {
3476 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3477 val = trunc_int_for_mode (val, SImode);
3478
3479 if (which_immediate_load (val) != SPU_NONE)
3480 return IC_IL1;
3481 }
3482
3483 /* Any mode of 2 bytes or smaller can be loaded with an il
3484 instruction. */
3485 gcc_assert (GET_MODE_SIZE (mode) > 2);
3486
3487 fsmbi = 1;
5df189be 3488 repeat = 0;
dea01258 3489 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3490 if (arr[i] != 0 && repeat == 0)
3491 repeat = arr[i];
3492 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3493 fsmbi = 0;
3494 if (fsmbi)
5df189be 3495 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3496
3497 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3498 return IC_CPAT;
3499
3500 if (repeated)
3501 return IC_IL2;
3502
3503 return IC_POOL;
3504 default:
3505 break;
3506 }
3507 gcc_unreachable ();
644459d0 3508}
3509
3510static enum spu_immediate
3511which_logical_immediate (HOST_WIDE_INT val)
3512{
3513 gcc_assert (val == trunc_int_for_mode (val, SImode));
3514
3515 if (val >= -0x200 && val <= 0x1ff)
3516 return SPU_ORI;
3517 if (val >= 0 && val <= 0xffff)
3518 return SPU_IOHL;
3519 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3520 {
3521 val = trunc_int_for_mode (val, HImode);
3522 if (val >= -0x200 && val <= 0x1ff)
3523 return SPU_ORHI;
3524 if ((val & 0xff) == ((val >> 8) & 0xff))
3525 {
3526 val = trunc_int_for_mode (val, QImode);
3527 if (val >= -0x200 && val <= 0x1ff)
3528 return SPU_ORBI;
3529 }
3530 }
3531 return SPU_NONE;
3532}
3533
5df189be 3534/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3535 CONST_DOUBLEs. */
3536static int
3537const_vector_immediate_p (rtx x)
3538{
3539 int i;
3540 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3541 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3542 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3543 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3544 return 0;
3545 return 1;
3546}
3547
644459d0 3548int
3549logical_immediate_p (rtx op, enum machine_mode mode)
3550{
3551 HOST_WIDE_INT val;
3552 unsigned char arr[16];
3553 int i, j;
3554
3555 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3556 || GET_CODE (op) == CONST_VECTOR);
3557
5df189be 3558 if (GET_CODE (op) == CONST_VECTOR
3559 && !const_vector_immediate_p (op))
3560 return 0;
3561
644459d0 3562 if (GET_MODE (op) != VOIDmode)
3563 mode = GET_MODE (op);
3564
3565 constant_to_array (mode, op, arr);
3566
3567 /* Check that bytes are repeated. */
3568 for (i = 4; i < 16; i += 4)
3569 for (j = 0; j < 4; j++)
3570 if (arr[j] != arr[i + j])
3571 return 0;
3572
3573 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3574 val = trunc_int_for_mode (val, SImode);
3575
3576 i = which_logical_immediate (val);
3577 return i != SPU_NONE && i != SPU_IOHL;
3578}
3579
3580int
3581iohl_immediate_p (rtx op, enum machine_mode mode)
3582{
3583 HOST_WIDE_INT val;
3584 unsigned char arr[16];
3585 int i, j;
3586
3587 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3588 || GET_CODE (op) == CONST_VECTOR);
3589
5df189be 3590 if (GET_CODE (op) == CONST_VECTOR
3591 && !const_vector_immediate_p (op))
3592 return 0;
3593
644459d0 3594 if (GET_MODE (op) != VOIDmode)
3595 mode = GET_MODE (op);
3596
3597 constant_to_array (mode, op, arr);
3598
3599 /* Check that bytes are repeated. */
3600 for (i = 4; i < 16; i += 4)
3601 for (j = 0; j < 4; j++)
3602 if (arr[j] != arr[i + j])
3603 return 0;
3604
3605 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3606 val = trunc_int_for_mode (val, SImode);
3607
3608 return val >= 0 && val <= 0xffff;
3609}
3610
3611int
3612arith_immediate_p (rtx op, enum machine_mode mode,
3613 HOST_WIDE_INT low, HOST_WIDE_INT high)
3614{
3615 HOST_WIDE_INT val;
3616 unsigned char arr[16];
3617 int bytes, i, j;
3618
3619 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3620 || GET_CODE (op) == CONST_VECTOR);
3621
5df189be 3622 if (GET_CODE (op) == CONST_VECTOR
3623 && !const_vector_immediate_p (op))
3624 return 0;
3625
644459d0 3626 if (GET_MODE (op) != VOIDmode)
3627 mode = GET_MODE (op);
3628
3629 constant_to_array (mode, op, arr);
3630
3631 if (VECTOR_MODE_P (mode))
3632 mode = GET_MODE_INNER (mode);
3633
3634 bytes = GET_MODE_SIZE (mode);
3635 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3636
3637 /* Check that bytes are repeated. */
3638 for (i = bytes; i < 16; i += bytes)
3639 for (j = 0; j < bytes; j++)
3640 if (arr[j] != arr[i + j])
3641 return 0;
3642
3643 val = arr[0];
3644 for (j = 1; j < bytes; j++)
3645 val = (val << 8) | arr[j];
3646
3647 val = trunc_int_for_mode (val, mode);
3648
3649 return val >= low && val <= high;
3650}
3651
56c7bfc2 3652/* TRUE when op is an immediate and an exact power of 2, and given that
3653 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3654 all entries must be the same. */
3655bool
3656exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3657{
3658 enum machine_mode int_mode;
3659 HOST_WIDE_INT val;
3660 unsigned char arr[16];
3661 int bytes, i, j;
3662
3663 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3664 || GET_CODE (op) == CONST_VECTOR);
3665
3666 if (GET_CODE (op) == CONST_VECTOR
3667 && !const_vector_immediate_p (op))
3668 return 0;
3669
3670 if (GET_MODE (op) != VOIDmode)
3671 mode = GET_MODE (op);
3672
3673 constant_to_array (mode, op, arr);
3674
3675 if (VECTOR_MODE_P (mode))
3676 mode = GET_MODE_INNER (mode);
3677
3678 bytes = GET_MODE_SIZE (mode);
3679 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3680
3681 /* Check that bytes are repeated. */
3682 for (i = bytes; i < 16; i += bytes)
3683 for (j = 0; j < bytes; j++)
3684 if (arr[j] != arr[i + j])
3685 return 0;
3686
3687 val = arr[0];
3688 for (j = 1; j < bytes; j++)
3689 val = (val << 8) | arr[j];
3690
3691 val = trunc_int_for_mode (val, int_mode);
3692
3693 /* Currently, we only handle SFmode */
3694 gcc_assert (mode == SFmode);
3695 if (mode == SFmode)
3696 {
3697 int exp = (val >> 23) - 127;
3698 return val > 0 && (val & 0x007fffff) == 0
3699 && exp >= low && exp <= high;
3700 }
3701 return FALSE;
3702}
3703
6cf5579e 3704/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3705
3706static int
3707ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3708{
3709 rtx x = *px;
3710 tree decl;
3711
3712 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3713 {
3714 rtx plus = XEXP (x, 0);
3715 rtx op0 = XEXP (plus, 0);
3716 rtx op1 = XEXP (plus, 1);
3717 if (GET_CODE (op1) == CONST_INT)
3718 x = op0;
3719 }
3720
3721 return (GET_CODE (x) == SYMBOL_REF
3722 && (decl = SYMBOL_REF_DECL (x)) != 0
3723 && TREE_CODE (decl) == VAR_DECL
3724 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3725}
3726
644459d0 3727/* We accept:
5b865faf 3728 - any 32-bit constant (SImode, SFmode)
644459d0 3729 - any constant that can be generated with fsmbi (any mode)
5b865faf 3730 - a 64-bit constant where the high and low bits are identical
644459d0 3731 (DImode, DFmode)
5b865faf 3732 - a 128-bit constant where the four 32-bit words match. */
ca316360 3733bool
3734spu_legitimate_constant_p (enum machine_mode mode, rtx x)
644459d0 3735{
5df189be 3736 if (GET_CODE (x) == HIGH)
3737 x = XEXP (x, 0);
6cf5579e 3738
3739 /* Reject any __ea qualified reference. These can't appear in
3740 instructions but must be forced to the constant pool. */
3741 if (for_each_rtx (&x, ea_symbol_ref, 0))
3742 return 0;
3743
644459d0 3744 /* V4SI with all identical symbols is valid. */
5df189be 3745 if (!flag_pic
ca316360 3746 && mode == V4SImode
644459d0 3747 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3748 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3749 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3750 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3751 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3752 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3753
5df189be 3754 if (GET_CODE (x) == CONST_VECTOR
3755 && !const_vector_immediate_p (x))
3756 return 0;
644459d0 3757 return 1;
3758}
3759
3760/* Valid address are:
3761 - symbol_ref, label_ref, const
3762 - reg
9d98604b 3763 - reg + const_int, where const_int is 16 byte aligned
644459d0 3764 - reg + reg, alignment doesn't matter
3765 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3766 ignore the 4 least significant bits of the const. We only care about
3767 16 byte modes because the expand phase will change all smaller MEM
3768 references to TImode. */
3769static bool
3770spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3771 rtx x, bool reg_ok_strict)
644459d0 3772{
9d98604b 3773 int aligned = GET_MODE_SIZE (mode) >= 16;
3774 if (aligned
3775 && GET_CODE (x) == AND
644459d0 3776 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3777 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3778 x = XEXP (x, 0);
3779 switch (GET_CODE (x))
3780 {
644459d0 3781 case LABEL_REF:
6cf5579e 3782 return !TARGET_LARGE_MEM;
3783
9d98604b 3784 case SYMBOL_REF:
644459d0 3785 case CONST:
6cf5579e 3786 /* Keep __ea references until reload so that spu_expand_mov can see them
3787 in MEMs. */
3788 if (ea_symbol_ref (&x, 0))
3789 return !reload_in_progress && !reload_completed;
9d98604b 3790 return !TARGET_LARGE_MEM;
644459d0 3791
3792 case CONST_INT:
3793 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3794
3795 case SUBREG:
3796 x = XEXP (x, 0);
9d98604b 3797 if (REG_P (x))
3798 return 0;
644459d0 3799
3800 case REG:
3801 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3802
3803 case PLUS:
3804 case LO_SUM:
3805 {
3806 rtx op0 = XEXP (x, 0);
3807 rtx op1 = XEXP (x, 1);
3808 if (GET_CODE (op0) == SUBREG)
3809 op0 = XEXP (op0, 0);
3810 if (GET_CODE (op1) == SUBREG)
3811 op1 = XEXP (op1, 0);
644459d0 3812 if (GET_CODE (op0) == REG
3813 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3814 && GET_CODE (op1) == CONST_INT
3815 && INTVAL (op1) >= -0x2000
3816 && INTVAL (op1) <= 0x1fff
9d98604b 3817 && (!aligned || (INTVAL (op1) & 15) == 0))
3818 return TRUE;
644459d0 3819 if (GET_CODE (op0) == REG
3820 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3821 && GET_CODE (op1) == REG
3822 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3823 return TRUE;
644459d0 3824 }
3825 break;
3826
3827 default:
3828 break;
3829 }
9d98604b 3830 return FALSE;
644459d0 3831}
3832
6cf5579e 3833/* Like spu_legitimate_address_p, except with named addresses. */
3834static bool
3835spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3836 bool reg_ok_strict, addr_space_t as)
3837{
3838 if (as == ADDR_SPACE_EA)
3839 return (REG_P (x) && (GET_MODE (x) == EAmode));
3840
3841 else if (as != ADDR_SPACE_GENERIC)
3842 gcc_unreachable ();
3843
3844 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3845}
3846
644459d0 3847/* When the address is reg + const_int, force the const_int into a
fa7637bd 3848 register. */
644459d0 3849rtx
3850spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3851 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3852{
3853 rtx op0, op1;
3854 /* Make sure both operands are registers. */
3855 if (GET_CODE (x) == PLUS)
3856 {
3857 op0 = XEXP (x, 0);
3858 op1 = XEXP (x, 1);
3859 if (ALIGNED_SYMBOL_REF_P (op0))
3860 {
3861 op0 = force_reg (Pmode, op0);
3862 mark_reg_pointer (op0, 128);
3863 }
3864 else if (GET_CODE (op0) != REG)
3865 op0 = force_reg (Pmode, op0);
3866 if (ALIGNED_SYMBOL_REF_P (op1))
3867 {
3868 op1 = force_reg (Pmode, op1);
3869 mark_reg_pointer (op1, 128);
3870 }
3871 else if (GET_CODE (op1) != REG)
3872 op1 = force_reg (Pmode, op1);
3873 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3874 }
41e3a0c7 3875 return x;
644459d0 3876}
3877
6cf5579e 3878/* Like spu_legitimate_address, except with named address support. */
3879static rtx
3880spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3881 addr_space_t as)
3882{
3883 if (as != ADDR_SPACE_GENERIC)
3884 return x;
3885
3886 return spu_legitimize_address (x, oldx, mode);
3887}
3888
644459d0 3889/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3890 struct attribute_spec.handler. */
3891static tree
3892spu_handle_fndecl_attribute (tree * node,
3893 tree name,
3894 tree args ATTRIBUTE_UNUSED,
3895 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3896{
3897 if (TREE_CODE (*node) != FUNCTION_DECL)
3898 {
67a779df 3899 warning (0, "%qE attribute only applies to functions",
3900 name);
644459d0 3901 *no_add_attrs = true;
3902 }
3903
3904 return NULL_TREE;
3905}
3906
3907/* Handle the "vector" attribute. */
3908static tree
3909spu_handle_vector_attribute (tree * node, tree name,
3910 tree args ATTRIBUTE_UNUSED,
3911 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3912{
3913 tree type = *node, result = NULL_TREE;
3914 enum machine_mode mode;
3915 int unsigned_p;
3916
3917 while (POINTER_TYPE_P (type)
3918 || TREE_CODE (type) == FUNCTION_TYPE
3919 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3920 type = TREE_TYPE (type);
3921
3922 mode = TYPE_MODE (type);
3923
3924 unsigned_p = TYPE_UNSIGNED (type);
3925 switch (mode)
3926 {
3927 case DImode:
3928 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3929 break;
3930 case SImode:
3931 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3932 break;
3933 case HImode:
3934 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3935 break;
3936 case QImode:
3937 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3938 break;
3939 case SFmode:
3940 result = V4SF_type_node;
3941 break;
3942 case DFmode:
3943 result = V2DF_type_node;
3944 break;
3945 default:
3946 break;
3947 }
3948
3949 /* Propagate qualifiers attached to the element type
3950 onto the vector type. */
3951 if (result && result != type && TYPE_QUALS (type))
3952 result = build_qualified_type (result, TYPE_QUALS (type));
3953
3954 *no_add_attrs = true; /* No need to hang on to the attribute. */
3955
3956 if (!result)
67a779df 3957 warning (0, "%qE attribute ignored", name);
644459d0 3958 else
d991e6e8 3959 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3960
3961 return NULL_TREE;
3962}
3963
f2b32076 3964/* Return nonzero if FUNC is a naked function. */
644459d0 3965static int
3966spu_naked_function_p (tree func)
3967{
3968 tree a;
3969
3970 if (TREE_CODE (func) != FUNCTION_DECL)
3971 abort ();
3972
3973 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3974 return a != NULL_TREE;
3975}
3976
3977int
3978spu_initial_elimination_offset (int from, int to)
3979{
3980 int saved_regs_size = spu_saved_regs_size ();
3981 int sp_offset = 0;
abe32cce 3982 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3983 || get_frame_size () || saved_regs_size)
3984 sp_offset = STACK_POINTER_OFFSET;
3985 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3986 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3987 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3988 return get_frame_size ();
644459d0 3989 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3990 return sp_offset + crtl->outgoing_args_size
644459d0 3991 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3992 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3993 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3994 else
3995 gcc_unreachable ();
644459d0 3996}
3997
3998rtx
fb80456a 3999spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 4000{
4001 enum machine_mode mode = TYPE_MODE (type);
4002 int byte_size = ((mode == BLKmode)
4003 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4004
4005 /* Make sure small structs are left justified in a register. */
4006 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4007 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4008 {
4009 enum machine_mode smode;
4010 rtvec v;
4011 int i;
4012 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4013 int n = byte_size / UNITS_PER_WORD;
4014 v = rtvec_alloc (nregs);
4015 for (i = 0; i < n; i++)
4016 {
4017 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4018 gen_rtx_REG (TImode,
4019 FIRST_RETURN_REGNUM
4020 + i),
4021 GEN_INT (UNITS_PER_WORD * i));
4022 byte_size -= UNITS_PER_WORD;
4023 }
4024
4025 if (n < nregs)
4026 {
4027 if (byte_size < 4)
4028 byte_size = 4;
4029 smode =
4030 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4031 RTVEC_ELT (v, n) =
4032 gen_rtx_EXPR_LIST (VOIDmode,
4033 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4034 GEN_INT (UNITS_PER_WORD * n));
4035 }
4036 return gen_rtx_PARALLEL (mode, v);
4037 }
4038 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4039}
4040
ee9034d4 4041static rtx
4042spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4043 enum machine_mode mode,
ee9034d4 4044 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4045{
4046 int byte_size;
4047
a08c5dd0 4048 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4049 return 0;
4050
4051 byte_size = ((mode == BLKmode)
4052 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4053
4054 /* The ABI does not allow parameters to be passed partially in
4055 reg and partially in stack. */
a08c5dd0 4056 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4057 return 0;
4058
4059 /* Make sure small structs are left justified in a register. */
4060 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4061 && byte_size < UNITS_PER_WORD && byte_size > 0)
4062 {
4063 enum machine_mode smode;
4064 rtx gr_reg;
4065 if (byte_size < 4)
4066 byte_size = 4;
4067 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4068 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4069 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4070 const0_rtx);
4071 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4072 }
4073 else
a08c5dd0 4074 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4075}
4076
ee9034d4 4077static void
4078spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4079 const_tree type, bool named ATTRIBUTE_UNUSED)
4080{
4081 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4082 ? 1
4083 : mode == BLKmode
4084 ? ((int_size_in_bytes (type) + 15) / 16)
4085 : mode == VOIDmode
4086 ? 1
4087 : HARD_REGNO_NREGS (cum, mode));
4088}
4089
644459d0 4090/* Variable sized types are passed by reference. */
4091static bool
4092spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4093 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4094 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4095{
4096 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4097}
4098\f
4099
4100/* Var args. */
4101
4102/* Create and return the va_list datatype.
4103
4104 On SPU, va_list is an array type equivalent to
4105
4106 typedef struct __va_list_tag
4107 {
4108 void *__args __attribute__((__aligned(16)));
4109 void *__skip __attribute__((__aligned(16)));
4110
4111 } va_list[1];
4112
fa7637bd 4113 where __args points to the arg that will be returned by the next
644459d0 4114 va_arg(), and __skip points to the previous stack frame such that
4115 when __args == __skip we should advance __args by 32 bytes. */
4116static tree
4117spu_build_builtin_va_list (void)
4118{
4119 tree f_args, f_skip, record, type_decl;
4120 bool owp;
4121
4122 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4123
4124 type_decl =
54e46243 4125 build_decl (BUILTINS_LOCATION,
4126 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4127
54e46243 4128 f_args = build_decl (BUILTINS_LOCATION,
4129 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4130 f_skip = build_decl (BUILTINS_LOCATION,
4131 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4132
4133 DECL_FIELD_CONTEXT (f_args) = record;
4134 DECL_ALIGN (f_args) = 128;
4135 DECL_USER_ALIGN (f_args) = 1;
4136
4137 DECL_FIELD_CONTEXT (f_skip) = record;
4138 DECL_ALIGN (f_skip) = 128;
4139 DECL_USER_ALIGN (f_skip) = 1;
4140
bc907808 4141 TYPE_STUB_DECL (record) = type_decl;
644459d0 4142 TYPE_NAME (record) = type_decl;
4143 TYPE_FIELDS (record) = f_args;
1767a056 4144 DECL_CHAIN (f_args) = f_skip;
644459d0 4145
4146 /* We know this is being padded and we want it too. It is an internal
4147 type so hide the warnings from the user. */
4148 owp = warn_padded;
4149 warn_padded = false;
4150
4151 layout_type (record);
4152
4153 warn_padded = owp;
4154
4155 /* The correct type is an array type of one element. */
4156 return build_array_type (record, build_index_type (size_zero_node));
4157}
4158
4159/* Implement va_start by filling the va_list structure VALIST.
4160 NEXTARG points to the first anonymous stack argument.
4161
4162 The following global variables are used to initialize
4163 the va_list structure:
4164
abe32cce 4165 crtl->args.info;
644459d0 4166 the CUMULATIVE_ARGS for this function
4167
abe32cce 4168 crtl->args.arg_offset_rtx:
644459d0 4169 holds the offset of the first anonymous stack argument
4170 (relative to the virtual arg pointer). */
4171
8a58ed0a 4172static void
644459d0 4173spu_va_start (tree valist, rtx nextarg)
4174{
4175 tree f_args, f_skip;
4176 tree args, skip, t;
4177
4178 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4179 f_skip = DECL_CHAIN (f_args);
644459d0 4180
170efcd4 4181 valist = build_simple_mem_ref (valist);
644459d0 4182 args =
4183 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4184 skip =
4185 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4186
4187 /* Find the __args area. */
4188 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4189 if (crtl->args.pretend_args_size > 0)
0de36bdb 4190 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4191 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4192 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4193 TREE_SIDE_EFFECTS (t) = 1;
4194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4195
4196 /* Find the __skip area. */
4197 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4198 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4199 size_int (crtl->args.pretend_args_size
0de36bdb 4200 - STACK_POINTER_OFFSET));
75a70cf9 4201 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4202 TREE_SIDE_EFFECTS (t) = 1;
4203 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4204}
4205
4206/* Gimplify va_arg by updating the va_list structure
4207 VALIST as required to retrieve an argument of type
4208 TYPE, and returning that argument.
4209
4210 ret = va_arg(VALIST, TYPE);
4211
4212 generates code equivalent to:
4213
4214 paddedsize = (sizeof(TYPE) + 15) & -16;
4215 if (VALIST.__args + paddedsize > VALIST.__skip
4216 && VALIST.__args <= VALIST.__skip)
4217 addr = VALIST.__skip + 32;
4218 else
4219 addr = VALIST.__args;
4220 VALIST.__args = addr + paddedsize;
4221 ret = *(TYPE *)addr;
4222 */
4223static tree
75a70cf9 4224spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4225 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4226{
4227 tree f_args, f_skip;
4228 tree args, skip;
4229 HOST_WIDE_INT size, rsize;
4230 tree paddedsize, addr, tmp;
4231 bool pass_by_reference_p;
4232
4233 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4234 f_skip = DECL_CHAIN (f_args);
644459d0 4235
182cf5a9 4236 valist = build_simple_mem_ref (valist);
644459d0 4237 args =
4238 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4239 skip =
4240 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4241
4242 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4243
4244 /* if an object is dynamically sized, a pointer to it is passed
4245 instead of the object itself. */
27a82950 4246 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4247 false);
644459d0 4248 if (pass_by_reference_p)
4249 type = build_pointer_type (type);
4250 size = int_size_in_bytes (type);
4251 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4252
4253 /* build conditional expression to calculate addr. The expression
4254 will be gimplified later. */
0de36bdb 4255 paddedsize = size_int (rsize);
75a70cf9 4256 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4257 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4258 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4259 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4260 unshare_expr (skip)));
644459d0 4261
4262 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4263 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4264 size_int (32)), unshare_expr (args));
644459d0 4265
75a70cf9 4266 gimplify_assign (addr, tmp, pre_p);
644459d0 4267
4268 /* update VALIST.__args */
0de36bdb 4269 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4270 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4271
8115f0af 4272 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4273 addr);
644459d0 4274
4275 if (pass_by_reference_p)
4276 addr = build_va_arg_indirect_ref (addr);
4277
4278 return build_va_arg_indirect_ref (addr);
4279}
4280
4281/* Save parameter registers starting with the register that corresponds
4282 to the first unnamed parameters. If the first unnamed parameter is
4283 in the stack then save no registers. Set pretend_args_size to the
4284 amount of space needed to save the registers. */
4285void
4286spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4287 tree type, int *pretend_size, int no_rtl)
4288{
4289 if (!no_rtl)
4290 {
4291 rtx tmp;
4292 int regno;
4293 int offset;
4294 int ncum = *cum;
4295
4296 /* cum currently points to the last named argument, we want to
4297 start at the next argument. */
ee9034d4 4298 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4299
4300 offset = -STACK_POINTER_OFFSET;
4301 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4302 {
4303 tmp = gen_frame_mem (V4SImode,
4304 plus_constant (virtual_incoming_args_rtx,
4305 offset));
4306 emit_move_insn (tmp,
4307 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4308 offset += 16;
4309 }
4310 *pretend_size = offset + STACK_POINTER_OFFSET;
4311 }
4312}
4313\f
b2d7ede1 4314static void
644459d0 4315spu_conditional_register_usage (void)
4316{
4317 if (flag_pic)
4318 {
4319 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4320 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4321 }
644459d0 4322}
4323
9d98604b 4324/* This is called any time we inspect the alignment of a register for
4325 addresses. */
644459d0 4326static int
9d98604b 4327reg_aligned_for_addr (rtx x)
644459d0 4328{
9d98604b 4329 int regno =
4330 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4331 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4332}
4333
69ced2d6 4334/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4335 into its SYMBOL_REF_FLAGS. */
4336static void
4337spu_encode_section_info (tree decl, rtx rtl, int first)
4338{
4339 default_encode_section_info (decl, rtl, first);
4340
4341 /* If a variable has a forced alignment to < 16 bytes, mark it with
4342 SYMBOL_FLAG_ALIGN1. */
4343 if (TREE_CODE (decl) == VAR_DECL
4344 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4345 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4346}
4347
644459d0 4348/* Return TRUE if we are certain the mem refers to a complete object
4349 which is both 16-byte aligned and padded to a 16-byte boundary. This
4350 would make it safe to store with a single instruction.
4351 We guarantee the alignment and padding for static objects by aligning
4352 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4353 FIXME: We currently cannot guarantee this for objects on the stack
4354 because assign_parm_setup_stack calls assign_stack_local with the
4355 alignment of the parameter mode and in that case the alignment never
4356 gets adjusted by LOCAL_ALIGNMENT. */
4357static int
4358store_with_one_insn_p (rtx mem)
4359{
9d98604b 4360 enum machine_mode mode = GET_MODE (mem);
644459d0 4361 rtx addr = XEXP (mem, 0);
9d98604b 4362 if (mode == BLKmode)
644459d0 4363 return 0;
9d98604b 4364 if (GET_MODE_SIZE (mode) >= 16)
4365 return 1;
644459d0 4366 /* Only static objects. */
4367 if (GET_CODE (addr) == SYMBOL_REF)
4368 {
4369 /* We use the associated declaration to make sure the access is
fa7637bd 4370 referring to the whole object.
851d9296 4371 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4372 if it is necessary. Will there be cases where one exists, and
4373 the other does not? Will there be cases where both exist, but
4374 have different types? */
4375 tree decl = MEM_EXPR (mem);
4376 if (decl
4377 && TREE_CODE (decl) == VAR_DECL
4378 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4379 return 1;
4380 decl = SYMBOL_REF_DECL (addr);
4381 if (decl
4382 && TREE_CODE (decl) == VAR_DECL
4383 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4384 return 1;
4385 }
4386 return 0;
4387}
4388
9d98604b 4389/* Return 1 when the address is not valid for a simple load and store as
4390 required by the '_mov*' patterns. We could make this less strict
4391 for loads, but we prefer mem's to look the same so they are more
4392 likely to be merged. */
4393static int
4394address_needs_split (rtx mem)
4395{
4396 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4397 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4398 || !(store_with_one_insn_p (mem)
4399 || mem_is_padded_component_ref (mem))))
4400 return 1;
4401
4402 return 0;
4403}
4404
6cf5579e 4405static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4406static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4407static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4408
4409/* MEM is known to be an __ea qualified memory access. Emit a call to
4410 fetch the ppu memory to local store, and return its address in local
4411 store. */
4412
4413static void
4414ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4415{
4416 if (is_store)
4417 {
4418 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4419 if (!cache_fetch_dirty)
4420 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4421 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4422 2, ea_addr, EAmode, ndirty, SImode);
4423 }
4424 else
4425 {
4426 if (!cache_fetch)
4427 cache_fetch = init_one_libfunc ("__cache_fetch");
4428 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4429 1, ea_addr, EAmode);
4430 }
4431}
4432
4433/* Like ea_load_store, but do the cache tag comparison and, for stores,
4434 dirty bit marking, inline.
4435
4436 The cache control data structure is an array of
4437
4438 struct __cache_tag_array
4439 {
4440 unsigned int tag_lo[4];
4441 unsigned int tag_hi[4];
4442 void *data_pointer[4];
4443 int reserved[4];
4444 vector unsigned short dirty_bits[4];
4445 } */
4446
4447static void
4448ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4449{
4450 rtx ea_addr_si;
4451 HOST_WIDE_INT v;
4452 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4453 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4454 rtx index_mask = gen_reg_rtx (SImode);
4455 rtx tag_arr = gen_reg_rtx (Pmode);
4456 rtx splat_mask = gen_reg_rtx (TImode);
4457 rtx splat = gen_reg_rtx (V4SImode);
4458 rtx splat_hi = NULL_RTX;
4459 rtx tag_index = gen_reg_rtx (Pmode);
4460 rtx block_off = gen_reg_rtx (SImode);
4461 rtx tag_addr = gen_reg_rtx (Pmode);
4462 rtx tag = gen_reg_rtx (V4SImode);
4463 rtx cache_tag = gen_reg_rtx (V4SImode);
4464 rtx cache_tag_hi = NULL_RTX;
4465 rtx cache_ptrs = gen_reg_rtx (TImode);
4466 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4467 rtx tag_equal = gen_reg_rtx (V4SImode);
4468 rtx tag_equal_hi = NULL_RTX;
4469 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4470 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4471 rtx eq_index = gen_reg_rtx (SImode);
4472 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4473
4474 if (spu_ea_model != 32)
4475 {
4476 splat_hi = gen_reg_rtx (V4SImode);
4477 cache_tag_hi = gen_reg_rtx (V4SImode);
4478 tag_equal_hi = gen_reg_rtx (V4SImode);
4479 }
4480
4481 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4482 emit_move_insn (tag_arr, tag_arr_sym);
4483 v = 0x0001020300010203LL;
4484 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4485 ea_addr_si = ea_addr;
4486 if (spu_ea_model != 32)
4487 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4488
4489 /* tag_index = ea_addr & (tag_array_size - 128) */
4490 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4491
4492 /* splat ea_addr to all 4 slots. */
4493 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4494 /* Similarly for high 32 bits of ea_addr. */
4495 if (spu_ea_model != 32)
4496 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4497
4498 /* block_off = ea_addr & 127 */
4499 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4500
4501 /* tag_addr = tag_arr + tag_index */
4502 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4503
4504 /* Read cache tags. */
4505 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4506 if (spu_ea_model != 32)
4507 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4508 plus_constant (tag_addr, 16)));
4509
4510 /* tag = ea_addr & -128 */
4511 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4512
4513 /* Read all four cache data pointers. */
4514 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4515 plus_constant (tag_addr, 32)));
4516
4517 /* Compare tags. */
4518 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4519 if (spu_ea_model != 32)
4520 {
4521 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4522 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4523 }
4524
4525 /* At most one of the tags compare equal, so tag_equal has one
4526 32-bit slot set to all 1's, with the other slots all zero.
4527 gbb picks off low bit from each byte in the 128-bit registers,
4528 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4529 we have a hit. */
4530 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4531 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4532
4533 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4534 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4535
4536 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4537 (rotating eq_index mod 16 bytes). */
4538 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4539 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4540
4541 /* Add block offset to form final data address. */
4542 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4543
4544 /* Check that we did hit. */
4545 hit_label = gen_label_rtx ();
4546 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4547 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4548 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4550 hit_ref, pc_rtx)));
4551 /* Say that this branch is very likely to happen. */
4552 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4553 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4554
4555 ea_load_store (mem, is_store, ea_addr, data_addr);
4556 cont_label = gen_label_rtx ();
4557 emit_jump_insn (gen_jump (cont_label));
4558 emit_barrier ();
4559
4560 emit_label (hit_label);
4561
4562 if (is_store)
4563 {
4564 HOST_WIDE_INT v_hi;
4565 rtx dirty_bits = gen_reg_rtx (TImode);
4566 rtx dirty_off = gen_reg_rtx (SImode);
4567 rtx dirty_128 = gen_reg_rtx (TImode);
4568 rtx neg_block_off = gen_reg_rtx (SImode);
4569
4570 /* Set up mask with one dirty bit per byte of the mem we are
4571 writing, starting from top bit. */
4572 v_hi = v = -1;
4573 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4574 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4575 {
4576 v_hi = v;
4577 v = 0;
4578 }
4579 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4580
4581 /* Form index into cache dirty_bits. eq_index is one of
4582 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4583 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4584 offset to each of the four dirty_bits elements. */
4585 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4586
4587 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4588
4589 /* Rotate bit mask to proper bit. */
4590 emit_insn (gen_negsi2 (neg_block_off, block_off));
4591 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4592 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4593
4594 /* Or in the new dirty bits. */
4595 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4596
4597 /* Store. */
4598 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4599 }
4600
4601 emit_label (cont_label);
4602}
4603
4604static rtx
4605expand_ea_mem (rtx mem, bool is_store)
4606{
4607 rtx ea_addr;
4608 rtx data_addr = gen_reg_rtx (Pmode);
4609 rtx new_mem;
4610
4611 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4612 if (optimize_size || optimize == 0)
4613 ea_load_store (mem, is_store, ea_addr, data_addr);
4614 else
4615 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4616
4617 if (ea_alias_set == -1)
4618 ea_alias_set = new_alias_set ();
4619
4620 /* We generate a new MEM RTX to refer to the copy of the data
4621 in the cache. We do not copy memory attributes (except the
4622 alignment) from the original MEM, as they may no longer apply
4623 to the cache copy. */
4624 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4625 set_mem_alias_set (new_mem, ea_alias_set);
4626 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4627
4628 return new_mem;
4629}
4630
644459d0 4631int
4632spu_expand_mov (rtx * ops, enum machine_mode mode)
4633{
4634 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4635 {
4636 /* Perform the move in the destination SUBREG's inner mode. */
4637 ops[0] = SUBREG_REG (ops[0]);
4638 mode = GET_MODE (ops[0]);
4639 ops[1] = gen_lowpart_common (mode, ops[1]);
4640 gcc_assert (ops[1]);
4641 }
644459d0 4642
4643 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4644 {
4645 rtx from = SUBREG_REG (ops[1]);
8d72495d 4646 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4647
4648 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4649 && GET_MODE_CLASS (imode) == MODE_INT
4650 && subreg_lowpart_p (ops[1]));
4651
4652 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4653 imode = SImode;
4654 if (imode != GET_MODE (from))
4655 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4656
4657 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4658 {
d6bf3b14 4659 enum insn_code icode = convert_optab_handler (trunc_optab,
4660 mode, imode);
644459d0 4661 emit_insn (GEN_FCN (icode) (ops[0], from));
4662 }
4663 else
4664 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4665 return 1;
4666 }
4667
4668 /* At least one of the operands needs to be a register. */
4669 if ((reload_in_progress | reload_completed) == 0
4670 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4671 {
4672 rtx temp = force_reg (mode, ops[1]);
4673 emit_move_insn (ops[0], temp);
4674 return 1;
4675 }
4676 if (reload_in_progress || reload_completed)
4677 {
dea01258 4678 if (CONSTANT_P (ops[1]))
4679 return spu_split_immediate (ops);
644459d0 4680 return 0;
4681 }
9d98604b 4682
4683 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4684 extend them. */
4685 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4686 {
9d98604b 4687 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4688 if (val != INTVAL (ops[1]))
644459d0 4689 {
9d98604b 4690 emit_move_insn (ops[0], GEN_INT (val));
4691 return 1;
644459d0 4692 }
4693 }
9d98604b 4694 if (MEM_P (ops[0]))
6cf5579e 4695 {
4696 if (MEM_ADDR_SPACE (ops[0]))
4697 ops[0] = expand_ea_mem (ops[0], true);
4698 return spu_split_store (ops);
4699 }
9d98604b 4700 if (MEM_P (ops[1]))
6cf5579e 4701 {
4702 if (MEM_ADDR_SPACE (ops[1]))
4703 ops[1] = expand_ea_mem (ops[1], false);
4704 return spu_split_load (ops);
4705 }
9d98604b 4706
644459d0 4707 return 0;
4708}
4709
9d98604b 4710static void
4711spu_convert_move (rtx dst, rtx src)
644459d0 4712{
9d98604b 4713 enum machine_mode mode = GET_MODE (dst);
4714 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4715 rtx reg;
4716 gcc_assert (GET_MODE (src) == TImode);
4717 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4718 emit_insn (gen_rtx_SET (VOIDmode, reg,
4719 gen_rtx_TRUNCATE (int_mode,
4720 gen_rtx_LSHIFTRT (TImode, src,
4721 GEN_INT (int_mode == DImode ? 64 : 96)))));
4722 if (int_mode != mode)
4723 {
4724 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4725 emit_move_insn (dst, reg);
4726 }
4727}
644459d0 4728
9d98604b 4729/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4730 the address from SRC and SRC+16. Return a REG or CONST_INT that
4731 specifies how many bytes to rotate the loaded registers, plus any
4732 extra from EXTRA_ROTQBY. The address and rotate amounts are
4733 normalized to improve merging of loads and rotate computations. */
4734static rtx
4735spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4736{
4737 rtx addr = XEXP (src, 0);
4738 rtx p0, p1, rot, addr0, addr1;
4739 int rot_amt;
644459d0 4740
4741 rot = 0;
4742 rot_amt = 0;
9d98604b 4743
4744 if (MEM_ALIGN (src) >= 128)
4745 /* Address is already aligned; simply perform a TImode load. */ ;
4746 else if (GET_CODE (addr) == PLUS)
644459d0 4747 {
4748 /* 8 cases:
4749 aligned reg + aligned reg => lqx
4750 aligned reg + unaligned reg => lqx, rotqby
4751 aligned reg + aligned const => lqd
4752 aligned reg + unaligned const => lqd, rotqbyi
4753 unaligned reg + aligned reg => lqx, rotqby
4754 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4755 unaligned reg + aligned const => lqd, rotqby
4756 unaligned reg + unaligned const -> not allowed by legitimate address
4757 */
4758 p0 = XEXP (addr, 0);
4759 p1 = XEXP (addr, 1);
9d98604b 4760 if (!reg_aligned_for_addr (p0))
644459d0 4761 {
9d98604b 4762 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4763 {
9d98604b 4764 rot = gen_reg_rtx (SImode);
4765 emit_insn (gen_addsi3 (rot, p0, p1));
4766 }
4767 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4768 {
4769 if (INTVAL (p1) > 0
4770 && REG_POINTER (p0)
4771 && INTVAL (p1) * BITS_PER_UNIT
4772 < REGNO_POINTER_ALIGN (REGNO (p0)))
4773 {
4774 rot = gen_reg_rtx (SImode);
4775 emit_insn (gen_addsi3 (rot, p0, p1));
4776 addr = p0;
4777 }
4778 else
4779 {
4780 rtx x = gen_reg_rtx (SImode);
4781 emit_move_insn (x, p1);
4782 if (!spu_arith_operand (p1, SImode))
4783 p1 = x;
4784 rot = gen_reg_rtx (SImode);
4785 emit_insn (gen_addsi3 (rot, p0, p1));
4786 addr = gen_rtx_PLUS (Pmode, p0, x);
4787 }
644459d0 4788 }
4789 else
4790 rot = p0;
4791 }
4792 else
4793 {
4794 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4795 {
4796 rot_amt = INTVAL (p1) & 15;
9d98604b 4797 if (INTVAL (p1) & -16)
4798 {
4799 p1 = GEN_INT (INTVAL (p1) & -16);
4800 addr = gen_rtx_PLUS (SImode, p0, p1);
4801 }
4802 else
4803 addr = p0;
644459d0 4804 }
9d98604b 4805 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4806 rot = p1;
4807 }
4808 }
9d98604b 4809 else if (REG_P (addr))
644459d0 4810 {
9d98604b 4811 if (!reg_aligned_for_addr (addr))
644459d0 4812 rot = addr;
4813 }
4814 else if (GET_CODE (addr) == CONST)
4815 {
4816 if (GET_CODE (XEXP (addr, 0)) == PLUS
4817 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4818 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4819 {
4820 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4821 if (rot_amt & -16)
4822 addr = gen_rtx_CONST (Pmode,
4823 gen_rtx_PLUS (Pmode,
4824 XEXP (XEXP (addr, 0), 0),
4825 GEN_INT (rot_amt & -16)));
4826 else
4827 addr = XEXP (XEXP (addr, 0), 0);
4828 }
4829 else
9d98604b 4830 {
4831 rot = gen_reg_rtx (Pmode);
4832 emit_move_insn (rot, addr);
4833 }
644459d0 4834 }
4835 else if (GET_CODE (addr) == CONST_INT)
4836 {
4837 rot_amt = INTVAL (addr);
4838 addr = GEN_INT (rot_amt & -16);
4839 }
4840 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4841 {
4842 rot = gen_reg_rtx (Pmode);
4843 emit_move_insn (rot, addr);
4844 }
644459d0 4845
9d98604b 4846 rot_amt += extra_rotby;
644459d0 4847
4848 rot_amt &= 15;
4849
4850 if (rot && rot_amt)
4851 {
9d98604b 4852 rtx x = gen_reg_rtx (SImode);
4853 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4854 rot = x;
644459d0 4855 rot_amt = 0;
4856 }
9d98604b 4857 if (!rot && rot_amt)
4858 rot = GEN_INT (rot_amt);
4859
4860 addr0 = copy_rtx (addr);
4861 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4862 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4863
4864 if (dst1)
4865 {
4866 addr1 = plus_constant (copy_rtx (addr), 16);
4867 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4868 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4869 }
644459d0 4870
9d98604b 4871 return rot;
4872}
4873
4874int
4875spu_split_load (rtx * ops)
4876{
4877 enum machine_mode mode = GET_MODE (ops[0]);
4878 rtx addr, load, rot;
4879 int rot_amt;
644459d0 4880
9d98604b 4881 if (GET_MODE_SIZE (mode) >= 16)
4882 return 0;
644459d0 4883
9d98604b 4884 addr = XEXP (ops[1], 0);
4885 gcc_assert (GET_CODE (addr) != AND);
4886
4887 if (!address_needs_split (ops[1]))
4888 {
4889 ops[1] = change_address (ops[1], TImode, addr);
4890 load = gen_reg_rtx (TImode);
4891 emit_insn (gen__movti (load, ops[1]));
4892 spu_convert_move (ops[0], load);
4893 return 1;
4894 }
4895
4896 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4897
4898 load = gen_reg_rtx (TImode);
4899 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4900
4901 if (rot)
4902 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4903
9d98604b 4904 spu_convert_move (ops[0], load);
4905 return 1;
644459d0 4906}
4907
9d98604b 4908int
644459d0 4909spu_split_store (rtx * ops)
4910{
4911 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4912 rtx reg;
644459d0 4913 rtx addr, p0, p1, p1_lo, smem;
4914 int aform;
4915 int scalar;
4916
9d98604b 4917 if (GET_MODE_SIZE (mode) >= 16)
4918 return 0;
4919
644459d0 4920 addr = XEXP (ops[0], 0);
9d98604b 4921 gcc_assert (GET_CODE (addr) != AND);
4922
4923 if (!address_needs_split (ops[0]))
4924 {
4925 reg = gen_reg_rtx (TImode);
4926 emit_insn (gen_spu_convert (reg, ops[1]));
4927 ops[0] = change_address (ops[0], TImode, addr);
4928 emit_move_insn (ops[0], reg);
4929 return 1;
4930 }
644459d0 4931
4932 if (GET_CODE (addr) == PLUS)
4933 {
4934 /* 8 cases:
4935 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4936 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4937 aligned reg + aligned const => lqd, c?d, shuf, stqx
4938 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4939 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4940 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4941 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4942 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4943 */
4944 aform = 0;
4945 p0 = XEXP (addr, 0);
4946 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4947 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4948 {
4949 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4950 if (reg_aligned_for_addr (p0))
4951 {
4952 p1 = GEN_INT (INTVAL (p1) & -16);
4953 if (p1 == const0_rtx)
4954 addr = p0;
4955 else
4956 addr = gen_rtx_PLUS (SImode, p0, p1);
4957 }
4958 else
4959 {
4960 rtx x = gen_reg_rtx (SImode);
4961 emit_move_insn (x, p1);
4962 addr = gen_rtx_PLUS (SImode, p0, x);
4963 }
644459d0 4964 }
4965 }
9d98604b 4966 else if (REG_P (addr))
644459d0 4967 {
4968 aform = 0;
4969 p0 = addr;
4970 p1 = p1_lo = const0_rtx;
4971 }
4972 else
4973 {
4974 aform = 1;
4975 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4976 p1 = 0; /* aform doesn't use p1 */
4977 p1_lo = addr;
4978 if (ALIGNED_SYMBOL_REF_P (addr))
4979 p1_lo = const0_rtx;
9d98604b 4980 else if (GET_CODE (addr) == CONST
4981 && GET_CODE (XEXP (addr, 0)) == PLUS
4982 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4983 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4984 {
9d98604b 4985 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4986 if ((v & -16) != 0)
4987 addr = gen_rtx_CONST (Pmode,
4988 gen_rtx_PLUS (Pmode,
4989 XEXP (XEXP (addr, 0), 0),
4990 GEN_INT (v & -16)));
4991 else
4992 addr = XEXP (XEXP (addr, 0), 0);
4993 p1_lo = GEN_INT (v & 15);
644459d0 4994 }
4995 else if (GET_CODE (addr) == CONST_INT)
4996 {
4997 p1_lo = GEN_INT (INTVAL (addr) & 15);
4998 addr = GEN_INT (INTVAL (addr) & -16);
4999 }
9d98604b 5000 else
5001 {
5002 p1_lo = gen_reg_rtx (SImode);
5003 emit_move_insn (p1_lo, addr);
5004 }
644459d0 5005 }
5006
4cbad5bb 5007 gcc_assert (aform == 0 || aform == 1);
9d98604b 5008 reg = gen_reg_rtx (TImode);
e04cf423 5009
644459d0 5010 scalar = store_with_one_insn_p (ops[0]);
5011 if (!scalar)
5012 {
5013 /* We could copy the flags from the ops[0] MEM to mem here,
5014 We don't because we want this load to be optimized away if
5015 possible, and copying the flags will prevent that in certain
5016 cases, e.g. consider the volatile flag. */
5017
9d98604b 5018 rtx pat = gen_reg_rtx (TImode);
e04cf423 5019 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5020 set_mem_alias_set (lmem, 0);
5021 emit_insn (gen_movti (reg, lmem));
644459d0 5022
9d98604b 5023 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5024 p0 = stack_pointer_rtx;
5025 if (!p1_lo)
5026 p1_lo = const0_rtx;
5027
5028 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5029 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5030 }
644459d0 5031 else
5032 {
5033 if (GET_CODE (ops[1]) == REG)
5034 emit_insn (gen_spu_convert (reg, ops[1]));
5035 else if (GET_CODE (ops[1]) == SUBREG)
5036 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5037 else
5038 abort ();
5039 }
5040
5041 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5042 emit_insn (gen_ashlti3
5043 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5044
9d98604b 5045 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5046 /* We can't use the previous alias set because the memory has changed
5047 size and can potentially overlap objects of other types. */
5048 set_mem_alias_set (smem, 0);
5049
e04cf423 5050 emit_insn (gen_movti (smem, reg));
9d98604b 5051 return 1;
644459d0 5052}
5053
5054/* Return TRUE if X is MEM which is a struct member reference
5055 and the member can safely be loaded and stored with a single
5056 instruction because it is padded. */
5057static int
5058mem_is_padded_component_ref (rtx x)
5059{
5060 tree t = MEM_EXPR (x);
5061 tree r;
5062 if (!t || TREE_CODE (t) != COMPONENT_REF)
5063 return 0;
5064 t = TREE_OPERAND (t, 1);
5065 if (!t || TREE_CODE (t) != FIELD_DECL
5066 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5067 return 0;
5068 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5069 r = DECL_FIELD_CONTEXT (t);
5070 if (!r || TREE_CODE (r) != RECORD_TYPE)
5071 return 0;
5072 /* Make sure they are the same mode */
5073 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5074 return 0;
5075 /* If there are no following fields then the field alignment assures
fa7637bd 5076 the structure is padded to the alignment which means this field is
5077 padded too. */
644459d0 5078 if (TREE_CHAIN (t) == 0)
5079 return 1;
5080 /* If the following field is also aligned then this field will be
5081 padded. */
5082 t = TREE_CHAIN (t);
5083 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5084 return 1;
5085 return 0;
5086}
5087
c7b91b14 5088/* Parse the -mfixed-range= option string. */
5089static void
5090fix_range (const char *const_str)
5091{
5092 int i, first, last;
5093 char *str, *dash, *comma;
5094
5095 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5096 REG2 are either register names or register numbers. The effect
5097 of this option is to mark the registers in the range from REG1 to
5098 REG2 as ``fixed'' so they won't be used by the compiler. */
5099
5100 i = strlen (const_str);
5101 str = (char *) alloca (i + 1);
5102 memcpy (str, const_str, i + 1);
5103
5104 while (1)
5105 {
5106 dash = strchr (str, '-');
5107 if (!dash)
5108 {
5109 warning (0, "value of -mfixed-range must have form REG1-REG2");
5110 return;
5111 }
5112 *dash = '\0';
5113 comma = strchr (dash + 1, ',');
5114 if (comma)
5115 *comma = '\0';
5116
5117 first = decode_reg_name (str);
5118 if (first < 0)
5119 {
5120 warning (0, "unknown register name: %s", str);
5121 return;
5122 }
5123
5124 last = decode_reg_name (dash + 1);
5125 if (last < 0)
5126 {
5127 warning (0, "unknown register name: %s", dash + 1);
5128 return;
5129 }
5130
5131 *dash = '-';
5132
5133 if (first > last)
5134 {
5135 warning (0, "%s-%s is an empty range", str, dash + 1);
5136 return;
5137 }
5138
5139 for (i = first; i <= last; ++i)
5140 fixed_regs[i] = call_used_regs[i] = 1;
5141
5142 if (!comma)
5143 break;
5144
5145 *comma = ',';
5146 str = comma + 1;
5147 }
5148}
5149
644459d0 5150/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5151 can be generated using the fsmbi instruction. */
5152int
5153fsmbi_const_p (rtx x)
5154{
dea01258 5155 if (CONSTANT_P (x))
5156 {
5df189be 5157 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5158 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5159 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5160 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5161 }
5162 return 0;
5163}
5164
5165/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5166 can be generated using the cbd, chd, cwd or cdd instruction. */
5167int
5168cpat_const_p (rtx x, enum machine_mode mode)
5169{
5170 if (CONSTANT_P (x))
5171 {
5172 enum immediate_class c = classify_immediate (x, mode);
5173 return c == IC_CPAT;
5174 }
5175 return 0;
5176}
644459d0 5177
dea01258 5178rtx
5179gen_cpat_const (rtx * ops)
5180{
5181 unsigned char dst[16];
5182 int i, offset, shift, isize;
5183 if (GET_CODE (ops[3]) != CONST_INT
5184 || GET_CODE (ops[2]) != CONST_INT
5185 || (GET_CODE (ops[1]) != CONST_INT
5186 && GET_CODE (ops[1]) != REG))
5187 return 0;
5188 if (GET_CODE (ops[1]) == REG
5189 && (!REG_POINTER (ops[1])
5190 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5191 return 0;
644459d0 5192
5193 for (i = 0; i < 16; i++)
dea01258 5194 dst[i] = i + 16;
5195 isize = INTVAL (ops[3]);
5196 if (isize == 1)
5197 shift = 3;
5198 else if (isize == 2)
5199 shift = 2;
5200 else
5201 shift = 0;
5202 offset = (INTVAL (ops[2]) +
5203 (GET_CODE (ops[1]) ==
5204 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5205 for (i = 0; i < isize; i++)
5206 dst[offset + i] = i + shift;
5207 return array_to_constant (TImode, dst);
644459d0 5208}
5209
5210/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5211 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5212 than 16 bytes, the value is repeated across the rest of the array. */
5213void
5214constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5215{
5216 HOST_WIDE_INT val;
5217 int i, j, first;
5218
5219 memset (arr, 0, 16);
5220 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5221 if (GET_CODE (x) == CONST_INT
5222 || (GET_CODE (x) == CONST_DOUBLE
5223 && (mode == SFmode || mode == DFmode)))
5224 {
5225 gcc_assert (mode != VOIDmode && mode != BLKmode);
5226
5227 if (GET_CODE (x) == CONST_DOUBLE)
5228 val = const_double_to_hwint (x);
5229 else
5230 val = INTVAL (x);
5231 first = GET_MODE_SIZE (mode) - 1;
5232 for (i = first; i >= 0; i--)
5233 {
5234 arr[i] = val & 0xff;
5235 val >>= 8;
5236 }
5237 /* Splat the constant across the whole array. */
5238 for (j = 0, i = first + 1; i < 16; i++)
5239 {
5240 arr[i] = arr[j];
5241 j = (j == first) ? 0 : j + 1;
5242 }
5243 }
5244 else if (GET_CODE (x) == CONST_DOUBLE)
5245 {
5246 val = CONST_DOUBLE_LOW (x);
5247 for (i = 15; i >= 8; i--)
5248 {
5249 arr[i] = val & 0xff;
5250 val >>= 8;
5251 }
5252 val = CONST_DOUBLE_HIGH (x);
5253 for (i = 7; i >= 0; i--)
5254 {
5255 arr[i] = val & 0xff;
5256 val >>= 8;
5257 }
5258 }
5259 else if (GET_CODE (x) == CONST_VECTOR)
5260 {
5261 int units;
5262 rtx elt;
5263 mode = GET_MODE_INNER (mode);
5264 units = CONST_VECTOR_NUNITS (x);
5265 for (i = 0; i < units; i++)
5266 {
5267 elt = CONST_VECTOR_ELT (x, i);
5268 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5269 {
5270 if (GET_CODE (elt) == CONST_DOUBLE)
5271 val = const_double_to_hwint (elt);
5272 else
5273 val = INTVAL (elt);
5274 first = GET_MODE_SIZE (mode) - 1;
5275 if (first + i * GET_MODE_SIZE (mode) > 16)
5276 abort ();
5277 for (j = first; j >= 0; j--)
5278 {
5279 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5280 val >>= 8;
5281 }
5282 }
5283 }
5284 }
5285 else
5286 gcc_unreachable();
5287}
5288
5289/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5290 smaller than 16 bytes, use the bytes that would represent that value
5291 in a register, e.g., for QImode return the value of arr[3]. */
5292rtx
e96f2783 5293array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5294{
5295 enum machine_mode inner_mode;
5296 rtvec v;
5297 int units, size, i, j, k;
5298 HOST_WIDE_INT val;
5299
5300 if (GET_MODE_CLASS (mode) == MODE_INT
5301 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5302 {
5303 j = GET_MODE_SIZE (mode);
5304 i = j < 4 ? 4 - j : 0;
5305 for (val = 0; i < j; i++)
5306 val = (val << 8) | arr[i];
5307 val = trunc_int_for_mode (val, mode);
5308 return GEN_INT (val);
5309 }
5310
5311 if (mode == TImode)
5312 {
5313 HOST_WIDE_INT high;
5314 for (i = high = 0; i < 8; i++)
5315 high = (high << 8) | arr[i];
5316 for (i = 8, val = 0; i < 16; i++)
5317 val = (val << 8) | arr[i];
5318 return immed_double_const (val, high, TImode);
5319 }
5320 if (mode == SFmode)
5321 {
5322 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5323 val = trunc_int_for_mode (val, SImode);
171b6d22 5324 return hwint_to_const_double (SFmode, val);
644459d0 5325 }
5326 if (mode == DFmode)
5327 {
1f915911 5328 for (i = 0, val = 0; i < 8; i++)
5329 val = (val << 8) | arr[i];
171b6d22 5330 return hwint_to_const_double (DFmode, val);
644459d0 5331 }
5332
5333 if (!VECTOR_MODE_P (mode))
5334 abort ();
5335
5336 units = GET_MODE_NUNITS (mode);
5337 size = GET_MODE_UNIT_SIZE (mode);
5338 inner_mode = GET_MODE_INNER (mode);
5339 v = rtvec_alloc (units);
5340
5341 for (k = i = 0; i < units; ++i)
5342 {
5343 val = 0;
5344 for (j = 0; j < size; j++, k++)
5345 val = (val << 8) | arr[k];
5346
5347 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5348 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5349 else
5350 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5351 }
5352 if (k > 16)
5353 abort ();
5354
5355 return gen_rtx_CONST_VECTOR (mode, v);
5356}
5357
5358static void
5359reloc_diagnostic (rtx x)
5360{
712d2297 5361 tree decl = 0;
644459d0 5362 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5363 return;
5364
5365 if (GET_CODE (x) == SYMBOL_REF)
5366 decl = SYMBOL_REF_DECL (x);
5367 else if (GET_CODE (x) == CONST
5368 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5369 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5370
5371 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5372 if (decl && !DECL_P (decl))
5373 decl = 0;
5374
644459d0 5375 /* The decl could be a string constant. */
5376 if (decl && DECL_P (decl))
712d2297 5377 {
5378 location_t loc;
5379 /* We use last_assemble_variable_decl to get line information. It's
5380 not always going to be right and might not even be close, but will
5381 be right for the more common cases. */
5382 if (!last_assemble_variable_decl || in_section == ctors_section)
5383 loc = DECL_SOURCE_LOCATION (decl);
5384 else
5385 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5386
712d2297 5387 if (TARGET_WARN_RELOC)
5388 warning_at (loc, 0,
5389 "creating run-time relocation for %qD", decl);
5390 else
5391 error_at (loc,
5392 "creating run-time relocation for %qD", decl);
5393 }
5394 else
5395 {
5396 if (TARGET_WARN_RELOC)
5397 warning_at (input_location, 0, "creating run-time relocation");
5398 else
5399 error_at (input_location, "creating run-time relocation");
5400 }
644459d0 5401}
5402
5403/* Hook into assemble_integer so we can generate an error for run-time
5404 relocations. The SPU ABI disallows them. */
5405static bool
5406spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5407{
5408 /* By default run-time relocations aren't supported, but we allow them
5409 in case users support it in their own run-time loader. And we provide
5410 a warning for those users that don't. */
5411 if ((GET_CODE (x) == SYMBOL_REF)
5412 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5413 reloc_diagnostic (x);
5414
5415 return default_assemble_integer (x, size, aligned_p);
5416}
5417
5418static void
5419spu_asm_globalize_label (FILE * file, const char *name)
5420{
5421 fputs ("\t.global\t", file);
5422 assemble_name (file, name);
5423 fputs ("\n", file);
5424}
5425
5426static bool
f529eb25 5427spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5428 bool speed ATTRIBUTE_UNUSED)
644459d0 5429{
5430 enum machine_mode mode = GET_MODE (x);
5431 int cost = COSTS_N_INSNS (2);
5432
5433 /* Folding to a CONST_VECTOR will use extra space but there might
5434 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5435 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5436 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5437 because this cost will only be compared against a single insn.
5438 if (code == CONST_VECTOR)
ca316360 5439 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5440 */
5441
5442 /* Use defaults for float operations. Not accurate but good enough. */
5443 if (mode == DFmode)
5444 {
5445 *total = COSTS_N_INSNS (13);
5446 return true;
5447 }
5448 if (mode == SFmode)
5449 {
5450 *total = COSTS_N_INSNS (6);
5451 return true;
5452 }
5453 switch (code)
5454 {
5455 case CONST_INT:
5456 if (satisfies_constraint_K (x))
5457 *total = 0;
5458 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5459 *total = COSTS_N_INSNS (1);
5460 else
5461 *total = COSTS_N_INSNS (3);
5462 return true;
5463
5464 case CONST:
5465 *total = COSTS_N_INSNS (3);
5466 return true;
5467
5468 case LABEL_REF:
5469 case SYMBOL_REF:
5470 *total = COSTS_N_INSNS (0);
5471 return true;
5472
5473 case CONST_DOUBLE:
5474 *total = COSTS_N_INSNS (5);
5475 return true;
5476
5477 case FLOAT_EXTEND:
5478 case FLOAT_TRUNCATE:
5479 case FLOAT:
5480 case UNSIGNED_FLOAT:
5481 case FIX:
5482 case UNSIGNED_FIX:
5483 *total = COSTS_N_INSNS (7);
5484 return true;
5485
5486 case PLUS:
5487 if (mode == TImode)
5488 {
5489 *total = COSTS_N_INSNS (9);
5490 return true;
5491 }
5492 break;
5493
5494 case MULT:
5495 cost =
5496 GET_CODE (XEXP (x, 0)) ==
5497 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5498 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5499 {
5500 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5501 {
5502 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5503 cost = COSTS_N_INSNS (14);
5504 if ((val & 0xffff) == 0)
5505 cost = COSTS_N_INSNS (9);
5506 else if (val > 0 && val < 0x10000)
5507 cost = COSTS_N_INSNS (11);
5508 }
5509 }
5510 *total = cost;
5511 return true;
5512 case DIV:
5513 case UDIV:
5514 case MOD:
5515 case UMOD:
5516 *total = COSTS_N_INSNS (20);
5517 return true;
5518 case ROTATE:
5519 case ROTATERT:
5520 case ASHIFT:
5521 case ASHIFTRT:
5522 case LSHIFTRT:
5523 *total = COSTS_N_INSNS (4);
5524 return true;
5525 case UNSPEC:
5526 if (XINT (x, 1) == UNSPEC_CONVERT)
5527 *total = COSTS_N_INSNS (0);
5528 else
5529 *total = COSTS_N_INSNS (4);
5530 return true;
5531 }
5532 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5533 if (GET_MODE_CLASS (mode) == MODE_INT
5534 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5535 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5536 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5537 *total = cost;
5538 return true;
5539}
5540
1bd43494 5541static enum machine_mode
5542spu_unwind_word_mode (void)
644459d0 5543{
1bd43494 5544 return SImode;
644459d0 5545}
5546
5547/* Decide whether we can make a sibling call to a function. DECL is the
5548 declaration of the function being targeted by the call and EXP is the
5549 CALL_EXPR representing the call. */
5550static bool
5551spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5552{
5553 return decl && !TARGET_LARGE_MEM;
5554}
5555
5556/* We need to correctly update the back chain pointer and the Available
5557 Stack Size (which is in the second slot of the sp register.) */
5558void
5559spu_allocate_stack (rtx op0, rtx op1)
5560{
5561 HOST_WIDE_INT v;
5562 rtx chain = gen_reg_rtx (V4SImode);
5563 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5564 rtx sp = gen_reg_rtx (V4SImode);
5565 rtx splatted = gen_reg_rtx (V4SImode);
5566 rtx pat = gen_reg_rtx (TImode);
5567
5568 /* copy the back chain so we can save it back again. */
5569 emit_move_insn (chain, stack_bot);
5570
5571 op1 = force_reg (SImode, op1);
5572
5573 v = 0x1020300010203ll;
5574 emit_move_insn (pat, immed_double_const (v, v, TImode));
5575 emit_insn (gen_shufb (splatted, op1, op1, pat));
5576
5577 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5578 emit_insn (gen_subv4si3 (sp, sp, splatted));
5579
5580 if (flag_stack_check)
5581 {
5582 rtx avail = gen_reg_rtx(SImode);
5583 rtx result = gen_reg_rtx(SImode);
5584 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5585 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5586 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5587 }
5588
5589 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5590
5591 emit_move_insn (stack_bot, chain);
5592
5593 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5594}
5595
5596void
5597spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5598{
5599 static unsigned char arr[16] =
5600 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5601 rtx temp = gen_reg_rtx (SImode);
5602 rtx temp2 = gen_reg_rtx (SImode);
5603 rtx temp3 = gen_reg_rtx (V4SImode);
5604 rtx temp4 = gen_reg_rtx (V4SImode);
5605 rtx pat = gen_reg_rtx (TImode);
5606 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5607
5608 /* Restore the backchain from the first word, sp from the second. */
5609 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5610 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5611
5612 emit_move_insn (pat, array_to_constant (TImode, arr));
5613
5614 /* Compute Available Stack Size for sp */
5615 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5616 emit_insn (gen_shufb (temp3, temp, temp, pat));
5617
5618 /* Compute Available Stack Size for back chain */
5619 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5620 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5621 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5622
5623 emit_insn (gen_addv4si3 (sp, sp, temp3));
5624 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5625}
5626
5627static void
5628spu_init_libfuncs (void)
5629{
5630 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5631 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5632 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5633 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5634 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5635 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5636 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5637 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5638 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5639 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5640 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5641
5642 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5643 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5644
5825ec3f 5645 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5646 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5647 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5648 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5649 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5650 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5651 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5652 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5653 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5654 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5655 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5656 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5657
19a53068 5658 set_optab_libfunc (smul_optab, TImode, "__multi3");
5659 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5660 set_optab_libfunc (smod_optab, TImode, "__modti3");
5661 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5662 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5663 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5664}
5665
5666/* Make a subreg, stripping any existing subreg. We could possibly just
5667 call simplify_subreg, but in this case we know what we want. */
5668rtx
5669spu_gen_subreg (enum machine_mode mode, rtx x)
5670{
5671 if (GET_CODE (x) == SUBREG)
5672 x = SUBREG_REG (x);
5673 if (GET_MODE (x) == mode)
5674 return x;
5675 return gen_rtx_SUBREG (mode, x, 0);
5676}
5677
5678static bool
fb80456a 5679spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5680{
5681 return (TYPE_MODE (type) == BLKmode
5682 && ((type) == 0
5683 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5684 || int_size_in_bytes (type) >
5685 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5686}
5687\f
5688/* Create the built-in types and functions */
5689
c2233b46 5690enum spu_function_code
5691{
5692#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5693#include "spu-builtins.def"
5694#undef DEF_BUILTIN
5695 NUM_SPU_BUILTINS
5696};
5697
5698extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5699
644459d0 5700struct spu_builtin_description spu_builtins[] = {
5701#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5702 {fcode, icode, name, type, params},
644459d0 5703#include "spu-builtins.def"
5704#undef DEF_BUILTIN
5705};
5706
0c5c4d59 5707static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5708
5709/* Returns the spu builtin decl for CODE. */
e6925042 5710
5711static tree
5712spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5713{
5714 if (code >= NUM_SPU_BUILTINS)
5715 return error_mark_node;
5716
0c5c4d59 5717 return spu_builtin_decls[code];
e6925042 5718}
5719
5720
644459d0 5721static void
5722spu_init_builtins (void)
5723{
5724 struct spu_builtin_description *d;
5725 unsigned int i;
5726
5727 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5728 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5729 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5730 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5731 V4SF_type_node = build_vector_type (float_type_node, 4);
5732 V2DF_type_node = build_vector_type (double_type_node, 2);
5733
5734 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5735 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5736 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5737 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5738
c4ecce0c 5739 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5740
5741 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5742 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5744 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5748 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5750 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5751 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5753
5754 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5755 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5756 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5757 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5758 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5759 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5760 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5761 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5762
5763 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5764 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5765
5766 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5767
5768 spu_builtin_types[SPU_BTI_PTR] =
5769 build_pointer_type (build_qualified_type
5770 (void_type_node,
5771 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5772
5773 /* For each builtin we build a new prototype. The tree code will make
5774 sure nodes are shared. */
5775 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5776 {
5777 tree p;
5778 char name[64]; /* build_function will make a copy. */
5779 int parm;
5780
5781 if (d->name == 0)
5782 continue;
5783
5dfbd18f 5784 /* Find last parm. */
644459d0 5785 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5786 ;
644459d0 5787
5788 p = void_list_node;
5789 while (parm > 1)
5790 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5791
5792 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5793
5794 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5795 spu_builtin_decls[i] =
3726fe5e 5796 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5797 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5798 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5799
5800 /* These builtins don't throw. */
0c5c4d59 5801 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5802 }
5803}
5804
cf31d486 5805void
5806spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5807{
5808 static unsigned char arr[16] =
5809 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5810
5811 rtx temp = gen_reg_rtx (Pmode);
5812 rtx temp2 = gen_reg_rtx (V4SImode);
5813 rtx temp3 = gen_reg_rtx (V4SImode);
5814 rtx pat = gen_reg_rtx (TImode);
5815 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5816
5817 emit_move_insn (pat, array_to_constant (TImode, arr));
5818
5819 /* Restore the sp. */
5820 emit_move_insn (temp, op1);
5821 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5822
5823 /* Compute available stack size for sp. */
5824 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5825 emit_insn (gen_shufb (temp3, temp, temp, pat));
5826
5827 emit_insn (gen_addv4si3 (sp, sp, temp3));
5828 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5829}
5830
644459d0 5831int
5832spu_safe_dma (HOST_WIDE_INT channel)
5833{
006e4b96 5834 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5835}
5836
5837void
5838spu_builtin_splats (rtx ops[])
5839{
5840 enum machine_mode mode = GET_MODE (ops[0]);
5841 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5842 {
5843 unsigned char arr[16];
5844 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5845 emit_move_insn (ops[0], array_to_constant (mode, arr));
5846 }
644459d0 5847 else
5848 {
5849 rtx reg = gen_reg_rtx (TImode);
5850 rtx shuf;
5851 if (GET_CODE (ops[1]) != REG
5852 && GET_CODE (ops[1]) != SUBREG)
5853 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5854 switch (mode)
5855 {
5856 case V2DImode:
5857 case V2DFmode:
5858 shuf =
5859 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5860 TImode);
5861 break;
5862 case V4SImode:
5863 case V4SFmode:
5864 shuf =
5865 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5866 TImode);
5867 break;
5868 case V8HImode:
5869 shuf =
5870 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5871 TImode);
5872 break;
5873 case V16QImode:
5874 shuf =
5875 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5876 TImode);
5877 break;
5878 default:
5879 abort ();
5880 }
5881 emit_move_insn (reg, shuf);
5882 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5883 }
5884}
5885
5886void
5887spu_builtin_extract (rtx ops[])
5888{
5889 enum machine_mode mode;
5890 rtx rot, from, tmp;
5891
5892 mode = GET_MODE (ops[1]);
5893
5894 if (GET_CODE (ops[2]) == CONST_INT)
5895 {
5896 switch (mode)
5897 {
5898 case V16QImode:
5899 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5900 break;
5901 case V8HImode:
5902 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5903 break;
5904 case V4SFmode:
5905 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5906 break;
5907 case V4SImode:
5908 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5909 break;
5910 case V2DImode:
5911 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5912 break;
5913 case V2DFmode:
5914 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5915 break;
5916 default:
5917 abort ();
5918 }
5919 return;
5920 }
5921
5922 from = spu_gen_subreg (TImode, ops[1]);
5923 rot = gen_reg_rtx (TImode);
5924 tmp = gen_reg_rtx (SImode);
5925
5926 switch (mode)
5927 {
5928 case V16QImode:
5929 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5930 break;
5931 case V8HImode:
5932 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5933 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5934 break;
5935 case V4SFmode:
5936 case V4SImode:
5937 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5938 break;
5939 case V2DImode:
5940 case V2DFmode:
5941 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5942 break;
5943 default:
5944 abort ();
5945 }
5946 emit_insn (gen_rotqby_ti (rot, from, tmp));
5947
5948 emit_insn (gen_spu_convert (ops[0], rot));
5949}
5950
5951void
5952spu_builtin_insert (rtx ops[])
5953{
5954 enum machine_mode mode = GET_MODE (ops[0]);
5955 enum machine_mode imode = GET_MODE_INNER (mode);
5956 rtx mask = gen_reg_rtx (TImode);
5957 rtx offset;
5958
5959 if (GET_CODE (ops[3]) == CONST_INT)
5960 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5961 else
5962 {
5963 offset = gen_reg_rtx (SImode);
5964 emit_insn (gen_mulsi3
5965 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5966 }
5967 emit_insn (gen_cpat
5968 (mask, stack_pointer_rtx, offset,
5969 GEN_INT (GET_MODE_SIZE (imode))));
5970 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5971}
5972
5973void
5974spu_builtin_promote (rtx ops[])
5975{
5976 enum machine_mode mode, imode;
5977 rtx rot, from, offset;
5978 HOST_WIDE_INT pos;
5979
5980 mode = GET_MODE (ops[0]);
5981 imode = GET_MODE_INNER (mode);
5982
5983 from = gen_reg_rtx (TImode);
5984 rot = spu_gen_subreg (TImode, ops[0]);
5985
5986 emit_insn (gen_spu_convert (from, ops[1]));
5987
5988 if (GET_CODE (ops[2]) == CONST_INT)
5989 {
5990 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5991 if (GET_MODE_SIZE (imode) < 4)
5992 pos += 4 - GET_MODE_SIZE (imode);
5993 offset = GEN_INT (pos & 15);
5994 }
5995 else
5996 {
5997 offset = gen_reg_rtx (SImode);
5998 switch (mode)
5999 {
6000 case V16QImode:
6001 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6002 break;
6003 case V8HImode:
6004 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6005 emit_insn (gen_addsi3 (offset, offset, offset));
6006 break;
6007 case V4SFmode:
6008 case V4SImode:
6009 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6010 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6011 break;
6012 case V2DImode:
6013 case V2DFmode:
6014 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6015 break;
6016 default:
6017 abort ();
6018 }
6019 }
6020 emit_insn (gen_rotqby_ti (rot, from, offset));
6021}
6022
e96f2783 6023static void
6024spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6025{
e96f2783 6026 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6027 rtx shuf = gen_reg_rtx (V4SImode);
6028 rtx insn = gen_reg_rtx (V4SImode);
6029 rtx shufc;
6030 rtx insnc;
6031 rtx mem;
6032
6033 fnaddr = force_reg (SImode, fnaddr);
6034 cxt = force_reg (SImode, cxt);
6035
6036 if (TARGET_LARGE_MEM)
6037 {
6038 rtx rotl = gen_reg_rtx (V4SImode);
6039 rtx mask = gen_reg_rtx (V4SImode);
6040 rtx bi = gen_reg_rtx (SImode);
e96f2783 6041 static unsigned char const shufa[16] = {
644459d0 6042 2, 3, 0, 1, 18, 19, 16, 17,
6043 0, 1, 2, 3, 16, 17, 18, 19
6044 };
e96f2783 6045 static unsigned char const insna[16] = {
644459d0 6046 0x41, 0, 0, 79,
6047 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6048 0x60, 0x80, 0, 79,
6049 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6050 };
6051
6052 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6053 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6054
6055 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6056 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6057 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6058 emit_insn (gen_selb (insn, insnc, rotl, mask));
6059
e96f2783 6060 mem = adjust_address (m_tramp, V4SImode, 0);
6061 emit_move_insn (mem, insn);
644459d0 6062
6063 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6064 mem = adjust_address (m_tramp, Pmode, 16);
6065 emit_move_insn (mem, bi);
644459d0 6066 }
6067 else
6068 {
6069 rtx scxt = gen_reg_rtx (SImode);
6070 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6071 static unsigned char const insna[16] = {
644459d0 6072 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6073 0x30, 0, 0, 0,
6074 0, 0, 0, 0,
6075 0, 0, 0, 0
6076 };
6077
6078 shufc = gen_reg_rtx (TImode);
6079 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6080
6081 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6082 fits 18 bits and the last 4 are zeros. This will be true if
6083 the stack pointer is initialized to 0x3fff0 at program start,
6084 otherwise the ila instruction will be garbage. */
6085
6086 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6087 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6088 emit_insn (gen_cpat
6089 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6090 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6091 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6092
e96f2783 6093 mem = adjust_address (m_tramp, V4SImode, 0);
6094 emit_move_insn (mem, insn);
644459d0 6095 }
6096 emit_insn (gen_sync ());
6097}
6098
6099void
6100spu_expand_sign_extend (rtx ops[])
6101{
6102 unsigned char arr[16];
6103 rtx pat = gen_reg_rtx (TImode);
6104 rtx sign, c;
6105 int i, last;
6106 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6107 if (GET_MODE (ops[1]) == QImode)
6108 {
6109 sign = gen_reg_rtx (HImode);
6110 emit_insn (gen_extendqihi2 (sign, ops[1]));
6111 for (i = 0; i < 16; i++)
6112 arr[i] = 0x12;
6113 arr[last] = 0x13;
6114 }
6115 else
6116 {
6117 for (i = 0; i < 16; i++)
6118 arr[i] = 0x10;
6119 switch (GET_MODE (ops[1]))
6120 {
6121 case HImode:
6122 sign = gen_reg_rtx (SImode);
6123 emit_insn (gen_extendhisi2 (sign, ops[1]));
6124 arr[last] = 0x03;
6125 arr[last - 1] = 0x02;
6126 break;
6127 case SImode:
6128 sign = gen_reg_rtx (SImode);
6129 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6130 for (i = 0; i < 4; i++)
6131 arr[last - i] = 3 - i;
6132 break;
6133 case DImode:
6134 sign = gen_reg_rtx (SImode);
6135 c = gen_reg_rtx (SImode);
6136 emit_insn (gen_spu_convert (c, ops[1]));
6137 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6138 for (i = 0; i < 8; i++)
6139 arr[last - i] = 7 - i;
6140 break;
6141 default:
6142 abort ();
6143 }
6144 }
6145 emit_move_insn (pat, array_to_constant (TImode, arr));
6146 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6147}
6148
6149/* expand vector initialization. If there are any constant parts,
6150 load constant parts first. Then load any non-constant parts. */
6151void
6152spu_expand_vector_init (rtx target, rtx vals)
6153{
6154 enum machine_mode mode = GET_MODE (target);
6155 int n_elts = GET_MODE_NUNITS (mode);
6156 int n_var = 0;
6157 bool all_same = true;
790c536c 6158 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6159 int i;
6160
6161 first = XVECEXP (vals, 0, 0);
6162 for (i = 0; i < n_elts; ++i)
6163 {
6164 x = XVECEXP (vals, 0, i);
e442af0b 6165 if (!(CONST_INT_P (x)
6166 || GET_CODE (x) == CONST_DOUBLE
6167 || GET_CODE (x) == CONST_FIXED))
644459d0 6168 ++n_var;
6169 else
6170 {
6171 if (first_constant == NULL_RTX)
6172 first_constant = x;
6173 }
6174 if (i > 0 && !rtx_equal_p (x, first))
6175 all_same = false;
6176 }
6177
6178 /* if all elements are the same, use splats to repeat elements */
6179 if (all_same)
6180 {
6181 if (!CONSTANT_P (first)
6182 && !register_operand (first, GET_MODE (x)))
6183 first = force_reg (GET_MODE (first), first);
6184 emit_insn (gen_spu_splats (target, first));
6185 return;
6186 }
6187
6188 /* load constant parts */
6189 if (n_var != n_elts)
6190 {
6191 if (n_var == 0)
6192 {
6193 emit_move_insn (target,
6194 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6195 }
6196 else
6197 {
6198 rtx constant_parts_rtx = copy_rtx (vals);
6199
6200 gcc_assert (first_constant != NULL_RTX);
6201 /* fill empty slots with the first constant, this increases
6202 our chance of using splats in the recursive call below. */
6203 for (i = 0; i < n_elts; ++i)
e442af0b 6204 {
6205 x = XVECEXP (constant_parts_rtx, 0, i);
6206 if (!(CONST_INT_P (x)
6207 || GET_CODE (x) == CONST_DOUBLE
6208 || GET_CODE (x) == CONST_FIXED))
6209 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6210 }
644459d0 6211
6212 spu_expand_vector_init (target, constant_parts_rtx);
6213 }
6214 }
6215
6216 /* load variable parts */
6217 if (n_var != 0)
6218 {
6219 rtx insert_operands[4];
6220
6221 insert_operands[0] = target;
6222 insert_operands[2] = target;
6223 for (i = 0; i < n_elts; ++i)
6224 {
6225 x = XVECEXP (vals, 0, i);
e442af0b 6226 if (!(CONST_INT_P (x)
6227 || GET_CODE (x) == CONST_DOUBLE
6228 || GET_CODE (x) == CONST_FIXED))
644459d0 6229 {
6230 if (!register_operand (x, GET_MODE (x)))
6231 x = force_reg (GET_MODE (x), x);
6232 insert_operands[1] = x;
6233 insert_operands[3] = GEN_INT (i);
6234 spu_builtin_insert (insert_operands);
6235 }
6236 }
6237 }
6238}
6352eedf 6239
5474166e 6240/* Return insn index for the vector compare instruction for given CODE,
6241 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6242
6243static int
6244get_vec_cmp_insn (enum rtx_code code,
6245 enum machine_mode dest_mode,
6246 enum machine_mode op_mode)
6247
6248{
6249 switch (code)
6250 {
6251 case EQ:
6252 if (dest_mode == V16QImode && op_mode == V16QImode)
6253 return CODE_FOR_ceq_v16qi;
6254 if (dest_mode == V8HImode && op_mode == V8HImode)
6255 return CODE_FOR_ceq_v8hi;
6256 if (dest_mode == V4SImode && op_mode == V4SImode)
6257 return CODE_FOR_ceq_v4si;
6258 if (dest_mode == V4SImode && op_mode == V4SFmode)
6259 return CODE_FOR_ceq_v4sf;
6260 if (dest_mode == V2DImode && op_mode == V2DFmode)
6261 return CODE_FOR_ceq_v2df;
6262 break;
6263 case GT:
6264 if (dest_mode == V16QImode && op_mode == V16QImode)
6265 return CODE_FOR_cgt_v16qi;
6266 if (dest_mode == V8HImode && op_mode == V8HImode)
6267 return CODE_FOR_cgt_v8hi;
6268 if (dest_mode == V4SImode && op_mode == V4SImode)
6269 return CODE_FOR_cgt_v4si;
6270 if (dest_mode == V4SImode && op_mode == V4SFmode)
6271 return CODE_FOR_cgt_v4sf;
6272 if (dest_mode == V2DImode && op_mode == V2DFmode)
6273 return CODE_FOR_cgt_v2df;
6274 break;
6275 case GTU:
6276 if (dest_mode == V16QImode && op_mode == V16QImode)
6277 return CODE_FOR_clgt_v16qi;
6278 if (dest_mode == V8HImode && op_mode == V8HImode)
6279 return CODE_FOR_clgt_v8hi;
6280 if (dest_mode == V4SImode && op_mode == V4SImode)
6281 return CODE_FOR_clgt_v4si;
6282 break;
6283 default:
6284 break;
6285 }
6286 return -1;
6287}
6288
6289/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6290 DMODE is expected destination mode. This is a recursive function. */
6291
6292static rtx
6293spu_emit_vector_compare (enum rtx_code rcode,
6294 rtx op0, rtx op1,
6295 enum machine_mode dmode)
6296{
6297 int vec_cmp_insn;
6298 rtx mask;
6299 enum machine_mode dest_mode;
6300 enum machine_mode op_mode = GET_MODE (op1);
6301
6302 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6303
6304 /* Floating point vector compare instructions uses destination V4SImode.
6305 Double floating point vector compare instructions uses destination V2DImode.
6306 Move destination to appropriate mode later. */
6307 if (dmode == V4SFmode)
6308 dest_mode = V4SImode;
6309 else if (dmode == V2DFmode)
6310 dest_mode = V2DImode;
6311 else
6312 dest_mode = dmode;
6313
6314 mask = gen_reg_rtx (dest_mode);
6315 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6316
6317 if (vec_cmp_insn == -1)
6318 {
6319 bool swap_operands = false;
6320 bool try_again = false;
6321 switch (rcode)
6322 {
6323 case LT:
6324 rcode = GT;
6325 swap_operands = true;
6326 try_again = true;
6327 break;
6328 case LTU:
6329 rcode = GTU;
6330 swap_operands = true;
6331 try_again = true;
6332 break;
6333 case NE:
6334 /* Treat A != B as ~(A==B). */
6335 {
6336 enum insn_code nor_code;
6337 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6338 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6339 gcc_assert (nor_code != CODE_FOR_nothing);
6340 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6341 if (dmode != dest_mode)
6342 {
6343 rtx temp = gen_reg_rtx (dest_mode);
6344 convert_move (temp, mask, 0);
6345 return temp;
6346 }
6347 return mask;
6348 }
6349 break;
6350 case GE:
6351 case GEU:
6352 case LE:
6353 case LEU:
6354 /* Try GT/GTU/LT/LTU OR EQ */
6355 {
6356 rtx c_rtx, eq_rtx;
6357 enum insn_code ior_code;
6358 enum rtx_code new_code;
6359
6360 switch (rcode)
6361 {
6362 case GE: new_code = GT; break;
6363 case GEU: new_code = GTU; break;
6364 case LE: new_code = LT; break;
6365 case LEU: new_code = LTU; break;
6366 default:
6367 gcc_unreachable ();
6368 }
6369
6370 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6371 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6372
d6bf3b14 6373 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6374 gcc_assert (ior_code != CODE_FOR_nothing);
6375 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6376 if (dmode != dest_mode)
6377 {
6378 rtx temp = gen_reg_rtx (dest_mode);
6379 convert_move (temp, mask, 0);
6380 return temp;
6381 }
6382 return mask;
6383 }
6384 break;
6385 default:
6386 gcc_unreachable ();
6387 }
6388
6389 /* You only get two chances. */
6390 if (try_again)
6391 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6392
6393 gcc_assert (vec_cmp_insn != -1);
6394
6395 if (swap_operands)
6396 {
6397 rtx tmp;
6398 tmp = op0;
6399 op0 = op1;
6400 op1 = tmp;
6401 }
6402 }
6403
6404 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6405 if (dmode != dest_mode)
6406 {
6407 rtx temp = gen_reg_rtx (dest_mode);
6408 convert_move (temp, mask, 0);
6409 return temp;
6410 }
6411 return mask;
6412}
6413
6414
6415/* Emit vector conditional expression.
6416 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6417 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6418
6419int
6420spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6421 rtx cond, rtx cc_op0, rtx cc_op1)
6422{
6423 enum machine_mode dest_mode = GET_MODE (dest);
6424 enum rtx_code rcode = GET_CODE (cond);
6425 rtx mask;
6426
6427 /* Get the vector mask for the given relational operations. */
6428 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6429
6430 emit_insn(gen_selb (dest, op2, op1, mask));
6431
6432 return 1;
6433}
6434
6352eedf 6435static rtx
6436spu_force_reg (enum machine_mode mode, rtx op)
6437{
6438 rtx x, r;
6439 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6440 {
6441 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6442 || GET_MODE (op) == BLKmode)
6443 return force_reg (mode, convert_to_mode (mode, op, 0));
6444 abort ();
6445 }
6446
6447 r = force_reg (GET_MODE (op), op);
6448 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6449 {
6450 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6451 if (x)
6452 return x;
6453 }
6454
6455 x = gen_reg_rtx (mode);
6456 emit_insn (gen_spu_convert (x, r));
6457 return x;
6458}
6459
6460static void
6461spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6462{
6463 HOST_WIDE_INT v = 0;
6464 int lsbits;
6465 /* Check the range of immediate operands. */
6466 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6467 {
6468 int range = p - SPU_BTI_7;
5df189be 6469
6470 if (!CONSTANT_P (op))
bf776685 6471 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6472 d->name,
6473 spu_builtin_range[range].low, spu_builtin_range[range].high);
6474
6475 if (GET_CODE (op) == CONST
6476 && (GET_CODE (XEXP (op, 0)) == PLUS
6477 || GET_CODE (XEXP (op, 0)) == MINUS))
6478 {
6479 v = INTVAL (XEXP (XEXP (op, 0), 1));
6480 op = XEXP (XEXP (op, 0), 0);
6481 }
6482 else if (GET_CODE (op) == CONST_INT)
6483 v = INTVAL (op);
5df189be 6484 else if (GET_CODE (op) == CONST_VECTOR
6485 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6486 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6487
6488 /* The default for v is 0 which is valid in every range. */
6489 if (v < spu_builtin_range[range].low
6490 || v > spu_builtin_range[range].high)
bf776685 6491 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6492 d->name,
6493 spu_builtin_range[range].low, spu_builtin_range[range].high,
6494 v);
6352eedf 6495
6496 switch (p)
6497 {
6498 case SPU_BTI_S10_4:
6499 lsbits = 4;
6500 break;
6501 case SPU_BTI_U16_2:
6502 /* This is only used in lqa, and stqa. Even though the insns
6503 encode 16 bits of the address (all but the 2 least
6504 significant), only 14 bits are used because it is masked to
6505 be 16 byte aligned. */
6506 lsbits = 4;
6507 break;
6508 case SPU_BTI_S16_2:
6509 /* This is used for lqr and stqr. */
6510 lsbits = 2;
6511 break;
6512 default:
6513 lsbits = 0;
6514 }
6515
6516 if (GET_CODE (op) == LABEL_REF
6517 || (GET_CODE (op) == SYMBOL_REF
6518 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6519 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6520 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6521 d->name);
6522 }
6523}
6524
6525
70ca06f8 6526static int
5df189be 6527expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6528 rtx target, rtx ops[])
6529{
bc620c5c 6530 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6531 int i = 0, a;
6352eedf 6532
6533 /* Expand the arguments into rtl. */
6534
6535 if (d->parm[0] != SPU_BTI_VOID)
6536 ops[i++] = target;
6537
70ca06f8 6538 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6539 {
5df189be 6540 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6541 if (arg == 0)
6542 abort ();
b9c74b4d 6543 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6544 }
70ca06f8 6545
32f79657 6546 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6547 return i;
6352eedf 6548}
6549
6550static rtx
6551spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6552 tree exp, rtx target)
6352eedf 6553{
6554 rtx pat;
6555 rtx ops[8];
bc620c5c 6556 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6557 enum machine_mode mode, tmode;
6558 int i, p;
70ca06f8 6559 int n_operands;
6352eedf 6560 tree return_type;
6561
6562 /* Set up ops[] with values from arglist. */
70ca06f8 6563 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6564
6565 /* Handle the target operand which must be operand 0. */
6566 i = 0;
6567 if (d->parm[0] != SPU_BTI_VOID)
6568 {
6569
6570 /* We prefer the mode specified for the match_operand otherwise
6571 use the mode from the builtin function prototype. */
6572 tmode = insn_data[d->icode].operand[0].mode;
6573 if (tmode == VOIDmode)
6574 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6575
6576 /* Try to use target because not using it can lead to extra copies
6577 and when we are using all of the registers extra copies leads
6578 to extra spills. */
6579 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6580 ops[0] = target;
6581 else
6582 target = ops[0] = gen_reg_rtx (tmode);
6583
6584 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6585 abort ();
6586
6587 i++;
6588 }
6589
a76866d3 6590 if (d->fcode == SPU_MASK_FOR_LOAD)
6591 {
6592 enum machine_mode mode = insn_data[icode].operand[1].mode;
6593 tree arg;
6594 rtx addr, op, pat;
6595
6596 /* get addr */
5df189be 6597 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6598 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6599 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6600 addr = memory_address (mode, op);
6601
6602 /* negate addr */
6603 op = gen_reg_rtx (GET_MODE (addr));
6604 emit_insn (gen_rtx_SET (VOIDmode, op,
6605 gen_rtx_NEG (GET_MODE (addr), addr)));
6606 op = gen_rtx_MEM (mode, op);
6607
6608 pat = GEN_FCN (icode) (target, op);
6609 if (!pat)
6610 return 0;
6611 emit_insn (pat);
6612 return target;
6613 }
6614
6352eedf 6615 /* Ignore align_hint, but still expand it's args in case they have
6616 side effects. */
6617 if (icode == CODE_FOR_spu_align_hint)
6618 return 0;
6619
6620 /* Handle the rest of the operands. */
70ca06f8 6621 for (p = 1; i < n_operands; i++, p++)
6352eedf 6622 {
6623 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6624 mode = insn_data[d->icode].operand[i].mode;
6625 else
6626 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6627
6628 /* mode can be VOIDmode here for labels */
6629
6630 /* For specific intrinsics with an immediate operand, e.g.,
6631 si_ai(), we sometimes need to convert the scalar argument to a
6632 vector argument by splatting the scalar. */
6633 if (VECTOR_MODE_P (mode)
6634 && (GET_CODE (ops[i]) == CONST_INT
6635 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6636 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6637 {
6638 if (GET_CODE (ops[i]) == CONST_INT)
6639 ops[i] = spu_const (mode, INTVAL (ops[i]));
6640 else
6641 {
6642 rtx reg = gen_reg_rtx (mode);
6643 enum machine_mode imode = GET_MODE_INNER (mode);
6644 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6645 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6646 if (imode != GET_MODE (ops[i]))
6647 ops[i] = convert_to_mode (imode, ops[i],
6648 TYPE_UNSIGNED (spu_builtin_types
6649 [d->parm[i]]));
6650 emit_insn (gen_spu_splats (reg, ops[i]));
6651 ops[i] = reg;
6652 }
6653 }
6654
5df189be 6655 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6656
6352eedf 6657 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6658 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6659 }
6660
70ca06f8 6661 switch (n_operands)
6352eedf 6662 {
6663 case 0:
6664 pat = GEN_FCN (icode) (0);
6665 break;
6666 case 1:
6667 pat = GEN_FCN (icode) (ops[0]);
6668 break;
6669 case 2:
6670 pat = GEN_FCN (icode) (ops[0], ops[1]);
6671 break;
6672 case 3:
6673 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6674 break;
6675 case 4:
6676 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6677 break;
6678 case 5:
6679 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6680 break;
6681 case 6:
6682 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6683 break;
6684 default:
6685 abort ();
6686 }
6687
6688 if (!pat)
6689 abort ();
6690
6691 if (d->type == B_CALL || d->type == B_BISLED)
6692 emit_call_insn (pat);
6693 else if (d->type == B_JUMP)
6694 {
6695 emit_jump_insn (pat);
6696 emit_barrier ();
6697 }
6698 else
6699 emit_insn (pat);
6700
6701 return_type = spu_builtin_types[d->parm[0]];
6702 if (d->parm[0] != SPU_BTI_VOID
6703 && GET_MODE (target) != TYPE_MODE (return_type))
6704 {
6705 /* target is the return value. It should always be the mode of
6706 the builtin function prototype. */
6707 target = spu_force_reg (TYPE_MODE (return_type), target);
6708 }
6709
6710 return target;
6711}
6712
6713rtx
6714spu_expand_builtin (tree exp,
6715 rtx target,
6716 rtx subtarget ATTRIBUTE_UNUSED,
6717 enum machine_mode mode ATTRIBUTE_UNUSED,
6718 int ignore ATTRIBUTE_UNUSED)
6719{
5df189be 6720 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6721 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6722 struct spu_builtin_description *d;
6723
6724 if (fcode < NUM_SPU_BUILTINS)
6725 {
6726 d = &spu_builtins[fcode];
6727
5df189be 6728 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6729 }
6730 abort ();
6731}
6732
e99f512d 6733/* Implement targetm.vectorize.builtin_mul_widen_even. */
6734static tree
6735spu_builtin_mul_widen_even (tree type)
6736{
e99f512d 6737 switch (TYPE_MODE (type))
6738 {
6739 case V8HImode:
6740 if (TYPE_UNSIGNED (type))
0c5c4d59 6741 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6742 else
0c5c4d59 6743 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6744 break;
6745 default:
6746 return NULL_TREE;
6747 }
6748}
6749
6750/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6751static tree
6752spu_builtin_mul_widen_odd (tree type)
6753{
6754 switch (TYPE_MODE (type))
6755 {
6756 case V8HImode:
6757 if (TYPE_UNSIGNED (type))
0c5c4d59 6758 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6759 else
0c5c4d59 6760 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6761 break;
6762 default:
6763 return NULL_TREE;
6764 }
6765}
6766
a76866d3 6767/* Implement targetm.vectorize.builtin_mask_for_load. */
6768static tree
6769spu_builtin_mask_for_load (void)
6770{
0c5c4d59 6771 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6772}
5df189be 6773
a28df51d 6774/* Implement targetm.vectorize.builtin_vectorization_cost. */
6775static int
0822b158 6776spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6777 tree vectype ATTRIBUTE_UNUSED,
6778 int misalign ATTRIBUTE_UNUSED)
559093aa 6779{
6780 switch (type_of_cost)
6781 {
6782 case scalar_stmt:
6783 case vector_stmt:
6784 case vector_load:
6785 case vector_store:
6786 case vec_to_scalar:
6787 case scalar_to_vec:
6788 case cond_branch_not_taken:
6789 case vec_perm:
6790 return 1;
6791
6792 case scalar_store:
6793 return 10;
6794
6795 case scalar_load:
6796 /* Load + rotate. */
6797 return 2;
6798
6799 case unaligned_load:
6800 return 2;
6801
6802 case cond_branch_taken:
6803 return 6;
6804
6805 default:
6806 gcc_unreachable ();
6807 }
a28df51d 6808}
6809
0e87db76 6810/* Return true iff, data reference of TYPE can reach vector alignment (16)
6811 after applying N number of iterations. This routine does not determine
6812 how may iterations are required to reach desired alignment. */
6813
6814static bool
a9f1838b 6815spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6816{
6817 if (is_packed)
6818 return false;
6819
6820 /* All other types are naturally aligned. */
6821 return true;
6822}
6823
a0515226 6824/* Implement targetm.vectorize.builtin_vec_perm. */
6825tree
6826spu_builtin_vec_perm (tree type, tree *mask_element_type)
6827{
a0515226 6828 *mask_element_type = unsigned_char_type_node;
6829
6830 switch (TYPE_MODE (type))
6831 {
6832 case V16QImode:
6833 if (TYPE_UNSIGNED (type))
0c5c4d59 6834 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6835 else
0c5c4d59 6836 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6837
6838 case V8HImode:
6839 if (TYPE_UNSIGNED (type))
0c5c4d59 6840 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6841 else
0c5c4d59 6842 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6843
6844 case V4SImode:
6845 if (TYPE_UNSIGNED (type))
0c5c4d59 6846 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6847 else
0c5c4d59 6848 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6849
6850 case V2DImode:
6851 if (TYPE_UNSIGNED (type))
0c5c4d59 6852 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6853 else
0c5c4d59 6854 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6855
6856 case V4SFmode:
0c5c4d59 6857 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6858
6859 case V2DFmode:
0c5c4d59 6860 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6861
6862 default:
6863 return NULL_TREE;
6864 }
a0515226 6865}
6866
6cf5579e 6867/* Return the appropriate mode for a named address pointer. */
6868static enum machine_mode
6869spu_addr_space_pointer_mode (addr_space_t addrspace)
6870{
6871 switch (addrspace)
6872 {
6873 case ADDR_SPACE_GENERIC:
6874 return ptr_mode;
6875 case ADDR_SPACE_EA:
6876 return EAmode;
6877 default:
6878 gcc_unreachable ();
6879 }
6880}
6881
6882/* Return the appropriate mode for a named address address. */
6883static enum machine_mode
6884spu_addr_space_address_mode (addr_space_t addrspace)
6885{
6886 switch (addrspace)
6887 {
6888 case ADDR_SPACE_GENERIC:
6889 return Pmode;
6890 case ADDR_SPACE_EA:
6891 return EAmode;
6892 default:
6893 gcc_unreachable ();
6894 }
6895}
6896
6897/* Determine if one named address space is a subset of another. */
6898
6899static bool
6900spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6901{
6902 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6903 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6904
6905 if (subset == superset)
6906 return true;
6907
6908 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6909 being subsets but instead as disjoint address spaces. */
6910 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6911 return false;
6912
6913 else
6914 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6915}
6916
6917/* Convert from one address space to another. */
6918static rtx
6919spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6920{
6921 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6922 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6923
6924 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6925 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6926
6927 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6928 {
6929 rtx result, ls;
6930
6931 ls = gen_const_mem (DImode,
6932 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6933 set_mem_align (ls, 128);
6934
6935 result = gen_reg_rtx (Pmode);
6936 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6937 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6938 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6939 ls, const0_rtx, Pmode, 1);
6940
6941 emit_insn (gen_subsi3 (result, op, ls));
6942
6943 return result;
6944 }
6945
6946 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6947 {
6948 rtx result, ls;
6949
6950 ls = gen_const_mem (DImode,
6951 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6952 set_mem_align (ls, 128);
6953
6954 result = gen_reg_rtx (EAmode);
6955 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6956 op = force_reg (Pmode, op);
6957 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6958 ls, const0_rtx, EAmode, 1);
6959 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6960
6961 if (EAmode == SImode)
6962 emit_insn (gen_addsi3 (result, op, ls));
6963 else
6964 emit_insn (gen_adddi3 (result, op, ls));
6965
6966 return result;
6967 }
6968
6969 else
6970 gcc_unreachable ();
6971}
6972
6973
d52fd16a 6974/* Count the total number of instructions in each pipe and return the
6975 maximum, which is used as the Minimum Iteration Interval (MII)
6976 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6977 -2 are instructions that can go in pipe0 or pipe1. */
6978static int
6979spu_sms_res_mii (struct ddg *g)
6980{
6981 int i;
6982 unsigned t[4] = {0, 0, 0, 0};
6983
6984 for (i = 0; i < g->num_nodes; i++)
6985 {
6986 rtx insn = g->nodes[i].insn;
6987 int p = get_pipe (insn) + 2;
6988
1e944a0b 6989 gcc_assert (p >= 0);
6990 gcc_assert (p < 4);
d52fd16a 6991
6992 t[p]++;
6993 if (dump_file && INSN_P (insn))
6994 fprintf (dump_file, "i%d %s %d %d\n",
6995 INSN_UID (insn),
6996 insn_data[INSN_CODE(insn)].name,
6997 p, t[p]);
6998 }
6999 if (dump_file)
7000 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7001
7002 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7003}
7004
7005
5df189be 7006void
7007spu_init_expanders (void)
9d98604b 7008{
5df189be 7009 if (cfun)
9d98604b 7010 {
7011 rtx r0, r1;
7012 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7013 frame_pointer_needed is true. We don't know that until we're
7014 expanding the prologue. */
7015 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7016
7017 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7018 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7019 to be treated as aligned, so generate them here. */
7020 r0 = gen_reg_rtx (SImode);
7021 r1 = gen_reg_rtx (SImode);
7022 mark_reg_pointer (r0, 128);
7023 mark_reg_pointer (r1, 128);
7024 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7025 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7026 }
ea32e033 7027}
7028
7029static enum machine_mode
7030spu_libgcc_cmp_return_mode (void)
7031{
7032
7033/* For SPU word mode is TI mode so it is better to use SImode
7034 for compare returns. */
7035 return SImode;
7036}
7037
7038static enum machine_mode
7039spu_libgcc_shift_count_mode (void)
7040{
7041/* For SPU word mode is TI mode so it is better to use SImode
7042 for shift counts. */
7043 return SImode;
7044}
5a976006 7045
7046/* An early place to adjust some flags after GCC has finished processing
7047 * them. */
7048static void
7049asm_file_start (void)
7050{
5a976006 7051 default_file_start ();
7052}
7053
a08dfd55 7054/* Implement targetm.section_type_flags. */
7055static unsigned int
7056spu_section_type_flags (tree decl, const char *name, int reloc)
7057{
7058 /* .toe needs to have type @nobits. */
7059 if (strcmp (name, ".toe") == 0)
7060 return SECTION_BSS;
6cf5579e 7061 /* Don't load _ea into the current address space. */
7062 if (strcmp (name, "._ea") == 0)
7063 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7064 return default_section_type_flags (decl, name, reloc);
7065}
c2233b46 7066
6cf5579e 7067/* Implement targetm.select_section. */
7068static section *
7069spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7070{
7071 /* Variables and constants defined in the __ea address space
7072 go into a special section named "._ea". */
7073 if (TREE_TYPE (decl) != error_mark_node
7074 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7075 {
7076 /* We might get called with string constants, but get_named_section
7077 doesn't like them as they are not DECLs. Also, we need to set
7078 flags in that case. */
7079 if (!DECL_P (decl))
7080 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7081
7082 return get_named_section (decl, "._ea", reloc);
7083 }
7084
7085 return default_elf_select_section (decl, reloc, align);
7086}
7087
7088/* Implement targetm.unique_section. */
7089static void
7090spu_unique_section (tree decl, int reloc)
7091{
7092 /* We don't support unique section names in the __ea address
7093 space for now. */
7094 if (TREE_TYPE (decl) != error_mark_node
7095 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7096 return;
7097
7098 default_unique_section (decl, reloc);
7099}
7100
56c7bfc2 7101/* Generate a constant or register which contains 2^SCALE. We assume
7102 the result is valid for MODE. Currently, MODE must be V4SFmode and
7103 SCALE must be SImode. */
7104rtx
7105spu_gen_exp2 (enum machine_mode mode, rtx scale)
7106{
7107 gcc_assert (mode == V4SFmode);
7108 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7109 if (GET_CODE (scale) != CONST_INT)
7110 {
7111 /* unsigned int exp = (127 + scale) << 23;
7112 __vector float m = (__vector float) spu_splats (exp); */
7113 rtx reg = force_reg (SImode, scale);
7114 rtx exp = gen_reg_rtx (SImode);
7115 rtx mul = gen_reg_rtx (mode);
7116 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7117 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7118 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7119 return mul;
7120 }
7121 else
7122 {
7123 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7124 unsigned char arr[16];
7125 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7126 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7127 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7128 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7129 return array_to_constant (mode, arr);
7130 }
7131}
7132
9d98604b 7133/* After reload, just change the convert into a move instruction
7134 or a dead instruction. */
7135void
7136spu_split_convert (rtx ops[])
7137{
7138 if (REGNO (ops[0]) == REGNO (ops[1]))
7139 emit_note (NOTE_INSN_DELETED);
7140 else
7141 {
7142 /* Use TImode always as this might help hard reg copyprop. */
7143 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7144 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7145 emit_insn (gen_move_insn (op0, op1));
7146 }
7147}
7148
b3878a6c 7149void
4cbad5bb 7150spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7151{
7152 fprintf (file, "# profile\n");
7153 fprintf (file, "brsl $75, _mcount\n");
7154}
7155
329c1e4e 7156/* Implement targetm.ref_may_alias_errno. */
7157static bool
7158spu_ref_may_alias_errno (ao_ref *ref)
7159{
7160 tree base = ao_ref_base (ref);
7161
7162 /* With SPU newlib, errno is defined as something like
7163 _impure_data._errno
7164 The default implementation of this target macro does not
7165 recognize such expressions, so special-code for it here. */
7166
7167 if (TREE_CODE (base) == VAR_DECL
7168 && !TREE_STATIC (base)
7169 && DECL_EXTERNAL (base)
7170 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7171 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7172 "_impure_data") == 0
7173 /* _errno is the first member of _impure_data. */
7174 && ref->offset == 0)
7175 return true;
7176
7177 return default_ref_may_alias_errno (ref);
7178}
7179
f17d2d13 7180/* Output thunk to FILE that implements a C++ virtual function call (with
7181 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7182 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7183 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7184 relative to the resulting this pointer. */
7185
7186static void
7187spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7188 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7189 tree function)
7190{
7191 rtx op[8];
7192
7193 /* Make sure unwind info is emitted for the thunk if needed. */
7194 final_start_function (emit_barrier (), file, 1);
7195
7196 /* Operand 0 is the target function. */
7197 op[0] = XEXP (DECL_RTL (function), 0);
7198
7199 /* Operand 1 is the 'this' pointer. */
7200 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7201 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7202 else
7203 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7204
7205 /* Operands 2/3 are the low/high halfwords of delta. */
7206 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7207 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7208
7209 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7210 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7211 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7212
7213 /* Operands 6/7 are temporary registers. */
7214 op[6] = gen_rtx_REG (Pmode, 79);
7215 op[7] = gen_rtx_REG (Pmode, 78);
7216
7217 /* Add DELTA to this pointer. */
7218 if (delta)
7219 {
7220 if (delta >= -0x200 && delta < 0x200)
7221 output_asm_insn ("ai\t%1,%1,%2", op);
7222 else if (delta >= -0x8000 && delta < 0x8000)
7223 {
7224 output_asm_insn ("il\t%6,%2", op);
7225 output_asm_insn ("a\t%1,%1,%6", op);
7226 }
7227 else
7228 {
7229 output_asm_insn ("ilhu\t%6,%3", op);
7230 output_asm_insn ("iohl\t%6,%2", op);
7231 output_asm_insn ("a\t%1,%1,%6", op);
7232 }
7233 }
7234
7235 /* Perform vcall adjustment. */
7236 if (vcall_offset)
7237 {
7238 output_asm_insn ("lqd\t%7,0(%1)", op);
7239 output_asm_insn ("rotqby\t%7,%7,%1", op);
7240
7241 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7242 output_asm_insn ("ai\t%7,%7,%4", op);
7243 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7244 {
7245 output_asm_insn ("il\t%6,%4", op);
7246 output_asm_insn ("a\t%7,%7,%6", op);
7247 }
7248 else
7249 {
7250 output_asm_insn ("ilhu\t%6,%5", op);
7251 output_asm_insn ("iohl\t%6,%4", op);
7252 output_asm_insn ("a\t%7,%7,%6", op);
7253 }
7254
7255 output_asm_insn ("lqd\t%6,0(%7)", op);
7256 output_asm_insn ("rotqby\t%6,%6,%7", op);
7257 output_asm_insn ("a\t%1,%1,%6", op);
7258 }
7259
7260 /* Jump to target. */
7261 output_asm_insn ("br\t%0", op);
7262
7263 final_end_function ();
7264}
7265
c2233b46 7266#include "gt-spu.h"