]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
PR boehm-gc/48514
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
842ae815 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
644459d0 3
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
038d1e19 6 Software Foundation; either version 3 of the License, or (at your option)
644459d0 7 any later version.
8
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
038d1e19 15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
644459d0 17
18#include "config.h"
19#include "system.h"
20#include "coretypes.h"
21#include "tm.h"
22#include "rtl.h"
23#include "regs.h"
24#include "hard-reg-set.h"
644459d0 25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
0b205f4c 39#include "diagnostic-core.h"
644459d0 40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
644459d0 50#include "machmode.h"
75a70cf9 51#include "gimple.h"
644459d0 52#include "tm-constrs.h"
d52fd16a 53#include "ddg.h"
5a976006 54#include "sbitmap.h"
55#include "timevar.h"
56#include "df.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
c2233b46 59
60enum spu_builtin_type_index
61{
62 SPU_BTI_END_OF_PARAMS,
63
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
75
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
78
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
92
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
98
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
103
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
106
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
109
110 SPU_BTI_MAX
111};
112
113#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
123
124static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
125
6352eedf 126struct spu_builtin_range
127{
128 int low, high;
129};
130
131static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
144};
145
644459d0 146\f
147/* Target specific attribute specifications. */
148char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
149
150/* Prototypes and external defs. */
4c834714 151static void spu_option_override (void);
644459d0 152static void spu_init_builtins (void);
e6925042 153static tree spu_builtin_decl (unsigned, bool);
b62e30b8 154static bool spu_scalar_mode_supported_p (enum machine_mode mode);
155static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 156static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 157static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158 bool, addr_space_t);
644459d0 159static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160static rtx get_pic_reg (void);
161static int need_to_save_reg (int regno, int saving);
162static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165 rtx scratch);
166static void emit_nop_for_insn (rtx insn);
167static bool insn_clobbers_hbr (rtx insn);
168static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 169 int distance, sbitmap blocks);
5474166e 170static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
644459d0 172static rtx get_branch_target (rtx branch);
644459d0 173static void spu_machine_dependent_reorg (void);
174static int spu_sched_issue_rate (void);
175static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176 int can_issue_more);
177static int get_pipe (rtx insn);
644459d0 178static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 179static void spu_sched_init_global (FILE *, int, int);
180static void spu_sched_init (FILE *, int, int);
181static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 182static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183 int flags,
b62e30b8 184 bool *no_add_attrs);
644459d0 185static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186 int flags,
b62e30b8 187 bool *no_add_attrs);
644459d0 188static int spu_naked_function_p (tree func);
39cba157 189static bool spu_pass_by_reference (cumulative_args_t cum,
190 enum machine_mode mode,
b62e30b8 191 const_tree type, bool named);
39cba157 192static rtx spu_function_arg (cumulative_args_t cum, enum machine_mode mode,
ee9034d4 193 const_tree type, bool named);
39cba157 194static void spu_function_arg_advance (cumulative_args_t cum,
195 enum machine_mode mode,
ee9034d4 196 const_tree type, bool named);
644459d0 197static tree spu_build_builtin_va_list (void);
8a58ed0a 198static void spu_va_start (tree, rtx);
75a70cf9 199static tree spu_gimplify_va_arg_expr (tree valist, tree type,
200 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 201static int store_with_one_insn_p (rtx mem);
644459d0 202static int mem_is_padded_component_ref (rtx x);
9d98604b 203static int reg_aligned_for_addr (rtx x);
644459d0 204static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
205static void spu_asm_globalize_label (FILE * file, const char *name);
20d892d1 206static bool spu_rtx_costs (rtx x, int code, int outer_code, int opno,
b62e30b8 207 int *total, bool speed);
208static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 209static void spu_init_libfuncs (void);
fb80456a 210static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 211static void fix_range (const char *);
69ced2d6 212static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 213static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 214static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
215 addr_space_t);
e99f512d 216static tree spu_builtin_mul_widen_even (tree);
217static tree spu_builtin_mul_widen_odd (tree);
a76866d3 218static tree spu_builtin_mask_for_load (void);
0822b158 219static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 220static bool spu_vector_alignment_reachable (const_tree, bool);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
a08dfd55 226static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 227static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
228static void spu_unique_section (tree, int);
9d98604b 229static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 230static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 231static void spu_conditional_register_usage (void);
329c1e4e 232static bool spu_ref_may_alias_errno (ao_ref *);
f17d2d13 233static void spu_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
234 HOST_WIDE_INT, tree);
644459d0 235
5474166e 236/* Which instruction set architecture to use. */
237int spu_arch;
238/* Which cpu are we tuning for. */
239int spu_tune;
240
5a976006 241/* The hardware requires 8 insns between a hint and the branch it
242 effects. This variable describes how many rtl instructions the
243 compiler needs to see before inserting a hint, and then the compiler
244 will insert enough nops to make it at least 8 insns. The default is
245 for the compiler to allow up to 2 nops be emitted. The nops are
246 inserted in pairs, so we round down. */
247int spu_hint_dist = (8*4) - (2*4);
248
644459d0 249enum spu_immediate {
250 SPU_NONE,
251 SPU_IL,
252 SPU_ILA,
253 SPU_ILH,
254 SPU_ILHU,
255 SPU_ORI,
256 SPU_ORHI,
257 SPU_ORBI,
99369027 258 SPU_IOHL
644459d0 259};
dea01258 260enum immediate_class
261{
262 IC_POOL, /* constant pool */
263 IC_IL1, /* one il* instruction */
264 IC_IL2, /* both ilhu and iohl instructions */
265 IC_IL1s, /* one il* instruction */
266 IC_IL2s, /* both ilhu and iohl instructions */
267 IC_FSMBI, /* the fsmbi instruction */
268 IC_CPAT, /* one of the c*d instructions */
5df189be 269 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 270};
644459d0 271
272static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
273static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 274static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
275static enum immediate_class classify_immediate (rtx op,
276 enum machine_mode mode);
644459d0 277
1bd43494 278static enum machine_mode spu_unwind_word_mode (void);
279
ea32e033 280static enum machine_mode
281spu_libgcc_cmp_return_mode (void);
282
283static enum machine_mode
284spu_libgcc_shift_count_mode (void);
6cf5579e 285
286/* Pointer mode for __ea references. */
287#define EAmode (spu_ea_model != 32 ? DImode : SImode)
288
ef51d1e3 289\f
290/* Table of machine attributes. */
291static const struct attribute_spec spu_attribute_table[] =
292{
ac86af5d 293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294 affects_type_identity } */
295 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
296 false },
297 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
298 false },
299 { NULL, 0, 0, false, false, false, NULL, false }
ef51d1e3 300};
644459d0 301\f
302/* TARGET overrides. */
303
6cf5579e 304#undef TARGET_ADDR_SPACE_POINTER_MODE
305#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
306
307#undef TARGET_ADDR_SPACE_ADDRESS_MODE
308#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
309
310#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
313
314#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
316
317#undef TARGET_ADDR_SPACE_SUBSET_P
318#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
319
320#undef TARGET_ADDR_SPACE_CONVERT
321#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
322
644459d0 323#undef TARGET_INIT_BUILTINS
324#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 325#undef TARGET_BUILTIN_DECL
326#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 327
644459d0 328#undef TARGET_EXPAND_BUILTIN
329#define TARGET_EXPAND_BUILTIN spu_expand_builtin
330
1bd43494 331#undef TARGET_UNWIND_WORD_MODE
332#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 333
41e3a0c7 334#undef TARGET_LEGITIMIZE_ADDRESS
335#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
336
6cf5579e 337/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340#undef TARGET_ASM_UNALIGNED_SI_OP
341#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
342
343#undef TARGET_ASM_ALIGNED_DI_OP
344#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
345
644459d0 346/* The .8byte directive doesn't seem to work well for a 32 bit
347 architecture. */
348#undef TARGET_ASM_UNALIGNED_DI_OP
349#define TARGET_ASM_UNALIGNED_DI_OP NULL
350
351#undef TARGET_RTX_COSTS
352#define TARGET_RTX_COSTS spu_rtx_costs
353
354#undef TARGET_ADDRESS_COST
f529eb25 355#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 356
357#undef TARGET_SCHED_ISSUE_RATE
358#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
359
5a976006 360#undef TARGET_SCHED_INIT_GLOBAL
361#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
362
363#undef TARGET_SCHED_INIT
364#define TARGET_SCHED_INIT spu_sched_init
365
644459d0 366#undef TARGET_SCHED_VARIABLE_ISSUE
367#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
368
5a976006 369#undef TARGET_SCHED_REORDER
370#define TARGET_SCHED_REORDER spu_sched_reorder
371
372#undef TARGET_SCHED_REORDER2
373#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 374
375#undef TARGET_SCHED_ADJUST_COST
376#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
377
644459d0 378#undef TARGET_ATTRIBUTE_TABLE
379#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
380
381#undef TARGET_ASM_INTEGER
382#define TARGET_ASM_INTEGER spu_assemble_integer
383
384#undef TARGET_SCALAR_MODE_SUPPORTED_P
385#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
386
387#undef TARGET_VECTOR_MODE_SUPPORTED_P
388#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
389
390#undef TARGET_FUNCTION_OK_FOR_SIBCALL
391#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
392
393#undef TARGET_ASM_GLOBALIZE_LABEL
394#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
395
396#undef TARGET_PASS_BY_REFERENCE
397#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
398
ee9034d4 399#undef TARGET_FUNCTION_ARG
400#define TARGET_FUNCTION_ARG spu_function_arg
401
402#undef TARGET_FUNCTION_ARG_ADVANCE
403#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
404
644459d0 405#undef TARGET_MUST_PASS_IN_STACK
406#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
408#undef TARGET_BUILD_BUILTIN_VA_LIST
409#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
410
8a58ed0a 411#undef TARGET_EXPAND_BUILTIN_VA_START
412#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
413
39cba157 414static void spu_setup_incoming_varargs (cumulative_args_t cum,
415 enum machine_mode mode,
416 tree type, int *pretend_size,
417 int no_rtl);
644459d0 418#undef TARGET_SETUP_INCOMING_VARARGS
419#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
420
421#undef TARGET_MACHINE_DEPENDENT_REORG
422#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
423
424#undef TARGET_GIMPLIFY_VA_ARG_EXPR
425#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
426
644459d0 427#undef TARGET_INIT_LIBFUNCS
428#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
429
430#undef TARGET_RETURN_IN_MEMORY
431#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
432
69ced2d6 433#undef TARGET_ENCODE_SECTION_INFO
434#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
435
e99f512d 436#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
437#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
438
439#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
440#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
441
a76866d3 442#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
443#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
444
a28df51d 445#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
446#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
447
202d6e5f 448#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
449#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 450
ea32e033 451#undef TARGET_LIBGCC_CMP_RETURN_MODE
452#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
453
454#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
456
d52fd16a 457#undef TARGET_SCHED_SMS_RES_MII
458#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
459
a08dfd55 460#undef TARGET_SECTION_TYPE_FLAGS
461#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
462
6cf5579e 463#undef TARGET_ASM_SELECT_SECTION
464#define TARGET_ASM_SELECT_SECTION spu_select_section
465
466#undef TARGET_ASM_UNIQUE_SECTION
467#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
468
fd50b071 469#undef TARGET_LEGITIMATE_ADDRESS_P
470#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
471
ca316360 472#undef TARGET_LEGITIMATE_CONSTANT_P
473#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
474
e96f2783 475#undef TARGET_TRAMPOLINE_INIT
476#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
477
4c834714 478#undef TARGET_OPTION_OVERRIDE
479#define TARGET_OPTION_OVERRIDE spu_option_override
480
b2d7ede1 481#undef TARGET_CONDITIONAL_REGISTER_USAGE
482#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
483
329c1e4e 484#undef TARGET_REF_MAY_ALIAS_ERRNO
485#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
486
f17d2d13 487#undef TARGET_ASM_OUTPUT_MI_THUNK
488#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
489#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
490#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
491
8a42230a 492/* Variable tracking should be run after all optimizations which
493 change order of insns. It also needs a valid CFG. */
494#undef TARGET_DELAY_VARTRACK
495#define TARGET_DELAY_VARTRACK true
496
644459d0 497struct gcc_target targetm = TARGET_INITIALIZER;
498
5eb28709 499/* Define the structure for the machine field in struct function. */
500struct GTY(()) machine_function
501{
502 /* Register to use for PIC accesses. */
503 rtx pic_reg;
504};
505
506/* How to allocate a 'struct machine_function'. */
507static struct machine_function *
508spu_init_machine_status (void)
509{
510 return ggc_alloc_cleared_machine_function ();
511}
512
4c834714 513/* Implement TARGET_OPTION_OVERRIDE. */
514static void
515spu_option_override (void)
644459d0 516{
5eb28709 517 /* Set up function hooks. */
518 init_machine_status = spu_init_machine_status;
519
14d408d9 520 /* Small loops will be unpeeled at -O3. For SPU it is more important
521 to keep code small by default. */
686e2769 522 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 523 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 524 global_options.x_param_values,
525 global_options_set.x_param_values);
14d408d9 526
644459d0 527 flag_omit_frame_pointer = 1;
528
5a976006 529 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 530 if (align_functions < 8)
531 align_functions = 8;
c7b91b14 532
5a976006 533 spu_hint_dist = 8*4 - spu_max_nops*4;
534 if (spu_hint_dist < 0)
535 spu_hint_dist = 0;
536
c7b91b14 537 if (spu_fixed_range_string)
538 fix_range (spu_fixed_range_string);
5474166e 539
540 /* Determine processor architectural level. */
541 if (spu_arch_string)
542 {
543 if (strcmp (&spu_arch_string[0], "cell") == 0)
544 spu_arch = PROCESSOR_CELL;
545 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
546 spu_arch = PROCESSOR_CELLEDP;
547 else
8e181c9d 548 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 549 }
550
551 /* Determine processor to tune for. */
552 if (spu_tune_string)
553 {
554 if (strcmp (&spu_tune_string[0], "cell") == 0)
555 spu_tune = PROCESSOR_CELL;
556 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
557 spu_tune = PROCESSOR_CELLEDP;
558 else
8e181c9d 559 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 560 }
98bbec1e 561
13684256 562 /* Change defaults according to the processor architecture. */
563 if (spu_arch == PROCESSOR_CELLEDP)
564 {
565 /* If no command line option has been otherwise specified, change
566 the default to -mno-safe-hints on celledp -- only the original
567 Cell/B.E. processors require this workaround. */
568 if (!(target_flags_explicit & MASK_SAFE_HINTS))
569 target_flags &= ~MASK_SAFE_HINTS;
570 }
571
98bbec1e 572 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 573}
574\f
575/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
576 struct attribute_spec.handler. */
577
644459d0 578/* True if MODE is valid for the target. By "valid", we mean able to
579 be manipulated in non-trivial ways. In particular, this means all
580 the arithmetic is supported. */
581static bool
582spu_scalar_mode_supported_p (enum machine_mode mode)
583{
584 switch (mode)
585 {
586 case QImode:
587 case HImode:
588 case SImode:
589 case SFmode:
590 case DImode:
591 case TImode:
592 case DFmode:
593 return true;
594
595 default:
596 return false;
597 }
598}
599
600/* Similarly for vector modes. "Supported" here is less strict. At
601 least some operations are supported; need to check optabs or builtins
602 for further details. */
603static bool
604spu_vector_mode_supported_p (enum machine_mode mode)
605{
606 switch (mode)
607 {
608 case V16QImode:
609 case V8HImode:
610 case V4SImode:
611 case V2DImode:
612 case V4SFmode:
613 case V2DFmode:
614 return true;
615
616 default:
617 return false;
618 }
619}
620
621/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
622 least significant bytes of the outer mode. This function returns
623 TRUE for the SUBREG's where this is correct. */
624int
625valid_subreg (rtx op)
626{
627 enum machine_mode om = GET_MODE (op);
628 enum machine_mode im = GET_MODE (SUBREG_REG (op));
629 return om != VOIDmode && im != VOIDmode
630 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 631 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
632 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 633}
634
635/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 636 and adjust the start offset. */
644459d0 637static rtx
638adjust_operand (rtx op, HOST_WIDE_INT * start)
639{
640 enum machine_mode mode;
641 int op_size;
38aca5eb 642 /* Strip any paradoxical SUBREG. */
643 if (GET_CODE (op) == SUBREG
644 && (GET_MODE_BITSIZE (GET_MODE (op))
645 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 646 {
647 if (start)
648 *start -=
649 GET_MODE_BITSIZE (GET_MODE (op)) -
650 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
651 op = SUBREG_REG (op);
652 }
653 /* If it is smaller than SI, assure a SUBREG */
654 op_size = GET_MODE_BITSIZE (GET_MODE (op));
655 if (op_size < 32)
656 {
657 if (start)
658 *start += 32 - op_size;
659 op_size = 32;
660 }
661 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
662 mode = mode_for_size (op_size, MODE_INT, 0);
663 if (mode != GET_MODE (op))
664 op = gen_rtx_SUBREG (mode, op, 0);
665 return op;
666}
667
668void
669spu_expand_extv (rtx ops[], int unsignedp)
670{
9d98604b 671 rtx dst = ops[0], src = ops[1];
644459d0 672 HOST_WIDE_INT width = INTVAL (ops[2]);
673 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 674 HOST_WIDE_INT align_mask;
675 rtx s0, s1, mask, r0;
644459d0 676
9d98604b 677 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 678
9d98604b 679 if (MEM_P (src))
644459d0 680 {
9d98604b 681 /* First, determine if we need 1 TImode load or 2. We need only 1
682 if the bits being extracted do not cross the alignment boundary
683 as determined by the MEM and its address. */
684
685 align_mask = -MEM_ALIGN (src);
686 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 687 {
9d98604b 688 /* Alignment is sufficient for 1 load. */
689 s0 = gen_reg_rtx (TImode);
690 r0 = spu_expand_load (s0, 0, src, start / 8);
691 start &= 7;
692 if (r0)
693 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 694 }
9d98604b 695 else
696 {
697 /* Need 2 loads. */
698 s0 = gen_reg_rtx (TImode);
699 s1 = gen_reg_rtx (TImode);
700 r0 = spu_expand_load (s0, s1, src, start / 8);
701 start &= 7;
702
703 gcc_assert (start + width <= 128);
704 if (r0)
705 {
706 rtx r1 = gen_reg_rtx (SImode);
707 mask = gen_reg_rtx (TImode);
708 emit_move_insn (mask, GEN_INT (-1));
709 emit_insn (gen_rotqby_ti (s0, s0, r0));
710 emit_insn (gen_rotqby_ti (s1, s1, r0));
711 if (GET_CODE (r0) == CONST_INT)
712 r1 = GEN_INT (INTVAL (r0) & 15);
713 else
714 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
715 emit_insn (gen_shlqby_ti (mask, mask, r1));
716 emit_insn (gen_selb (s0, s1, s0, mask));
717 }
718 }
719
720 }
721 else if (GET_CODE (src) == SUBREG)
722 {
723 rtx r = SUBREG_REG (src);
724 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
725 s0 = gen_reg_rtx (TImode);
726 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
727 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
728 else
729 emit_move_insn (s0, src);
730 }
731 else
732 {
733 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
734 s0 = gen_reg_rtx (TImode);
735 emit_move_insn (s0, src);
644459d0 736 }
737
9d98604b 738 /* Now s0 is TImode and contains the bits to extract at start. */
739
740 if (start)
741 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
742
743 if (128 - width)
f5ff0b21 744 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
644459d0 745
9d98604b 746 emit_move_insn (dst, s0);
644459d0 747}
748
749void
750spu_expand_insv (rtx ops[])
751{
752 HOST_WIDE_INT width = INTVAL (ops[1]);
753 HOST_WIDE_INT start = INTVAL (ops[2]);
754 HOST_WIDE_INT maskbits;
4cbad5bb 755 enum machine_mode dst_mode;
644459d0 756 rtx dst = ops[0], src = ops[3];
4cbad5bb 757 int dst_size;
644459d0 758 rtx mask;
759 rtx shift_reg;
760 int shift;
761
762
763 if (GET_CODE (ops[0]) == MEM)
764 dst = gen_reg_rtx (TImode);
765 else
766 dst = adjust_operand (dst, &start);
767 dst_mode = GET_MODE (dst);
768 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
769
770 if (CONSTANT_P (src))
771 {
772 enum machine_mode m =
773 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
774 src = force_reg (m, convert_to_mode (m, src, 0));
775 }
776 src = adjust_operand (src, 0);
644459d0 777
778 mask = gen_reg_rtx (dst_mode);
779 shift_reg = gen_reg_rtx (dst_mode);
780 shift = dst_size - start - width;
781
782 /* It's not safe to use subreg here because the compiler assumes
783 that the SUBREG_REG is right justified in the SUBREG. */
784 convert_move (shift_reg, src, 1);
785
786 if (shift > 0)
787 {
788 switch (dst_mode)
789 {
790 case SImode:
791 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
792 break;
793 case DImode:
794 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
795 break;
796 case TImode:
797 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
798 break;
799 default:
800 abort ();
801 }
802 }
803 else if (shift < 0)
804 abort ();
805
806 switch (dst_size)
807 {
808 case 32:
809 maskbits = (-1ll << (32 - width - start));
810 if (start)
811 maskbits += (1ll << (32 - start));
812 emit_move_insn (mask, GEN_INT (maskbits));
813 break;
814 case 64:
815 maskbits = (-1ll << (64 - width - start));
816 if (start)
817 maskbits += (1ll << (64 - start));
818 emit_move_insn (mask, GEN_INT (maskbits));
819 break;
820 case 128:
821 {
822 unsigned char arr[16];
823 int i = start / 8;
824 memset (arr, 0, sizeof (arr));
825 arr[i] = 0xff >> (start & 7);
826 for (i++; i <= (start + width - 1) / 8; i++)
827 arr[i] = 0xff;
828 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
829 emit_move_insn (mask, array_to_constant (TImode, arr));
830 }
831 break;
832 default:
833 abort ();
834 }
835 if (GET_CODE (ops[0]) == MEM)
836 {
644459d0 837 rtx low = gen_reg_rtx (SImode);
644459d0 838 rtx rotl = gen_reg_rtx (SImode);
839 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 840 rtx addr;
841 rtx addr0;
842 rtx addr1;
644459d0 843 rtx mem;
844
9d98604b 845 addr = force_reg (Pmode, XEXP (ops[0], 0));
846 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 847 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
848 emit_insn (gen_negsi2 (rotl, low));
849 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
850 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 851 mem = change_address (ops[0], TImode, addr0);
644459d0 852 set_mem_alias_set (mem, 0);
853 emit_move_insn (dst, mem);
854 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 855 if (start + width > MEM_ALIGN (ops[0]))
856 {
857 rtx shl = gen_reg_rtx (SImode);
858 rtx mask1 = gen_reg_rtx (TImode);
859 rtx dst1 = gen_reg_rtx (TImode);
860 rtx mem1;
9d98604b 861 addr1 = plus_constant (addr, 16);
862 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 863 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
864 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 865 mem1 = change_address (ops[0], TImode, addr1);
644459d0 866 set_mem_alias_set (mem1, 0);
867 emit_move_insn (dst1, mem1);
868 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
869 emit_move_insn (mem1, dst1);
870 }
9d98604b 871 emit_move_insn (mem, dst);
644459d0 872 }
873 else
71cd778d 874 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 875}
876
877
878int
879spu_expand_block_move (rtx ops[])
880{
881 HOST_WIDE_INT bytes, align, offset;
882 rtx src, dst, sreg, dreg, target;
883 int i;
884 if (GET_CODE (ops[2]) != CONST_INT
885 || GET_CODE (ops[3]) != CONST_INT
48eb4342 886 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 887 return 0;
888
889 bytes = INTVAL (ops[2]);
890 align = INTVAL (ops[3]);
891
892 if (bytes <= 0)
893 return 1;
894
895 dst = ops[0];
896 src = ops[1];
897
898 if (align == 16)
899 {
900 for (offset = 0; offset + 16 <= bytes; offset += 16)
901 {
902 dst = adjust_address (ops[0], V16QImode, offset);
903 src = adjust_address (ops[1], V16QImode, offset);
904 emit_move_insn (dst, src);
905 }
906 if (offset < bytes)
907 {
908 rtx mask;
909 unsigned char arr[16] = { 0 };
910 for (i = 0; i < bytes - offset; i++)
911 arr[i] = 0xff;
912 dst = adjust_address (ops[0], V16QImode, offset);
913 src = adjust_address (ops[1], V16QImode, offset);
914 mask = gen_reg_rtx (V16QImode);
915 sreg = gen_reg_rtx (V16QImode);
916 dreg = gen_reg_rtx (V16QImode);
917 target = gen_reg_rtx (V16QImode);
918 emit_move_insn (mask, array_to_constant (V16QImode, arr));
919 emit_move_insn (dreg, dst);
920 emit_move_insn (sreg, src);
921 emit_insn (gen_selb (target, dreg, sreg, mask));
922 emit_move_insn (dst, target);
923 }
924 return 1;
925 }
926 return 0;
927}
928
929enum spu_comp_code
930{ SPU_EQ, SPU_GT, SPU_GTU };
931
5474166e 932int spu_comp_icode[12][3] = {
933 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
934 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
935 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
936 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
937 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
938 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
939 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
940 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
941 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
942 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
943 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
944 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 945};
946
947/* Generate a compare for CODE. Return a brand-new rtx that represents
948 the result of the compare. GCC can figure this out too if we don't
949 provide all variations of compares, but GCC always wants to use
950 WORD_MODE, we can generate better code in most cases if we do it
951 ourselves. */
952void
74f4459c 953spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 954{
955 int reverse_compare = 0;
956 int reverse_test = 0;
5d70b918 957 rtx compare_result, eq_result;
958 rtx comp_rtx, eq_rtx;
644459d0 959 enum machine_mode comp_mode;
960 enum machine_mode op_mode;
b9c74b4d 961 enum spu_comp_code scode, eq_code;
962 enum insn_code ior_code;
74f4459c 963 enum rtx_code code = GET_CODE (cmp);
964 rtx op0 = XEXP (cmp, 0);
965 rtx op1 = XEXP (cmp, 1);
644459d0 966 int index;
5d70b918 967 int eq_test = 0;
644459d0 968
74f4459c 969 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 970 and so on, to keep the constant in operand 1. */
74f4459c 971 if (GET_CODE (op1) == CONST_INT)
644459d0 972 {
74f4459c 973 HOST_WIDE_INT val = INTVAL (op1) - 1;
974 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 975 switch (code)
976 {
977 case GE:
74f4459c 978 op1 = GEN_INT (val);
644459d0 979 code = GT;
980 break;
981 case LT:
74f4459c 982 op1 = GEN_INT (val);
644459d0 983 code = LE;
984 break;
985 case GEU:
74f4459c 986 op1 = GEN_INT (val);
644459d0 987 code = GTU;
988 break;
989 case LTU:
74f4459c 990 op1 = GEN_INT (val);
644459d0 991 code = LEU;
992 break;
993 default:
994 break;
995 }
996 }
997
686195ea 998 /* However, if we generate an integer result, performing a reverse test
999 would require an extra negation, so avoid that where possible. */
1000 if (GET_CODE (op1) == CONST_INT && is_set == 1)
1001 {
1002 HOST_WIDE_INT val = INTVAL (op1) + 1;
1003 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
1004 switch (code)
1005 {
1006 case LE:
1007 op1 = GEN_INT (val);
1008 code = LT;
1009 break;
1010 case LEU:
1011 op1 = GEN_INT (val);
1012 code = LTU;
1013 break;
1014 default:
1015 break;
1016 }
1017 }
1018
5d70b918 1019 comp_mode = SImode;
74f4459c 1020 op_mode = GET_MODE (op0);
5d70b918 1021
644459d0 1022 switch (code)
1023 {
1024 case GE:
644459d0 1025 scode = SPU_GT;
07027691 1026 if (HONOR_NANS (op_mode))
5d70b918 1027 {
1028 reverse_compare = 0;
1029 reverse_test = 0;
1030 eq_test = 1;
1031 eq_code = SPU_EQ;
1032 }
1033 else
1034 {
1035 reverse_compare = 1;
1036 reverse_test = 1;
1037 }
644459d0 1038 break;
1039 case LE:
644459d0 1040 scode = SPU_GT;
07027691 1041 if (HONOR_NANS (op_mode))
5d70b918 1042 {
1043 reverse_compare = 1;
1044 reverse_test = 0;
1045 eq_test = 1;
1046 eq_code = SPU_EQ;
1047 }
1048 else
1049 {
1050 reverse_compare = 0;
1051 reverse_test = 1;
1052 }
644459d0 1053 break;
1054 case LT:
1055 reverse_compare = 1;
1056 reverse_test = 0;
1057 scode = SPU_GT;
1058 break;
1059 case GEU:
1060 reverse_compare = 1;
1061 reverse_test = 1;
1062 scode = SPU_GTU;
1063 break;
1064 case LEU:
1065 reverse_compare = 0;
1066 reverse_test = 1;
1067 scode = SPU_GTU;
1068 break;
1069 case LTU:
1070 reverse_compare = 1;
1071 reverse_test = 0;
1072 scode = SPU_GTU;
1073 break;
1074 case NE:
1075 reverse_compare = 0;
1076 reverse_test = 1;
1077 scode = SPU_EQ;
1078 break;
1079
1080 case EQ:
1081 scode = SPU_EQ;
1082 break;
1083 case GT:
1084 scode = SPU_GT;
1085 break;
1086 case GTU:
1087 scode = SPU_GTU;
1088 break;
1089 default:
1090 scode = SPU_EQ;
1091 break;
1092 }
1093
644459d0 1094 switch (op_mode)
1095 {
1096 case QImode:
1097 index = 0;
1098 comp_mode = QImode;
1099 break;
1100 case HImode:
1101 index = 1;
1102 comp_mode = HImode;
1103 break;
1104 case SImode:
1105 index = 2;
1106 break;
1107 case DImode:
1108 index = 3;
1109 break;
1110 case TImode:
1111 index = 4;
1112 break;
1113 case SFmode:
1114 index = 5;
1115 break;
1116 case DFmode:
1117 index = 6;
1118 break;
1119 case V16QImode:
5474166e 1120 index = 7;
1121 comp_mode = op_mode;
1122 break;
644459d0 1123 case V8HImode:
5474166e 1124 index = 8;
1125 comp_mode = op_mode;
1126 break;
644459d0 1127 case V4SImode:
5474166e 1128 index = 9;
1129 comp_mode = op_mode;
1130 break;
644459d0 1131 case V4SFmode:
5474166e 1132 index = 10;
1133 comp_mode = V4SImode;
1134 break;
644459d0 1135 case V2DFmode:
5474166e 1136 index = 11;
1137 comp_mode = V2DImode;
644459d0 1138 break;
5474166e 1139 case V2DImode:
644459d0 1140 default:
1141 abort ();
1142 }
1143
74f4459c 1144 if (GET_MODE (op1) == DFmode
07027691 1145 && (scode != SPU_GT && scode != SPU_EQ))
1146 abort ();
644459d0 1147
74f4459c 1148 if (is_set == 0 && op1 == const0_rtx
1149 && (GET_MODE (op0) == SImode
686195ea 1150 || GET_MODE (op0) == HImode
1151 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
644459d0 1152 {
1153 /* Don't need to set a register with the result when we are
1154 comparing against zero and branching. */
1155 reverse_test = !reverse_test;
74f4459c 1156 compare_result = op0;
644459d0 1157 }
1158 else
1159 {
1160 compare_result = gen_reg_rtx (comp_mode);
1161
1162 if (reverse_compare)
1163 {
74f4459c 1164 rtx t = op1;
1165 op1 = op0;
1166 op0 = t;
644459d0 1167 }
1168
1169 if (spu_comp_icode[index][scode] == 0)
1170 abort ();
1171
1172 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1173 (op0, op_mode))
1174 op0 = force_reg (op_mode, op0);
644459d0 1175 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1176 (op1, op_mode))
1177 op1 = force_reg (op_mode, op1);
644459d0 1178 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1179 op0, op1);
644459d0 1180 if (comp_rtx == 0)
1181 abort ();
1182 emit_insn (comp_rtx);
1183
5d70b918 1184 if (eq_test)
1185 {
1186 eq_result = gen_reg_rtx (comp_mode);
1187 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1188 op0, op1);
5d70b918 1189 if (eq_rtx == 0)
1190 abort ();
1191 emit_insn (eq_rtx);
d6bf3b14 1192 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1193 gcc_assert (ior_code != CODE_FOR_nothing);
1194 emit_insn (GEN_FCN (ior_code)
1195 (compare_result, compare_result, eq_result));
1196 }
644459d0 1197 }
1198
1199 if (is_set == 0)
1200 {
1201 rtx bcomp;
1202 rtx loc_ref;
1203
1204 /* We don't have branch on QI compare insns, so we convert the
1205 QI compare result to a HI result. */
1206 if (comp_mode == QImode)
1207 {
1208 rtx old_res = compare_result;
1209 compare_result = gen_reg_rtx (HImode);
1210 comp_mode = HImode;
1211 emit_insn (gen_extendqihi2 (compare_result, old_res));
1212 }
1213
1214 if (reverse_test)
1215 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1216 else
1217 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1218
74f4459c 1219 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1220 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1221 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1222 loc_ref, pc_rtx)));
1223 }
1224 else if (is_set == 2)
1225 {
74f4459c 1226 rtx target = operands[0];
644459d0 1227 int compare_size = GET_MODE_BITSIZE (comp_mode);
1228 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1229 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1230 rtx select_mask;
1231 rtx op_t = operands[2];
1232 rtx op_f = operands[3];
1233
1234 /* The result of the comparison can be SI, HI or QI mode. Create a
1235 mask based on that result. */
1236 if (target_size > compare_size)
1237 {
1238 select_mask = gen_reg_rtx (mode);
1239 emit_insn (gen_extend_compare (select_mask, compare_result));
1240 }
1241 else if (target_size < compare_size)
1242 select_mask =
1243 gen_rtx_SUBREG (mode, compare_result,
1244 (compare_size - target_size) / BITS_PER_UNIT);
1245 else if (comp_mode != mode)
1246 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1247 else
1248 select_mask = compare_result;
1249
1250 if (GET_MODE (target) != GET_MODE (op_t)
1251 || GET_MODE (target) != GET_MODE (op_f))
1252 abort ();
1253
1254 if (reverse_test)
1255 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1256 else
1257 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1258 }
1259 else
1260 {
74f4459c 1261 rtx target = operands[0];
644459d0 1262 if (reverse_test)
1263 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1264 gen_rtx_NOT (comp_mode, compare_result)));
1265 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1266 emit_insn (gen_extendhisi2 (target, compare_result));
1267 else if (GET_MODE (target) == SImode
1268 && GET_MODE (compare_result) == QImode)
1269 emit_insn (gen_extend_compare (target, compare_result));
1270 else
1271 emit_move_insn (target, compare_result);
1272 }
1273}
1274
1275HOST_WIDE_INT
1276const_double_to_hwint (rtx x)
1277{
1278 HOST_WIDE_INT val;
1279 REAL_VALUE_TYPE rv;
1280 if (GET_MODE (x) == SFmode)
1281 {
1282 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1283 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1284 }
1285 else if (GET_MODE (x) == DFmode)
1286 {
1287 long l[2];
1288 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1289 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1290 val = l[0];
1291 val = (val << 32) | (l[1] & 0xffffffff);
1292 }
1293 else
1294 abort ();
1295 return val;
1296}
1297
1298rtx
1299hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1300{
1301 long tv[2];
1302 REAL_VALUE_TYPE rv;
1303 gcc_assert (mode == SFmode || mode == DFmode);
1304
1305 if (mode == SFmode)
1306 tv[0] = (v << 32) >> 32;
1307 else if (mode == DFmode)
1308 {
1309 tv[1] = (v << 32) >> 32;
1310 tv[0] = v >> 32;
1311 }
1312 real_from_target (&rv, tv, mode);
1313 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1314}
1315
1316void
1317print_operand_address (FILE * file, register rtx addr)
1318{
1319 rtx reg;
1320 rtx offset;
1321
e04cf423 1322 if (GET_CODE (addr) == AND
1323 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1324 && INTVAL (XEXP (addr, 1)) == -16)
1325 addr = XEXP (addr, 0);
1326
644459d0 1327 switch (GET_CODE (addr))
1328 {
1329 case REG:
1330 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1331 break;
1332
1333 case PLUS:
1334 reg = XEXP (addr, 0);
1335 offset = XEXP (addr, 1);
1336 if (GET_CODE (offset) == REG)
1337 {
1338 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1339 reg_names[REGNO (offset)]);
1340 }
1341 else if (GET_CODE (offset) == CONST_INT)
1342 {
1343 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1344 INTVAL (offset), reg_names[REGNO (reg)]);
1345 }
1346 else
1347 abort ();
1348 break;
1349
1350 case CONST:
1351 case LABEL_REF:
1352 case SYMBOL_REF:
1353 case CONST_INT:
1354 output_addr_const (file, addr);
1355 break;
1356
1357 default:
1358 debug_rtx (addr);
1359 abort ();
1360 }
1361}
1362
1363void
1364print_operand (FILE * file, rtx x, int code)
1365{
1366 enum machine_mode mode = GET_MODE (x);
1367 HOST_WIDE_INT val;
1368 unsigned char arr[16];
1369 int xcode = GET_CODE (x);
dea01258 1370 int i, info;
644459d0 1371 if (GET_MODE (x) == VOIDmode)
1372 switch (code)
1373 {
644459d0 1374 case 'L': /* 128 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 case 'T': /* 128 bits, signed */
1377 case 't': /* 128 bits, signed */
1378 mode = TImode;
1379 break;
644459d0 1380 case 'K': /* 64 bits, signed */
1381 case 'k': /* 64 bits, signed */
1382 case 'D': /* 64 bits, signed */
1383 case 'd': /* 64 bits, signed */
1384 mode = DImode;
1385 break;
644459d0 1386 case 'J': /* 32 bits, signed */
1387 case 'j': /* 32 bits, signed */
1388 case 's': /* 32 bits, signed */
1389 case 'S': /* 32 bits, signed */
1390 mode = SImode;
1391 break;
1392 }
1393 switch (code)
1394 {
1395
1396 case 'j': /* 32 bits, signed */
1397 case 'k': /* 64 bits, signed */
1398 case 'm': /* 128 bits, signed */
1399 if (xcode == CONST_INT
1400 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1401 {
1402 gcc_assert (logical_immediate_p (x, mode));
1403 constant_to_array (mode, x, arr);
1404 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1405 val = trunc_int_for_mode (val, SImode);
1406 switch (which_logical_immediate (val))
1407 {
1408 case SPU_ORI:
1409 break;
1410 case SPU_ORHI:
1411 fprintf (file, "h");
1412 break;
1413 case SPU_ORBI:
1414 fprintf (file, "b");
1415 break;
1416 default:
1417 gcc_unreachable();
1418 }
1419 }
1420 else
1421 gcc_unreachable();
1422 return;
1423
1424 case 'J': /* 32 bits, signed */
1425 case 'K': /* 64 bits, signed */
1426 case 'L': /* 128 bits, signed */
1427 if (xcode == CONST_INT
1428 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1429 {
1430 gcc_assert (logical_immediate_p (x, mode)
1431 || iohl_immediate_p (x, mode));
1432 constant_to_array (mode, x, arr);
1433 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1434 val = trunc_int_for_mode (val, SImode);
1435 switch (which_logical_immediate (val))
1436 {
1437 case SPU_ORI:
1438 case SPU_IOHL:
1439 break;
1440 case SPU_ORHI:
1441 val = trunc_int_for_mode (val, HImode);
1442 break;
1443 case SPU_ORBI:
1444 val = trunc_int_for_mode (val, QImode);
1445 break;
1446 default:
1447 gcc_unreachable();
1448 }
1449 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1450 }
1451 else
1452 gcc_unreachable();
1453 return;
1454
1455 case 't': /* 128 bits, signed */
1456 case 'd': /* 64 bits, signed */
1457 case 's': /* 32 bits, signed */
dea01258 1458 if (CONSTANT_P (x))
644459d0 1459 {
dea01258 1460 enum immediate_class c = classify_immediate (x, mode);
1461 switch (c)
1462 {
1463 case IC_IL1:
1464 constant_to_array (mode, x, arr);
1465 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1466 val = trunc_int_for_mode (val, SImode);
1467 switch (which_immediate_load (val))
1468 {
1469 case SPU_IL:
1470 break;
1471 case SPU_ILA:
1472 fprintf (file, "a");
1473 break;
1474 case SPU_ILH:
1475 fprintf (file, "h");
1476 break;
1477 case SPU_ILHU:
1478 fprintf (file, "hu");
1479 break;
1480 default:
1481 gcc_unreachable ();
1482 }
1483 break;
1484 case IC_CPAT:
1485 constant_to_array (mode, x, arr);
1486 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1487 if (info == 1)
1488 fprintf (file, "b");
1489 else if (info == 2)
1490 fprintf (file, "h");
1491 else if (info == 4)
1492 fprintf (file, "w");
1493 else if (info == 8)
1494 fprintf (file, "d");
1495 break;
1496 case IC_IL1s:
1497 if (xcode == CONST_VECTOR)
1498 {
1499 x = CONST_VECTOR_ELT (x, 0);
1500 xcode = GET_CODE (x);
1501 }
1502 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1503 fprintf (file, "a");
1504 else if (xcode == HIGH)
1505 fprintf (file, "hu");
1506 break;
1507 case IC_FSMBI:
5df189be 1508 case IC_FSMBI2:
dea01258 1509 case IC_IL2:
1510 case IC_IL2s:
1511 case IC_POOL:
1512 abort ();
1513 }
644459d0 1514 }
644459d0 1515 else
1516 gcc_unreachable ();
1517 return;
1518
1519 case 'T': /* 128 bits, signed */
1520 case 'D': /* 64 bits, signed */
1521 case 'S': /* 32 bits, signed */
dea01258 1522 if (CONSTANT_P (x))
644459d0 1523 {
dea01258 1524 enum immediate_class c = classify_immediate (x, mode);
1525 switch (c)
644459d0 1526 {
dea01258 1527 case IC_IL1:
1528 constant_to_array (mode, x, arr);
1529 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1530 val = trunc_int_for_mode (val, SImode);
1531 switch (which_immediate_load (val))
1532 {
1533 case SPU_IL:
1534 case SPU_ILA:
1535 break;
1536 case SPU_ILH:
1537 case SPU_ILHU:
1538 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1539 break;
1540 default:
1541 gcc_unreachable ();
1542 }
1543 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1544 break;
1545 case IC_FSMBI:
1546 constant_to_array (mode, x, arr);
1547 val = 0;
1548 for (i = 0; i < 16; i++)
1549 {
1550 val <<= 1;
1551 val |= arr[i] & 1;
1552 }
1553 print_operand (file, GEN_INT (val), 0);
1554 break;
1555 case IC_CPAT:
1556 constant_to_array (mode, x, arr);
1557 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1558 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1559 break;
dea01258 1560 case IC_IL1s:
dea01258 1561 if (xcode == HIGH)
5df189be 1562 x = XEXP (x, 0);
1563 if (GET_CODE (x) == CONST_VECTOR)
1564 x = CONST_VECTOR_ELT (x, 0);
1565 output_addr_const (file, x);
1566 if (xcode == HIGH)
1567 fprintf (file, "@h");
644459d0 1568 break;
dea01258 1569 case IC_IL2:
1570 case IC_IL2s:
5df189be 1571 case IC_FSMBI2:
dea01258 1572 case IC_POOL:
1573 abort ();
644459d0 1574 }
c8befdb9 1575 }
644459d0 1576 else
1577 gcc_unreachable ();
1578 return;
1579
644459d0 1580 case 'C':
1581 if (xcode == CONST_INT)
1582 {
1583 /* Only 4 least significant bits are relevant for generate
1584 control word instructions. */
1585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1586 return;
1587 }
1588 break;
1589
1590 case 'M': /* print code for c*d */
1591 if (GET_CODE (x) == CONST_INT)
1592 switch (INTVAL (x))
1593 {
1594 case 1:
1595 fprintf (file, "b");
1596 break;
1597 case 2:
1598 fprintf (file, "h");
1599 break;
1600 case 4:
1601 fprintf (file, "w");
1602 break;
1603 case 8:
1604 fprintf (file, "d");
1605 break;
1606 default:
1607 gcc_unreachable();
1608 }
1609 else
1610 gcc_unreachable();
1611 return;
1612
1613 case 'N': /* Negate the operand */
1614 if (xcode == CONST_INT)
1615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1616 else if (xcode == CONST_VECTOR)
1617 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1618 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1619 return;
1620
1621 case 'I': /* enable/disable interrupts */
1622 if (xcode == CONST_INT)
1623 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1624 return;
1625
1626 case 'b': /* branch modifiers */
1627 if (xcode == REG)
1628 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1629 else if (COMPARISON_P (x))
1630 fprintf (file, "%s", xcode == NE ? "n" : "");
1631 return;
1632
1633 case 'i': /* indirect call */
1634 if (xcode == MEM)
1635 {
1636 if (GET_CODE (XEXP (x, 0)) == REG)
1637 /* Used in indirect function calls. */
1638 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1639 else
1640 output_address (XEXP (x, 0));
1641 }
1642 return;
1643
1644 case 'p': /* load/store */
1645 if (xcode == MEM)
1646 {
1647 x = XEXP (x, 0);
1648 xcode = GET_CODE (x);
1649 }
e04cf423 1650 if (xcode == AND)
1651 {
1652 x = XEXP (x, 0);
1653 xcode = GET_CODE (x);
1654 }
644459d0 1655 if (xcode == REG)
1656 fprintf (file, "d");
1657 else if (xcode == CONST_INT)
1658 fprintf (file, "a");
1659 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1660 fprintf (file, "r");
1661 else if (xcode == PLUS || xcode == LO_SUM)
1662 {
1663 if (GET_CODE (XEXP (x, 1)) == REG)
1664 fprintf (file, "x");
1665 else
1666 fprintf (file, "d");
1667 }
1668 return;
1669
5df189be 1670 case 'e':
1671 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1672 val &= 0x7;
1673 output_addr_const (file, GEN_INT (val));
1674 return;
1675
1676 case 'f':
1677 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1678 val &= 0x1f;
1679 output_addr_const (file, GEN_INT (val));
1680 return;
1681
1682 case 'g':
1683 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1684 val &= 0x3f;
1685 output_addr_const (file, GEN_INT (val));
1686 return;
1687
1688 case 'h':
1689 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1690 val = (val >> 3) & 0x1f;
1691 output_addr_const (file, GEN_INT (val));
1692 return;
1693
1694 case 'E':
1695 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1696 val = -val;
1697 val &= 0x7;
1698 output_addr_const (file, GEN_INT (val));
1699 return;
1700
1701 case 'F':
1702 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1703 val = -val;
1704 val &= 0x1f;
1705 output_addr_const (file, GEN_INT (val));
1706 return;
1707
1708 case 'G':
1709 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1710 val = -val;
1711 val &= 0x3f;
1712 output_addr_const (file, GEN_INT (val));
1713 return;
1714
1715 case 'H':
1716 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1717 val = -(val & -8ll);
1718 val = (val >> 3) & 0x1f;
1719 output_addr_const (file, GEN_INT (val));
1720 return;
1721
56c7bfc2 1722 case 'v':
1723 case 'w':
1724 constant_to_array (mode, x, arr);
1725 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1726 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1727 return;
1728
644459d0 1729 case 0:
1730 if (xcode == REG)
1731 fprintf (file, "%s", reg_names[REGNO (x)]);
1732 else if (xcode == MEM)
1733 output_address (XEXP (x, 0));
1734 else if (xcode == CONST_VECTOR)
dea01258 1735 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1736 else
1737 output_addr_const (file, x);
1738 return;
1739
f6a0d06f 1740 /* unused letters
56c7bfc2 1741 o qr u yz
5df189be 1742 AB OPQR UVWXYZ */
644459d0 1743 default:
1744 output_operand_lossage ("invalid %%xn code");
1745 }
1746 gcc_unreachable ();
1747}
1748
644459d0 1749/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1750 caller saved register. For leaf functions it is more efficient to
1751 use a volatile register because we won't need to save and restore the
1752 pic register. This routine is only valid after register allocation
1753 is completed, so we can pick an unused register. */
1754static rtx
1755get_pic_reg (void)
1756{
644459d0 1757 if (!reload_completed && !reload_in_progress)
1758 abort ();
5eb28709 1759
1760 /* If we've already made the decision, we need to keep with it. Once we've
1761 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1762 return true since the register is now live; this should not cause us to
1763 "switch back" to using pic_offset_table_rtx. */
1764 if (!cfun->machine->pic_reg)
1765 {
1766 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1767 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1768 else
1769 cfun->machine->pic_reg = pic_offset_table_rtx;
1770 }
1771
1772 return cfun->machine->pic_reg;
644459d0 1773}
1774
5df189be 1775/* Split constant addresses to handle cases that are too large.
1776 Add in the pic register when in PIC mode.
1777 Split immediates that require more than 1 instruction. */
dea01258 1778int
1779spu_split_immediate (rtx * ops)
c8befdb9 1780{
dea01258 1781 enum machine_mode mode = GET_MODE (ops[0]);
1782 enum immediate_class c = classify_immediate (ops[1], mode);
1783
1784 switch (c)
c8befdb9 1785 {
dea01258 1786 case IC_IL2:
1787 {
1788 unsigned char arrhi[16];
1789 unsigned char arrlo[16];
98bbec1e 1790 rtx to, temp, hi, lo;
dea01258 1791 int i;
98bbec1e 1792 enum machine_mode imode = mode;
1793 /* We need to do reals as ints because the constant used in the
1794 IOR might not be a legitimate real constant. */
1795 imode = int_mode_for_mode (mode);
dea01258 1796 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1797 if (imode != mode)
1798 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1799 else
1800 to = ops[0];
1801 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1802 for (i = 0; i < 16; i += 4)
1803 {
1804 arrlo[i + 2] = arrhi[i + 2];
1805 arrlo[i + 3] = arrhi[i + 3];
1806 arrlo[i + 0] = arrlo[i + 1] = 0;
1807 arrhi[i + 2] = arrhi[i + 3] = 0;
1808 }
98bbec1e 1809 hi = array_to_constant (imode, arrhi);
1810 lo = array_to_constant (imode, arrlo);
1811 emit_move_insn (temp, hi);
dea01258 1812 emit_insn (gen_rtx_SET
98bbec1e 1813 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1814 return 1;
1815 }
5df189be 1816 case IC_FSMBI2:
1817 {
1818 unsigned char arr_fsmbi[16];
1819 unsigned char arr_andbi[16];
1820 rtx to, reg_fsmbi, reg_and;
1821 int i;
1822 enum machine_mode imode = mode;
1823 /* We need to do reals as ints because the constant used in the
1824 * AND might not be a legitimate real constant. */
1825 imode = int_mode_for_mode (mode);
1826 constant_to_array (mode, ops[1], arr_fsmbi);
1827 if (imode != mode)
1828 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1829 else
1830 to = ops[0];
1831 for (i = 0; i < 16; i++)
1832 if (arr_fsmbi[i] != 0)
1833 {
1834 arr_andbi[0] = arr_fsmbi[i];
1835 arr_fsmbi[i] = 0xff;
1836 }
1837 for (i = 1; i < 16; i++)
1838 arr_andbi[i] = arr_andbi[0];
1839 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1840 reg_and = array_to_constant (imode, arr_andbi);
1841 emit_move_insn (to, reg_fsmbi);
1842 emit_insn (gen_rtx_SET
1843 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1844 return 1;
1845 }
dea01258 1846 case IC_POOL:
1847 if (reload_in_progress || reload_completed)
1848 {
1849 rtx mem = force_const_mem (mode, ops[1]);
1850 if (TARGET_LARGE_MEM)
1851 {
1852 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1853 emit_move_insn (addr, XEXP (mem, 0));
1854 mem = replace_equiv_address (mem, addr);
1855 }
1856 emit_move_insn (ops[0], mem);
1857 return 1;
1858 }
1859 break;
1860 case IC_IL1s:
1861 case IC_IL2s:
1862 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1863 {
1864 if (c == IC_IL2s)
1865 {
5df189be 1866 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1867 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1868 }
1869 else if (flag_pic)
1870 emit_insn (gen_pic (ops[0], ops[1]));
1871 if (flag_pic)
1872 {
1873 rtx pic_reg = get_pic_reg ();
1874 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
dea01258 1875 }
1876 return flag_pic || c == IC_IL2s;
1877 }
1878 break;
1879 case IC_IL1:
1880 case IC_FSMBI:
1881 case IC_CPAT:
1882 break;
c8befdb9 1883 }
dea01258 1884 return 0;
c8befdb9 1885}
1886
644459d0 1887/* SAVING is TRUE when we are generating the actual load and store
1888 instructions for REGNO. When determining the size of the stack
1889 needed for saving register we must allocate enough space for the
1890 worst case, because we don't always have the information early enough
1891 to not allocate it. But we can at least eliminate the actual loads
1892 and stores during the prologue/epilogue. */
1893static int
1894need_to_save_reg (int regno, int saving)
1895{
3072d30e 1896 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1897 return 1;
1898 if (flag_pic
1899 && regno == PIC_OFFSET_TABLE_REGNUM
5eb28709 1900 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
644459d0 1901 return 1;
1902 return 0;
1903}
1904
1905/* This function is only correct starting with local register
1906 allocation */
1907int
1908spu_saved_regs_size (void)
1909{
1910 int reg_save_size = 0;
1911 int regno;
1912
1913 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1914 if (need_to_save_reg (regno, 0))
1915 reg_save_size += 0x10;
1916 return reg_save_size;
1917}
1918
1919static rtx
1920frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1921{
1922 rtx reg = gen_rtx_REG (V4SImode, regno);
1923 rtx mem =
1924 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1925 return emit_insn (gen_movv4si (mem, reg));
1926}
1927
1928static rtx
1929frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1930{
1931 rtx reg = gen_rtx_REG (V4SImode, regno);
1932 rtx mem =
1933 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1934 return emit_insn (gen_movv4si (reg, mem));
1935}
1936
1937/* This happens after reload, so we need to expand it. */
1938static rtx
1939frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1940{
1941 rtx insn;
1942 if (satisfies_constraint_K (GEN_INT (imm)))
1943 {
1944 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1945 }
1946 else
1947 {
3072d30e 1948 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1949 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1950 if (REGNO (src) == REGNO (scratch))
1951 abort ();
1952 }
644459d0 1953 return insn;
1954}
1955
1956/* Return nonzero if this function is known to have a null epilogue. */
1957
1958int
1959direct_return (void)
1960{
1961 if (reload_completed)
1962 {
1963 if (cfun->static_chain_decl == 0
1964 && (spu_saved_regs_size ()
1965 + get_frame_size ()
abe32cce 1966 + crtl->outgoing_args_size
1967 + crtl->args.pretend_args_size == 0)
644459d0 1968 && current_function_is_leaf)
1969 return 1;
1970 }
1971 return 0;
1972}
1973
1974/*
1975 The stack frame looks like this:
1976 +-------------+
1977 | incoming |
a8e019fa 1978 | args |
1979 AP -> +-------------+
644459d0 1980 | $lr save |
1981 +-------------+
1982 prev SP | back chain |
1983 +-------------+
1984 | var args |
abe32cce 1985 | reg save | crtl->args.pretend_args_size bytes
644459d0 1986 +-------------+
1987 | ... |
1988 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1989 FP -> +-------------+
644459d0 1990 | ... |
a8e019fa 1991 | vars | get_frame_size() bytes
1992 HFP -> +-------------+
644459d0 1993 | ... |
1994 | outgoing |
abe32cce 1995 | args | crtl->outgoing_args_size bytes
644459d0 1996 +-------------+
1997 | $lr of next |
1998 | frame |
1999 +-------------+
a8e019fa 2000 | back chain |
2001 SP -> +-------------+
644459d0 2002
2003*/
2004void
2005spu_expand_prologue (void)
2006{
2007 HOST_WIDE_INT size = get_frame_size (), offset, regno;
2008 HOST_WIDE_INT total_size;
2009 HOST_WIDE_INT saved_regs_size;
2010 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2011 rtx scratch_reg_0, scratch_reg_1;
2012 rtx insn, real;
2013
5eb28709 2014 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
2015 cfun->machine->pic_reg = pic_offset_table_rtx;
644459d0 2016
2017 if (spu_naked_function_p (current_function_decl))
2018 return;
2019
2020 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2021 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
2022
2023 saved_regs_size = spu_saved_regs_size ();
2024 total_size = size + saved_regs_size
abe32cce 2025 + crtl->outgoing_args_size
2026 + crtl->args.pretend_args_size;
644459d0 2027
2028 if (!current_function_is_leaf
18d50ae6 2029 || cfun->calls_alloca || total_size > 0)
644459d0 2030 total_size += STACK_POINTER_OFFSET;
2031
2032 /* Save this first because code after this might use the link
2033 register as a scratch register. */
2034 if (!current_function_is_leaf)
2035 {
2036 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2037 RTX_FRAME_RELATED_P (insn) = 1;
2038 }
2039
2040 if (total_size > 0)
2041 {
abe32cce 2042 offset = -crtl->args.pretend_args_size;
644459d0 2043 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2044 if (need_to_save_reg (regno, 1))
2045 {
2046 offset -= 16;
2047 insn = frame_emit_store (regno, sp_reg, offset);
2048 RTX_FRAME_RELATED_P (insn) = 1;
2049 }
2050 }
2051
5eb28709 2052 if (flag_pic && cfun->machine->pic_reg)
644459d0 2053 {
5eb28709 2054 rtx pic_reg = cfun->machine->pic_reg;
644459d0 2055 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2056 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2057 }
2058
2059 if (total_size > 0)
2060 {
2061 if (flag_stack_check)
2062 {
d819917f 2063 /* We compare against total_size-1 because
644459d0 2064 ($sp >= total_size) <=> ($sp > total_size-1) */
2065 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2066 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2067 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2068 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2069 {
2070 emit_move_insn (scratch_v4si, size_v4si);
2071 size_v4si = scratch_v4si;
2072 }
2073 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2074 emit_insn (gen_vec_extractv4si
2075 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2076 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2077 }
2078
2079 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2080 the value of the previous $sp because we save it as the back
2081 chain. */
2082 if (total_size <= 2000)
2083 {
2084 /* In this case we save the back chain first. */
2085 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2086 insn =
2087 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2088 }
644459d0 2089 else
2090 {
2091 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2092 insn =
2093 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2094 }
2095 RTX_FRAME_RELATED_P (insn) = 1;
2096 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2097 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2098
2099 if (total_size > 2000)
2100 {
2101 /* Save the back chain ptr */
2102 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2103 }
2104
2105 if (frame_pointer_needed)
2106 {
2107 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2108 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2109 + crtl->outgoing_args_size;
644459d0 2110 /* Set the new frame_pointer */
d8dfeb55 2111 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2112 RTX_FRAME_RELATED_P (insn) = 1;
2113 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2114 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2115 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2116 }
2117 }
2118
8c0dd614 2119 if (flag_stack_usage_info)
a512540d 2120 current_function_static_stack_size = total_size;
644459d0 2121}
2122
2123void
2124spu_expand_epilogue (bool sibcall_p)
2125{
2126 int size = get_frame_size (), offset, regno;
2127 HOST_WIDE_INT saved_regs_size, total_size;
2128 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
431ad7e0 2129 rtx scratch_reg_0;
644459d0 2130
644459d0 2131 if (spu_naked_function_p (current_function_decl))
2132 return;
2133
2134 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2135
2136 saved_regs_size = spu_saved_regs_size ();
2137 total_size = size + saved_regs_size
abe32cce 2138 + crtl->outgoing_args_size
2139 + crtl->args.pretend_args_size;
644459d0 2140
2141 if (!current_function_is_leaf
18d50ae6 2142 || cfun->calls_alloca || total_size > 0)
644459d0 2143 total_size += STACK_POINTER_OFFSET;
2144
2145 if (total_size > 0)
2146 {
18d50ae6 2147 if (cfun->calls_alloca)
644459d0 2148 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2149 else
2150 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2151
2152
2153 if (saved_regs_size > 0)
2154 {
abe32cce 2155 offset = -crtl->args.pretend_args_size;
644459d0 2156 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2157 if (need_to_save_reg (regno, 1))
2158 {
2159 offset -= 0x10;
2160 frame_emit_load (regno, sp_reg, offset);
2161 }
2162 }
2163 }
2164
2165 if (!current_function_is_leaf)
2166 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2167
2168 if (!sibcall_p)
2169 {
18b42941 2170 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
431ad7e0 2171 emit_jump_insn (gen__return ());
644459d0 2172 }
644459d0 2173}
2174
2175rtx
2176spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2177{
2178 if (count != 0)
2179 return 0;
2180 /* This is inefficient because it ends up copying to a save-register
2181 which then gets saved even though $lr has already been saved. But
2182 it does generate better code for leaf functions and we don't need
2183 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2184 used for __builtin_return_address anyway, so maybe we don't care if
2185 it's inefficient. */
2186 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2187}
2188\f
2189
2190/* Given VAL, generate a constant appropriate for MODE.
2191 If MODE is a vector mode, every element will be VAL.
2192 For TImode, VAL will be zero extended to 128 bits. */
2193rtx
2194spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2195{
2196 rtx inner;
2197 rtvec v;
2198 int units, i;
2199
2200 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2201 || GET_MODE_CLASS (mode) == MODE_FLOAT
2202 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2203 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2204
2205 if (GET_MODE_CLASS (mode) == MODE_INT)
2206 return immed_double_const (val, 0, mode);
2207
2208 /* val is the bit representation of the float */
2209 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2210 return hwint_to_const_double (mode, val);
2211
2212 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2213 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2214 else
2215 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2216
2217 units = GET_MODE_NUNITS (mode);
2218
2219 v = rtvec_alloc (units);
2220
2221 for (i = 0; i < units; ++i)
2222 RTVEC_ELT (v, i) = inner;
2223
2224 return gen_rtx_CONST_VECTOR (mode, v);
2225}
644459d0 2226
5474166e 2227/* Create a MODE vector constant from 4 ints. */
2228rtx
2229spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2230{
2231 unsigned char arr[16];
2232 arr[0] = (a >> 24) & 0xff;
2233 arr[1] = (a >> 16) & 0xff;
2234 arr[2] = (a >> 8) & 0xff;
2235 arr[3] = (a >> 0) & 0xff;
2236 arr[4] = (b >> 24) & 0xff;
2237 arr[5] = (b >> 16) & 0xff;
2238 arr[6] = (b >> 8) & 0xff;
2239 arr[7] = (b >> 0) & 0xff;
2240 arr[8] = (c >> 24) & 0xff;
2241 arr[9] = (c >> 16) & 0xff;
2242 arr[10] = (c >> 8) & 0xff;
2243 arr[11] = (c >> 0) & 0xff;
2244 arr[12] = (d >> 24) & 0xff;
2245 arr[13] = (d >> 16) & 0xff;
2246 arr[14] = (d >> 8) & 0xff;
2247 arr[15] = (d >> 0) & 0xff;
2248 return array_to_constant(mode, arr);
2249}
5a976006 2250\f
2251/* branch hint stuff */
5474166e 2252
644459d0 2253/* An array of these is used to propagate hints to predecessor blocks. */
2254struct spu_bb_info
2255{
5a976006 2256 rtx prop_jump; /* propagated from another block */
2257 int bb_index; /* the original block. */
644459d0 2258};
5a976006 2259static struct spu_bb_info *spu_bb_info;
644459d0 2260
5a976006 2261#define STOP_HINT_P(INSN) \
2262 (GET_CODE(INSN) == CALL_INSN \
2263 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2264 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2265
2266/* 1 when RTX is a hinted branch or its target. We keep track of
2267 what has been hinted so the safe-hint code can test it easily. */
2268#define HINTED_P(RTX) \
2269 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2270
2271/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2272#define SCHED_ON_EVEN_P(RTX) \
2273 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2274
2275/* Emit a nop for INSN such that the two will dual issue. This assumes
2276 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2277 We check for TImode to handle a MULTI1 insn which has dual issued its
2278 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2279 ADDR_VEC insns. */
2280static void
2281emit_nop_for_insn (rtx insn)
644459d0 2282{
5a976006 2283 int p;
2284 rtx new_insn;
2285 p = get_pipe (insn);
2286 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2287 new_insn = emit_insn_after (gen_lnop (), insn);
2288 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2289 {
5a976006 2290 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2291 PUT_MODE (new_insn, TImode);
2292 PUT_MODE (insn, VOIDmode);
2293 }
2294 else
2295 new_insn = emit_insn_after (gen_lnop (), insn);
2296 recog_memoized (new_insn);
2fbdf9ef 2297 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2298}
2299
2300/* Insert nops in basic blocks to meet dual issue alignment
2301 requirements. Also make sure hbrp and hint instructions are at least
2302 one cycle apart, possibly inserting a nop. */
2303static void
2304pad_bb(void)
2305{
2306 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2307 int length;
2308 int addr;
2309
2310 /* This sets up INSN_ADDRESSES. */
2311 shorten_branches (get_insns ());
2312
2313 /* Keep track of length added by nops. */
2314 length = 0;
2315
2316 prev_insn = 0;
2317 insn = get_insns ();
2318 if (!active_insn_p (insn))
2319 insn = next_active_insn (insn);
2320 for (; insn; insn = next_insn)
2321 {
2322 next_insn = next_active_insn (insn);
2323 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2324 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2325 {
5a976006 2326 if (hbr_insn)
2327 {
2328 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2329 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2330 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2331 || (a1 - a0 == 4))
2332 {
2333 prev_insn = emit_insn_before (gen_lnop (), insn);
2334 PUT_MODE (prev_insn, GET_MODE (insn));
2335 PUT_MODE (insn, TImode);
2fbdf9ef 2336 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2337 length += 4;
2338 }
2339 }
2340 hbr_insn = insn;
2341 }
2342 if (INSN_CODE (insn) == CODE_FOR_blockage)
2343 {
2344 if (GET_MODE (insn) == TImode)
2345 PUT_MODE (next_insn, TImode);
2346 insn = next_insn;
2347 next_insn = next_active_insn (insn);
2348 }
2349 addr = INSN_ADDRESSES (INSN_UID (insn));
2350 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2351 {
2352 if (((addr + length) & 7) != 0)
2353 {
2354 emit_nop_for_insn (prev_insn);
2355 length += 4;
2356 }
644459d0 2357 }
5a976006 2358 else if (GET_MODE (insn) == TImode
2359 && ((next_insn && GET_MODE (next_insn) != TImode)
2360 || get_attr_type (insn) == TYPE_MULTI0)
2361 && ((addr + length) & 7) != 0)
2362 {
2363 /* prev_insn will always be set because the first insn is
2364 always 8-byte aligned. */
2365 emit_nop_for_insn (prev_insn);
2366 length += 4;
2367 }
2368 prev_insn = insn;
644459d0 2369 }
644459d0 2370}
2371
5a976006 2372\f
2373/* Routines for branch hints. */
2374
644459d0 2375static void
5a976006 2376spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2377 int distance, sbitmap blocks)
644459d0 2378{
5a976006 2379 rtx branch_label = 0;
2380 rtx hint;
2381 rtx insn;
2382 rtx table;
644459d0 2383
2384 if (before == 0 || branch == 0 || target == 0)
2385 return;
2386
5a976006 2387 /* While scheduling we require hints to be no further than 600, so
2388 we need to enforce that here too */
644459d0 2389 if (distance > 600)
2390 return;
2391
5a976006 2392 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2393 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2394 before = NEXT_INSN (before);
644459d0 2395
2396 branch_label = gen_label_rtx ();
2397 LABEL_NUSES (branch_label)++;
2398 LABEL_PRESERVE_P (branch_label) = 1;
2399 insn = emit_label_before (branch_label, branch);
2400 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2401 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2402
2403 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2404 recog_memoized (hint);
2fbdf9ef 2405 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2406 HINTED_P (branch) = 1;
644459d0 2407
5a976006 2408 if (GET_CODE (target) == LABEL_REF)
2409 HINTED_P (XEXP (target, 0)) = 1;
2410 else if (tablejump_p (branch, 0, &table))
644459d0 2411 {
5a976006 2412 rtvec vec;
2413 int j;
2414 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2415 vec = XVEC (PATTERN (table), 0);
2416 else
2417 vec = XVEC (PATTERN (table), 1);
2418 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2419 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2420 }
5a976006 2421
2422 if (distance >= 588)
644459d0 2423 {
5a976006 2424 /* Make sure the hint isn't scheduled any earlier than this point,
2425 which could make it too far for the branch offest to fit */
2fbdf9ef 2426 insn = emit_insn_before (gen_blockage (), hint);
2427 recog_memoized (insn);
2428 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2429 }
2430 else if (distance <= 8 * 4)
2431 {
2432 /* To guarantee at least 8 insns between the hint and branch we
2433 insert nops. */
2434 int d;
2435 for (d = distance; d < 8 * 4; d += 4)
2436 {
2437 insn =
2438 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2439 recog_memoized (insn);
2fbdf9ef 2440 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2441 }
2442
2443 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2444 insn = emit_insn_after (gen_blockage (), hint);
2445 recog_memoized (insn);
2446 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2447
2448 /* Make sure any nops inserted aren't scheduled after the call. */
2449 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2450 {
2451 insn = emit_insn_before (gen_blockage (), branch);
2452 recog_memoized (insn);
2453 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2454 }
644459d0 2455 }
644459d0 2456}
2457
2458/* Returns 0 if we don't want a hint for this branch. Otherwise return
2459 the rtx for the branch target. */
2460static rtx
2461get_branch_target (rtx branch)
2462{
2463 if (GET_CODE (branch) == JUMP_INSN)
2464 {
2465 rtx set, src;
2466
2467 /* Return statements */
2468 if (GET_CODE (PATTERN (branch)) == RETURN)
2469 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2470
2471 /* jump table */
2472 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2473 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2474 return 0;
2475
fcc31b99 2476 /* ASM GOTOs. */
604157f6 2477 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2478 return NULL;
2479
644459d0 2480 set = single_set (branch);
2481 src = SET_SRC (set);
2482 if (GET_CODE (SET_DEST (set)) != PC)
2483 abort ();
2484
2485 if (GET_CODE (src) == IF_THEN_ELSE)
2486 {
2487 rtx lab = 0;
2488 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2489 if (note)
2490 {
2491 /* If the more probable case is not a fall through, then
2492 try a branch hint. */
2493 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2494 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2495 && GET_CODE (XEXP (src, 1)) != PC)
2496 lab = XEXP (src, 1);
2497 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2498 && GET_CODE (XEXP (src, 2)) != PC)
2499 lab = XEXP (src, 2);
2500 }
2501 if (lab)
2502 {
2503 if (GET_CODE (lab) == RETURN)
2504 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2505 return lab;
2506 }
2507 return 0;
2508 }
2509
2510 return src;
2511 }
2512 else if (GET_CODE (branch) == CALL_INSN)
2513 {
2514 rtx call;
2515 /* All of our call patterns are in a PARALLEL and the CALL is
2516 the first pattern in the PARALLEL. */
2517 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2518 abort ();
2519 call = XVECEXP (PATTERN (branch), 0, 0);
2520 if (GET_CODE (call) == SET)
2521 call = SET_SRC (call);
2522 if (GET_CODE (call) != CALL)
2523 abort ();
2524 return XEXP (XEXP (call, 0), 0);
2525 }
2526 return 0;
2527}
2528
5a976006 2529/* The special $hbr register is used to prevent the insn scheduler from
2530 moving hbr insns across instructions which invalidate them. It
2531 should only be used in a clobber, and this function searches for
2532 insns which clobber it. */
2533static bool
2534insn_clobbers_hbr (rtx insn)
2535{
2536 if (INSN_P (insn)
2537 && GET_CODE (PATTERN (insn)) == PARALLEL)
2538 {
2539 rtx parallel = PATTERN (insn);
2540 rtx clobber;
2541 int j;
2542 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2543 {
2544 clobber = XVECEXP (parallel, 0, j);
2545 if (GET_CODE (clobber) == CLOBBER
2546 && GET_CODE (XEXP (clobber, 0)) == REG
2547 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2548 return 1;
2549 }
2550 }
2551 return 0;
2552}
2553
2554/* Search up to 32 insns starting at FIRST:
2555 - at any kind of hinted branch, just return
2556 - at any unconditional branch in the first 15 insns, just return
2557 - at a call or indirect branch, after the first 15 insns, force it to
2558 an even address and return
2559 - at any unconditional branch, after the first 15 insns, force it to
2560 an even address.
2561 At then end of the search, insert an hbrp within 4 insns of FIRST,
2562 and an hbrp within 16 instructions of FIRST.
2563 */
644459d0 2564static void
5a976006 2565insert_hbrp_for_ilb_runout (rtx first)
644459d0 2566{
5a976006 2567 rtx insn, before_4 = 0, before_16 = 0;
2568 int addr = 0, length, first_addr = -1;
2569 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2570 int insert_lnop_after = 0;
2571 for (insn = first; insn; insn = NEXT_INSN (insn))
2572 if (INSN_P (insn))
2573 {
2574 if (first_addr == -1)
2575 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2576 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2577 length = get_attr_length (insn);
2578
2579 if (before_4 == 0 && addr + length >= 4 * 4)
2580 before_4 = insn;
2581 /* We test for 14 instructions because the first hbrp will add
2582 up to 2 instructions. */
2583 if (before_16 == 0 && addr + length >= 14 * 4)
2584 before_16 = insn;
2585
2586 if (INSN_CODE (insn) == CODE_FOR_hbr)
2587 {
2588 /* Make sure an hbrp is at least 2 cycles away from a hint.
2589 Insert an lnop after the hbrp when necessary. */
2590 if (before_4 == 0 && addr > 0)
2591 {
2592 before_4 = insn;
2593 insert_lnop_after |= 1;
2594 }
2595 else if (before_4 && addr <= 4 * 4)
2596 insert_lnop_after |= 1;
2597 if (before_16 == 0 && addr > 10 * 4)
2598 {
2599 before_16 = insn;
2600 insert_lnop_after |= 2;
2601 }
2602 else if (before_16 && addr <= 14 * 4)
2603 insert_lnop_after |= 2;
2604 }
644459d0 2605
5a976006 2606 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2607 {
2608 if (addr < hbrp_addr0)
2609 hbrp_addr0 = addr;
2610 else if (addr < hbrp_addr1)
2611 hbrp_addr1 = addr;
2612 }
644459d0 2613
5a976006 2614 if (CALL_P (insn) || JUMP_P (insn))
2615 {
2616 if (HINTED_P (insn))
2617 return;
2618
2619 /* Any branch after the first 15 insns should be on an even
2620 address to avoid a special case branch. There might be
2621 some nops and/or hbrps inserted, so we test after 10
2622 insns. */
2623 if (addr > 10 * 4)
2624 SCHED_ON_EVEN_P (insn) = 1;
2625 }
644459d0 2626
5a976006 2627 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2628 return;
2629
2630
2631 if (addr + length >= 32 * 4)
644459d0 2632 {
5a976006 2633 gcc_assert (before_4 && before_16);
2634 if (hbrp_addr0 > 4 * 4)
644459d0 2635 {
5a976006 2636 insn =
2637 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2638 recog_memoized (insn);
2fbdf9ef 2639 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2640 INSN_ADDRESSES_NEW (insn,
2641 INSN_ADDRESSES (INSN_UID (before_4)));
2642 PUT_MODE (insn, GET_MODE (before_4));
2643 PUT_MODE (before_4, TImode);
2644 if (insert_lnop_after & 1)
644459d0 2645 {
5a976006 2646 insn = emit_insn_before (gen_lnop (), before_4);
2647 recog_memoized (insn);
2fbdf9ef 2648 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2649 INSN_ADDRESSES_NEW (insn,
2650 INSN_ADDRESSES (INSN_UID (before_4)));
2651 PUT_MODE (insn, TImode);
644459d0 2652 }
644459d0 2653 }
5a976006 2654 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2655 && hbrp_addr1 > 16 * 4)
644459d0 2656 {
5a976006 2657 insn =
2658 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2659 recog_memoized (insn);
2fbdf9ef 2660 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2661 INSN_ADDRESSES_NEW (insn,
2662 INSN_ADDRESSES (INSN_UID (before_16)));
2663 PUT_MODE (insn, GET_MODE (before_16));
2664 PUT_MODE (before_16, TImode);
2665 if (insert_lnop_after & 2)
644459d0 2666 {
5a976006 2667 insn = emit_insn_before (gen_lnop (), before_16);
2668 recog_memoized (insn);
2fbdf9ef 2669 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2670 INSN_ADDRESSES_NEW (insn,
2671 INSN_ADDRESSES (INSN_UID
2672 (before_16)));
2673 PUT_MODE (insn, TImode);
644459d0 2674 }
2675 }
5a976006 2676 return;
644459d0 2677 }
644459d0 2678 }
5a976006 2679 else if (BARRIER_P (insn))
2680 return;
644459d0 2681
644459d0 2682}
5a976006 2683
2684/* The SPU might hang when it executes 48 inline instructions after a
2685 hinted branch jumps to its hinted target. The beginning of a
851d9296 2686 function and the return from a call might have been hinted, and
2687 must be handled as well. To prevent a hang we insert 2 hbrps. The
2688 first should be within 6 insns of the branch target. The second
2689 should be within 22 insns of the branch target. When determining
2690 if hbrps are necessary, we look for only 32 inline instructions,
2691 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2692 when inserting new hbrps, we insert them within 4 and 16 insns of
2693 the target. */
644459d0 2694static void
5a976006 2695insert_hbrp (void)
644459d0 2696{
5a976006 2697 rtx insn;
2698 if (TARGET_SAFE_HINTS)
644459d0 2699 {
5a976006 2700 shorten_branches (get_insns ());
2701 /* Insert hbrp at beginning of function */
2702 insn = next_active_insn (get_insns ());
2703 if (insn)
2704 insert_hbrp_for_ilb_runout (insn);
2705 /* Insert hbrp after hinted targets. */
2706 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2707 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2708 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2709 }
644459d0 2710}
2711
5a976006 2712static int in_spu_reorg;
2713
8a42230a 2714static void
2715spu_var_tracking (void)
2716{
2717 if (flag_var_tracking)
2718 {
2719 df_analyze ();
2720 timevar_push (TV_VAR_TRACKING);
2721 variable_tracking_main ();
2722 timevar_pop (TV_VAR_TRACKING);
2723 df_finish_pass (false);
2724 }
2725}
2726
5a976006 2727/* Insert branch hints. There are no branch optimizations after this
2728 pass, so it's safe to set our branch hints now. */
644459d0 2729static void
5a976006 2730spu_machine_dependent_reorg (void)
644459d0 2731{
5a976006 2732 sbitmap blocks;
2733 basic_block bb;
2734 rtx branch, insn;
2735 rtx branch_target = 0;
2736 int branch_addr = 0, insn_addr, required_dist = 0;
2737 int i;
2738 unsigned int j;
644459d0 2739
5a976006 2740 if (!TARGET_BRANCH_HINTS || optimize == 0)
2741 {
2742 /* We still do it for unoptimized code because an external
2743 function might have hinted a call or return. */
a54ca889 2744 compute_bb_for_insn ();
5a976006 2745 insert_hbrp ();
2746 pad_bb ();
8a42230a 2747 spu_var_tracking ();
a54ca889 2748 free_bb_for_insn ();
5a976006 2749 return;
2750 }
644459d0 2751
5a976006 2752 blocks = sbitmap_alloc (last_basic_block);
2753 sbitmap_zero (blocks);
644459d0 2754
5a976006 2755 in_spu_reorg = 1;
2756 compute_bb_for_insn ();
2757
2758 compact_blocks ();
2759
2760 spu_bb_info =
2761 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2762 sizeof (struct spu_bb_info));
2763
2764 /* We need exact insn addresses and lengths. */
2765 shorten_branches (get_insns ());
2766
2767 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2768 {
5a976006 2769 bb = BASIC_BLOCK (i);
2770 branch = 0;
2771 if (spu_bb_info[i].prop_jump)
644459d0 2772 {
5a976006 2773 branch = spu_bb_info[i].prop_jump;
2774 branch_target = get_branch_target (branch);
2775 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2776 required_dist = spu_hint_dist;
2777 }
2778 /* Search from end of a block to beginning. In this loop, find
2779 jumps which need a branch and emit them only when:
2780 - it's an indirect branch and we're at the insn which sets
2781 the register
2782 - we're at an insn that will invalidate the hint. e.g., a
2783 call, another hint insn, inline asm that clobbers $hbr, and
2784 some inlined operations (divmodsi4). Don't consider jumps
2785 because they are only at the end of a block and are
2786 considered when we are deciding whether to propagate
2787 - we're getting too far away from the branch. The hbr insns
2788 only have a signed 10 bit offset
2789 We go back as far as possible so the branch will be considered
2790 for propagation when we get to the beginning of the block. */
2791 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2792 {
2793 if (INSN_P (insn))
2794 {
2795 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2796 if (branch
2797 && ((GET_CODE (branch_target) == REG
2798 && set_of (branch_target, insn) != NULL_RTX)
2799 || insn_clobbers_hbr (insn)
2800 || branch_addr - insn_addr > 600))
2801 {
2802 rtx next = NEXT_INSN (insn);
2803 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2804 if (insn != BB_END (bb)
2805 && branch_addr - next_addr >= required_dist)
2806 {
2807 if (dump_file)
2808 fprintf (dump_file,
2809 "hint for %i in block %i before %i\n",
2810 INSN_UID (branch), bb->index,
2811 INSN_UID (next));
2812 spu_emit_branch_hint (next, branch, branch_target,
2813 branch_addr - next_addr, blocks);
2814 }
2815 branch = 0;
2816 }
2817
2818 /* JUMP_P will only be true at the end of a block. When
2819 branch is already set it means we've previously decided
2820 to propagate a hint for that branch into this block. */
2821 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2822 {
2823 branch = 0;
2824 if ((branch_target = get_branch_target (insn)))
2825 {
2826 branch = insn;
2827 branch_addr = insn_addr;
2828 required_dist = spu_hint_dist;
2829 }
2830 }
2831 }
2832 if (insn == BB_HEAD (bb))
2833 break;
2834 }
2835
2836 if (branch)
2837 {
2838 /* If we haven't emitted a hint for this branch yet, it might
2839 be profitable to emit it in one of the predecessor blocks,
2840 especially for loops. */
2841 rtx bbend;
2842 basic_block prev = 0, prop = 0, prev2 = 0;
2843 int loop_exit = 0, simple_loop = 0;
2844 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2845
2846 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2847 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2848 prev = EDGE_PRED (bb, j)->src;
2849 else
2850 prev2 = EDGE_PRED (bb, j)->src;
2851
2852 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2853 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2854 loop_exit = 1;
2855 else if (EDGE_SUCC (bb, j)->dest == bb)
2856 simple_loop = 1;
2857
2858 /* If this branch is a loop exit then propagate to previous
2859 fallthru block. This catches the cases when it is a simple
2860 loop or when there is an initial branch into the loop. */
2861 if (prev && (loop_exit || simple_loop)
2862 && prev->loop_depth <= bb->loop_depth)
2863 prop = prev;
2864
2865 /* If there is only one adjacent predecessor. Don't propagate
2866 outside this loop. This loop_depth test isn't perfect, but
2867 I'm not sure the loop_father member is valid at this point. */
2868 else if (prev && single_pred_p (bb)
2869 && prev->loop_depth == bb->loop_depth)
2870 prop = prev;
2871
2872 /* If this is the JOIN block of a simple IF-THEN then
2873 propogate the hint to the HEADER block. */
2874 else if (prev && prev2
2875 && EDGE_COUNT (bb->preds) == 2
2876 && EDGE_COUNT (prev->preds) == 1
2877 && EDGE_PRED (prev, 0)->src == prev2
2878 && prev2->loop_depth == bb->loop_depth
2879 && GET_CODE (branch_target) != REG)
2880 prop = prev;
2881
2882 /* Don't propagate when:
2883 - this is a simple loop and the hint would be too far
2884 - this is not a simple loop and there are 16 insns in
2885 this block already
2886 - the predecessor block ends in a branch that will be
2887 hinted
2888 - the predecessor block ends in an insn that invalidates
2889 the hint */
2890 if (prop
2891 && prop->index >= 0
2892 && (bbend = BB_END (prop))
2893 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2894 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2895 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2896 {
2897 if (dump_file)
2898 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2899 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2900 bb->index, prop->index, bb->loop_depth,
2901 INSN_UID (branch), loop_exit, simple_loop,
2902 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2903
2904 spu_bb_info[prop->index].prop_jump = branch;
2905 spu_bb_info[prop->index].bb_index = i;
2906 }
2907 else if (branch_addr - next_addr >= required_dist)
2908 {
2909 if (dump_file)
2910 fprintf (dump_file, "hint for %i in block %i before %i\n",
2911 INSN_UID (branch), bb->index,
2912 INSN_UID (NEXT_INSN (insn)));
2913 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2914 branch_addr - next_addr, blocks);
2915 }
2916 branch = 0;
644459d0 2917 }
644459d0 2918 }
5a976006 2919 free (spu_bb_info);
644459d0 2920
5a976006 2921 if (!sbitmap_empty_p (blocks))
2922 find_many_sub_basic_blocks (blocks);
2923
2924 /* We have to schedule to make sure alignment is ok. */
2925 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2926
2927 /* The hints need to be scheduled, so call it again. */
2928 schedule_insns ();
2fbdf9ef 2929 df_finish_pass (true);
5a976006 2930
2931 insert_hbrp ();
2932
2933 pad_bb ();
2934
8f1d58ad 2935 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2936 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2937 {
2938 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2939 between its branch label and the branch . We don't move the
2940 label because GCC expects it at the beginning of the block. */
2941 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2942 rtx label_ref = XVECEXP (unspec, 0, 0);
2943 rtx label = XEXP (label_ref, 0);
2944 rtx branch;
2945 int offset = 0;
2946 for (branch = NEXT_INSN (label);
2947 !JUMP_P (branch) && !CALL_P (branch);
2948 branch = NEXT_INSN (branch))
2949 if (NONJUMP_INSN_P (branch))
2950 offset += get_attr_length (branch);
2951 if (offset > 0)
2952 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2953 }
5a976006 2954
8a42230a 2955 spu_var_tracking ();
5a976006 2956
2957 free_bb_for_insn ();
2958
2959 in_spu_reorg = 0;
644459d0 2960}
2961\f
2962
2963/* Insn scheduling routines, primarily for dual issue. */
2964static int
2965spu_sched_issue_rate (void)
2966{
2967 return 2;
2968}
2969
2970static int
5a976006 2971uses_ls_unit(rtx insn)
644459d0 2972{
5a976006 2973 rtx set = single_set (insn);
2974 if (set != 0
2975 && (GET_CODE (SET_DEST (set)) == MEM
2976 || GET_CODE (SET_SRC (set)) == MEM))
2977 return 1;
2978 return 0;
644459d0 2979}
2980
2981static int
2982get_pipe (rtx insn)
2983{
2984 enum attr_type t;
2985 /* Handle inline asm */
2986 if (INSN_CODE (insn) == -1)
2987 return -1;
2988 t = get_attr_type (insn);
2989 switch (t)
2990 {
2991 case TYPE_CONVERT:
2992 return -2;
2993 case TYPE_MULTI0:
2994 return -1;
2995
2996 case TYPE_FX2:
2997 case TYPE_FX3:
2998 case TYPE_SPR:
2999 case TYPE_NOP:
3000 case TYPE_FXB:
3001 case TYPE_FPD:
3002 case TYPE_FP6:
3003 case TYPE_FP7:
644459d0 3004 return 0;
3005
3006 case TYPE_LNOP:
3007 case TYPE_SHUF:
3008 case TYPE_LOAD:
3009 case TYPE_STORE:
3010 case TYPE_BR:
3011 case TYPE_MULTI1:
3012 case TYPE_HBR:
5a976006 3013 case TYPE_IPREFETCH:
644459d0 3014 return 1;
3015 default:
3016 abort ();
3017 }
3018}
3019
5a976006 3020
3021/* haifa-sched.c has a static variable that keeps track of the current
3022 cycle. It is passed to spu_sched_reorder, and we record it here for
3023 use by spu_sched_variable_issue. It won't be accurate if the
3024 scheduler updates it's clock_var between the two calls. */
3025static int clock_var;
3026
3027/* This is used to keep track of insn alignment. Set to 0 at the
3028 beginning of each block and increased by the "length" attr of each
3029 insn scheduled. */
3030static int spu_sched_length;
3031
3032/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3033 ready list appropriately in spu_sched_reorder(). */
3034static int pipe0_clock;
3035static int pipe1_clock;
3036
3037static int prev_clock_var;
3038
3039static int prev_priority;
3040
3041/* The SPU needs to load the next ilb sometime during the execution of
3042 the previous ilb. There is a potential conflict if every cycle has a
3043 load or store. To avoid the conflict we make sure the load/store
3044 unit is free for at least one cycle during the execution of insns in
3045 the previous ilb. */
3046static int spu_ls_first;
3047static int prev_ls_clock;
3048
3049static void
3050spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3051 int max_ready ATTRIBUTE_UNUSED)
3052{
3053 spu_sched_length = 0;
3054}
3055
3056static void
3057spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3058 int max_ready ATTRIBUTE_UNUSED)
3059{
3060 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3061 {
3062 /* When any block might be at least 8-byte aligned, assume they
3063 will all be at least 8-byte aligned to make sure dual issue
3064 works out correctly. */
3065 spu_sched_length = 0;
3066 }
3067 spu_ls_first = INT_MAX;
3068 clock_var = -1;
3069 prev_ls_clock = -1;
3070 pipe0_clock = -1;
3071 pipe1_clock = -1;
3072 prev_clock_var = -1;
3073 prev_priority = -1;
3074}
3075
644459d0 3076static int
5a976006 3077spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3078 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3079{
5a976006 3080 int len;
3081 int p;
644459d0 3082 if (GET_CODE (PATTERN (insn)) == USE
3083 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3084 || (len = get_attr_length (insn)) == 0)
3085 return more;
3086
3087 spu_sched_length += len;
3088
3089 /* Reset on inline asm */
3090 if (INSN_CODE (insn) == -1)
3091 {
3092 spu_ls_first = INT_MAX;
3093 pipe0_clock = -1;
3094 pipe1_clock = -1;
3095 return 0;
3096 }
3097 p = get_pipe (insn);
3098 if (p == 0)
3099 pipe0_clock = clock_var;
3100 else
3101 pipe1_clock = clock_var;
3102
3103 if (in_spu_reorg)
3104 {
3105 if (clock_var - prev_ls_clock > 1
3106 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3107 spu_ls_first = INT_MAX;
3108 if (uses_ls_unit (insn))
3109 {
3110 if (spu_ls_first == INT_MAX)
3111 spu_ls_first = spu_sched_length;
3112 prev_ls_clock = clock_var;
3113 }
3114
3115 /* The scheduler hasn't inserted the nop, but we will later on.
3116 Include those nops in spu_sched_length. */
3117 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3118 spu_sched_length += 4;
3119 prev_clock_var = clock_var;
3120
3121 /* more is -1 when called from spu_sched_reorder for new insns
3122 that don't have INSN_PRIORITY */
3123 if (more >= 0)
3124 prev_priority = INSN_PRIORITY (insn);
3125 }
3126
3127 /* Always try issueing more insns. spu_sched_reorder will decide
3128 when the cycle should be advanced. */
3129 return 1;
3130}
3131
3132/* This function is called for both TARGET_SCHED_REORDER and
3133 TARGET_SCHED_REORDER2. */
3134static int
3135spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3136 rtx *ready, int *nreadyp, int clock)
3137{
3138 int i, nready = *nreadyp;
3139 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3140 rtx insn;
3141
3142 clock_var = clock;
3143
3144 if (nready <= 0 || pipe1_clock >= clock)
3145 return 0;
3146
3147 /* Find any rtl insns that don't generate assembly insns and schedule
3148 them first. */
3149 for (i = nready - 1; i >= 0; i--)
3150 {
3151 insn = ready[i];
3152 if (INSN_CODE (insn) == -1
3153 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3154 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3155 {
3156 ready[i] = ready[nready - 1];
3157 ready[nready - 1] = insn;
3158 return 1;
3159 }
3160 }
3161
3162 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3163 for (i = 0; i < nready; i++)
3164 if (INSN_CODE (ready[i]) != -1)
3165 {
3166 insn = ready[i];
3167 switch (get_attr_type (insn))
3168 {
3169 default:
3170 case TYPE_MULTI0:
3171 case TYPE_CONVERT:
3172 case TYPE_FX2:
3173 case TYPE_FX3:
3174 case TYPE_SPR:
3175 case TYPE_NOP:
3176 case TYPE_FXB:
3177 case TYPE_FPD:
3178 case TYPE_FP6:
3179 case TYPE_FP7:
3180 pipe_0 = i;
3181 break;
3182 case TYPE_LOAD:
3183 case TYPE_STORE:
3184 pipe_ls = i;
3185 case TYPE_LNOP:
3186 case TYPE_SHUF:
3187 case TYPE_BR:
3188 case TYPE_MULTI1:
3189 case TYPE_HBR:
3190 pipe_1 = i;
3191 break;
3192 case TYPE_IPREFETCH:
3193 pipe_hbrp = i;
3194 break;
3195 }
3196 }
3197
3198 /* In the first scheduling phase, schedule loads and stores together
3199 to increase the chance they will get merged during postreload CSE. */
3200 if (!reload_completed && pipe_ls >= 0)
3201 {
3202 insn = ready[pipe_ls];
3203 ready[pipe_ls] = ready[nready - 1];
3204 ready[nready - 1] = insn;
3205 return 1;
3206 }
3207
3208 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3209 if (pipe_hbrp >= 0)
3210 pipe_1 = pipe_hbrp;
3211
3212 /* When we have loads/stores in every cycle of the last 15 insns and
3213 we are about to schedule another load/store, emit an hbrp insn
3214 instead. */
3215 if (in_spu_reorg
3216 && spu_sched_length - spu_ls_first >= 4 * 15
3217 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3218 {
3219 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3220 recog_memoized (insn);
3221 if (pipe0_clock < clock)
3222 PUT_MODE (insn, TImode);
3223 spu_sched_variable_issue (file, verbose, insn, -1);
3224 return 0;
3225 }
3226
3227 /* In general, we want to emit nops to increase dual issue, but dual
3228 issue isn't faster when one of the insns could be scheduled later
3229 without effecting the critical path. We look at INSN_PRIORITY to
3230 make a good guess, but it isn't perfect so -mdual-nops=n can be
3231 used to effect it. */
3232 if (in_spu_reorg && spu_dual_nops < 10)
3233 {
3234 /* When we are at an even address and we are not issueing nops to
3235 improve scheduling then we need to advance the cycle. */
3236 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3237 && (spu_dual_nops == 0
3238 || (pipe_1 != -1
3239 && prev_priority >
3240 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3241 return 0;
3242
3243 /* When at an odd address, schedule the highest priority insn
3244 without considering pipeline. */
3245 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3246 && (spu_dual_nops == 0
3247 || (prev_priority >
3248 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3249 return 1;
3250 }
3251
3252
3253 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3254 pipe0 insn in the ready list, schedule it. */
3255 if (pipe0_clock < clock && pipe_0 >= 0)
3256 schedule_i = pipe_0;
3257
3258 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3259 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3260 else
3261 schedule_i = pipe_1;
3262
3263 if (schedule_i > -1)
3264 {
3265 insn = ready[schedule_i];
3266 ready[schedule_i] = ready[nready - 1];
3267 ready[nready - 1] = insn;
3268 return 1;
3269 }
3270 return 0;
644459d0 3271}
3272
3273/* INSN is dependent on DEP_INSN. */
3274static int
5a976006 3275spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3276{
5a976006 3277 rtx set;
3278
3279 /* The blockage pattern is used to prevent instructions from being
3280 moved across it and has no cost. */
3281 if (INSN_CODE (insn) == CODE_FOR_blockage
3282 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3283 return 0;
3284
9d98604b 3285 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3286 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3287 return 0;
3288
3289 /* Make sure hbrps are spread out. */
3290 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3291 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3292 return 8;
3293
3294 /* Make sure hints and hbrps are 2 cycles apart. */
3295 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3296 || INSN_CODE (insn) == CODE_FOR_hbr)
3297 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3298 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3299 return 2;
3300
3301 /* An hbrp has no real dependency on other insns. */
3302 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3303 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3304 return 0;
3305
3306 /* Assuming that it is unlikely an argument register will be used in
3307 the first cycle of the called function, we reduce the cost for
3308 slightly better scheduling of dep_insn. When not hinted, the
3309 mispredicted branch would hide the cost as well. */
3310 if (CALL_P (insn))
3311 {
3312 rtx target = get_branch_target (insn);
3313 if (GET_CODE (target) != REG || !set_of (target, insn))
3314 return cost - 2;
3315 return cost;
3316 }
3317
3318 /* And when returning from a function, let's assume the return values
3319 are completed sooner too. */
3320 if (CALL_P (dep_insn))
644459d0 3321 return cost - 2;
5a976006 3322
3323 /* Make sure an instruction that loads from the back chain is schedule
3324 away from the return instruction so a hint is more likely to get
3325 issued. */
3326 if (INSN_CODE (insn) == CODE_FOR__return
3327 && (set = single_set (dep_insn))
3328 && GET_CODE (SET_DEST (set)) == REG
3329 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3330 return 20;
3331
644459d0 3332 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3333 scheduler makes every insn in a block anti-dependent on the final
3334 jump_insn. We adjust here so higher cost insns will get scheduled
3335 earlier. */
5a976006 3336 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3337 return insn_cost (dep_insn) - 3;
5a976006 3338
644459d0 3339 return cost;
3340}
3341\f
3342/* Create a CONST_DOUBLE from a string. */
842ae815 3343rtx
644459d0 3344spu_float_const (const char *string, enum machine_mode mode)
3345{
3346 REAL_VALUE_TYPE value;
3347 value = REAL_VALUE_ATOF (string, mode);
3348 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3349}
3350
644459d0 3351int
3352spu_constant_address_p (rtx x)
3353{
3354 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3355 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3356 || GET_CODE (x) == HIGH);
3357}
3358
3359static enum spu_immediate
3360which_immediate_load (HOST_WIDE_INT val)
3361{
3362 gcc_assert (val == trunc_int_for_mode (val, SImode));
3363
3364 if (val >= -0x8000 && val <= 0x7fff)
3365 return SPU_IL;
3366 if (val >= 0 && val <= 0x3ffff)
3367 return SPU_ILA;
3368 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3369 return SPU_ILH;
3370 if ((val & 0xffff) == 0)
3371 return SPU_ILHU;
3372
3373 return SPU_NONE;
3374}
3375
dea01258 3376/* Return true when OP can be loaded by one of the il instructions, or
3377 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3378int
3379immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3380{
3381 if (CONSTANT_P (op))
3382 {
3383 enum immediate_class c = classify_immediate (op, mode);
5df189be 3384 return c == IC_IL1 || c == IC_IL1s
3072d30e 3385 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3386 }
3387 return 0;
3388}
3389
3390/* Return true if the first SIZE bytes of arr is a constant that can be
3391 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3392 represent the size and offset of the instruction to use. */
3393static int
3394cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3395{
3396 int cpat, run, i, start;
3397 cpat = 1;
3398 run = 0;
3399 start = -1;
3400 for (i = 0; i < size && cpat; i++)
3401 if (arr[i] != i+16)
3402 {
3403 if (!run)
3404 {
3405 start = i;
3406 if (arr[i] == 3)
3407 run = 1;
3408 else if (arr[i] == 2 && arr[i+1] == 3)
3409 run = 2;
3410 else if (arr[i] == 0)
3411 {
3412 while (arr[i+run] == run && i+run < 16)
3413 run++;
3414 if (run != 4 && run != 8)
3415 cpat = 0;
3416 }
3417 else
3418 cpat = 0;
3419 if ((i & (run-1)) != 0)
3420 cpat = 0;
3421 i += run;
3422 }
3423 else
3424 cpat = 0;
3425 }
b01a6dc3 3426 if (cpat && (run || size < 16))
dea01258 3427 {
3428 if (run == 0)
3429 run = 1;
3430 if (prun)
3431 *prun = run;
3432 if (pstart)
3433 *pstart = start == -1 ? 16-run : start;
3434 return 1;
3435 }
3436 return 0;
3437}
3438
3439/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3440 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3441static enum immediate_class
3442classify_immediate (rtx op, enum machine_mode mode)
644459d0 3443{
3444 HOST_WIDE_INT val;
3445 unsigned char arr[16];
5df189be 3446 int i, j, repeated, fsmbi, repeat;
dea01258 3447
3448 gcc_assert (CONSTANT_P (op));
3449
644459d0 3450 if (GET_MODE (op) != VOIDmode)
3451 mode = GET_MODE (op);
3452
dea01258 3453 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3454 if (!flag_pic
3455 && mode == V4SImode
dea01258 3456 && GET_CODE (op) == CONST_VECTOR
3457 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3458 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3459 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3460 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3461 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3462 op = CONST_VECTOR_ELT (op, 0);
644459d0 3463
dea01258 3464 switch (GET_CODE (op))
3465 {
3466 case SYMBOL_REF:
3467 case LABEL_REF:
3468 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3469
dea01258 3470 case CONST:
0cfc65d4 3471 /* We can never know if the resulting address fits in 18 bits and can be
3472 loaded with ila. For now, assume the address will not overflow if
3473 the displacement is "small" (fits 'K' constraint). */
3474 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3475 {
3476 rtx sym = XEXP (XEXP (op, 0), 0);
3477 rtx cst = XEXP (XEXP (op, 0), 1);
3478
3479 if (GET_CODE (sym) == SYMBOL_REF
3480 && GET_CODE (cst) == CONST_INT
3481 && satisfies_constraint_K (cst))
3482 return IC_IL1s;
3483 }
3484 return IC_IL2s;
644459d0 3485
dea01258 3486 case HIGH:
3487 return IC_IL1s;
3488
3489 case CONST_VECTOR:
3490 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3491 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3492 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3493 return IC_POOL;
3494 /* Fall through. */
3495
3496 case CONST_INT:
3497 case CONST_DOUBLE:
3498 constant_to_array (mode, op, arr);
644459d0 3499
dea01258 3500 /* Check that each 4-byte slot is identical. */
3501 repeated = 1;
3502 for (i = 4; i < 16; i += 4)
3503 for (j = 0; j < 4; j++)
3504 if (arr[j] != arr[i + j])
3505 repeated = 0;
3506
3507 if (repeated)
3508 {
3509 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3510 val = trunc_int_for_mode (val, SImode);
3511
3512 if (which_immediate_load (val) != SPU_NONE)
3513 return IC_IL1;
3514 }
3515
3516 /* Any mode of 2 bytes or smaller can be loaded with an il
3517 instruction. */
3518 gcc_assert (GET_MODE_SIZE (mode) > 2);
3519
3520 fsmbi = 1;
5df189be 3521 repeat = 0;
dea01258 3522 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3523 if (arr[i] != 0 && repeat == 0)
3524 repeat = arr[i];
3525 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3526 fsmbi = 0;
3527 if (fsmbi)
5df189be 3528 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3529
3530 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3531 return IC_CPAT;
3532
3533 if (repeated)
3534 return IC_IL2;
3535
3536 return IC_POOL;
3537 default:
3538 break;
3539 }
3540 gcc_unreachable ();
644459d0 3541}
3542
3543static enum spu_immediate
3544which_logical_immediate (HOST_WIDE_INT val)
3545{
3546 gcc_assert (val == trunc_int_for_mode (val, SImode));
3547
3548 if (val >= -0x200 && val <= 0x1ff)
3549 return SPU_ORI;
3550 if (val >= 0 && val <= 0xffff)
3551 return SPU_IOHL;
3552 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3553 {
3554 val = trunc_int_for_mode (val, HImode);
3555 if (val >= -0x200 && val <= 0x1ff)
3556 return SPU_ORHI;
3557 if ((val & 0xff) == ((val >> 8) & 0xff))
3558 {
3559 val = trunc_int_for_mode (val, QImode);
3560 if (val >= -0x200 && val <= 0x1ff)
3561 return SPU_ORBI;
3562 }
3563 }
3564 return SPU_NONE;
3565}
3566
5df189be 3567/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3568 CONST_DOUBLEs. */
3569static int
3570const_vector_immediate_p (rtx x)
3571{
3572 int i;
3573 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3574 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3575 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3576 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3577 return 0;
3578 return 1;
3579}
3580
644459d0 3581int
3582logical_immediate_p (rtx op, enum machine_mode mode)
3583{
3584 HOST_WIDE_INT val;
3585 unsigned char arr[16];
3586 int i, j;
3587
3588 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3589 || GET_CODE (op) == CONST_VECTOR);
3590
5df189be 3591 if (GET_CODE (op) == CONST_VECTOR
3592 && !const_vector_immediate_p (op))
3593 return 0;
3594
644459d0 3595 if (GET_MODE (op) != VOIDmode)
3596 mode = GET_MODE (op);
3597
3598 constant_to_array (mode, op, arr);
3599
3600 /* Check that bytes are repeated. */
3601 for (i = 4; i < 16; i += 4)
3602 for (j = 0; j < 4; j++)
3603 if (arr[j] != arr[i + j])
3604 return 0;
3605
3606 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3607 val = trunc_int_for_mode (val, SImode);
3608
3609 i = which_logical_immediate (val);
3610 return i != SPU_NONE && i != SPU_IOHL;
3611}
3612
3613int
3614iohl_immediate_p (rtx op, enum machine_mode mode)
3615{
3616 HOST_WIDE_INT val;
3617 unsigned char arr[16];
3618 int i, j;
3619
3620 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3621 || GET_CODE (op) == CONST_VECTOR);
3622
5df189be 3623 if (GET_CODE (op) == CONST_VECTOR
3624 && !const_vector_immediate_p (op))
3625 return 0;
3626
644459d0 3627 if (GET_MODE (op) != VOIDmode)
3628 mode = GET_MODE (op);
3629
3630 constant_to_array (mode, op, arr);
3631
3632 /* Check that bytes are repeated. */
3633 for (i = 4; i < 16; i += 4)
3634 for (j = 0; j < 4; j++)
3635 if (arr[j] != arr[i + j])
3636 return 0;
3637
3638 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3639 val = trunc_int_for_mode (val, SImode);
3640
3641 return val >= 0 && val <= 0xffff;
3642}
3643
3644int
3645arith_immediate_p (rtx op, enum machine_mode mode,
3646 HOST_WIDE_INT low, HOST_WIDE_INT high)
3647{
3648 HOST_WIDE_INT val;
3649 unsigned char arr[16];
3650 int bytes, i, j;
3651
3652 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3653 || GET_CODE (op) == CONST_VECTOR);
3654
5df189be 3655 if (GET_CODE (op) == CONST_VECTOR
3656 && !const_vector_immediate_p (op))
3657 return 0;
3658
644459d0 3659 if (GET_MODE (op) != VOIDmode)
3660 mode = GET_MODE (op);
3661
3662 constant_to_array (mode, op, arr);
3663
3664 if (VECTOR_MODE_P (mode))
3665 mode = GET_MODE_INNER (mode);
3666
3667 bytes = GET_MODE_SIZE (mode);
3668 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3669
3670 /* Check that bytes are repeated. */
3671 for (i = bytes; i < 16; i += bytes)
3672 for (j = 0; j < bytes; j++)
3673 if (arr[j] != arr[i + j])
3674 return 0;
3675
3676 val = arr[0];
3677 for (j = 1; j < bytes; j++)
3678 val = (val << 8) | arr[j];
3679
3680 val = trunc_int_for_mode (val, mode);
3681
3682 return val >= low && val <= high;
3683}
3684
56c7bfc2 3685/* TRUE when op is an immediate and an exact power of 2, and given that
3686 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3687 all entries must be the same. */
3688bool
3689exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3690{
3691 enum machine_mode int_mode;
3692 HOST_WIDE_INT val;
3693 unsigned char arr[16];
3694 int bytes, i, j;
3695
3696 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3697 || GET_CODE (op) == CONST_VECTOR);
3698
3699 if (GET_CODE (op) == CONST_VECTOR
3700 && !const_vector_immediate_p (op))
3701 return 0;
3702
3703 if (GET_MODE (op) != VOIDmode)
3704 mode = GET_MODE (op);
3705
3706 constant_to_array (mode, op, arr);
3707
3708 if (VECTOR_MODE_P (mode))
3709 mode = GET_MODE_INNER (mode);
3710
3711 bytes = GET_MODE_SIZE (mode);
3712 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3713
3714 /* Check that bytes are repeated. */
3715 for (i = bytes; i < 16; i += bytes)
3716 for (j = 0; j < bytes; j++)
3717 if (arr[j] != arr[i + j])
3718 return 0;
3719
3720 val = arr[0];
3721 for (j = 1; j < bytes; j++)
3722 val = (val << 8) | arr[j];
3723
3724 val = trunc_int_for_mode (val, int_mode);
3725
3726 /* Currently, we only handle SFmode */
3727 gcc_assert (mode == SFmode);
3728 if (mode == SFmode)
3729 {
3730 int exp = (val >> 23) - 127;
3731 return val > 0 && (val & 0x007fffff) == 0
3732 && exp >= low && exp <= high;
3733 }
3734 return FALSE;
3735}
3736
6cf5579e 3737/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3738
3739static int
3740ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3741{
3742 rtx x = *px;
3743 tree decl;
3744
3745 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3746 {
3747 rtx plus = XEXP (x, 0);
3748 rtx op0 = XEXP (plus, 0);
3749 rtx op1 = XEXP (plus, 1);
3750 if (GET_CODE (op1) == CONST_INT)
3751 x = op0;
3752 }
3753
3754 return (GET_CODE (x) == SYMBOL_REF
3755 && (decl = SYMBOL_REF_DECL (x)) != 0
3756 && TREE_CODE (decl) == VAR_DECL
3757 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3758}
3759
644459d0 3760/* We accept:
5b865faf 3761 - any 32-bit constant (SImode, SFmode)
644459d0 3762 - any constant that can be generated with fsmbi (any mode)
5b865faf 3763 - a 64-bit constant where the high and low bits are identical
644459d0 3764 (DImode, DFmode)
5b865faf 3765 - a 128-bit constant where the four 32-bit words match. */
ca316360 3766bool
3767spu_legitimate_constant_p (enum machine_mode mode, rtx x)
644459d0 3768{
5df189be 3769 if (GET_CODE (x) == HIGH)
3770 x = XEXP (x, 0);
6cf5579e 3771
3772 /* Reject any __ea qualified reference. These can't appear in
3773 instructions but must be forced to the constant pool. */
3774 if (for_each_rtx (&x, ea_symbol_ref, 0))
3775 return 0;
3776
644459d0 3777 /* V4SI with all identical symbols is valid. */
5df189be 3778 if (!flag_pic
ca316360 3779 && mode == V4SImode
644459d0 3780 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3781 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3782 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3783 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3784 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3785 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3786
5df189be 3787 if (GET_CODE (x) == CONST_VECTOR
3788 && !const_vector_immediate_p (x))
3789 return 0;
644459d0 3790 return 1;
3791}
3792
3793/* Valid address are:
3794 - symbol_ref, label_ref, const
3795 - reg
9d98604b 3796 - reg + const_int, where const_int is 16 byte aligned
644459d0 3797 - reg + reg, alignment doesn't matter
3798 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3799 ignore the 4 least significant bits of the const. We only care about
3800 16 byte modes because the expand phase will change all smaller MEM
3801 references to TImode. */
3802static bool
3803spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3804 rtx x, bool reg_ok_strict)
644459d0 3805{
9d98604b 3806 int aligned = GET_MODE_SIZE (mode) >= 16;
3807 if (aligned
3808 && GET_CODE (x) == AND
644459d0 3809 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3810 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3811 x = XEXP (x, 0);
3812 switch (GET_CODE (x))
3813 {
644459d0 3814 case LABEL_REF:
6cf5579e 3815 return !TARGET_LARGE_MEM;
3816
9d98604b 3817 case SYMBOL_REF:
644459d0 3818 case CONST:
6cf5579e 3819 /* Keep __ea references until reload so that spu_expand_mov can see them
3820 in MEMs. */
3821 if (ea_symbol_ref (&x, 0))
3822 return !reload_in_progress && !reload_completed;
9d98604b 3823 return !TARGET_LARGE_MEM;
644459d0 3824
3825 case CONST_INT:
3826 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3827
3828 case SUBREG:
3829 x = XEXP (x, 0);
9d98604b 3830 if (REG_P (x))
3831 return 0;
644459d0 3832
3833 case REG:
3834 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3835
3836 case PLUS:
3837 case LO_SUM:
3838 {
3839 rtx op0 = XEXP (x, 0);
3840 rtx op1 = XEXP (x, 1);
3841 if (GET_CODE (op0) == SUBREG)
3842 op0 = XEXP (op0, 0);
3843 if (GET_CODE (op1) == SUBREG)
3844 op1 = XEXP (op1, 0);
644459d0 3845 if (GET_CODE (op0) == REG
3846 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3847 && GET_CODE (op1) == CONST_INT
fa695424 3848 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3849 /* If virtual registers are involved, the displacement will
3850 change later on anyway, so checking would be premature.
3851 Reload will make sure the final displacement after
3852 register elimination is OK. */
3853 || op0 == arg_pointer_rtx
3854 || op0 == frame_pointer_rtx
3855 || op0 == virtual_stack_vars_rtx)
9d98604b 3856 && (!aligned || (INTVAL (op1) & 15) == 0))
3857 return TRUE;
644459d0 3858 if (GET_CODE (op0) == REG
3859 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3860 && GET_CODE (op1) == REG
3861 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3862 return TRUE;
644459d0 3863 }
3864 break;
3865
3866 default:
3867 break;
3868 }
9d98604b 3869 return FALSE;
644459d0 3870}
3871
6cf5579e 3872/* Like spu_legitimate_address_p, except with named addresses. */
3873static bool
3874spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3875 bool reg_ok_strict, addr_space_t as)
3876{
3877 if (as == ADDR_SPACE_EA)
3878 return (REG_P (x) && (GET_MODE (x) == EAmode));
3879
3880 else if (as != ADDR_SPACE_GENERIC)
3881 gcc_unreachable ();
3882
3883 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3884}
3885
644459d0 3886/* When the address is reg + const_int, force the const_int into a
fa7637bd 3887 register. */
644459d0 3888rtx
3889spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3890 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3891{
3892 rtx op0, op1;
3893 /* Make sure both operands are registers. */
3894 if (GET_CODE (x) == PLUS)
3895 {
3896 op0 = XEXP (x, 0);
3897 op1 = XEXP (x, 1);
3898 if (ALIGNED_SYMBOL_REF_P (op0))
3899 {
3900 op0 = force_reg (Pmode, op0);
3901 mark_reg_pointer (op0, 128);
3902 }
3903 else if (GET_CODE (op0) != REG)
3904 op0 = force_reg (Pmode, op0);
3905 if (ALIGNED_SYMBOL_REF_P (op1))
3906 {
3907 op1 = force_reg (Pmode, op1);
3908 mark_reg_pointer (op1, 128);
3909 }
3910 else if (GET_CODE (op1) != REG)
3911 op1 = force_reg (Pmode, op1);
3912 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3913 }
41e3a0c7 3914 return x;
644459d0 3915}
3916
6cf5579e 3917/* Like spu_legitimate_address, except with named address support. */
3918static rtx
3919spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3920 addr_space_t as)
3921{
3922 if (as != ADDR_SPACE_GENERIC)
3923 return x;
3924
3925 return spu_legitimize_address (x, oldx, mode);
3926}
3927
fa695424 3928/* Reload reg + const_int for out-of-range displacements. */
3929rtx
3930spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3931 int opnum, int type)
3932{
3933 bool removed_and = false;
3934
3935 if (GET_CODE (ad) == AND
3936 && CONST_INT_P (XEXP (ad, 1))
3937 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3938 {
3939 ad = XEXP (ad, 0);
3940 removed_and = true;
3941 }
3942
3943 if (GET_CODE (ad) == PLUS
3944 && REG_P (XEXP (ad, 0))
3945 && CONST_INT_P (XEXP (ad, 1))
3946 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3947 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3948 {
3949 /* Unshare the sum. */
3950 ad = copy_rtx (ad);
3951
3952 /* Reload the displacement. */
3953 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3954 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3955 opnum, (enum reload_type) type);
3956
3957 /* Add back AND for alignment if we stripped it. */
3958 if (removed_and)
3959 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3960
3961 return ad;
3962 }
3963
3964 return NULL_RTX;
3965}
3966
644459d0 3967/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3968 struct attribute_spec.handler. */
3969static tree
3970spu_handle_fndecl_attribute (tree * node,
3971 tree name,
3972 tree args ATTRIBUTE_UNUSED,
3973 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3974{
3975 if (TREE_CODE (*node) != FUNCTION_DECL)
3976 {
67a779df 3977 warning (0, "%qE attribute only applies to functions",
3978 name);
644459d0 3979 *no_add_attrs = true;
3980 }
3981
3982 return NULL_TREE;
3983}
3984
3985/* Handle the "vector" attribute. */
3986static tree
3987spu_handle_vector_attribute (tree * node, tree name,
3988 tree args ATTRIBUTE_UNUSED,
3989 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3990{
3991 tree type = *node, result = NULL_TREE;
3992 enum machine_mode mode;
3993 int unsigned_p;
3994
3995 while (POINTER_TYPE_P (type)
3996 || TREE_CODE (type) == FUNCTION_TYPE
3997 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3998 type = TREE_TYPE (type);
3999
4000 mode = TYPE_MODE (type);
4001
4002 unsigned_p = TYPE_UNSIGNED (type);
4003 switch (mode)
4004 {
4005 case DImode:
4006 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
4007 break;
4008 case SImode:
4009 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
4010 break;
4011 case HImode:
4012 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
4013 break;
4014 case QImode:
4015 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
4016 break;
4017 case SFmode:
4018 result = V4SF_type_node;
4019 break;
4020 case DFmode:
4021 result = V2DF_type_node;
4022 break;
4023 default:
4024 break;
4025 }
4026
4027 /* Propagate qualifiers attached to the element type
4028 onto the vector type. */
4029 if (result && result != type && TYPE_QUALS (type))
4030 result = build_qualified_type (result, TYPE_QUALS (type));
4031
4032 *no_add_attrs = true; /* No need to hang on to the attribute. */
4033
4034 if (!result)
67a779df 4035 warning (0, "%qE attribute ignored", name);
644459d0 4036 else
d991e6e8 4037 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 4038
4039 return NULL_TREE;
4040}
4041
f2b32076 4042/* Return nonzero if FUNC is a naked function. */
644459d0 4043static int
4044spu_naked_function_p (tree func)
4045{
4046 tree a;
4047
4048 if (TREE_CODE (func) != FUNCTION_DECL)
4049 abort ();
4050
4051 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
4052 return a != NULL_TREE;
4053}
4054
4055int
4056spu_initial_elimination_offset (int from, int to)
4057{
4058 int saved_regs_size = spu_saved_regs_size ();
4059 int sp_offset = 0;
abe32cce 4060 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 4061 || get_frame_size () || saved_regs_size)
4062 sp_offset = STACK_POINTER_OFFSET;
4063 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 4064 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 4065 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 4066 return get_frame_size ();
644459d0 4067 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 4068 return sp_offset + crtl->outgoing_args_size
644459d0 4069 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
4070 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4071 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 4072 else
4073 gcc_unreachable ();
644459d0 4074}
4075
4076rtx
fb80456a 4077spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 4078{
4079 enum machine_mode mode = TYPE_MODE (type);
4080 int byte_size = ((mode == BLKmode)
4081 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4082
4083 /* Make sure small structs are left justified in a register. */
4084 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4085 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4086 {
4087 enum machine_mode smode;
4088 rtvec v;
4089 int i;
4090 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4091 int n = byte_size / UNITS_PER_WORD;
4092 v = rtvec_alloc (nregs);
4093 for (i = 0; i < n; i++)
4094 {
4095 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4096 gen_rtx_REG (TImode,
4097 FIRST_RETURN_REGNUM
4098 + i),
4099 GEN_INT (UNITS_PER_WORD * i));
4100 byte_size -= UNITS_PER_WORD;
4101 }
4102
4103 if (n < nregs)
4104 {
4105 if (byte_size < 4)
4106 byte_size = 4;
4107 smode =
4108 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4109 RTVEC_ELT (v, n) =
4110 gen_rtx_EXPR_LIST (VOIDmode,
4111 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4112 GEN_INT (UNITS_PER_WORD * n));
4113 }
4114 return gen_rtx_PARALLEL (mode, v);
4115 }
4116 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4117}
4118
ee9034d4 4119static rtx
39cba157 4120spu_function_arg (cumulative_args_t cum_v,
644459d0 4121 enum machine_mode mode,
ee9034d4 4122 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4123{
39cba157 4124 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
644459d0 4125 int byte_size;
4126
a08c5dd0 4127 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4128 return 0;
4129
4130 byte_size = ((mode == BLKmode)
4131 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4132
4133 /* The ABI does not allow parameters to be passed partially in
4134 reg and partially in stack. */
a08c5dd0 4135 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4136 return 0;
4137
4138 /* Make sure small structs are left justified in a register. */
4139 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4140 && byte_size < UNITS_PER_WORD && byte_size > 0)
4141 {
4142 enum machine_mode smode;
4143 rtx gr_reg;
4144 if (byte_size < 4)
4145 byte_size = 4;
4146 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4147 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4148 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4149 const0_rtx);
4150 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4151 }
4152 else
a08c5dd0 4153 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4154}
4155
ee9034d4 4156static void
39cba157 4157spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
ee9034d4 4158 const_tree type, bool named ATTRIBUTE_UNUSED)
4159{
39cba157 4160 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4161
ee9034d4 4162 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4163 ? 1
4164 : mode == BLKmode
4165 ? ((int_size_in_bytes (type) + 15) / 16)
4166 : mode == VOIDmode
4167 ? 1
4168 : HARD_REGNO_NREGS (cum, mode));
4169}
4170
644459d0 4171/* Variable sized types are passed by reference. */
4172static bool
39cba157 4173spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
644459d0 4174 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4175 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4176{
4177 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4178}
4179\f
4180
4181/* Var args. */
4182
4183/* Create and return the va_list datatype.
4184
4185 On SPU, va_list is an array type equivalent to
4186
4187 typedef struct __va_list_tag
4188 {
4189 void *__args __attribute__((__aligned(16)));
4190 void *__skip __attribute__((__aligned(16)));
4191
4192 } va_list[1];
4193
fa7637bd 4194 where __args points to the arg that will be returned by the next
644459d0 4195 va_arg(), and __skip points to the previous stack frame such that
4196 when __args == __skip we should advance __args by 32 bytes. */
4197static tree
4198spu_build_builtin_va_list (void)
4199{
4200 tree f_args, f_skip, record, type_decl;
4201 bool owp;
4202
4203 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4204
4205 type_decl =
54e46243 4206 build_decl (BUILTINS_LOCATION,
4207 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4208
54e46243 4209 f_args = build_decl (BUILTINS_LOCATION,
4210 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4211 f_skip = build_decl (BUILTINS_LOCATION,
4212 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4213
4214 DECL_FIELD_CONTEXT (f_args) = record;
4215 DECL_ALIGN (f_args) = 128;
4216 DECL_USER_ALIGN (f_args) = 1;
4217
4218 DECL_FIELD_CONTEXT (f_skip) = record;
4219 DECL_ALIGN (f_skip) = 128;
4220 DECL_USER_ALIGN (f_skip) = 1;
4221
bc907808 4222 TYPE_STUB_DECL (record) = type_decl;
644459d0 4223 TYPE_NAME (record) = type_decl;
4224 TYPE_FIELDS (record) = f_args;
1767a056 4225 DECL_CHAIN (f_args) = f_skip;
644459d0 4226
4227 /* We know this is being padded and we want it too. It is an internal
4228 type so hide the warnings from the user. */
4229 owp = warn_padded;
4230 warn_padded = false;
4231
4232 layout_type (record);
4233
4234 warn_padded = owp;
4235
4236 /* The correct type is an array type of one element. */
4237 return build_array_type (record, build_index_type (size_zero_node));
4238}
4239
4240/* Implement va_start by filling the va_list structure VALIST.
4241 NEXTARG points to the first anonymous stack argument.
4242
4243 The following global variables are used to initialize
4244 the va_list structure:
4245
abe32cce 4246 crtl->args.info;
644459d0 4247 the CUMULATIVE_ARGS for this function
4248
abe32cce 4249 crtl->args.arg_offset_rtx:
644459d0 4250 holds the offset of the first anonymous stack argument
4251 (relative to the virtual arg pointer). */
4252
8a58ed0a 4253static void
644459d0 4254spu_va_start (tree valist, rtx nextarg)
4255{
4256 tree f_args, f_skip;
4257 tree args, skip, t;
4258
4259 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4260 f_skip = DECL_CHAIN (f_args);
644459d0 4261
170efcd4 4262 valist = build_simple_mem_ref (valist);
644459d0 4263 args =
4264 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4265 skip =
4266 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4267
4268 /* Find the __args area. */
4269 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4270 if (crtl->args.pretend_args_size > 0)
2cc66f2a 4271 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
75a70cf9 4272 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4273 TREE_SIDE_EFFECTS (t) = 1;
4274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4275
4276 /* Find the __skip area. */
4277 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
2cc66f2a 4278 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4279 - STACK_POINTER_OFFSET));
75a70cf9 4280 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4281 TREE_SIDE_EFFECTS (t) = 1;
4282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4283}
4284
4285/* Gimplify va_arg by updating the va_list structure
4286 VALIST as required to retrieve an argument of type
4287 TYPE, and returning that argument.
4288
4289 ret = va_arg(VALIST, TYPE);
4290
4291 generates code equivalent to:
4292
4293 paddedsize = (sizeof(TYPE) + 15) & -16;
4294 if (VALIST.__args + paddedsize > VALIST.__skip
4295 && VALIST.__args <= VALIST.__skip)
4296 addr = VALIST.__skip + 32;
4297 else
4298 addr = VALIST.__args;
4299 VALIST.__args = addr + paddedsize;
4300 ret = *(TYPE *)addr;
4301 */
4302static tree
75a70cf9 4303spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4304 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4305{
4306 tree f_args, f_skip;
4307 tree args, skip;
4308 HOST_WIDE_INT size, rsize;
2cc66f2a 4309 tree addr, tmp;
644459d0 4310 bool pass_by_reference_p;
4311
4312 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4313 f_skip = DECL_CHAIN (f_args);
644459d0 4314
182cf5a9 4315 valist = build_simple_mem_ref (valist);
644459d0 4316 args =
4317 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4318 skip =
4319 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4320
4321 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4322
4323 /* if an object is dynamically sized, a pointer to it is passed
4324 instead of the object itself. */
27a82950 4325 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4326 false);
644459d0 4327 if (pass_by_reference_p)
4328 type = build_pointer_type (type);
4329 size = int_size_in_bytes (type);
4330 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4331
4332 /* build conditional expression to calculate addr. The expression
4333 will be gimplified later. */
2cc66f2a 4334 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
644459d0 4335 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4336 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4337 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4338 unshare_expr (skip)));
644459d0 4339
4340 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
2cc66f2a 4341 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4342 unshare_expr (args));
644459d0 4343
75a70cf9 4344 gimplify_assign (addr, tmp, pre_p);
644459d0 4345
4346 /* update VALIST.__args */
2cc66f2a 4347 tmp = fold_build_pointer_plus_hwi (addr, rsize);
75a70cf9 4348 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4349
8115f0af 4350 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4351 addr);
644459d0 4352
4353 if (pass_by_reference_p)
4354 addr = build_va_arg_indirect_ref (addr);
4355
4356 return build_va_arg_indirect_ref (addr);
4357}
4358
4359/* Save parameter registers starting with the register that corresponds
4360 to the first unnamed parameters. If the first unnamed parameter is
4361 in the stack then save no registers. Set pretend_args_size to the
4362 amount of space needed to save the registers. */
39cba157 4363static void
4364spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
644459d0 4365 tree type, int *pretend_size, int no_rtl)
4366{
4367 if (!no_rtl)
4368 {
4369 rtx tmp;
4370 int regno;
4371 int offset;
39cba157 4372 int ncum = *get_cumulative_args (cum);
644459d0 4373
4374 /* cum currently points to the last named argument, we want to
4375 start at the next argument. */
39cba157 4376 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
644459d0 4377
4378 offset = -STACK_POINTER_OFFSET;
4379 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4380 {
4381 tmp = gen_frame_mem (V4SImode,
4382 plus_constant (virtual_incoming_args_rtx,
4383 offset));
4384 emit_move_insn (tmp,
4385 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4386 offset += 16;
4387 }
4388 *pretend_size = offset + STACK_POINTER_OFFSET;
4389 }
4390}
4391\f
b2d7ede1 4392static void
644459d0 4393spu_conditional_register_usage (void)
4394{
4395 if (flag_pic)
4396 {
4397 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4398 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4399 }
644459d0 4400}
4401
9d98604b 4402/* This is called any time we inspect the alignment of a register for
4403 addresses. */
644459d0 4404static int
9d98604b 4405reg_aligned_for_addr (rtx x)
644459d0 4406{
9d98604b 4407 int regno =
4408 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4409 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4410}
4411
69ced2d6 4412/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4413 into its SYMBOL_REF_FLAGS. */
4414static void
4415spu_encode_section_info (tree decl, rtx rtl, int first)
4416{
4417 default_encode_section_info (decl, rtl, first);
4418
4419 /* If a variable has a forced alignment to < 16 bytes, mark it with
4420 SYMBOL_FLAG_ALIGN1. */
4421 if (TREE_CODE (decl) == VAR_DECL
4422 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4423 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4424}
4425
644459d0 4426/* Return TRUE if we are certain the mem refers to a complete object
4427 which is both 16-byte aligned and padded to a 16-byte boundary. This
4428 would make it safe to store with a single instruction.
4429 We guarantee the alignment and padding for static objects by aligning
4430 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4431 FIXME: We currently cannot guarantee this for objects on the stack
4432 because assign_parm_setup_stack calls assign_stack_local with the
4433 alignment of the parameter mode and in that case the alignment never
4434 gets adjusted by LOCAL_ALIGNMENT. */
4435static int
4436store_with_one_insn_p (rtx mem)
4437{
9d98604b 4438 enum machine_mode mode = GET_MODE (mem);
644459d0 4439 rtx addr = XEXP (mem, 0);
9d98604b 4440 if (mode == BLKmode)
644459d0 4441 return 0;
9d98604b 4442 if (GET_MODE_SIZE (mode) >= 16)
4443 return 1;
644459d0 4444 /* Only static objects. */
4445 if (GET_CODE (addr) == SYMBOL_REF)
4446 {
4447 /* We use the associated declaration to make sure the access is
fa7637bd 4448 referring to the whole object.
851d9296 4449 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4450 if it is necessary. Will there be cases where one exists, and
4451 the other does not? Will there be cases where both exist, but
4452 have different types? */
4453 tree decl = MEM_EXPR (mem);
4454 if (decl
4455 && TREE_CODE (decl) == VAR_DECL
4456 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4457 return 1;
4458 decl = SYMBOL_REF_DECL (addr);
4459 if (decl
4460 && TREE_CODE (decl) == VAR_DECL
4461 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4462 return 1;
4463 }
4464 return 0;
4465}
4466
9d98604b 4467/* Return 1 when the address is not valid for a simple load and store as
4468 required by the '_mov*' patterns. We could make this less strict
4469 for loads, but we prefer mem's to look the same so they are more
4470 likely to be merged. */
4471static int
4472address_needs_split (rtx mem)
4473{
4474 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4475 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4476 || !(store_with_one_insn_p (mem)
4477 || mem_is_padded_component_ref (mem))))
4478 return 1;
4479
4480 return 0;
4481}
4482
6cf5579e 4483static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4484static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4485static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4486
4487/* MEM is known to be an __ea qualified memory access. Emit a call to
4488 fetch the ppu memory to local store, and return its address in local
4489 store. */
4490
4491static void
4492ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4493{
4494 if (is_store)
4495 {
4496 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4497 if (!cache_fetch_dirty)
4498 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4499 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4500 2, ea_addr, EAmode, ndirty, SImode);
4501 }
4502 else
4503 {
4504 if (!cache_fetch)
4505 cache_fetch = init_one_libfunc ("__cache_fetch");
4506 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4507 1, ea_addr, EAmode);
4508 }
4509}
4510
4511/* Like ea_load_store, but do the cache tag comparison and, for stores,
4512 dirty bit marking, inline.
4513
4514 The cache control data structure is an array of
4515
4516 struct __cache_tag_array
4517 {
4518 unsigned int tag_lo[4];
4519 unsigned int tag_hi[4];
4520 void *data_pointer[4];
4521 int reserved[4];
4522 vector unsigned short dirty_bits[4];
4523 } */
4524
4525static void
4526ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4527{
4528 rtx ea_addr_si;
4529 HOST_WIDE_INT v;
4530 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4531 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4532 rtx index_mask = gen_reg_rtx (SImode);
4533 rtx tag_arr = gen_reg_rtx (Pmode);
4534 rtx splat_mask = gen_reg_rtx (TImode);
4535 rtx splat = gen_reg_rtx (V4SImode);
4536 rtx splat_hi = NULL_RTX;
4537 rtx tag_index = gen_reg_rtx (Pmode);
4538 rtx block_off = gen_reg_rtx (SImode);
4539 rtx tag_addr = gen_reg_rtx (Pmode);
4540 rtx tag = gen_reg_rtx (V4SImode);
4541 rtx cache_tag = gen_reg_rtx (V4SImode);
4542 rtx cache_tag_hi = NULL_RTX;
4543 rtx cache_ptrs = gen_reg_rtx (TImode);
4544 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4545 rtx tag_equal = gen_reg_rtx (V4SImode);
4546 rtx tag_equal_hi = NULL_RTX;
4547 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4548 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4549 rtx eq_index = gen_reg_rtx (SImode);
4550 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4551
4552 if (spu_ea_model != 32)
4553 {
4554 splat_hi = gen_reg_rtx (V4SImode);
4555 cache_tag_hi = gen_reg_rtx (V4SImode);
4556 tag_equal_hi = gen_reg_rtx (V4SImode);
4557 }
4558
4559 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4560 emit_move_insn (tag_arr, tag_arr_sym);
4561 v = 0x0001020300010203LL;
4562 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4563 ea_addr_si = ea_addr;
4564 if (spu_ea_model != 32)
4565 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4566
4567 /* tag_index = ea_addr & (tag_array_size - 128) */
4568 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4569
4570 /* splat ea_addr to all 4 slots. */
4571 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4572 /* Similarly for high 32 bits of ea_addr. */
4573 if (spu_ea_model != 32)
4574 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4575
4576 /* block_off = ea_addr & 127 */
4577 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4578
4579 /* tag_addr = tag_arr + tag_index */
4580 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4581
4582 /* Read cache tags. */
4583 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4584 if (spu_ea_model != 32)
4585 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4586 plus_constant (tag_addr, 16)));
4587
4588 /* tag = ea_addr & -128 */
4589 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4590
4591 /* Read all four cache data pointers. */
4592 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4593 plus_constant (tag_addr, 32)));
4594
4595 /* Compare tags. */
4596 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4597 if (spu_ea_model != 32)
4598 {
4599 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4600 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4601 }
4602
4603 /* At most one of the tags compare equal, so tag_equal has one
4604 32-bit slot set to all 1's, with the other slots all zero.
4605 gbb picks off low bit from each byte in the 128-bit registers,
4606 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4607 we have a hit. */
4608 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4609 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4610
4611 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4612 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4613
4614 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4615 (rotating eq_index mod 16 bytes). */
4616 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4617 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4618
4619 /* Add block offset to form final data address. */
4620 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4621
4622 /* Check that we did hit. */
4623 hit_label = gen_label_rtx ();
4624 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4625 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4626 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4627 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4628 hit_ref, pc_rtx)));
4629 /* Say that this branch is very likely to happen. */
4630 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4631 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4632
4633 ea_load_store (mem, is_store, ea_addr, data_addr);
4634 cont_label = gen_label_rtx ();
4635 emit_jump_insn (gen_jump (cont_label));
4636 emit_barrier ();
4637
4638 emit_label (hit_label);
4639
4640 if (is_store)
4641 {
4642 HOST_WIDE_INT v_hi;
4643 rtx dirty_bits = gen_reg_rtx (TImode);
4644 rtx dirty_off = gen_reg_rtx (SImode);
4645 rtx dirty_128 = gen_reg_rtx (TImode);
4646 rtx neg_block_off = gen_reg_rtx (SImode);
4647
4648 /* Set up mask with one dirty bit per byte of the mem we are
4649 writing, starting from top bit. */
4650 v_hi = v = -1;
4651 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4652 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4653 {
4654 v_hi = v;
4655 v = 0;
4656 }
4657 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4658
4659 /* Form index into cache dirty_bits. eq_index is one of
4660 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4661 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4662 offset to each of the four dirty_bits elements. */
4663 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4664
4665 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4666
4667 /* Rotate bit mask to proper bit. */
4668 emit_insn (gen_negsi2 (neg_block_off, block_off));
4669 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4670 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4671
4672 /* Or in the new dirty bits. */
4673 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4674
4675 /* Store. */
4676 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4677 }
4678
4679 emit_label (cont_label);
4680}
4681
4682static rtx
4683expand_ea_mem (rtx mem, bool is_store)
4684{
4685 rtx ea_addr;
4686 rtx data_addr = gen_reg_rtx (Pmode);
4687 rtx new_mem;
4688
4689 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4690 if (optimize_size || optimize == 0)
4691 ea_load_store (mem, is_store, ea_addr, data_addr);
4692 else
4693 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4694
4695 if (ea_alias_set == -1)
4696 ea_alias_set = new_alias_set ();
4697
4698 /* We generate a new MEM RTX to refer to the copy of the data
4699 in the cache. We do not copy memory attributes (except the
4700 alignment) from the original MEM, as they may no longer apply
4701 to the cache copy. */
4702 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4703 set_mem_alias_set (new_mem, ea_alias_set);
4704 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4705
4706 return new_mem;
4707}
4708
644459d0 4709int
4710spu_expand_mov (rtx * ops, enum machine_mode mode)
4711{
4712 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4713 {
4714 /* Perform the move in the destination SUBREG's inner mode. */
4715 ops[0] = SUBREG_REG (ops[0]);
4716 mode = GET_MODE (ops[0]);
4717 ops[1] = gen_lowpart_common (mode, ops[1]);
4718 gcc_assert (ops[1]);
4719 }
644459d0 4720
4721 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4722 {
4723 rtx from = SUBREG_REG (ops[1]);
8d72495d 4724 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4725
4726 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4727 && GET_MODE_CLASS (imode) == MODE_INT
4728 && subreg_lowpart_p (ops[1]));
4729
4730 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4731 imode = SImode;
4732 if (imode != GET_MODE (from))
4733 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4734
4735 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4736 {
d6bf3b14 4737 enum insn_code icode = convert_optab_handler (trunc_optab,
4738 mode, imode);
644459d0 4739 emit_insn (GEN_FCN (icode) (ops[0], from));
4740 }
4741 else
4742 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4743 return 1;
4744 }
4745
4746 /* At least one of the operands needs to be a register. */
4747 if ((reload_in_progress | reload_completed) == 0
4748 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4749 {
4750 rtx temp = force_reg (mode, ops[1]);
4751 emit_move_insn (ops[0], temp);
4752 return 1;
4753 }
4754 if (reload_in_progress || reload_completed)
4755 {
dea01258 4756 if (CONSTANT_P (ops[1]))
4757 return spu_split_immediate (ops);
644459d0 4758 return 0;
4759 }
9d98604b 4760
4761 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4762 extend them. */
4763 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4764 {
9d98604b 4765 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4766 if (val != INTVAL (ops[1]))
644459d0 4767 {
9d98604b 4768 emit_move_insn (ops[0], GEN_INT (val));
4769 return 1;
644459d0 4770 }
4771 }
9d98604b 4772 if (MEM_P (ops[0]))
6cf5579e 4773 {
4774 if (MEM_ADDR_SPACE (ops[0]))
4775 ops[0] = expand_ea_mem (ops[0], true);
4776 return spu_split_store (ops);
4777 }
9d98604b 4778 if (MEM_P (ops[1]))
6cf5579e 4779 {
4780 if (MEM_ADDR_SPACE (ops[1]))
4781 ops[1] = expand_ea_mem (ops[1], false);
4782 return spu_split_load (ops);
4783 }
9d98604b 4784
644459d0 4785 return 0;
4786}
4787
9d98604b 4788static void
4789spu_convert_move (rtx dst, rtx src)
644459d0 4790{
9d98604b 4791 enum machine_mode mode = GET_MODE (dst);
4792 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4793 rtx reg;
4794 gcc_assert (GET_MODE (src) == TImode);
4795 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4796 emit_insn (gen_rtx_SET (VOIDmode, reg,
4797 gen_rtx_TRUNCATE (int_mode,
4798 gen_rtx_LSHIFTRT (TImode, src,
4799 GEN_INT (int_mode == DImode ? 64 : 96)))));
4800 if (int_mode != mode)
4801 {
4802 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4803 emit_move_insn (dst, reg);
4804 }
4805}
644459d0 4806
9d98604b 4807/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4808 the address from SRC and SRC+16. Return a REG or CONST_INT that
4809 specifies how many bytes to rotate the loaded registers, plus any
4810 extra from EXTRA_ROTQBY. The address and rotate amounts are
4811 normalized to improve merging of loads and rotate computations. */
4812static rtx
4813spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4814{
4815 rtx addr = XEXP (src, 0);
4816 rtx p0, p1, rot, addr0, addr1;
4817 int rot_amt;
644459d0 4818
4819 rot = 0;
4820 rot_amt = 0;
9d98604b 4821
4822 if (MEM_ALIGN (src) >= 128)
4823 /* Address is already aligned; simply perform a TImode load. */ ;
4824 else if (GET_CODE (addr) == PLUS)
644459d0 4825 {
4826 /* 8 cases:
4827 aligned reg + aligned reg => lqx
4828 aligned reg + unaligned reg => lqx, rotqby
4829 aligned reg + aligned const => lqd
4830 aligned reg + unaligned const => lqd, rotqbyi
4831 unaligned reg + aligned reg => lqx, rotqby
4832 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4833 unaligned reg + aligned const => lqd, rotqby
4834 unaligned reg + unaligned const -> not allowed by legitimate address
4835 */
4836 p0 = XEXP (addr, 0);
4837 p1 = XEXP (addr, 1);
9d98604b 4838 if (!reg_aligned_for_addr (p0))
644459d0 4839 {
9d98604b 4840 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4841 {
9d98604b 4842 rot = gen_reg_rtx (SImode);
4843 emit_insn (gen_addsi3 (rot, p0, p1));
4844 }
4845 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4846 {
4847 if (INTVAL (p1) > 0
4848 && REG_POINTER (p0)
4849 && INTVAL (p1) * BITS_PER_UNIT
4850 < REGNO_POINTER_ALIGN (REGNO (p0)))
4851 {
4852 rot = gen_reg_rtx (SImode);
4853 emit_insn (gen_addsi3 (rot, p0, p1));
4854 addr = p0;
4855 }
4856 else
4857 {
4858 rtx x = gen_reg_rtx (SImode);
4859 emit_move_insn (x, p1);
4860 if (!spu_arith_operand (p1, SImode))
4861 p1 = x;
4862 rot = gen_reg_rtx (SImode);
4863 emit_insn (gen_addsi3 (rot, p0, p1));
4864 addr = gen_rtx_PLUS (Pmode, p0, x);
4865 }
644459d0 4866 }
4867 else
4868 rot = p0;
4869 }
4870 else
4871 {
4872 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4873 {
4874 rot_amt = INTVAL (p1) & 15;
9d98604b 4875 if (INTVAL (p1) & -16)
4876 {
4877 p1 = GEN_INT (INTVAL (p1) & -16);
4878 addr = gen_rtx_PLUS (SImode, p0, p1);
4879 }
4880 else
4881 addr = p0;
644459d0 4882 }
9d98604b 4883 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4884 rot = p1;
4885 }
4886 }
9d98604b 4887 else if (REG_P (addr))
644459d0 4888 {
9d98604b 4889 if (!reg_aligned_for_addr (addr))
644459d0 4890 rot = addr;
4891 }
4892 else if (GET_CODE (addr) == CONST)
4893 {
4894 if (GET_CODE (XEXP (addr, 0)) == PLUS
4895 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4896 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4897 {
4898 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4899 if (rot_amt & -16)
4900 addr = gen_rtx_CONST (Pmode,
4901 gen_rtx_PLUS (Pmode,
4902 XEXP (XEXP (addr, 0), 0),
4903 GEN_INT (rot_amt & -16)));
4904 else
4905 addr = XEXP (XEXP (addr, 0), 0);
4906 }
4907 else
9d98604b 4908 {
4909 rot = gen_reg_rtx (Pmode);
4910 emit_move_insn (rot, addr);
4911 }
644459d0 4912 }
4913 else if (GET_CODE (addr) == CONST_INT)
4914 {
4915 rot_amt = INTVAL (addr);
4916 addr = GEN_INT (rot_amt & -16);
4917 }
4918 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4919 {
4920 rot = gen_reg_rtx (Pmode);
4921 emit_move_insn (rot, addr);
4922 }
644459d0 4923
9d98604b 4924 rot_amt += extra_rotby;
644459d0 4925
4926 rot_amt &= 15;
4927
4928 if (rot && rot_amt)
4929 {
9d98604b 4930 rtx x = gen_reg_rtx (SImode);
4931 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4932 rot = x;
644459d0 4933 rot_amt = 0;
4934 }
9d98604b 4935 if (!rot && rot_amt)
4936 rot = GEN_INT (rot_amt);
4937
4938 addr0 = copy_rtx (addr);
4939 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4940 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4941
4942 if (dst1)
4943 {
4944 addr1 = plus_constant (copy_rtx (addr), 16);
4945 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4946 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4947 }
644459d0 4948
9d98604b 4949 return rot;
4950}
4951
4952int
4953spu_split_load (rtx * ops)
4954{
4955 enum machine_mode mode = GET_MODE (ops[0]);
4956 rtx addr, load, rot;
4957 int rot_amt;
644459d0 4958
9d98604b 4959 if (GET_MODE_SIZE (mode) >= 16)
4960 return 0;
644459d0 4961
9d98604b 4962 addr = XEXP (ops[1], 0);
4963 gcc_assert (GET_CODE (addr) != AND);
4964
4965 if (!address_needs_split (ops[1]))
4966 {
4967 ops[1] = change_address (ops[1], TImode, addr);
4968 load = gen_reg_rtx (TImode);
4969 emit_insn (gen__movti (load, ops[1]));
4970 spu_convert_move (ops[0], load);
4971 return 1;
4972 }
4973
4974 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4975
4976 load = gen_reg_rtx (TImode);
4977 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4978
4979 if (rot)
4980 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4981
9d98604b 4982 spu_convert_move (ops[0], load);
4983 return 1;
644459d0 4984}
4985
9d98604b 4986int
644459d0 4987spu_split_store (rtx * ops)
4988{
4989 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4990 rtx reg;
644459d0 4991 rtx addr, p0, p1, p1_lo, smem;
4992 int aform;
4993 int scalar;
4994
9d98604b 4995 if (GET_MODE_SIZE (mode) >= 16)
4996 return 0;
4997
644459d0 4998 addr = XEXP (ops[0], 0);
9d98604b 4999 gcc_assert (GET_CODE (addr) != AND);
5000
5001 if (!address_needs_split (ops[0]))
5002 {
5003 reg = gen_reg_rtx (TImode);
5004 emit_insn (gen_spu_convert (reg, ops[1]));
5005 ops[0] = change_address (ops[0], TImode, addr);
5006 emit_move_insn (ops[0], reg);
5007 return 1;
5008 }
644459d0 5009
5010 if (GET_CODE (addr) == PLUS)
5011 {
5012 /* 8 cases:
5013 aligned reg + aligned reg => lqx, c?x, shuf, stqx
5014 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
5015 aligned reg + aligned const => lqd, c?d, shuf, stqx
5016 aligned reg + unaligned const => lqd, c?d, shuf, stqx
5017 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
5018 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
5019 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 5020 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 5021 */
5022 aform = 0;
5023 p0 = XEXP (addr, 0);
5024 p1 = p1_lo = XEXP (addr, 1);
9d98604b 5025 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 5026 {
5027 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 5028 if (reg_aligned_for_addr (p0))
5029 {
5030 p1 = GEN_INT (INTVAL (p1) & -16);
5031 if (p1 == const0_rtx)
5032 addr = p0;
5033 else
5034 addr = gen_rtx_PLUS (SImode, p0, p1);
5035 }
5036 else
5037 {
5038 rtx x = gen_reg_rtx (SImode);
5039 emit_move_insn (x, p1);
5040 addr = gen_rtx_PLUS (SImode, p0, x);
5041 }
644459d0 5042 }
5043 }
9d98604b 5044 else if (REG_P (addr))
644459d0 5045 {
5046 aform = 0;
5047 p0 = addr;
5048 p1 = p1_lo = const0_rtx;
5049 }
5050 else
5051 {
5052 aform = 1;
5053 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
5054 p1 = 0; /* aform doesn't use p1 */
5055 p1_lo = addr;
5056 if (ALIGNED_SYMBOL_REF_P (addr))
5057 p1_lo = const0_rtx;
9d98604b 5058 else if (GET_CODE (addr) == CONST
5059 && GET_CODE (XEXP (addr, 0)) == PLUS
5060 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
5061 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 5062 {
9d98604b 5063 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
5064 if ((v & -16) != 0)
5065 addr = gen_rtx_CONST (Pmode,
5066 gen_rtx_PLUS (Pmode,
5067 XEXP (XEXP (addr, 0), 0),
5068 GEN_INT (v & -16)));
5069 else
5070 addr = XEXP (XEXP (addr, 0), 0);
5071 p1_lo = GEN_INT (v & 15);
644459d0 5072 }
5073 else if (GET_CODE (addr) == CONST_INT)
5074 {
5075 p1_lo = GEN_INT (INTVAL (addr) & 15);
5076 addr = GEN_INT (INTVAL (addr) & -16);
5077 }
9d98604b 5078 else
5079 {
5080 p1_lo = gen_reg_rtx (SImode);
5081 emit_move_insn (p1_lo, addr);
5082 }
644459d0 5083 }
5084
4cbad5bb 5085 gcc_assert (aform == 0 || aform == 1);
9d98604b 5086 reg = gen_reg_rtx (TImode);
e04cf423 5087
644459d0 5088 scalar = store_with_one_insn_p (ops[0]);
5089 if (!scalar)
5090 {
5091 /* We could copy the flags from the ops[0] MEM to mem here,
5092 We don't because we want this load to be optimized away if
5093 possible, and copying the flags will prevent that in certain
5094 cases, e.g. consider the volatile flag. */
5095
9d98604b 5096 rtx pat = gen_reg_rtx (TImode);
e04cf423 5097 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5098 set_mem_alias_set (lmem, 0);
5099 emit_insn (gen_movti (reg, lmem));
644459d0 5100
9d98604b 5101 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5102 p0 = stack_pointer_rtx;
5103 if (!p1_lo)
5104 p1_lo = const0_rtx;
5105
5106 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5107 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5108 }
644459d0 5109 else
5110 {
5111 if (GET_CODE (ops[1]) == REG)
5112 emit_insn (gen_spu_convert (reg, ops[1]));
5113 else if (GET_CODE (ops[1]) == SUBREG)
5114 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5115 else
5116 abort ();
5117 }
5118
5119 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5120 emit_insn (gen_ashlti3
5121 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5122
9d98604b 5123 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5124 /* We can't use the previous alias set because the memory has changed
5125 size and can potentially overlap objects of other types. */
5126 set_mem_alias_set (smem, 0);
5127
e04cf423 5128 emit_insn (gen_movti (smem, reg));
9d98604b 5129 return 1;
644459d0 5130}
5131
5132/* Return TRUE if X is MEM which is a struct member reference
5133 and the member can safely be loaded and stored with a single
5134 instruction because it is padded. */
5135static int
5136mem_is_padded_component_ref (rtx x)
5137{
5138 tree t = MEM_EXPR (x);
5139 tree r;
5140 if (!t || TREE_CODE (t) != COMPONENT_REF)
5141 return 0;
5142 t = TREE_OPERAND (t, 1);
5143 if (!t || TREE_CODE (t) != FIELD_DECL
5144 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5145 return 0;
5146 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5147 r = DECL_FIELD_CONTEXT (t);
5148 if (!r || TREE_CODE (r) != RECORD_TYPE)
5149 return 0;
5150 /* Make sure they are the same mode */
5151 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5152 return 0;
5153 /* If there are no following fields then the field alignment assures
fa7637bd 5154 the structure is padded to the alignment which means this field is
5155 padded too. */
644459d0 5156 if (TREE_CHAIN (t) == 0)
5157 return 1;
5158 /* If the following field is also aligned then this field will be
5159 padded. */
5160 t = TREE_CHAIN (t);
5161 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5162 return 1;
5163 return 0;
5164}
5165
c7b91b14 5166/* Parse the -mfixed-range= option string. */
5167static void
5168fix_range (const char *const_str)
5169{
5170 int i, first, last;
5171 char *str, *dash, *comma;
5172
5173 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5174 REG2 are either register names or register numbers. The effect
5175 of this option is to mark the registers in the range from REG1 to
5176 REG2 as ``fixed'' so they won't be used by the compiler. */
5177
5178 i = strlen (const_str);
5179 str = (char *) alloca (i + 1);
5180 memcpy (str, const_str, i + 1);
5181
5182 while (1)
5183 {
5184 dash = strchr (str, '-');
5185 if (!dash)
5186 {
5187 warning (0, "value of -mfixed-range must have form REG1-REG2");
5188 return;
5189 }
5190 *dash = '\0';
5191 comma = strchr (dash + 1, ',');
5192 if (comma)
5193 *comma = '\0';
5194
5195 first = decode_reg_name (str);
5196 if (first < 0)
5197 {
5198 warning (0, "unknown register name: %s", str);
5199 return;
5200 }
5201
5202 last = decode_reg_name (dash + 1);
5203 if (last < 0)
5204 {
5205 warning (0, "unknown register name: %s", dash + 1);
5206 return;
5207 }
5208
5209 *dash = '-';
5210
5211 if (first > last)
5212 {
5213 warning (0, "%s-%s is an empty range", str, dash + 1);
5214 return;
5215 }
5216
5217 for (i = first; i <= last; ++i)
5218 fixed_regs[i] = call_used_regs[i] = 1;
5219
5220 if (!comma)
5221 break;
5222
5223 *comma = ',';
5224 str = comma + 1;
5225 }
5226}
5227
644459d0 5228/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5229 can be generated using the fsmbi instruction. */
5230int
5231fsmbi_const_p (rtx x)
5232{
dea01258 5233 if (CONSTANT_P (x))
5234 {
5df189be 5235 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5236 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5237 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5238 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5239 }
5240 return 0;
5241}
5242
5243/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5244 can be generated using the cbd, chd, cwd or cdd instruction. */
5245int
5246cpat_const_p (rtx x, enum machine_mode mode)
5247{
5248 if (CONSTANT_P (x))
5249 {
5250 enum immediate_class c = classify_immediate (x, mode);
5251 return c == IC_CPAT;
5252 }
5253 return 0;
5254}
644459d0 5255
dea01258 5256rtx
5257gen_cpat_const (rtx * ops)
5258{
5259 unsigned char dst[16];
5260 int i, offset, shift, isize;
5261 if (GET_CODE (ops[3]) != CONST_INT
5262 || GET_CODE (ops[2]) != CONST_INT
5263 || (GET_CODE (ops[1]) != CONST_INT
5264 && GET_CODE (ops[1]) != REG))
5265 return 0;
5266 if (GET_CODE (ops[1]) == REG
5267 && (!REG_POINTER (ops[1])
5268 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5269 return 0;
644459d0 5270
5271 for (i = 0; i < 16; i++)
dea01258 5272 dst[i] = i + 16;
5273 isize = INTVAL (ops[3]);
5274 if (isize == 1)
5275 shift = 3;
5276 else if (isize == 2)
5277 shift = 2;
5278 else
5279 shift = 0;
5280 offset = (INTVAL (ops[2]) +
5281 (GET_CODE (ops[1]) ==
5282 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5283 for (i = 0; i < isize; i++)
5284 dst[offset + i] = i + shift;
5285 return array_to_constant (TImode, dst);
644459d0 5286}
5287
5288/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5289 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5290 than 16 bytes, the value is repeated across the rest of the array. */
5291void
5292constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5293{
5294 HOST_WIDE_INT val;
5295 int i, j, first;
5296
5297 memset (arr, 0, 16);
5298 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5299 if (GET_CODE (x) == CONST_INT
5300 || (GET_CODE (x) == CONST_DOUBLE
5301 && (mode == SFmode || mode == DFmode)))
5302 {
5303 gcc_assert (mode != VOIDmode && mode != BLKmode);
5304
5305 if (GET_CODE (x) == CONST_DOUBLE)
5306 val = const_double_to_hwint (x);
5307 else
5308 val = INTVAL (x);
5309 first = GET_MODE_SIZE (mode) - 1;
5310 for (i = first; i >= 0; i--)
5311 {
5312 arr[i] = val & 0xff;
5313 val >>= 8;
5314 }
5315 /* Splat the constant across the whole array. */
5316 for (j = 0, i = first + 1; i < 16; i++)
5317 {
5318 arr[i] = arr[j];
5319 j = (j == first) ? 0 : j + 1;
5320 }
5321 }
5322 else if (GET_CODE (x) == CONST_DOUBLE)
5323 {
5324 val = CONST_DOUBLE_LOW (x);
5325 for (i = 15; i >= 8; i--)
5326 {
5327 arr[i] = val & 0xff;
5328 val >>= 8;
5329 }
5330 val = CONST_DOUBLE_HIGH (x);
5331 for (i = 7; i >= 0; i--)
5332 {
5333 arr[i] = val & 0xff;
5334 val >>= 8;
5335 }
5336 }
5337 else if (GET_CODE (x) == CONST_VECTOR)
5338 {
5339 int units;
5340 rtx elt;
5341 mode = GET_MODE_INNER (mode);
5342 units = CONST_VECTOR_NUNITS (x);
5343 for (i = 0; i < units; i++)
5344 {
5345 elt = CONST_VECTOR_ELT (x, i);
5346 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5347 {
5348 if (GET_CODE (elt) == CONST_DOUBLE)
5349 val = const_double_to_hwint (elt);
5350 else
5351 val = INTVAL (elt);
5352 first = GET_MODE_SIZE (mode) - 1;
5353 if (first + i * GET_MODE_SIZE (mode) > 16)
5354 abort ();
5355 for (j = first; j >= 0; j--)
5356 {
5357 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5358 val >>= 8;
5359 }
5360 }
5361 }
5362 }
5363 else
5364 gcc_unreachable();
5365}
5366
5367/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5368 smaller than 16 bytes, use the bytes that would represent that value
5369 in a register, e.g., for QImode return the value of arr[3]. */
5370rtx
e96f2783 5371array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5372{
5373 enum machine_mode inner_mode;
5374 rtvec v;
5375 int units, size, i, j, k;
5376 HOST_WIDE_INT val;
5377
5378 if (GET_MODE_CLASS (mode) == MODE_INT
5379 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5380 {
5381 j = GET_MODE_SIZE (mode);
5382 i = j < 4 ? 4 - j : 0;
5383 for (val = 0; i < j; i++)
5384 val = (val << 8) | arr[i];
5385 val = trunc_int_for_mode (val, mode);
5386 return GEN_INT (val);
5387 }
5388
5389 if (mode == TImode)
5390 {
5391 HOST_WIDE_INT high;
5392 for (i = high = 0; i < 8; i++)
5393 high = (high << 8) | arr[i];
5394 for (i = 8, val = 0; i < 16; i++)
5395 val = (val << 8) | arr[i];
5396 return immed_double_const (val, high, TImode);
5397 }
5398 if (mode == SFmode)
5399 {
5400 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5401 val = trunc_int_for_mode (val, SImode);
171b6d22 5402 return hwint_to_const_double (SFmode, val);
644459d0 5403 }
5404 if (mode == DFmode)
5405 {
1f915911 5406 for (i = 0, val = 0; i < 8; i++)
5407 val = (val << 8) | arr[i];
171b6d22 5408 return hwint_to_const_double (DFmode, val);
644459d0 5409 }
5410
5411 if (!VECTOR_MODE_P (mode))
5412 abort ();
5413
5414 units = GET_MODE_NUNITS (mode);
5415 size = GET_MODE_UNIT_SIZE (mode);
5416 inner_mode = GET_MODE_INNER (mode);
5417 v = rtvec_alloc (units);
5418
5419 for (k = i = 0; i < units; ++i)
5420 {
5421 val = 0;
5422 for (j = 0; j < size; j++, k++)
5423 val = (val << 8) | arr[k];
5424
5425 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5426 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5427 else
5428 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5429 }
5430 if (k > 16)
5431 abort ();
5432
5433 return gen_rtx_CONST_VECTOR (mode, v);
5434}
5435
5436static void
5437reloc_diagnostic (rtx x)
5438{
712d2297 5439 tree decl = 0;
644459d0 5440 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5441 return;
5442
5443 if (GET_CODE (x) == SYMBOL_REF)
5444 decl = SYMBOL_REF_DECL (x);
5445 else if (GET_CODE (x) == CONST
5446 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5447 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5448
5449 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5450 if (decl && !DECL_P (decl))
5451 decl = 0;
5452
644459d0 5453 /* The decl could be a string constant. */
5454 if (decl && DECL_P (decl))
712d2297 5455 {
5456 location_t loc;
5457 /* We use last_assemble_variable_decl to get line information. It's
5458 not always going to be right and might not even be close, but will
5459 be right for the more common cases. */
5460 if (!last_assemble_variable_decl || in_section == ctors_section)
5461 loc = DECL_SOURCE_LOCATION (decl);
5462 else
5463 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5464
712d2297 5465 if (TARGET_WARN_RELOC)
5466 warning_at (loc, 0,
5467 "creating run-time relocation for %qD", decl);
5468 else
5469 error_at (loc,
5470 "creating run-time relocation for %qD", decl);
5471 }
5472 else
5473 {
5474 if (TARGET_WARN_RELOC)
5475 warning_at (input_location, 0, "creating run-time relocation");
5476 else
5477 error_at (input_location, "creating run-time relocation");
5478 }
644459d0 5479}
5480
5481/* Hook into assemble_integer so we can generate an error for run-time
5482 relocations. The SPU ABI disallows them. */
5483static bool
5484spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5485{
5486 /* By default run-time relocations aren't supported, but we allow them
5487 in case users support it in their own run-time loader. And we provide
5488 a warning for those users that don't. */
5489 if ((GET_CODE (x) == SYMBOL_REF)
5490 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5491 reloc_diagnostic (x);
5492
5493 return default_assemble_integer (x, size, aligned_p);
5494}
5495
5496static void
5497spu_asm_globalize_label (FILE * file, const char *name)
5498{
5499 fputs ("\t.global\t", file);
5500 assemble_name (file, name);
5501 fputs ("\n", file);
5502}
5503
5504static bool
20d892d1 5505spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5506 int opno ATTRIBUTE_UNUSED, int *total,
f529eb25 5507 bool speed ATTRIBUTE_UNUSED)
644459d0 5508{
5509 enum machine_mode mode = GET_MODE (x);
5510 int cost = COSTS_N_INSNS (2);
5511
5512 /* Folding to a CONST_VECTOR will use extra space but there might
5513 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5514 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5515 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5516 because this cost will only be compared against a single insn.
5517 if (code == CONST_VECTOR)
ca316360 5518 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
644459d0 5519 */
5520
5521 /* Use defaults for float operations. Not accurate but good enough. */
5522 if (mode == DFmode)
5523 {
5524 *total = COSTS_N_INSNS (13);
5525 return true;
5526 }
5527 if (mode == SFmode)
5528 {
5529 *total = COSTS_N_INSNS (6);
5530 return true;
5531 }
5532 switch (code)
5533 {
5534 case CONST_INT:
5535 if (satisfies_constraint_K (x))
5536 *total = 0;
5537 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5538 *total = COSTS_N_INSNS (1);
5539 else
5540 *total = COSTS_N_INSNS (3);
5541 return true;
5542
5543 case CONST:
5544 *total = COSTS_N_INSNS (3);
5545 return true;
5546
5547 case LABEL_REF:
5548 case SYMBOL_REF:
5549 *total = COSTS_N_INSNS (0);
5550 return true;
5551
5552 case CONST_DOUBLE:
5553 *total = COSTS_N_INSNS (5);
5554 return true;
5555
5556 case FLOAT_EXTEND:
5557 case FLOAT_TRUNCATE:
5558 case FLOAT:
5559 case UNSIGNED_FLOAT:
5560 case FIX:
5561 case UNSIGNED_FIX:
5562 *total = COSTS_N_INSNS (7);
5563 return true;
5564
5565 case PLUS:
5566 if (mode == TImode)
5567 {
5568 *total = COSTS_N_INSNS (9);
5569 return true;
5570 }
5571 break;
5572
5573 case MULT:
5574 cost =
5575 GET_CODE (XEXP (x, 0)) ==
5576 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5577 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5578 {
5579 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5580 {
5581 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5582 cost = COSTS_N_INSNS (14);
5583 if ((val & 0xffff) == 0)
5584 cost = COSTS_N_INSNS (9);
5585 else if (val > 0 && val < 0x10000)
5586 cost = COSTS_N_INSNS (11);
5587 }
5588 }
5589 *total = cost;
5590 return true;
5591 case DIV:
5592 case UDIV:
5593 case MOD:
5594 case UMOD:
5595 *total = COSTS_N_INSNS (20);
5596 return true;
5597 case ROTATE:
5598 case ROTATERT:
5599 case ASHIFT:
5600 case ASHIFTRT:
5601 case LSHIFTRT:
5602 *total = COSTS_N_INSNS (4);
5603 return true;
5604 case UNSPEC:
5605 if (XINT (x, 1) == UNSPEC_CONVERT)
5606 *total = COSTS_N_INSNS (0);
5607 else
5608 *total = COSTS_N_INSNS (4);
5609 return true;
5610 }
5611 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5612 if (GET_MODE_CLASS (mode) == MODE_INT
5613 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5614 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5615 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5616 *total = cost;
5617 return true;
5618}
5619
1bd43494 5620static enum machine_mode
5621spu_unwind_word_mode (void)
644459d0 5622{
1bd43494 5623 return SImode;
644459d0 5624}
5625
5626/* Decide whether we can make a sibling call to a function. DECL is the
5627 declaration of the function being targeted by the call and EXP is the
5628 CALL_EXPR representing the call. */
5629static bool
5630spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5631{
5632 return decl && !TARGET_LARGE_MEM;
5633}
5634
5635/* We need to correctly update the back chain pointer and the Available
5636 Stack Size (which is in the second slot of the sp register.) */
5637void
5638spu_allocate_stack (rtx op0, rtx op1)
5639{
5640 HOST_WIDE_INT v;
5641 rtx chain = gen_reg_rtx (V4SImode);
5642 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5643 rtx sp = gen_reg_rtx (V4SImode);
5644 rtx splatted = gen_reg_rtx (V4SImode);
5645 rtx pat = gen_reg_rtx (TImode);
5646
5647 /* copy the back chain so we can save it back again. */
5648 emit_move_insn (chain, stack_bot);
5649
5650 op1 = force_reg (SImode, op1);
5651
5652 v = 0x1020300010203ll;
5653 emit_move_insn (pat, immed_double_const (v, v, TImode));
5654 emit_insn (gen_shufb (splatted, op1, op1, pat));
5655
5656 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5657 emit_insn (gen_subv4si3 (sp, sp, splatted));
5658
5659 if (flag_stack_check)
5660 {
5661 rtx avail = gen_reg_rtx(SImode);
5662 rtx result = gen_reg_rtx(SImode);
5663 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5664 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5665 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5666 }
5667
5668 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5669
5670 emit_move_insn (stack_bot, chain);
5671
5672 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5673}
5674
5675void
5676spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5677{
5678 static unsigned char arr[16] =
5679 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5680 rtx temp = gen_reg_rtx (SImode);
5681 rtx temp2 = gen_reg_rtx (SImode);
5682 rtx temp3 = gen_reg_rtx (V4SImode);
5683 rtx temp4 = gen_reg_rtx (V4SImode);
5684 rtx pat = gen_reg_rtx (TImode);
5685 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5686
5687 /* Restore the backchain from the first word, sp from the second. */
5688 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5689 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5690
5691 emit_move_insn (pat, array_to_constant (TImode, arr));
5692
5693 /* Compute Available Stack Size for sp */
5694 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5695 emit_insn (gen_shufb (temp3, temp, temp, pat));
5696
5697 /* Compute Available Stack Size for back chain */
5698 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5699 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5700 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5701
5702 emit_insn (gen_addv4si3 (sp, sp, temp3));
5703 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5704}
5705
5706static void
5707spu_init_libfuncs (void)
5708{
5709 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5710 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5711 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5712 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5713 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5714 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5715 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5716 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5717 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4d3aeb29 5718 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
644459d0 5719 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5720 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5721
5722 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5723 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5724
5825ec3f 5725 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5726 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5727 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5728 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5729 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5730 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5731 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5732 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5733 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5734 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5735 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5736 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5737
19a53068 5738 set_optab_libfunc (smul_optab, TImode, "__multi3");
5739 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5740 set_optab_libfunc (smod_optab, TImode, "__modti3");
5741 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5742 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5743 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5744}
5745
5746/* Make a subreg, stripping any existing subreg. We could possibly just
5747 call simplify_subreg, but in this case we know what we want. */
5748rtx
5749spu_gen_subreg (enum machine_mode mode, rtx x)
5750{
5751 if (GET_CODE (x) == SUBREG)
5752 x = SUBREG_REG (x);
5753 if (GET_MODE (x) == mode)
5754 return x;
5755 return gen_rtx_SUBREG (mode, x, 0);
5756}
5757
5758static bool
fb80456a 5759spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5760{
5761 return (TYPE_MODE (type) == BLKmode
5762 && ((type) == 0
5763 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5764 || int_size_in_bytes (type) >
5765 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5766}
5767\f
5768/* Create the built-in types and functions */
5769
c2233b46 5770enum spu_function_code
5771{
5772#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5773#include "spu-builtins.def"
5774#undef DEF_BUILTIN
5775 NUM_SPU_BUILTINS
5776};
5777
5778extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5779
644459d0 5780struct spu_builtin_description spu_builtins[] = {
5781#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5782 {fcode, icode, name, type, params},
644459d0 5783#include "spu-builtins.def"
5784#undef DEF_BUILTIN
5785};
5786
0c5c4d59 5787static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5788
5789/* Returns the spu builtin decl for CODE. */
e6925042 5790
5791static tree
5792spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5793{
5794 if (code >= NUM_SPU_BUILTINS)
5795 return error_mark_node;
5796
0c5c4d59 5797 return spu_builtin_decls[code];
e6925042 5798}
5799
5800
644459d0 5801static void
5802spu_init_builtins (void)
5803{
5804 struct spu_builtin_description *d;
5805 unsigned int i;
5806
5807 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5808 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5809 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5810 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5811 V4SF_type_node = build_vector_type (float_type_node, 4);
5812 V2DF_type_node = build_vector_type (double_type_node, 2);
5813
5814 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5815 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5816 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5817 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5818
c4ecce0c 5819 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5820
5821 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5822 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5823 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5824 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5825 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5826 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5827 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5828 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5829 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5830 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5831 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5832 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5833
5834 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5835 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5836 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5837 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5838 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5839 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5840 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5841 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5842
5843 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5844 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5845
5846 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5847
5848 spu_builtin_types[SPU_BTI_PTR] =
5849 build_pointer_type (build_qualified_type
5850 (void_type_node,
5851 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5852
5853 /* For each builtin we build a new prototype. The tree code will make
5854 sure nodes are shared. */
5855 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5856 {
5857 tree p;
5858 char name[64]; /* build_function will make a copy. */
5859 int parm;
5860
5861 if (d->name == 0)
5862 continue;
5863
5dfbd18f 5864 /* Find last parm. */
644459d0 5865 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5866 ;
644459d0 5867
5868 p = void_list_node;
5869 while (parm > 1)
5870 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5871
5872 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5873
5874 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5875 spu_builtin_decls[i] =
3726fe5e 5876 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5877 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5878 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5879
5880 /* These builtins don't throw. */
0c5c4d59 5881 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5882 }
5883}
5884
cf31d486 5885void
5886spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5887{
5888 static unsigned char arr[16] =
5889 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5890
5891 rtx temp = gen_reg_rtx (Pmode);
5892 rtx temp2 = gen_reg_rtx (V4SImode);
5893 rtx temp3 = gen_reg_rtx (V4SImode);
5894 rtx pat = gen_reg_rtx (TImode);
5895 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5896
5897 emit_move_insn (pat, array_to_constant (TImode, arr));
5898
5899 /* Restore the sp. */
5900 emit_move_insn (temp, op1);
5901 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5902
5903 /* Compute available stack size for sp. */
5904 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5905 emit_insn (gen_shufb (temp3, temp, temp, pat));
5906
5907 emit_insn (gen_addv4si3 (sp, sp, temp3));
5908 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5909}
5910
644459d0 5911int
5912spu_safe_dma (HOST_WIDE_INT channel)
5913{
006e4b96 5914 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5915}
5916
5917void
5918spu_builtin_splats (rtx ops[])
5919{
5920 enum machine_mode mode = GET_MODE (ops[0]);
5921 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5922 {
5923 unsigned char arr[16];
5924 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5925 emit_move_insn (ops[0], array_to_constant (mode, arr));
5926 }
644459d0 5927 else
5928 {
5929 rtx reg = gen_reg_rtx (TImode);
5930 rtx shuf;
5931 if (GET_CODE (ops[1]) != REG
5932 && GET_CODE (ops[1]) != SUBREG)
5933 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5934 switch (mode)
5935 {
5936 case V2DImode:
5937 case V2DFmode:
5938 shuf =
5939 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5940 TImode);
5941 break;
5942 case V4SImode:
5943 case V4SFmode:
5944 shuf =
5945 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5946 TImode);
5947 break;
5948 case V8HImode:
5949 shuf =
5950 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5951 TImode);
5952 break;
5953 case V16QImode:
5954 shuf =
5955 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5956 TImode);
5957 break;
5958 default:
5959 abort ();
5960 }
5961 emit_move_insn (reg, shuf);
5962 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5963 }
5964}
5965
5966void
5967spu_builtin_extract (rtx ops[])
5968{
5969 enum machine_mode mode;
5970 rtx rot, from, tmp;
5971
5972 mode = GET_MODE (ops[1]);
5973
5974 if (GET_CODE (ops[2]) == CONST_INT)
5975 {
5976 switch (mode)
5977 {
5978 case V16QImode:
5979 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5980 break;
5981 case V8HImode:
5982 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5983 break;
5984 case V4SFmode:
5985 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5986 break;
5987 case V4SImode:
5988 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5989 break;
5990 case V2DImode:
5991 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5992 break;
5993 case V2DFmode:
5994 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5995 break;
5996 default:
5997 abort ();
5998 }
5999 return;
6000 }
6001
6002 from = spu_gen_subreg (TImode, ops[1]);
6003 rot = gen_reg_rtx (TImode);
6004 tmp = gen_reg_rtx (SImode);
6005
6006 switch (mode)
6007 {
6008 case V16QImode:
6009 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
6010 break;
6011 case V8HImode:
6012 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
6013 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
6014 break;
6015 case V4SFmode:
6016 case V4SImode:
6017 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
6018 break;
6019 case V2DImode:
6020 case V2DFmode:
6021 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
6022 break;
6023 default:
6024 abort ();
6025 }
6026 emit_insn (gen_rotqby_ti (rot, from, tmp));
6027
6028 emit_insn (gen_spu_convert (ops[0], rot));
6029}
6030
6031void
6032spu_builtin_insert (rtx ops[])
6033{
6034 enum machine_mode mode = GET_MODE (ops[0]);
6035 enum machine_mode imode = GET_MODE_INNER (mode);
6036 rtx mask = gen_reg_rtx (TImode);
6037 rtx offset;
6038
6039 if (GET_CODE (ops[3]) == CONST_INT)
6040 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
6041 else
6042 {
6043 offset = gen_reg_rtx (SImode);
6044 emit_insn (gen_mulsi3
6045 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
6046 }
6047 emit_insn (gen_cpat
6048 (mask, stack_pointer_rtx, offset,
6049 GEN_INT (GET_MODE_SIZE (imode))));
6050 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
6051}
6052
6053void
6054spu_builtin_promote (rtx ops[])
6055{
6056 enum machine_mode mode, imode;
6057 rtx rot, from, offset;
6058 HOST_WIDE_INT pos;
6059
6060 mode = GET_MODE (ops[0]);
6061 imode = GET_MODE_INNER (mode);
6062
6063 from = gen_reg_rtx (TImode);
6064 rot = spu_gen_subreg (TImode, ops[0]);
6065
6066 emit_insn (gen_spu_convert (from, ops[1]));
6067
6068 if (GET_CODE (ops[2]) == CONST_INT)
6069 {
6070 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
6071 if (GET_MODE_SIZE (imode) < 4)
6072 pos += 4 - GET_MODE_SIZE (imode);
6073 offset = GEN_INT (pos & 15);
6074 }
6075 else
6076 {
6077 offset = gen_reg_rtx (SImode);
6078 switch (mode)
6079 {
6080 case V16QImode:
6081 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6082 break;
6083 case V8HImode:
6084 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6085 emit_insn (gen_addsi3 (offset, offset, offset));
6086 break;
6087 case V4SFmode:
6088 case V4SImode:
6089 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6090 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6091 break;
6092 case V2DImode:
6093 case V2DFmode:
6094 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6095 break;
6096 default:
6097 abort ();
6098 }
6099 }
6100 emit_insn (gen_rotqby_ti (rot, from, offset));
6101}
6102
e96f2783 6103static void
6104spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6105{
e96f2783 6106 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6107 rtx shuf = gen_reg_rtx (V4SImode);
6108 rtx insn = gen_reg_rtx (V4SImode);
6109 rtx shufc;
6110 rtx insnc;
6111 rtx mem;
6112
6113 fnaddr = force_reg (SImode, fnaddr);
6114 cxt = force_reg (SImode, cxt);
6115
6116 if (TARGET_LARGE_MEM)
6117 {
6118 rtx rotl = gen_reg_rtx (V4SImode);
6119 rtx mask = gen_reg_rtx (V4SImode);
6120 rtx bi = gen_reg_rtx (SImode);
e96f2783 6121 static unsigned char const shufa[16] = {
644459d0 6122 2, 3, 0, 1, 18, 19, 16, 17,
6123 0, 1, 2, 3, 16, 17, 18, 19
6124 };
e96f2783 6125 static unsigned char const insna[16] = {
644459d0 6126 0x41, 0, 0, 79,
6127 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6128 0x60, 0x80, 0, 79,
6129 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6130 };
6131
6132 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6133 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6134
6135 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6136 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6137 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6138 emit_insn (gen_selb (insn, insnc, rotl, mask));
6139
e96f2783 6140 mem = adjust_address (m_tramp, V4SImode, 0);
6141 emit_move_insn (mem, insn);
644459d0 6142
6143 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6144 mem = adjust_address (m_tramp, Pmode, 16);
6145 emit_move_insn (mem, bi);
644459d0 6146 }
6147 else
6148 {
6149 rtx scxt = gen_reg_rtx (SImode);
6150 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6151 static unsigned char const insna[16] = {
644459d0 6152 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6153 0x30, 0, 0, 0,
6154 0, 0, 0, 0,
6155 0, 0, 0, 0
6156 };
6157
6158 shufc = gen_reg_rtx (TImode);
6159 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6160
6161 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6162 fits 18 bits and the last 4 are zeros. This will be true if
6163 the stack pointer is initialized to 0x3fff0 at program start,
6164 otherwise the ila instruction will be garbage. */
6165
6166 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6167 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6168 emit_insn (gen_cpat
6169 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6170 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6171 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6172
e96f2783 6173 mem = adjust_address (m_tramp, V4SImode, 0);
6174 emit_move_insn (mem, insn);
644459d0 6175 }
6176 emit_insn (gen_sync ());
6177}
6178
6179void
6180spu_expand_sign_extend (rtx ops[])
6181{
6182 unsigned char arr[16];
6183 rtx pat = gen_reg_rtx (TImode);
6184 rtx sign, c;
6185 int i, last;
6186 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6187 if (GET_MODE (ops[1]) == QImode)
6188 {
6189 sign = gen_reg_rtx (HImode);
6190 emit_insn (gen_extendqihi2 (sign, ops[1]));
6191 for (i = 0; i < 16; i++)
6192 arr[i] = 0x12;
6193 arr[last] = 0x13;
6194 }
6195 else
6196 {
6197 for (i = 0; i < 16; i++)
6198 arr[i] = 0x10;
6199 switch (GET_MODE (ops[1]))
6200 {
6201 case HImode:
6202 sign = gen_reg_rtx (SImode);
6203 emit_insn (gen_extendhisi2 (sign, ops[1]));
6204 arr[last] = 0x03;
6205 arr[last - 1] = 0x02;
6206 break;
6207 case SImode:
6208 sign = gen_reg_rtx (SImode);
6209 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6210 for (i = 0; i < 4; i++)
6211 arr[last - i] = 3 - i;
6212 break;
6213 case DImode:
6214 sign = gen_reg_rtx (SImode);
6215 c = gen_reg_rtx (SImode);
6216 emit_insn (gen_spu_convert (c, ops[1]));
6217 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6218 for (i = 0; i < 8; i++)
6219 arr[last - i] = 7 - i;
6220 break;
6221 default:
6222 abort ();
6223 }
6224 }
6225 emit_move_insn (pat, array_to_constant (TImode, arr));
6226 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6227}
6228
6229/* expand vector initialization. If there are any constant parts,
6230 load constant parts first. Then load any non-constant parts. */
6231void
6232spu_expand_vector_init (rtx target, rtx vals)
6233{
6234 enum machine_mode mode = GET_MODE (target);
6235 int n_elts = GET_MODE_NUNITS (mode);
6236 int n_var = 0;
6237 bool all_same = true;
790c536c 6238 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6239 int i;
6240
6241 first = XVECEXP (vals, 0, 0);
6242 for (i = 0; i < n_elts; ++i)
6243 {
6244 x = XVECEXP (vals, 0, i);
e442af0b 6245 if (!(CONST_INT_P (x)
6246 || GET_CODE (x) == CONST_DOUBLE
6247 || GET_CODE (x) == CONST_FIXED))
644459d0 6248 ++n_var;
6249 else
6250 {
6251 if (first_constant == NULL_RTX)
6252 first_constant = x;
6253 }
6254 if (i > 0 && !rtx_equal_p (x, first))
6255 all_same = false;
6256 }
6257
6258 /* if all elements are the same, use splats to repeat elements */
6259 if (all_same)
6260 {
6261 if (!CONSTANT_P (first)
6262 && !register_operand (first, GET_MODE (x)))
6263 first = force_reg (GET_MODE (first), first);
6264 emit_insn (gen_spu_splats (target, first));
6265 return;
6266 }
6267
6268 /* load constant parts */
6269 if (n_var != n_elts)
6270 {
6271 if (n_var == 0)
6272 {
6273 emit_move_insn (target,
6274 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6275 }
6276 else
6277 {
6278 rtx constant_parts_rtx = copy_rtx (vals);
6279
6280 gcc_assert (first_constant != NULL_RTX);
6281 /* fill empty slots with the first constant, this increases
6282 our chance of using splats in the recursive call below. */
6283 for (i = 0; i < n_elts; ++i)
e442af0b 6284 {
6285 x = XVECEXP (constant_parts_rtx, 0, i);
6286 if (!(CONST_INT_P (x)
6287 || GET_CODE (x) == CONST_DOUBLE
6288 || GET_CODE (x) == CONST_FIXED))
6289 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6290 }
644459d0 6291
6292 spu_expand_vector_init (target, constant_parts_rtx);
6293 }
6294 }
6295
6296 /* load variable parts */
6297 if (n_var != 0)
6298 {
6299 rtx insert_operands[4];
6300
6301 insert_operands[0] = target;
6302 insert_operands[2] = target;
6303 for (i = 0; i < n_elts; ++i)
6304 {
6305 x = XVECEXP (vals, 0, i);
e442af0b 6306 if (!(CONST_INT_P (x)
6307 || GET_CODE (x) == CONST_DOUBLE
6308 || GET_CODE (x) == CONST_FIXED))
644459d0 6309 {
6310 if (!register_operand (x, GET_MODE (x)))
6311 x = force_reg (GET_MODE (x), x);
6312 insert_operands[1] = x;
6313 insert_operands[3] = GEN_INT (i);
6314 spu_builtin_insert (insert_operands);
6315 }
6316 }
6317 }
6318}
6352eedf 6319
5474166e 6320/* Return insn index for the vector compare instruction for given CODE,
6321 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6322
6323static int
6324get_vec_cmp_insn (enum rtx_code code,
6325 enum machine_mode dest_mode,
6326 enum machine_mode op_mode)
6327
6328{
6329 switch (code)
6330 {
6331 case EQ:
6332 if (dest_mode == V16QImode && op_mode == V16QImode)
6333 return CODE_FOR_ceq_v16qi;
6334 if (dest_mode == V8HImode && op_mode == V8HImode)
6335 return CODE_FOR_ceq_v8hi;
6336 if (dest_mode == V4SImode && op_mode == V4SImode)
6337 return CODE_FOR_ceq_v4si;
6338 if (dest_mode == V4SImode && op_mode == V4SFmode)
6339 return CODE_FOR_ceq_v4sf;
6340 if (dest_mode == V2DImode && op_mode == V2DFmode)
6341 return CODE_FOR_ceq_v2df;
6342 break;
6343 case GT:
6344 if (dest_mode == V16QImode && op_mode == V16QImode)
6345 return CODE_FOR_cgt_v16qi;
6346 if (dest_mode == V8HImode && op_mode == V8HImode)
6347 return CODE_FOR_cgt_v8hi;
6348 if (dest_mode == V4SImode && op_mode == V4SImode)
6349 return CODE_FOR_cgt_v4si;
6350 if (dest_mode == V4SImode && op_mode == V4SFmode)
6351 return CODE_FOR_cgt_v4sf;
6352 if (dest_mode == V2DImode && op_mode == V2DFmode)
6353 return CODE_FOR_cgt_v2df;
6354 break;
6355 case GTU:
6356 if (dest_mode == V16QImode && op_mode == V16QImode)
6357 return CODE_FOR_clgt_v16qi;
6358 if (dest_mode == V8HImode && op_mode == V8HImode)
6359 return CODE_FOR_clgt_v8hi;
6360 if (dest_mode == V4SImode && op_mode == V4SImode)
6361 return CODE_FOR_clgt_v4si;
6362 break;
6363 default:
6364 break;
6365 }
6366 return -1;
6367}
6368
6369/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6370 DMODE is expected destination mode. This is a recursive function. */
6371
6372static rtx
6373spu_emit_vector_compare (enum rtx_code rcode,
6374 rtx op0, rtx op1,
6375 enum machine_mode dmode)
6376{
6377 int vec_cmp_insn;
6378 rtx mask;
6379 enum machine_mode dest_mode;
6380 enum machine_mode op_mode = GET_MODE (op1);
6381
6382 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6383
6384 /* Floating point vector compare instructions uses destination V4SImode.
6385 Double floating point vector compare instructions uses destination V2DImode.
6386 Move destination to appropriate mode later. */
6387 if (dmode == V4SFmode)
6388 dest_mode = V4SImode;
6389 else if (dmode == V2DFmode)
6390 dest_mode = V2DImode;
6391 else
6392 dest_mode = dmode;
6393
6394 mask = gen_reg_rtx (dest_mode);
6395 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6396
6397 if (vec_cmp_insn == -1)
6398 {
6399 bool swap_operands = false;
6400 bool try_again = false;
6401 switch (rcode)
6402 {
6403 case LT:
6404 rcode = GT;
6405 swap_operands = true;
6406 try_again = true;
6407 break;
6408 case LTU:
6409 rcode = GTU;
6410 swap_operands = true;
6411 try_again = true;
6412 break;
6413 case NE:
e20943d4 6414 case UNEQ:
6415 case UNLE:
6416 case UNLT:
6417 case UNGE:
6418 case UNGT:
6419 case UNORDERED:
5474166e 6420 /* Treat A != B as ~(A==B). */
6421 {
e20943d4 6422 enum rtx_code rev_code;
5474166e 6423 enum insn_code nor_code;
e20943d4 6424 rtx rev_mask;
6425
6426 rev_code = reverse_condition_maybe_unordered (rcode);
6427 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6428
d6bf3b14 6429 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6430 gcc_assert (nor_code != CODE_FOR_nothing);
e20943d4 6431 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
5474166e 6432 if (dmode != dest_mode)
6433 {
6434 rtx temp = gen_reg_rtx (dest_mode);
6435 convert_move (temp, mask, 0);
6436 return temp;
6437 }
6438 return mask;
6439 }
6440 break;
6441 case GE:
6442 case GEU:
6443 case LE:
6444 case LEU:
6445 /* Try GT/GTU/LT/LTU OR EQ */
6446 {
6447 rtx c_rtx, eq_rtx;
6448 enum insn_code ior_code;
6449 enum rtx_code new_code;
6450
6451 switch (rcode)
6452 {
6453 case GE: new_code = GT; break;
6454 case GEU: new_code = GTU; break;
6455 case LE: new_code = LT; break;
6456 case LEU: new_code = LTU; break;
6457 default:
6458 gcc_unreachable ();
6459 }
6460
6461 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6462 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6463
d6bf3b14 6464 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6465 gcc_assert (ior_code != CODE_FOR_nothing);
6466 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6467 if (dmode != dest_mode)
6468 {
6469 rtx temp = gen_reg_rtx (dest_mode);
6470 convert_move (temp, mask, 0);
6471 return temp;
6472 }
6473 return mask;
6474 }
6475 break;
e20943d4 6476 case LTGT:
6477 /* Try LT OR GT */
6478 {
6479 rtx lt_rtx, gt_rtx;
6480 enum insn_code ior_code;
6481
6482 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6483 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6484
6485 ior_code = optab_handler (ior_optab, dest_mode);
6486 gcc_assert (ior_code != CODE_FOR_nothing);
6487 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6488 if (dmode != dest_mode)
6489 {
6490 rtx temp = gen_reg_rtx (dest_mode);
6491 convert_move (temp, mask, 0);
6492 return temp;
6493 }
6494 return mask;
6495 }
6496 break;
6497 case ORDERED:
6498 /* Implement as (A==A) & (B==B) */
6499 {
6500 rtx a_rtx, b_rtx;
6501 enum insn_code and_code;
6502
6503 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6504 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6505
6506 and_code = optab_handler (and_optab, dest_mode);
6507 gcc_assert (and_code != CODE_FOR_nothing);
6508 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6509 if (dmode != dest_mode)
6510 {
6511 rtx temp = gen_reg_rtx (dest_mode);
6512 convert_move (temp, mask, 0);
6513 return temp;
6514 }
6515 return mask;
6516 }
6517 break;
5474166e 6518 default:
6519 gcc_unreachable ();
6520 }
6521
6522 /* You only get two chances. */
6523 if (try_again)
6524 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6525
6526 gcc_assert (vec_cmp_insn != -1);
6527
6528 if (swap_operands)
6529 {
6530 rtx tmp;
6531 tmp = op0;
6532 op0 = op1;
6533 op1 = tmp;
6534 }
6535 }
6536
6537 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6538 if (dmode != dest_mode)
6539 {
6540 rtx temp = gen_reg_rtx (dest_mode);
6541 convert_move (temp, mask, 0);
6542 return temp;
6543 }
6544 return mask;
6545}
6546
6547
6548/* Emit vector conditional expression.
6549 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6550 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6551
6552int
6553spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6554 rtx cond, rtx cc_op0, rtx cc_op1)
6555{
6556 enum machine_mode dest_mode = GET_MODE (dest);
6557 enum rtx_code rcode = GET_CODE (cond);
6558 rtx mask;
6559
6560 /* Get the vector mask for the given relational operations. */
6561 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6562
6563 emit_insn(gen_selb (dest, op2, op1, mask));
6564
6565 return 1;
6566}
6567
6352eedf 6568static rtx
6569spu_force_reg (enum machine_mode mode, rtx op)
6570{
6571 rtx x, r;
6572 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6573 {
6574 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6575 || GET_MODE (op) == BLKmode)
6576 return force_reg (mode, convert_to_mode (mode, op, 0));
6577 abort ();
6578 }
6579
6580 r = force_reg (GET_MODE (op), op);
6581 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6582 {
6583 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6584 if (x)
6585 return x;
6586 }
6587
6588 x = gen_reg_rtx (mode);
6589 emit_insn (gen_spu_convert (x, r));
6590 return x;
6591}
6592
6593static void
6594spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6595{
6596 HOST_WIDE_INT v = 0;
6597 int lsbits;
6598 /* Check the range of immediate operands. */
6599 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6600 {
6601 int range = p - SPU_BTI_7;
5df189be 6602
6603 if (!CONSTANT_P (op))
bf776685 6604 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6605 d->name,
6606 spu_builtin_range[range].low, spu_builtin_range[range].high);
6607
6608 if (GET_CODE (op) == CONST
6609 && (GET_CODE (XEXP (op, 0)) == PLUS
6610 || GET_CODE (XEXP (op, 0)) == MINUS))
6611 {
6612 v = INTVAL (XEXP (XEXP (op, 0), 1));
6613 op = XEXP (XEXP (op, 0), 0);
6614 }
6615 else if (GET_CODE (op) == CONST_INT)
6616 v = INTVAL (op);
5df189be 6617 else if (GET_CODE (op) == CONST_VECTOR
6618 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6619 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6620
6621 /* The default for v is 0 which is valid in every range. */
6622 if (v < spu_builtin_range[range].low
6623 || v > spu_builtin_range[range].high)
bf776685 6624 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6625 d->name,
6626 spu_builtin_range[range].low, spu_builtin_range[range].high,
6627 v);
6352eedf 6628
6629 switch (p)
6630 {
6631 case SPU_BTI_S10_4:
6632 lsbits = 4;
6633 break;
6634 case SPU_BTI_U16_2:
6635 /* This is only used in lqa, and stqa. Even though the insns
6636 encode 16 bits of the address (all but the 2 least
6637 significant), only 14 bits are used because it is masked to
6638 be 16 byte aligned. */
6639 lsbits = 4;
6640 break;
6641 case SPU_BTI_S16_2:
6642 /* This is used for lqr and stqr. */
6643 lsbits = 2;
6644 break;
6645 default:
6646 lsbits = 0;
6647 }
6648
6649 if (GET_CODE (op) == LABEL_REF
6650 || (GET_CODE (op) == SYMBOL_REF
6651 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6652 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6653 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6654 d->name);
6655 }
6656}
6657
6658
70ca06f8 6659static int
5df189be 6660expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6661 rtx target, rtx ops[])
6662{
bc620c5c 6663 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6664 int i = 0, a;
6352eedf 6665
6666 /* Expand the arguments into rtl. */
6667
6668 if (d->parm[0] != SPU_BTI_VOID)
6669 ops[i++] = target;
6670
70ca06f8 6671 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6672 {
5df189be 6673 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6674 if (arg == 0)
6675 abort ();
b9c74b4d 6676 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6677 }
70ca06f8 6678
32f79657 6679 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6680 return i;
6352eedf 6681}
6682
6683static rtx
6684spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6685 tree exp, rtx target)
6352eedf 6686{
6687 rtx pat;
6688 rtx ops[8];
bc620c5c 6689 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6690 enum machine_mode mode, tmode;
6691 int i, p;
70ca06f8 6692 int n_operands;
6352eedf 6693 tree return_type;
6694
6695 /* Set up ops[] with values from arglist. */
70ca06f8 6696 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6697
6698 /* Handle the target operand which must be operand 0. */
6699 i = 0;
6700 if (d->parm[0] != SPU_BTI_VOID)
6701 {
6702
6703 /* We prefer the mode specified for the match_operand otherwise
6704 use the mode from the builtin function prototype. */
6705 tmode = insn_data[d->icode].operand[0].mode;
6706 if (tmode == VOIDmode)
6707 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6708
6709 /* Try to use target because not using it can lead to extra copies
6710 and when we are using all of the registers extra copies leads
6711 to extra spills. */
6712 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6713 ops[0] = target;
6714 else
6715 target = ops[0] = gen_reg_rtx (tmode);
6716
6717 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6718 abort ();
6719
6720 i++;
6721 }
6722
a76866d3 6723 if (d->fcode == SPU_MASK_FOR_LOAD)
6724 {
6725 enum machine_mode mode = insn_data[icode].operand[1].mode;
6726 tree arg;
6727 rtx addr, op, pat;
6728
6729 /* get addr */
5df189be 6730 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6731 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6732 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6733 addr = memory_address (mode, op);
6734
6735 /* negate addr */
6736 op = gen_reg_rtx (GET_MODE (addr));
6737 emit_insn (gen_rtx_SET (VOIDmode, op,
6738 gen_rtx_NEG (GET_MODE (addr), addr)));
6739 op = gen_rtx_MEM (mode, op);
6740
6741 pat = GEN_FCN (icode) (target, op);
6742 if (!pat)
6743 return 0;
6744 emit_insn (pat);
6745 return target;
6746 }
6747
6352eedf 6748 /* Ignore align_hint, but still expand it's args in case they have
6749 side effects. */
6750 if (icode == CODE_FOR_spu_align_hint)
6751 return 0;
6752
6753 /* Handle the rest of the operands. */
70ca06f8 6754 for (p = 1; i < n_operands; i++, p++)
6352eedf 6755 {
6756 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6757 mode = insn_data[d->icode].operand[i].mode;
6758 else
6759 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6760
6761 /* mode can be VOIDmode here for labels */
6762
6763 /* For specific intrinsics with an immediate operand, e.g.,
6764 si_ai(), we sometimes need to convert the scalar argument to a
6765 vector argument by splatting the scalar. */
6766 if (VECTOR_MODE_P (mode)
6767 && (GET_CODE (ops[i]) == CONST_INT
6768 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6769 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6770 {
6771 if (GET_CODE (ops[i]) == CONST_INT)
6772 ops[i] = spu_const (mode, INTVAL (ops[i]));
6773 else
6774 {
6775 rtx reg = gen_reg_rtx (mode);
6776 enum machine_mode imode = GET_MODE_INNER (mode);
6777 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6778 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6779 if (imode != GET_MODE (ops[i]))
6780 ops[i] = convert_to_mode (imode, ops[i],
6781 TYPE_UNSIGNED (spu_builtin_types
6782 [d->parm[i]]));
6783 emit_insn (gen_spu_splats (reg, ops[i]));
6784 ops[i] = reg;
6785 }
6786 }
6787
5df189be 6788 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6789
6352eedf 6790 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6791 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6792 }
6793
70ca06f8 6794 switch (n_operands)
6352eedf 6795 {
6796 case 0:
6797 pat = GEN_FCN (icode) (0);
6798 break;
6799 case 1:
6800 pat = GEN_FCN (icode) (ops[0]);
6801 break;
6802 case 2:
6803 pat = GEN_FCN (icode) (ops[0], ops[1]);
6804 break;
6805 case 3:
6806 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6807 break;
6808 case 4:
6809 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6810 break;
6811 case 5:
6812 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6813 break;
6814 case 6:
6815 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6816 break;
6817 default:
6818 abort ();
6819 }
6820
6821 if (!pat)
6822 abort ();
6823
6824 if (d->type == B_CALL || d->type == B_BISLED)
6825 emit_call_insn (pat);
6826 else if (d->type == B_JUMP)
6827 {
6828 emit_jump_insn (pat);
6829 emit_barrier ();
6830 }
6831 else
6832 emit_insn (pat);
6833
6834 return_type = spu_builtin_types[d->parm[0]];
6835 if (d->parm[0] != SPU_BTI_VOID
6836 && GET_MODE (target) != TYPE_MODE (return_type))
6837 {
6838 /* target is the return value. It should always be the mode of
6839 the builtin function prototype. */
6840 target = spu_force_reg (TYPE_MODE (return_type), target);
6841 }
6842
6843 return target;
6844}
6845
6846rtx
6847spu_expand_builtin (tree exp,
6848 rtx target,
6849 rtx subtarget ATTRIBUTE_UNUSED,
6850 enum machine_mode mode ATTRIBUTE_UNUSED,
6851 int ignore ATTRIBUTE_UNUSED)
6852{
5df189be 6853 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6854 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6855 struct spu_builtin_description *d;
6856
6857 if (fcode < NUM_SPU_BUILTINS)
6858 {
6859 d = &spu_builtins[fcode];
6860
5df189be 6861 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6862 }
6863 abort ();
6864}
6865
e99f512d 6866/* Implement targetm.vectorize.builtin_mul_widen_even. */
6867static tree
6868spu_builtin_mul_widen_even (tree type)
6869{
e99f512d 6870 switch (TYPE_MODE (type))
6871 {
6872 case V8HImode:
6873 if (TYPE_UNSIGNED (type))
0c5c4d59 6874 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6875 else
0c5c4d59 6876 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6877 break;
6878 default:
6879 return NULL_TREE;
6880 }
6881}
6882
6883/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6884static tree
6885spu_builtin_mul_widen_odd (tree type)
6886{
6887 switch (TYPE_MODE (type))
6888 {
6889 case V8HImode:
6890 if (TYPE_UNSIGNED (type))
0c5c4d59 6891 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6892 else
0c5c4d59 6893 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6894 break;
6895 default:
6896 return NULL_TREE;
6897 }
6898}
6899
a76866d3 6900/* Implement targetm.vectorize.builtin_mask_for_load. */
6901static tree
6902spu_builtin_mask_for_load (void)
6903{
0c5c4d59 6904 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6905}
5df189be 6906
a28df51d 6907/* Implement targetm.vectorize.builtin_vectorization_cost. */
6908static int
0822b158 6909spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6910 tree vectype ATTRIBUTE_UNUSED,
6911 int misalign ATTRIBUTE_UNUSED)
559093aa 6912{
6913 switch (type_of_cost)
6914 {
6915 case scalar_stmt:
6916 case vector_stmt:
6917 case vector_load:
6918 case vector_store:
6919 case vec_to_scalar:
6920 case scalar_to_vec:
6921 case cond_branch_not_taken:
6922 case vec_perm:
6923 return 1;
6924
6925 case scalar_store:
6926 return 10;
6927
6928 case scalar_load:
6929 /* Load + rotate. */
6930 return 2;
6931
6932 case unaligned_load:
6933 return 2;
6934
6935 case cond_branch_taken:
6936 return 6;
6937
6938 default:
6939 gcc_unreachable ();
6940 }
a28df51d 6941}
6942
0e87db76 6943/* Return true iff, data reference of TYPE can reach vector alignment (16)
6944 after applying N number of iterations. This routine does not determine
6945 how may iterations are required to reach desired alignment. */
6946
6947static bool
a9f1838b 6948spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6949{
6950 if (is_packed)
6951 return false;
6952
6953 /* All other types are naturally aligned. */
6954 return true;
6955}
6956
6cf5579e 6957/* Return the appropriate mode for a named address pointer. */
6958static enum machine_mode
6959spu_addr_space_pointer_mode (addr_space_t addrspace)
6960{
6961 switch (addrspace)
6962 {
6963 case ADDR_SPACE_GENERIC:
6964 return ptr_mode;
6965 case ADDR_SPACE_EA:
6966 return EAmode;
6967 default:
6968 gcc_unreachable ();
6969 }
6970}
6971
6972/* Return the appropriate mode for a named address address. */
6973static enum machine_mode
6974spu_addr_space_address_mode (addr_space_t addrspace)
6975{
6976 switch (addrspace)
6977 {
6978 case ADDR_SPACE_GENERIC:
6979 return Pmode;
6980 case ADDR_SPACE_EA:
6981 return EAmode;
6982 default:
6983 gcc_unreachable ();
6984 }
6985}
6986
6987/* Determine if one named address space is a subset of another. */
6988
6989static bool
6990spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6991{
6992 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6993 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6994
6995 if (subset == superset)
6996 return true;
6997
6998 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6999 being subsets but instead as disjoint address spaces. */
7000 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
7001 return false;
7002
7003 else
7004 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
7005}
7006
7007/* Convert from one address space to another. */
7008static rtx
7009spu_addr_space_convert (rtx op, tree from_type, tree to_type)
7010{
7011 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
7012 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
7013
7014 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
7015 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
7016
7017 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
7018 {
7019 rtx result, ls;
7020
7021 ls = gen_const_mem (DImode,
7022 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7023 set_mem_align (ls, 128);
7024
7025 result = gen_reg_rtx (Pmode);
7026 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
7027 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
7028 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7029 ls, const0_rtx, Pmode, 1);
7030
7031 emit_insn (gen_subsi3 (result, op, ls));
7032
7033 return result;
7034 }
7035
7036 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
7037 {
7038 rtx result, ls;
7039
7040 ls = gen_const_mem (DImode,
7041 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
7042 set_mem_align (ls, 128);
7043
7044 result = gen_reg_rtx (EAmode);
7045 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
7046 op = force_reg (Pmode, op);
7047 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
7048 ls, const0_rtx, EAmode, 1);
7049 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
7050
7051 if (EAmode == SImode)
7052 emit_insn (gen_addsi3 (result, op, ls));
7053 else
7054 emit_insn (gen_adddi3 (result, op, ls));
7055
7056 return result;
7057 }
7058
7059 else
7060 gcc_unreachable ();
7061}
7062
7063
d52fd16a 7064/* Count the total number of instructions in each pipe and return the
7065 maximum, which is used as the Minimum Iteration Interval (MII)
7066 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
7067 -2 are instructions that can go in pipe0 or pipe1. */
7068static int
7069spu_sms_res_mii (struct ddg *g)
7070{
7071 int i;
7072 unsigned t[4] = {0, 0, 0, 0};
7073
7074 for (i = 0; i < g->num_nodes; i++)
7075 {
7076 rtx insn = g->nodes[i].insn;
7077 int p = get_pipe (insn) + 2;
7078
1e944a0b 7079 gcc_assert (p >= 0);
7080 gcc_assert (p < 4);
d52fd16a 7081
7082 t[p]++;
7083 if (dump_file && INSN_P (insn))
7084 fprintf (dump_file, "i%d %s %d %d\n",
7085 INSN_UID (insn),
7086 insn_data[INSN_CODE(insn)].name,
7087 p, t[p]);
7088 }
7089 if (dump_file)
7090 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7091
7092 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7093}
7094
7095
5df189be 7096void
7097spu_init_expanders (void)
9d98604b 7098{
5df189be 7099 if (cfun)
9d98604b 7100 {
7101 rtx r0, r1;
7102 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7103 frame_pointer_needed is true. We don't know that until we're
7104 expanding the prologue. */
7105 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7106
7107 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7108 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7109 to be treated as aligned, so generate them here. */
7110 r0 = gen_reg_rtx (SImode);
7111 r1 = gen_reg_rtx (SImode);
7112 mark_reg_pointer (r0, 128);
7113 mark_reg_pointer (r1, 128);
7114 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7115 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7116 }
ea32e033 7117}
7118
7119static enum machine_mode
7120spu_libgcc_cmp_return_mode (void)
7121{
7122
7123/* For SPU word mode is TI mode so it is better to use SImode
7124 for compare returns. */
7125 return SImode;
7126}
7127
7128static enum machine_mode
7129spu_libgcc_shift_count_mode (void)
7130{
7131/* For SPU word mode is TI mode so it is better to use SImode
7132 for shift counts. */
7133 return SImode;
7134}
5a976006 7135
a08dfd55 7136/* Implement targetm.section_type_flags. */
7137static unsigned int
7138spu_section_type_flags (tree decl, const char *name, int reloc)
7139{
7140 /* .toe needs to have type @nobits. */
7141 if (strcmp (name, ".toe") == 0)
7142 return SECTION_BSS;
6cf5579e 7143 /* Don't load _ea into the current address space. */
7144 if (strcmp (name, "._ea") == 0)
7145 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7146 return default_section_type_flags (decl, name, reloc);
7147}
c2233b46 7148
6cf5579e 7149/* Implement targetm.select_section. */
7150static section *
7151spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7152{
7153 /* Variables and constants defined in the __ea address space
7154 go into a special section named "._ea". */
7155 if (TREE_TYPE (decl) != error_mark_node
7156 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7157 {
7158 /* We might get called with string constants, but get_named_section
7159 doesn't like them as they are not DECLs. Also, we need to set
7160 flags in that case. */
7161 if (!DECL_P (decl))
7162 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7163
7164 return get_named_section (decl, "._ea", reloc);
7165 }
7166
7167 return default_elf_select_section (decl, reloc, align);
7168}
7169
7170/* Implement targetm.unique_section. */
7171static void
7172spu_unique_section (tree decl, int reloc)
7173{
7174 /* We don't support unique section names in the __ea address
7175 space for now. */
7176 if (TREE_TYPE (decl) != error_mark_node
7177 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7178 return;
7179
7180 default_unique_section (decl, reloc);
7181}
7182
56c7bfc2 7183/* Generate a constant or register which contains 2^SCALE. We assume
7184 the result is valid for MODE. Currently, MODE must be V4SFmode and
7185 SCALE must be SImode. */
7186rtx
7187spu_gen_exp2 (enum machine_mode mode, rtx scale)
7188{
7189 gcc_assert (mode == V4SFmode);
7190 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7191 if (GET_CODE (scale) != CONST_INT)
7192 {
7193 /* unsigned int exp = (127 + scale) << 23;
7194 __vector float m = (__vector float) spu_splats (exp); */
7195 rtx reg = force_reg (SImode, scale);
7196 rtx exp = gen_reg_rtx (SImode);
7197 rtx mul = gen_reg_rtx (mode);
7198 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7199 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7200 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7201 return mul;
7202 }
7203 else
7204 {
7205 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7206 unsigned char arr[16];
7207 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7208 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7209 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7210 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7211 return array_to_constant (mode, arr);
7212 }
7213}
7214
9d98604b 7215/* After reload, just change the convert into a move instruction
7216 or a dead instruction. */
7217void
7218spu_split_convert (rtx ops[])
7219{
7220 if (REGNO (ops[0]) == REGNO (ops[1]))
7221 emit_note (NOTE_INSN_DELETED);
7222 else
7223 {
7224 /* Use TImode always as this might help hard reg copyprop. */
7225 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7226 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7227 emit_insn (gen_move_insn (op0, op1));
7228 }
7229}
7230
b3878a6c 7231void
4cbad5bb 7232spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7233{
7234 fprintf (file, "# profile\n");
7235 fprintf (file, "brsl $75, _mcount\n");
7236}
7237
329c1e4e 7238/* Implement targetm.ref_may_alias_errno. */
7239static bool
7240spu_ref_may_alias_errno (ao_ref *ref)
7241{
7242 tree base = ao_ref_base (ref);
7243
7244 /* With SPU newlib, errno is defined as something like
7245 _impure_data._errno
7246 The default implementation of this target macro does not
7247 recognize such expressions, so special-code for it here. */
7248
7249 if (TREE_CODE (base) == VAR_DECL
7250 && !TREE_STATIC (base)
7251 && DECL_EXTERNAL (base)
7252 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7253 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7254 "_impure_data") == 0
7255 /* _errno is the first member of _impure_data. */
7256 && ref->offset == 0)
7257 return true;
7258
7259 return default_ref_may_alias_errno (ref);
7260}
7261
f17d2d13 7262/* Output thunk to FILE that implements a C++ virtual function call (with
7263 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7264 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7265 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7266 relative to the resulting this pointer. */
7267
7268static void
7269spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7270 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7271 tree function)
7272{
7273 rtx op[8];
7274
7275 /* Make sure unwind info is emitted for the thunk if needed. */
7276 final_start_function (emit_barrier (), file, 1);
7277
7278 /* Operand 0 is the target function. */
7279 op[0] = XEXP (DECL_RTL (function), 0);
7280
7281 /* Operand 1 is the 'this' pointer. */
7282 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7283 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7284 else
7285 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7286
7287 /* Operands 2/3 are the low/high halfwords of delta. */
7288 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7289 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7290
7291 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7292 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7293 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7294
7295 /* Operands 6/7 are temporary registers. */
7296 op[6] = gen_rtx_REG (Pmode, 79);
7297 op[7] = gen_rtx_REG (Pmode, 78);
7298
7299 /* Add DELTA to this pointer. */
7300 if (delta)
7301 {
7302 if (delta >= -0x200 && delta < 0x200)
7303 output_asm_insn ("ai\t%1,%1,%2", op);
7304 else if (delta >= -0x8000 && delta < 0x8000)
7305 {
7306 output_asm_insn ("il\t%6,%2", op);
7307 output_asm_insn ("a\t%1,%1,%6", op);
7308 }
7309 else
7310 {
7311 output_asm_insn ("ilhu\t%6,%3", op);
7312 output_asm_insn ("iohl\t%6,%2", op);
7313 output_asm_insn ("a\t%1,%1,%6", op);
7314 }
7315 }
7316
7317 /* Perform vcall adjustment. */
7318 if (vcall_offset)
7319 {
7320 output_asm_insn ("lqd\t%7,0(%1)", op);
7321 output_asm_insn ("rotqby\t%7,%7,%1", op);
7322
7323 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7324 output_asm_insn ("ai\t%7,%7,%4", op);
7325 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7326 {
7327 output_asm_insn ("il\t%6,%4", op);
7328 output_asm_insn ("a\t%7,%7,%6", op);
7329 }
7330 else
7331 {
7332 output_asm_insn ("ilhu\t%6,%5", op);
7333 output_asm_insn ("iohl\t%6,%4", op);
7334 output_asm_insn ("a\t%7,%7,%6", op);
7335 }
7336
7337 output_asm_insn ("lqd\t%6,0(%7)", op);
7338 output_asm_insn ("rotqby\t%6,%6,%7", op);
7339 output_asm_insn ("a\t%1,%1,%6", op);
7340 }
7341
7342 /* Jump to target. */
7343 output_asm_insn ("br\t%0", op);
7344
7345 final_end_function ();
7346}
7347
c2233b46 7348#include "gt-spu.h"