]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
gcc/
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "ggc.h"
40#include "hashtab.h"
41#include "tm_p.h"
42#include "target.h"
43#include "target-def.h"
44#include "langhooks.h"
45#include "reload.h"
46#include "cfglayout.h"
47#include "sched-int.h"
48#include "params.h"
644459d0 49#include "machmode.h"
75a70cf9 50#include "gimple.h"
644459d0 51#include "tm-constrs.h"
d52fd16a 52#include "ddg.h"
5a976006 53#include "sbitmap.h"
54#include "timevar.h"
55#include "df.h"
6352eedf 56
57/* Builtin types, data and prototypes. */
c2233b46 58
59enum spu_builtin_type_index
60{
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110};
111
112#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
6352eedf 125struct spu_builtin_range
126{
127 int low, high;
128};
129
130static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143};
144
644459d0 145\f
146/* Target specific attribute specifications. */
147char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149/* Prototypes and external defs. */
4c834714 150static void spu_option_override (void);
cc07c468 151static void spu_option_init_struct (struct gcc_options *opts);
686e2769 152static void spu_option_default_params (void);
644459d0 153static void spu_init_builtins (void);
e6925042 154static tree spu_builtin_decl (unsigned, bool);
b62e30b8 155static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 157static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 158static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
644459d0 160static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161static rtx get_pic_reg (void);
162static int need_to_save_reg (int regno, int saving);
163static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167static void emit_nop_for_insn (rtx insn);
168static bool insn_clobbers_hbr (rtx insn);
169static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 170 int distance, sbitmap blocks);
5474166e 171static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
644459d0 173static rtx get_branch_target (rtx branch);
644459d0 174static void spu_machine_dependent_reorg (void);
175static int spu_sched_issue_rate (void);
176static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178static int get_pipe (rtx insn);
644459d0 179static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 180static void spu_sched_init_global (FILE *, int, int);
181static void spu_sched_init (FILE *, int, int);
182static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 183static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
b62e30b8 185 bool *no_add_attrs);
644459d0 186static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
b62e30b8 188 bool *no_add_attrs);
644459d0 189static int spu_naked_function_p (tree func);
b62e30b8 190static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
ee9034d4 192static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
644459d0 196static tree spu_build_builtin_va_list (void);
8a58ed0a 197static void spu_va_start (tree, rtx);
75a70cf9 198static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 200static int store_with_one_insn_p (rtx mem);
644459d0 201static int mem_is_padded_component_ref (rtx x);
9d98604b 202static int reg_aligned_for_addr (rtx x);
644459d0 203static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 205static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 208static void spu_init_libfuncs (void);
fb80456a 209static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 210static void fix_range (const char *);
69ced2d6 211static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 212static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 213static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
e99f512d 215static tree spu_builtin_mul_widen_even (tree);
216static tree spu_builtin_mul_widen_odd (tree);
a76866d3 217static tree spu_builtin_mask_for_load (void);
0822b158 218static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 219static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 220static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
5a976006 226static void asm_file_start (void);
a08dfd55 227static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 228static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229static void spu_unique_section (tree, int);
9d98604b 230static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 231static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 232static void spu_conditional_register_usage (void);
329c1e4e 233static bool spu_ref_may_alias_errno (ao_ref *);
644459d0 234
5474166e 235/* Which instruction set architecture to use. */
236int spu_arch;
237/* Which cpu are we tuning for. */
238int spu_tune;
239
5a976006 240/* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246int spu_hint_dist = (8*4) - (2*4);
247
248/* Determines whether we run variable tracking in machine dependent
249 reorganization. */
250static int spu_flag_var_tracking;
251
644459d0 252enum spu_immediate {
253 SPU_NONE,
254 SPU_IL,
255 SPU_ILA,
256 SPU_ILH,
257 SPU_ILHU,
258 SPU_ORI,
259 SPU_ORHI,
260 SPU_ORBI,
99369027 261 SPU_IOHL
644459d0 262};
dea01258 263enum immediate_class
264{
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
5df189be 272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 273};
644459d0 274
275static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 277static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
644459d0 280
1bd43494 281static enum machine_mode spu_unwind_word_mode (void);
282
ea32e033 283static enum machine_mode
284spu_libgcc_cmp_return_mode (void);
285
286static enum machine_mode
287spu_libgcc_shift_count_mode (void);
6cf5579e 288
289/* Pointer mode for __ea references. */
290#define EAmode (spu_ea_model != 32 ? DImode : SImode)
291
ef51d1e3 292\f
293/* Table of machine attributes. */
294static const struct attribute_spec spu_attribute_table[] =
295{
ac86af5d 296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
297 affects_type_identity } */
298 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
299 false },
300 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
301 false },
302 { NULL, 0, 0, false, false, false, NULL, false }
ef51d1e3 303};
644459d0 304\f
305/* TARGET overrides. */
306
6cf5579e 307#undef TARGET_ADDR_SPACE_POINTER_MODE
308#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
309
310#undef TARGET_ADDR_SPACE_ADDRESS_MODE
311#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
312
313#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
314#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
315 spu_addr_space_legitimate_address_p
316
317#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
318#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
319
320#undef TARGET_ADDR_SPACE_SUBSET_P
321#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
322
323#undef TARGET_ADDR_SPACE_CONVERT
324#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
325
644459d0 326#undef TARGET_INIT_BUILTINS
327#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 328#undef TARGET_BUILTIN_DECL
329#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 330
644459d0 331#undef TARGET_EXPAND_BUILTIN
332#define TARGET_EXPAND_BUILTIN spu_expand_builtin
333
1bd43494 334#undef TARGET_UNWIND_WORD_MODE
335#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 336
41e3a0c7 337#undef TARGET_LEGITIMIZE_ADDRESS
338#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
339
6cf5579e 340/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
341 and .quad for the debugger. When it is known that the assembler is fixed,
342 these can be removed. */
343#undef TARGET_ASM_UNALIGNED_SI_OP
344#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
345
346#undef TARGET_ASM_ALIGNED_DI_OP
347#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
348
644459d0 349/* The .8byte directive doesn't seem to work well for a 32 bit
350 architecture. */
351#undef TARGET_ASM_UNALIGNED_DI_OP
352#define TARGET_ASM_UNALIGNED_DI_OP NULL
353
354#undef TARGET_RTX_COSTS
355#define TARGET_RTX_COSTS spu_rtx_costs
356
357#undef TARGET_ADDRESS_COST
f529eb25 358#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 359
360#undef TARGET_SCHED_ISSUE_RATE
361#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
362
5a976006 363#undef TARGET_SCHED_INIT_GLOBAL
364#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
365
366#undef TARGET_SCHED_INIT
367#define TARGET_SCHED_INIT spu_sched_init
368
644459d0 369#undef TARGET_SCHED_VARIABLE_ISSUE
370#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
371
5a976006 372#undef TARGET_SCHED_REORDER
373#define TARGET_SCHED_REORDER spu_sched_reorder
374
375#undef TARGET_SCHED_REORDER2
376#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 377
378#undef TARGET_SCHED_ADJUST_COST
379#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
380
644459d0 381#undef TARGET_ATTRIBUTE_TABLE
382#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
383
384#undef TARGET_ASM_INTEGER
385#define TARGET_ASM_INTEGER spu_assemble_integer
386
387#undef TARGET_SCALAR_MODE_SUPPORTED_P
388#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
389
390#undef TARGET_VECTOR_MODE_SUPPORTED_P
391#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
392
393#undef TARGET_FUNCTION_OK_FOR_SIBCALL
394#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
395
396#undef TARGET_ASM_GLOBALIZE_LABEL
397#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
398
399#undef TARGET_PASS_BY_REFERENCE
400#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
401
ee9034d4 402#undef TARGET_FUNCTION_ARG
403#define TARGET_FUNCTION_ARG spu_function_arg
404
405#undef TARGET_FUNCTION_ARG_ADVANCE
406#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
407
644459d0 408#undef TARGET_MUST_PASS_IN_STACK
409#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
410
411#undef TARGET_BUILD_BUILTIN_VA_LIST
412#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
413
8a58ed0a 414#undef TARGET_EXPAND_BUILTIN_VA_START
415#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
416
644459d0 417#undef TARGET_SETUP_INCOMING_VARARGS
418#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
419
420#undef TARGET_MACHINE_DEPENDENT_REORG
421#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
422
423#undef TARGET_GIMPLIFY_VA_ARG_EXPR
424#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
425
426#undef TARGET_DEFAULT_TARGET_FLAGS
427#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
428
429#undef TARGET_INIT_LIBFUNCS
430#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
431
432#undef TARGET_RETURN_IN_MEMORY
433#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
434
69ced2d6 435#undef TARGET_ENCODE_SECTION_INFO
436#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
437
e99f512d 438#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
439#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
440
441#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
442#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
443
a76866d3 444#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
445#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
446
a28df51d 447#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
448#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
449
202d6e5f 450#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
451#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 452
a0515226 453#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
454#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
455
ea32e033 456#undef TARGET_LIBGCC_CMP_RETURN_MODE
457#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
458
459#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
460#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
461
d52fd16a 462#undef TARGET_SCHED_SMS_RES_MII
463#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
464
5a976006 465#undef TARGET_ASM_FILE_START
466#define TARGET_ASM_FILE_START asm_file_start
467
a08dfd55 468#undef TARGET_SECTION_TYPE_FLAGS
469#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
470
6cf5579e 471#undef TARGET_ASM_SELECT_SECTION
472#define TARGET_ASM_SELECT_SECTION spu_select_section
473
474#undef TARGET_ASM_UNIQUE_SECTION
475#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
476
fd50b071 477#undef TARGET_LEGITIMATE_ADDRESS_P
478#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
479
e96f2783 480#undef TARGET_TRAMPOLINE_INIT
481#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
482
4c834714 483#undef TARGET_OPTION_OVERRIDE
484#define TARGET_OPTION_OVERRIDE spu_option_override
485
cc07c468 486#undef TARGET_OPTION_INIT_STRUCT
487#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
02e53c17 488
686e2769 489#undef TARGET_OPTION_DEFAULT_PARAMS
490#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
491
ed6befa5 492#undef TARGET_EXCEPT_UNWIND_INFO
493#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
494
b2d7ede1 495#undef TARGET_CONDITIONAL_REGISTER_USAGE
496#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
497
329c1e4e 498#undef TARGET_REF_MAY_ALIAS_ERRNO
499#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
500
644459d0 501struct gcc_target targetm = TARGET_INITIALIZER;
502
02e53c17 503static void
cc07c468 504spu_option_init_struct (struct gcc_options *opts)
5df189be 505{
5df189be 506 /* With so many registers this is better on by default. */
cc07c468 507 opts->x_flag_rename_registers = 1;
5df189be 508}
509
686e2769 510/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
511static void
512spu_option_default_params (void)
513{
514 /* Override some of the default param values. With so many registers
515 larger values are better for these params. */
516 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
517}
518
4c834714 519/* Implement TARGET_OPTION_OVERRIDE. */
520static void
521spu_option_override (void)
644459d0 522{
14d408d9 523 /* Small loops will be unpeeled at -O3. For SPU it is more important
524 to keep code small by default. */
686e2769 525 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 526 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 527 global_options.x_param_values,
528 global_options_set.x_param_values);
14d408d9 529
644459d0 530 flag_omit_frame_pointer = 1;
531
5a976006 532 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 533 if (align_functions < 8)
534 align_functions = 8;
c7b91b14 535
5a976006 536 spu_hint_dist = 8*4 - spu_max_nops*4;
537 if (spu_hint_dist < 0)
538 spu_hint_dist = 0;
539
c7b91b14 540 if (spu_fixed_range_string)
541 fix_range (spu_fixed_range_string);
5474166e 542
543 /* Determine processor architectural level. */
544 if (spu_arch_string)
545 {
546 if (strcmp (&spu_arch_string[0], "cell") == 0)
547 spu_arch = PROCESSOR_CELL;
548 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
549 spu_arch = PROCESSOR_CELLEDP;
550 else
8e181c9d 551 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 552 }
553
554 /* Determine processor to tune for. */
555 if (spu_tune_string)
556 {
557 if (strcmp (&spu_tune_string[0], "cell") == 0)
558 spu_tune = PROCESSOR_CELL;
559 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
560 spu_tune = PROCESSOR_CELLEDP;
561 else
8e181c9d 562 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 563 }
98bbec1e 564
13684256 565 /* Change defaults according to the processor architecture. */
566 if (spu_arch == PROCESSOR_CELLEDP)
567 {
568 /* If no command line option has been otherwise specified, change
569 the default to -mno-safe-hints on celledp -- only the original
570 Cell/B.E. processors require this workaround. */
571 if (!(target_flags_explicit & MASK_SAFE_HINTS))
572 target_flags &= ~MASK_SAFE_HINTS;
573 }
574
98bbec1e 575 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 576}
577\f
578/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
579 struct attribute_spec.handler. */
580
644459d0 581/* True if MODE is valid for the target. By "valid", we mean able to
582 be manipulated in non-trivial ways. In particular, this means all
583 the arithmetic is supported. */
584static bool
585spu_scalar_mode_supported_p (enum machine_mode mode)
586{
587 switch (mode)
588 {
589 case QImode:
590 case HImode:
591 case SImode:
592 case SFmode:
593 case DImode:
594 case TImode:
595 case DFmode:
596 return true;
597
598 default:
599 return false;
600 }
601}
602
603/* Similarly for vector modes. "Supported" here is less strict. At
604 least some operations are supported; need to check optabs or builtins
605 for further details. */
606static bool
607spu_vector_mode_supported_p (enum machine_mode mode)
608{
609 switch (mode)
610 {
611 case V16QImode:
612 case V8HImode:
613 case V4SImode:
614 case V2DImode:
615 case V4SFmode:
616 case V2DFmode:
617 return true;
618
619 default:
620 return false;
621 }
622}
623
624/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
625 least significant bytes of the outer mode. This function returns
626 TRUE for the SUBREG's where this is correct. */
627int
628valid_subreg (rtx op)
629{
630 enum machine_mode om = GET_MODE (op);
631 enum machine_mode im = GET_MODE (SUBREG_REG (op));
632 return om != VOIDmode && im != VOIDmode
633 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 634 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
635 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 636}
637
638/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 639 and adjust the start offset. */
644459d0 640static rtx
641adjust_operand (rtx op, HOST_WIDE_INT * start)
642{
643 enum machine_mode mode;
644 int op_size;
38aca5eb 645 /* Strip any paradoxical SUBREG. */
646 if (GET_CODE (op) == SUBREG
647 && (GET_MODE_BITSIZE (GET_MODE (op))
648 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 649 {
650 if (start)
651 *start -=
652 GET_MODE_BITSIZE (GET_MODE (op)) -
653 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
654 op = SUBREG_REG (op);
655 }
656 /* If it is smaller than SI, assure a SUBREG */
657 op_size = GET_MODE_BITSIZE (GET_MODE (op));
658 if (op_size < 32)
659 {
660 if (start)
661 *start += 32 - op_size;
662 op_size = 32;
663 }
664 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
665 mode = mode_for_size (op_size, MODE_INT, 0);
666 if (mode != GET_MODE (op))
667 op = gen_rtx_SUBREG (mode, op, 0);
668 return op;
669}
670
671void
672spu_expand_extv (rtx ops[], int unsignedp)
673{
9d98604b 674 rtx dst = ops[0], src = ops[1];
644459d0 675 HOST_WIDE_INT width = INTVAL (ops[2]);
676 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 677 HOST_WIDE_INT align_mask;
678 rtx s0, s1, mask, r0;
644459d0 679
9d98604b 680 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 681
9d98604b 682 if (MEM_P (src))
644459d0 683 {
9d98604b 684 /* First, determine if we need 1 TImode load or 2. We need only 1
685 if the bits being extracted do not cross the alignment boundary
686 as determined by the MEM and its address. */
687
688 align_mask = -MEM_ALIGN (src);
689 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 690 {
9d98604b 691 /* Alignment is sufficient for 1 load. */
692 s0 = gen_reg_rtx (TImode);
693 r0 = spu_expand_load (s0, 0, src, start / 8);
694 start &= 7;
695 if (r0)
696 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 697 }
9d98604b 698 else
699 {
700 /* Need 2 loads. */
701 s0 = gen_reg_rtx (TImode);
702 s1 = gen_reg_rtx (TImode);
703 r0 = spu_expand_load (s0, s1, src, start / 8);
704 start &= 7;
705
706 gcc_assert (start + width <= 128);
707 if (r0)
708 {
709 rtx r1 = gen_reg_rtx (SImode);
710 mask = gen_reg_rtx (TImode);
711 emit_move_insn (mask, GEN_INT (-1));
712 emit_insn (gen_rotqby_ti (s0, s0, r0));
713 emit_insn (gen_rotqby_ti (s1, s1, r0));
714 if (GET_CODE (r0) == CONST_INT)
715 r1 = GEN_INT (INTVAL (r0) & 15);
716 else
717 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
718 emit_insn (gen_shlqby_ti (mask, mask, r1));
719 emit_insn (gen_selb (s0, s1, s0, mask));
720 }
721 }
722
723 }
724 else if (GET_CODE (src) == SUBREG)
725 {
726 rtx r = SUBREG_REG (src);
727 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
728 s0 = gen_reg_rtx (TImode);
729 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
730 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
731 else
732 emit_move_insn (s0, src);
733 }
734 else
735 {
736 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
737 s0 = gen_reg_rtx (TImode);
738 emit_move_insn (s0, src);
644459d0 739 }
740
9d98604b 741 /* Now s0 is TImode and contains the bits to extract at start. */
742
743 if (start)
744 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
745
746 if (128 - width)
644459d0 747 {
9d98604b 748 tree c = build_int_cst (NULL_TREE, 128 - width);
749 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 750 }
751
9d98604b 752 emit_move_insn (dst, s0);
644459d0 753}
754
755void
756spu_expand_insv (rtx ops[])
757{
758 HOST_WIDE_INT width = INTVAL (ops[1]);
759 HOST_WIDE_INT start = INTVAL (ops[2]);
760 HOST_WIDE_INT maskbits;
4cbad5bb 761 enum machine_mode dst_mode;
644459d0 762 rtx dst = ops[0], src = ops[3];
4cbad5bb 763 int dst_size;
644459d0 764 rtx mask;
765 rtx shift_reg;
766 int shift;
767
768
769 if (GET_CODE (ops[0]) == MEM)
770 dst = gen_reg_rtx (TImode);
771 else
772 dst = adjust_operand (dst, &start);
773 dst_mode = GET_MODE (dst);
774 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
775
776 if (CONSTANT_P (src))
777 {
778 enum machine_mode m =
779 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
780 src = force_reg (m, convert_to_mode (m, src, 0));
781 }
782 src = adjust_operand (src, 0);
644459d0 783
784 mask = gen_reg_rtx (dst_mode);
785 shift_reg = gen_reg_rtx (dst_mode);
786 shift = dst_size - start - width;
787
788 /* It's not safe to use subreg here because the compiler assumes
789 that the SUBREG_REG is right justified in the SUBREG. */
790 convert_move (shift_reg, src, 1);
791
792 if (shift > 0)
793 {
794 switch (dst_mode)
795 {
796 case SImode:
797 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
798 break;
799 case DImode:
800 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
801 break;
802 case TImode:
803 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
804 break;
805 default:
806 abort ();
807 }
808 }
809 else if (shift < 0)
810 abort ();
811
812 switch (dst_size)
813 {
814 case 32:
815 maskbits = (-1ll << (32 - width - start));
816 if (start)
817 maskbits += (1ll << (32 - start));
818 emit_move_insn (mask, GEN_INT (maskbits));
819 break;
820 case 64:
821 maskbits = (-1ll << (64 - width - start));
822 if (start)
823 maskbits += (1ll << (64 - start));
824 emit_move_insn (mask, GEN_INT (maskbits));
825 break;
826 case 128:
827 {
828 unsigned char arr[16];
829 int i = start / 8;
830 memset (arr, 0, sizeof (arr));
831 arr[i] = 0xff >> (start & 7);
832 for (i++; i <= (start + width - 1) / 8; i++)
833 arr[i] = 0xff;
834 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
835 emit_move_insn (mask, array_to_constant (TImode, arr));
836 }
837 break;
838 default:
839 abort ();
840 }
841 if (GET_CODE (ops[0]) == MEM)
842 {
644459d0 843 rtx low = gen_reg_rtx (SImode);
644459d0 844 rtx rotl = gen_reg_rtx (SImode);
845 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 846 rtx addr;
847 rtx addr0;
848 rtx addr1;
644459d0 849 rtx mem;
850
9d98604b 851 addr = force_reg (Pmode, XEXP (ops[0], 0));
852 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 853 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
854 emit_insn (gen_negsi2 (rotl, low));
855 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
856 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 857 mem = change_address (ops[0], TImode, addr0);
644459d0 858 set_mem_alias_set (mem, 0);
859 emit_move_insn (dst, mem);
860 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 861 if (start + width > MEM_ALIGN (ops[0]))
862 {
863 rtx shl = gen_reg_rtx (SImode);
864 rtx mask1 = gen_reg_rtx (TImode);
865 rtx dst1 = gen_reg_rtx (TImode);
866 rtx mem1;
9d98604b 867 addr1 = plus_constant (addr, 16);
868 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 869 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
870 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 871 mem1 = change_address (ops[0], TImode, addr1);
644459d0 872 set_mem_alias_set (mem1, 0);
873 emit_move_insn (dst1, mem1);
874 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
875 emit_move_insn (mem1, dst1);
876 }
9d98604b 877 emit_move_insn (mem, dst);
644459d0 878 }
879 else
71cd778d 880 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 881}
882
883
884int
885spu_expand_block_move (rtx ops[])
886{
887 HOST_WIDE_INT bytes, align, offset;
888 rtx src, dst, sreg, dreg, target;
889 int i;
890 if (GET_CODE (ops[2]) != CONST_INT
891 || GET_CODE (ops[3]) != CONST_INT
48eb4342 892 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 893 return 0;
894
895 bytes = INTVAL (ops[2]);
896 align = INTVAL (ops[3]);
897
898 if (bytes <= 0)
899 return 1;
900
901 dst = ops[0];
902 src = ops[1];
903
904 if (align == 16)
905 {
906 for (offset = 0; offset + 16 <= bytes; offset += 16)
907 {
908 dst = adjust_address (ops[0], V16QImode, offset);
909 src = adjust_address (ops[1], V16QImode, offset);
910 emit_move_insn (dst, src);
911 }
912 if (offset < bytes)
913 {
914 rtx mask;
915 unsigned char arr[16] = { 0 };
916 for (i = 0; i < bytes - offset; i++)
917 arr[i] = 0xff;
918 dst = adjust_address (ops[0], V16QImode, offset);
919 src = adjust_address (ops[1], V16QImode, offset);
920 mask = gen_reg_rtx (V16QImode);
921 sreg = gen_reg_rtx (V16QImode);
922 dreg = gen_reg_rtx (V16QImode);
923 target = gen_reg_rtx (V16QImode);
924 emit_move_insn (mask, array_to_constant (V16QImode, arr));
925 emit_move_insn (dreg, dst);
926 emit_move_insn (sreg, src);
927 emit_insn (gen_selb (target, dreg, sreg, mask));
928 emit_move_insn (dst, target);
929 }
930 return 1;
931 }
932 return 0;
933}
934
935enum spu_comp_code
936{ SPU_EQ, SPU_GT, SPU_GTU };
937
5474166e 938int spu_comp_icode[12][3] = {
939 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
940 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
941 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
942 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
943 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
944 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
945 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
946 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
947 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
948 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
949 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
950 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 951};
952
953/* Generate a compare for CODE. Return a brand-new rtx that represents
954 the result of the compare. GCC can figure this out too if we don't
955 provide all variations of compares, but GCC always wants to use
956 WORD_MODE, we can generate better code in most cases if we do it
957 ourselves. */
958void
74f4459c 959spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 960{
961 int reverse_compare = 0;
962 int reverse_test = 0;
5d70b918 963 rtx compare_result, eq_result;
964 rtx comp_rtx, eq_rtx;
644459d0 965 enum machine_mode comp_mode;
966 enum machine_mode op_mode;
b9c74b4d 967 enum spu_comp_code scode, eq_code;
968 enum insn_code ior_code;
74f4459c 969 enum rtx_code code = GET_CODE (cmp);
970 rtx op0 = XEXP (cmp, 0);
971 rtx op1 = XEXP (cmp, 1);
644459d0 972 int index;
5d70b918 973 int eq_test = 0;
644459d0 974
74f4459c 975 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 976 and so on, to keep the constant in operand 1. */
74f4459c 977 if (GET_CODE (op1) == CONST_INT)
644459d0 978 {
74f4459c 979 HOST_WIDE_INT val = INTVAL (op1) - 1;
980 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 981 switch (code)
982 {
983 case GE:
74f4459c 984 op1 = GEN_INT (val);
644459d0 985 code = GT;
986 break;
987 case LT:
74f4459c 988 op1 = GEN_INT (val);
644459d0 989 code = LE;
990 break;
991 case GEU:
74f4459c 992 op1 = GEN_INT (val);
644459d0 993 code = GTU;
994 break;
995 case LTU:
74f4459c 996 op1 = GEN_INT (val);
644459d0 997 code = LEU;
998 break;
999 default:
1000 break;
1001 }
1002 }
1003
5d70b918 1004 comp_mode = SImode;
74f4459c 1005 op_mode = GET_MODE (op0);
5d70b918 1006
644459d0 1007 switch (code)
1008 {
1009 case GE:
644459d0 1010 scode = SPU_GT;
07027691 1011 if (HONOR_NANS (op_mode))
5d70b918 1012 {
1013 reverse_compare = 0;
1014 reverse_test = 0;
1015 eq_test = 1;
1016 eq_code = SPU_EQ;
1017 }
1018 else
1019 {
1020 reverse_compare = 1;
1021 reverse_test = 1;
1022 }
644459d0 1023 break;
1024 case LE:
644459d0 1025 scode = SPU_GT;
07027691 1026 if (HONOR_NANS (op_mode))
5d70b918 1027 {
1028 reverse_compare = 1;
1029 reverse_test = 0;
1030 eq_test = 1;
1031 eq_code = SPU_EQ;
1032 }
1033 else
1034 {
1035 reverse_compare = 0;
1036 reverse_test = 1;
1037 }
644459d0 1038 break;
1039 case LT:
1040 reverse_compare = 1;
1041 reverse_test = 0;
1042 scode = SPU_GT;
1043 break;
1044 case GEU:
1045 reverse_compare = 1;
1046 reverse_test = 1;
1047 scode = SPU_GTU;
1048 break;
1049 case LEU:
1050 reverse_compare = 0;
1051 reverse_test = 1;
1052 scode = SPU_GTU;
1053 break;
1054 case LTU:
1055 reverse_compare = 1;
1056 reverse_test = 0;
1057 scode = SPU_GTU;
1058 break;
1059 case NE:
1060 reverse_compare = 0;
1061 reverse_test = 1;
1062 scode = SPU_EQ;
1063 break;
1064
1065 case EQ:
1066 scode = SPU_EQ;
1067 break;
1068 case GT:
1069 scode = SPU_GT;
1070 break;
1071 case GTU:
1072 scode = SPU_GTU;
1073 break;
1074 default:
1075 scode = SPU_EQ;
1076 break;
1077 }
1078
644459d0 1079 switch (op_mode)
1080 {
1081 case QImode:
1082 index = 0;
1083 comp_mode = QImode;
1084 break;
1085 case HImode:
1086 index = 1;
1087 comp_mode = HImode;
1088 break;
1089 case SImode:
1090 index = 2;
1091 break;
1092 case DImode:
1093 index = 3;
1094 break;
1095 case TImode:
1096 index = 4;
1097 break;
1098 case SFmode:
1099 index = 5;
1100 break;
1101 case DFmode:
1102 index = 6;
1103 break;
1104 case V16QImode:
5474166e 1105 index = 7;
1106 comp_mode = op_mode;
1107 break;
644459d0 1108 case V8HImode:
5474166e 1109 index = 8;
1110 comp_mode = op_mode;
1111 break;
644459d0 1112 case V4SImode:
5474166e 1113 index = 9;
1114 comp_mode = op_mode;
1115 break;
644459d0 1116 case V4SFmode:
5474166e 1117 index = 10;
1118 comp_mode = V4SImode;
1119 break;
644459d0 1120 case V2DFmode:
5474166e 1121 index = 11;
1122 comp_mode = V2DImode;
644459d0 1123 break;
5474166e 1124 case V2DImode:
644459d0 1125 default:
1126 abort ();
1127 }
1128
74f4459c 1129 if (GET_MODE (op1) == DFmode
07027691 1130 && (scode != SPU_GT && scode != SPU_EQ))
1131 abort ();
644459d0 1132
74f4459c 1133 if (is_set == 0 && op1 == const0_rtx
1134 && (GET_MODE (op0) == SImode
1135 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1136 {
1137 /* Don't need to set a register with the result when we are
1138 comparing against zero and branching. */
1139 reverse_test = !reverse_test;
74f4459c 1140 compare_result = op0;
644459d0 1141 }
1142 else
1143 {
1144 compare_result = gen_reg_rtx (comp_mode);
1145
1146 if (reverse_compare)
1147 {
74f4459c 1148 rtx t = op1;
1149 op1 = op0;
1150 op0 = t;
644459d0 1151 }
1152
1153 if (spu_comp_icode[index][scode] == 0)
1154 abort ();
1155
1156 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1157 (op0, op_mode))
1158 op0 = force_reg (op_mode, op0);
644459d0 1159 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1160 (op1, op_mode))
1161 op1 = force_reg (op_mode, op1);
644459d0 1162 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1163 op0, op1);
644459d0 1164 if (comp_rtx == 0)
1165 abort ();
1166 emit_insn (comp_rtx);
1167
5d70b918 1168 if (eq_test)
1169 {
1170 eq_result = gen_reg_rtx (comp_mode);
1171 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1172 op0, op1);
5d70b918 1173 if (eq_rtx == 0)
1174 abort ();
1175 emit_insn (eq_rtx);
d6bf3b14 1176 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1177 gcc_assert (ior_code != CODE_FOR_nothing);
1178 emit_insn (GEN_FCN (ior_code)
1179 (compare_result, compare_result, eq_result));
1180 }
644459d0 1181 }
1182
1183 if (is_set == 0)
1184 {
1185 rtx bcomp;
1186 rtx loc_ref;
1187
1188 /* We don't have branch on QI compare insns, so we convert the
1189 QI compare result to a HI result. */
1190 if (comp_mode == QImode)
1191 {
1192 rtx old_res = compare_result;
1193 compare_result = gen_reg_rtx (HImode);
1194 comp_mode = HImode;
1195 emit_insn (gen_extendqihi2 (compare_result, old_res));
1196 }
1197
1198 if (reverse_test)
1199 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1200 else
1201 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1202
74f4459c 1203 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1204 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1205 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1206 loc_ref, pc_rtx)));
1207 }
1208 else if (is_set == 2)
1209 {
74f4459c 1210 rtx target = operands[0];
644459d0 1211 int compare_size = GET_MODE_BITSIZE (comp_mode);
1212 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1213 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1214 rtx select_mask;
1215 rtx op_t = operands[2];
1216 rtx op_f = operands[3];
1217
1218 /* The result of the comparison can be SI, HI or QI mode. Create a
1219 mask based on that result. */
1220 if (target_size > compare_size)
1221 {
1222 select_mask = gen_reg_rtx (mode);
1223 emit_insn (gen_extend_compare (select_mask, compare_result));
1224 }
1225 else if (target_size < compare_size)
1226 select_mask =
1227 gen_rtx_SUBREG (mode, compare_result,
1228 (compare_size - target_size) / BITS_PER_UNIT);
1229 else if (comp_mode != mode)
1230 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1231 else
1232 select_mask = compare_result;
1233
1234 if (GET_MODE (target) != GET_MODE (op_t)
1235 || GET_MODE (target) != GET_MODE (op_f))
1236 abort ();
1237
1238 if (reverse_test)
1239 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1240 else
1241 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1242 }
1243 else
1244 {
74f4459c 1245 rtx target = operands[0];
644459d0 1246 if (reverse_test)
1247 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1248 gen_rtx_NOT (comp_mode, compare_result)));
1249 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1250 emit_insn (gen_extendhisi2 (target, compare_result));
1251 else if (GET_MODE (target) == SImode
1252 && GET_MODE (compare_result) == QImode)
1253 emit_insn (gen_extend_compare (target, compare_result));
1254 else
1255 emit_move_insn (target, compare_result);
1256 }
1257}
1258
1259HOST_WIDE_INT
1260const_double_to_hwint (rtx x)
1261{
1262 HOST_WIDE_INT val;
1263 REAL_VALUE_TYPE rv;
1264 if (GET_MODE (x) == SFmode)
1265 {
1266 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1267 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1268 }
1269 else if (GET_MODE (x) == DFmode)
1270 {
1271 long l[2];
1272 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1273 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1274 val = l[0];
1275 val = (val << 32) | (l[1] & 0xffffffff);
1276 }
1277 else
1278 abort ();
1279 return val;
1280}
1281
1282rtx
1283hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1284{
1285 long tv[2];
1286 REAL_VALUE_TYPE rv;
1287 gcc_assert (mode == SFmode || mode == DFmode);
1288
1289 if (mode == SFmode)
1290 tv[0] = (v << 32) >> 32;
1291 else if (mode == DFmode)
1292 {
1293 tv[1] = (v << 32) >> 32;
1294 tv[0] = v >> 32;
1295 }
1296 real_from_target (&rv, tv, mode);
1297 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1298}
1299
1300void
1301print_operand_address (FILE * file, register rtx addr)
1302{
1303 rtx reg;
1304 rtx offset;
1305
e04cf423 1306 if (GET_CODE (addr) == AND
1307 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1308 && INTVAL (XEXP (addr, 1)) == -16)
1309 addr = XEXP (addr, 0);
1310
644459d0 1311 switch (GET_CODE (addr))
1312 {
1313 case REG:
1314 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1315 break;
1316
1317 case PLUS:
1318 reg = XEXP (addr, 0);
1319 offset = XEXP (addr, 1);
1320 if (GET_CODE (offset) == REG)
1321 {
1322 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1323 reg_names[REGNO (offset)]);
1324 }
1325 else if (GET_CODE (offset) == CONST_INT)
1326 {
1327 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1328 INTVAL (offset), reg_names[REGNO (reg)]);
1329 }
1330 else
1331 abort ();
1332 break;
1333
1334 case CONST:
1335 case LABEL_REF:
1336 case SYMBOL_REF:
1337 case CONST_INT:
1338 output_addr_const (file, addr);
1339 break;
1340
1341 default:
1342 debug_rtx (addr);
1343 abort ();
1344 }
1345}
1346
1347void
1348print_operand (FILE * file, rtx x, int code)
1349{
1350 enum machine_mode mode = GET_MODE (x);
1351 HOST_WIDE_INT val;
1352 unsigned char arr[16];
1353 int xcode = GET_CODE (x);
dea01258 1354 int i, info;
644459d0 1355 if (GET_MODE (x) == VOIDmode)
1356 switch (code)
1357 {
644459d0 1358 case 'L': /* 128 bits, signed */
1359 case 'm': /* 128 bits, signed */
1360 case 'T': /* 128 bits, signed */
1361 case 't': /* 128 bits, signed */
1362 mode = TImode;
1363 break;
644459d0 1364 case 'K': /* 64 bits, signed */
1365 case 'k': /* 64 bits, signed */
1366 case 'D': /* 64 bits, signed */
1367 case 'd': /* 64 bits, signed */
1368 mode = DImode;
1369 break;
644459d0 1370 case 'J': /* 32 bits, signed */
1371 case 'j': /* 32 bits, signed */
1372 case 's': /* 32 bits, signed */
1373 case 'S': /* 32 bits, signed */
1374 mode = SImode;
1375 break;
1376 }
1377 switch (code)
1378 {
1379
1380 case 'j': /* 32 bits, signed */
1381 case 'k': /* 64 bits, signed */
1382 case 'm': /* 128 bits, signed */
1383 if (xcode == CONST_INT
1384 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1385 {
1386 gcc_assert (logical_immediate_p (x, mode));
1387 constant_to_array (mode, x, arr);
1388 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1389 val = trunc_int_for_mode (val, SImode);
1390 switch (which_logical_immediate (val))
1391 {
1392 case SPU_ORI:
1393 break;
1394 case SPU_ORHI:
1395 fprintf (file, "h");
1396 break;
1397 case SPU_ORBI:
1398 fprintf (file, "b");
1399 break;
1400 default:
1401 gcc_unreachable();
1402 }
1403 }
1404 else
1405 gcc_unreachable();
1406 return;
1407
1408 case 'J': /* 32 bits, signed */
1409 case 'K': /* 64 bits, signed */
1410 case 'L': /* 128 bits, signed */
1411 if (xcode == CONST_INT
1412 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1413 {
1414 gcc_assert (logical_immediate_p (x, mode)
1415 || iohl_immediate_p (x, mode));
1416 constant_to_array (mode, x, arr);
1417 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1418 val = trunc_int_for_mode (val, SImode);
1419 switch (which_logical_immediate (val))
1420 {
1421 case SPU_ORI:
1422 case SPU_IOHL:
1423 break;
1424 case SPU_ORHI:
1425 val = trunc_int_for_mode (val, HImode);
1426 break;
1427 case SPU_ORBI:
1428 val = trunc_int_for_mode (val, QImode);
1429 break;
1430 default:
1431 gcc_unreachable();
1432 }
1433 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1434 }
1435 else
1436 gcc_unreachable();
1437 return;
1438
1439 case 't': /* 128 bits, signed */
1440 case 'd': /* 64 bits, signed */
1441 case 's': /* 32 bits, signed */
dea01258 1442 if (CONSTANT_P (x))
644459d0 1443 {
dea01258 1444 enum immediate_class c = classify_immediate (x, mode);
1445 switch (c)
1446 {
1447 case IC_IL1:
1448 constant_to_array (mode, x, arr);
1449 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1450 val = trunc_int_for_mode (val, SImode);
1451 switch (which_immediate_load (val))
1452 {
1453 case SPU_IL:
1454 break;
1455 case SPU_ILA:
1456 fprintf (file, "a");
1457 break;
1458 case SPU_ILH:
1459 fprintf (file, "h");
1460 break;
1461 case SPU_ILHU:
1462 fprintf (file, "hu");
1463 break;
1464 default:
1465 gcc_unreachable ();
1466 }
1467 break;
1468 case IC_CPAT:
1469 constant_to_array (mode, x, arr);
1470 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1471 if (info == 1)
1472 fprintf (file, "b");
1473 else if (info == 2)
1474 fprintf (file, "h");
1475 else if (info == 4)
1476 fprintf (file, "w");
1477 else if (info == 8)
1478 fprintf (file, "d");
1479 break;
1480 case IC_IL1s:
1481 if (xcode == CONST_VECTOR)
1482 {
1483 x = CONST_VECTOR_ELT (x, 0);
1484 xcode = GET_CODE (x);
1485 }
1486 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1487 fprintf (file, "a");
1488 else if (xcode == HIGH)
1489 fprintf (file, "hu");
1490 break;
1491 case IC_FSMBI:
5df189be 1492 case IC_FSMBI2:
dea01258 1493 case IC_IL2:
1494 case IC_IL2s:
1495 case IC_POOL:
1496 abort ();
1497 }
644459d0 1498 }
644459d0 1499 else
1500 gcc_unreachable ();
1501 return;
1502
1503 case 'T': /* 128 bits, signed */
1504 case 'D': /* 64 bits, signed */
1505 case 'S': /* 32 bits, signed */
dea01258 1506 if (CONSTANT_P (x))
644459d0 1507 {
dea01258 1508 enum immediate_class c = classify_immediate (x, mode);
1509 switch (c)
644459d0 1510 {
dea01258 1511 case IC_IL1:
1512 constant_to_array (mode, x, arr);
1513 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1514 val = trunc_int_for_mode (val, SImode);
1515 switch (which_immediate_load (val))
1516 {
1517 case SPU_IL:
1518 case SPU_ILA:
1519 break;
1520 case SPU_ILH:
1521 case SPU_ILHU:
1522 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1523 break;
1524 default:
1525 gcc_unreachable ();
1526 }
1527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1528 break;
1529 case IC_FSMBI:
1530 constant_to_array (mode, x, arr);
1531 val = 0;
1532 for (i = 0; i < 16; i++)
1533 {
1534 val <<= 1;
1535 val |= arr[i] & 1;
1536 }
1537 print_operand (file, GEN_INT (val), 0);
1538 break;
1539 case IC_CPAT:
1540 constant_to_array (mode, x, arr);
1541 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1543 break;
dea01258 1544 case IC_IL1s:
dea01258 1545 if (xcode == HIGH)
5df189be 1546 x = XEXP (x, 0);
1547 if (GET_CODE (x) == CONST_VECTOR)
1548 x = CONST_VECTOR_ELT (x, 0);
1549 output_addr_const (file, x);
1550 if (xcode == HIGH)
1551 fprintf (file, "@h");
644459d0 1552 break;
dea01258 1553 case IC_IL2:
1554 case IC_IL2s:
5df189be 1555 case IC_FSMBI2:
dea01258 1556 case IC_POOL:
1557 abort ();
644459d0 1558 }
c8befdb9 1559 }
644459d0 1560 else
1561 gcc_unreachable ();
1562 return;
1563
644459d0 1564 case 'C':
1565 if (xcode == CONST_INT)
1566 {
1567 /* Only 4 least significant bits are relevant for generate
1568 control word instructions. */
1569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1570 return;
1571 }
1572 break;
1573
1574 case 'M': /* print code for c*d */
1575 if (GET_CODE (x) == CONST_INT)
1576 switch (INTVAL (x))
1577 {
1578 case 1:
1579 fprintf (file, "b");
1580 break;
1581 case 2:
1582 fprintf (file, "h");
1583 break;
1584 case 4:
1585 fprintf (file, "w");
1586 break;
1587 case 8:
1588 fprintf (file, "d");
1589 break;
1590 default:
1591 gcc_unreachable();
1592 }
1593 else
1594 gcc_unreachable();
1595 return;
1596
1597 case 'N': /* Negate the operand */
1598 if (xcode == CONST_INT)
1599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1600 else if (xcode == CONST_VECTOR)
1601 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1602 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1603 return;
1604
1605 case 'I': /* enable/disable interrupts */
1606 if (xcode == CONST_INT)
1607 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1608 return;
1609
1610 case 'b': /* branch modifiers */
1611 if (xcode == REG)
1612 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1613 else if (COMPARISON_P (x))
1614 fprintf (file, "%s", xcode == NE ? "n" : "");
1615 return;
1616
1617 case 'i': /* indirect call */
1618 if (xcode == MEM)
1619 {
1620 if (GET_CODE (XEXP (x, 0)) == REG)
1621 /* Used in indirect function calls. */
1622 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1623 else
1624 output_address (XEXP (x, 0));
1625 }
1626 return;
1627
1628 case 'p': /* load/store */
1629 if (xcode == MEM)
1630 {
1631 x = XEXP (x, 0);
1632 xcode = GET_CODE (x);
1633 }
e04cf423 1634 if (xcode == AND)
1635 {
1636 x = XEXP (x, 0);
1637 xcode = GET_CODE (x);
1638 }
644459d0 1639 if (xcode == REG)
1640 fprintf (file, "d");
1641 else if (xcode == CONST_INT)
1642 fprintf (file, "a");
1643 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1644 fprintf (file, "r");
1645 else if (xcode == PLUS || xcode == LO_SUM)
1646 {
1647 if (GET_CODE (XEXP (x, 1)) == REG)
1648 fprintf (file, "x");
1649 else
1650 fprintf (file, "d");
1651 }
1652 return;
1653
5df189be 1654 case 'e':
1655 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1656 val &= 0x7;
1657 output_addr_const (file, GEN_INT (val));
1658 return;
1659
1660 case 'f':
1661 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1662 val &= 0x1f;
1663 output_addr_const (file, GEN_INT (val));
1664 return;
1665
1666 case 'g':
1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668 val &= 0x3f;
1669 output_addr_const (file, GEN_INT (val));
1670 return;
1671
1672 case 'h':
1673 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1674 val = (val >> 3) & 0x1f;
1675 output_addr_const (file, GEN_INT (val));
1676 return;
1677
1678 case 'E':
1679 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 val = -val;
1681 val &= 0x7;
1682 output_addr_const (file, GEN_INT (val));
1683 return;
1684
1685 case 'F':
1686 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 val = -val;
1688 val &= 0x1f;
1689 output_addr_const (file, GEN_INT (val));
1690 return;
1691
1692 case 'G':
1693 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1694 val = -val;
1695 val &= 0x3f;
1696 output_addr_const (file, GEN_INT (val));
1697 return;
1698
1699 case 'H':
1700 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1701 val = -(val & -8ll);
1702 val = (val >> 3) & 0x1f;
1703 output_addr_const (file, GEN_INT (val));
1704 return;
1705
56c7bfc2 1706 case 'v':
1707 case 'w':
1708 constant_to_array (mode, x, arr);
1709 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1710 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1711 return;
1712
644459d0 1713 case 0:
1714 if (xcode == REG)
1715 fprintf (file, "%s", reg_names[REGNO (x)]);
1716 else if (xcode == MEM)
1717 output_address (XEXP (x, 0));
1718 else if (xcode == CONST_VECTOR)
dea01258 1719 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1720 else
1721 output_addr_const (file, x);
1722 return;
1723
f6a0d06f 1724 /* unused letters
56c7bfc2 1725 o qr u yz
5df189be 1726 AB OPQR UVWXYZ */
644459d0 1727 default:
1728 output_operand_lossage ("invalid %%xn code");
1729 }
1730 gcc_unreachable ();
1731}
1732
644459d0 1733/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1734 caller saved register. For leaf functions it is more efficient to
1735 use a volatile register because we won't need to save and restore the
1736 pic register. This routine is only valid after register allocation
1737 is completed, so we can pick an unused register. */
1738static rtx
1739get_pic_reg (void)
1740{
1741 rtx pic_reg = pic_offset_table_rtx;
1742 if (!reload_completed && !reload_in_progress)
1743 abort ();
87a95921 1744 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1745 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1746 return pic_reg;
1747}
1748
5df189be 1749/* Split constant addresses to handle cases that are too large.
1750 Add in the pic register when in PIC mode.
1751 Split immediates that require more than 1 instruction. */
dea01258 1752int
1753spu_split_immediate (rtx * ops)
c8befdb9 1754{
dea01258 1755 enum machine_mode mode = GET_MODE (ops[0]);
1756 enum immediate_class c = classify_immediate (ops[1], mode);
1757
1758 switch (c)
c8befdb9 1759 {
dea01258 1760 case IC_IL2:
1761 {
1762 unsigned char arrhi[16];
1763 unsigned char arrlo[16];
98bbec1e 1764 rtx to, temp, hi, lo;
dea01258 1765 int i;
98bbec1e 1766 enum machine_mode imode = mode;
1767 /* We need to do reals as ints because the constant used in the
1768 IOR might not be a legitimate real constant. */
1769 imode = int_mode_for_mode (mode);
dea01258 1770 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1771 if (imode != mode)
1772 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1773 else
1774 to = ops[0];
1775 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1776 for (i = 0; i < 16; i += 4)
1777 {
1778 arrlo[i + 2] = arrhi[i + 2];
1779 arrlo[i + 3] = arrhi[i + 3];
1780 arrlo[i + 0] = arrlo[i + 1] = 0;
1781 arrhi[i + 2] = arrhi[i + 3] = 0;
1782 }
98bbec1e 1783 hi = array_to_constant (imode, arrhi);
1784 lo = array_to_constant (imode, arrlo);
1785 emit_move_insn (temp, hi);
dea01258 1786 emit_insn (gen_rtx_SET
98bbec1e 1787 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1788 return 1;
1789 }
5df189be 1790 case IC_FSMBI2:
1791 {
1792 unsigned char arr_fsmbi[16];
1793 unsigned char arr_andbi[16];
1794 rtx to, reg_fsmbi, reg_and;
1795 int i;
1796 enum machine_mode imode = mode;
1797 /* We need to do reals as ints because the constant used in the
1798 * AND might not be a legitimate real constant. */
1799 imode = int_mode_for_mode (mode);
1800 constant_to_array (mode, ops[1], arr_fsmbi);
1801 if (imode != mode)
1802 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1803 else
1804 to = ops[0];
1805 for (i = 0; i < 16; i++)
1806 if (arr_fsmbi[i] != 0)
1807 {
1808 arr_andbi[0] = arr_fsmbi[i];
1809 arr_fsmbi[i] = 0xff;
1810 }
1811 for (i = 1; i < 16; i++)
1812 arr_andbi[i] = arr_andbi[0];
1813 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1814 reg_and = array_to_constant (imode, arr_andbi);
1815 emit_move_insn (to, reg_fsmbi);
1816 emit_insn (gen_rtx_SET
1817 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1818 return 1;
1819 }
dea01258 1820 case IC_POOL:
1821 if (reload_in_progress || reload_completed)
1822 {
1823 rtx mem = force_const_mem (mode, ops[1]);
1824 if (TARGET_LARGE_MEM)
1825 {
1826 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1827 emit_move_insn (addr, XEXP (mem, 0));
1828 mem = replace_equiv_address (mem, addr);
1829 }
1830 emit_move_insn (ops[0], mem);
1831 return 1;
1832 }
1833 break;
1834 case IC_IL1s:
1835 case IC_IL2s:
1836 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1837 {
1838 if (c == IC_IL2s)
1839 {
5df189be 1840 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1841 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1842 }
1843 else if (flag_pic)
1844 emit_insn (gen_pic (ops[0], ops[1]));
1845 if (flag_pic)
1846 {
1847 rtx pic_reg = get_pic_reg ();
1848 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1849 crtl->uses_pic_offset_table = 1;
dea01258 1850 }
1851 return flag_pic || c == IC_IL2s;
1852 }
1853 break;
1854 case IC_IL1:
1855 case IC_FSMBI:
1856 case IC_CPAT:
1857 break;
c8befdb9 1858 }
dea01258 1859 return 0;
c8befdb9 1860}
1861
644459d0 1862/* SAVING is TRUE when we are generating the actual load and store
1863 instructions for REGNO. When determining the size of the stack
1864 needed for saving register we must allocate enough space for the
1865 worst case, because we don't always have the information early enough
1866 to not allocate it. But we can at least eliminate the actual loads
1867 and stores during the prologue/epilogue. */
1868static int
1869need_to_save_reg (int regno, int saving)
1870{
3072d30e 1871 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1872 return 1;
1873 if (flag_pic
1874 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1875 && (!saving || crtl->uses_pic_offset_table)
644459d0 1876 && (!saving
3072d30e 1877 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1878 return 1;
1879 return 0;
1880}
1881
1882/* This function is only correct starting with local register
1883 allocation */
1884int
1885spu_saved_regs_size (void)
1886{
1887 int reg_save_size = 0;
1888 int regno;
1889
1890 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1891 if (need_to_save_reg (regno, 0))
1892 reg_save_size += 0x10;
1893 return reg_save_size;
1894}
1895
1896static rtx
1897frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1898{
1899 rtx reg = gen_rtx_REG (V4SImode, regno);
1900 rtx mem =
1901 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1902 return emit_insn (gen_movv4si (mem, reg));
1903}
1904
1905static rtx
1906frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1907{
1908 rtx reg = gen_rtx_REG (V4SImode, regno);
1909 rtx mem =
1910 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1911 return emit_insn (gen_movv4si (reg, mem));
1912}
1913
1914/* This happens after reload, so we need to expand it. */
1915static rtx
1916frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1917{
1918 rtx insn;
1919 if (satisfies_constraint_K (GEN_INT (imm)))
1920 {
1921 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1922 }
1923 else
1924 {
3072d30e 1925 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1926 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1927 if (REGNO (src) == REGNO (scratch))
1928 abort ();
1929 }
644459d0 1930 return insn;
1931}
1932
1933/* Return nonzero if this function is known to have a null epilogue. */
1934
1935int
1936direct_return (void)
1937{
1938 if (reload_completed)
1939 {
1940 if (cfun->static_chain_decl == 0
1941 && (spu_saved_regs_size ()
1942 + get_frame_size ()
abe32cce 1943 + crtl->outgoing_args_size
1944 + crtl->args.pretend_args_size == 0)
644459d0 1945 && current_function_is_leaf)
1946 return 1;
1947 }
1948 return 0;
1949}
1950
1951/*
1952 The stack frame looks like this:
1953 +-------------+
1954 | incoming |
a8e019fa 1955 | args |
1956 AP -> +-------------+
644459d0 1957 | $lr save |
1958 +-------------+
1959 prev SP | back chain |
1960 +-------------+
1961 | var args |
abe32cce 1962 | reg save | crtl->args.pretend_args_size bytes
644459d0 1963 +-------------+
1964 | ... |
1965 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1966 FP -> +-------------+
644459d0 1967 | ... |
a8e019fa 1968 | vars | get_frame_size() bytes
1969 HFP -> +-------------+
644459d0 1970 | ... |
1971 | outgoing |
abe32cce 1972 | args | crtl->outgoing_args_size bytes
644459d0 1973 +-------------+
1974 | $lr of next |
1975 | frame |
1976 +-------------+
a8e019fa 1977 | back chain |
1978 SP -> +-------------+
644459d0 1979
1980*/
1981void
1982spu_expand_prologue (void)
1983{
1984 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1985 HOST_WIDE_INT total_size;
1986 HOST_WIDE_INT saved_regs_size;
1987 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1988 rtx scratch_reg_0, scratch_reg_1;
1989 rtx insn, real;
1990
644459d0 1991 if (flag_pic && optimize == 0)
18d50ae6 1992 crtl->uses_pic_offset_table = 1;
644459d0 1993
1994 if (spu_naked_function_p (current_function_decl))
1995 return;
1996
1997 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1998 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1999
2000 saved_regs_size = spu_saved_regs_size ();
2001 total_size = size + saved_regs_size
abe32cce 2002 + crtl->outgoing_args_size
2003 + crtl->args.pretend_args_size;
644459d0 2004
2005 if (!current_function_is_leaf
18d50ae6 2006 || cfun->calls_alloca || total_size > 0)
644459d0 2007 total_size += STACK_POINTER_OFFSET;
2008
2009 /* Save this first because code after this might use the link
2010 register as a scratch register. */
2011 if (!current_function_is_leaf)
2012 {
2013 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2014 RTX_FRAME_RELATED_P (insn) = 1;
2015 }
2016
2017 if (total_size > 0)
2018 {
abe32cce 2019 offset = -crtl->args.pretend_args_size;
644459d0 2020 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2021 if (need_to_save_reg (regno, 1))
2022 {
2023 offset -= 16;
2024 insn = frame_emit_store (regno, sp_reg, offset);
2025 RTX_FRAME_RELATED_P (insn) = 1;
2026 }
2027 }
2028
18d50ae6 2029 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2030 {
2031 rtx pic_reg = get_pic_reg ();
2032 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2033 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2034 }
2035
2036 if (total_size > 0)
2037 {
2038 if (flag_stack_check)
2039 {
d819917f 2040 /* We compare against total_size-1 because
644459d0 2041 ($sp >= total_size) <=> ($sp > total_size-1) */
2042 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2043 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2044 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2045 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2046 {
2047 emit_move_insn (scratch_v4si, size_v4si);
2048 size_v4si = scratch_v4si;
2049 }
2050 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2051 emit_insn (gen_vec_extractv4si
2052 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2053 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2054 }
2055
2056 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2057 the value of the previous $sp because we save it as the back
2058 chain. */
2059 if (total_size <= 2000)
2060 {
2061 /* In this case we save the back chain first. */
2062 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2063 insn =
2064 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2065 }
644459d0 2066 else
2067 {
2068 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2069 insn =
2070 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2071 }
2072 RTX_FRAME_RELATED_P (insn) = 1;
2073 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2074 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2075
2076 if (total_size > 2000)
2077 {
2078 /* Save the back chain ptr */
2079 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2080 }
2081
2082 if (frame_pointer_needed)
2083 {
2084 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2085 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2086 + crtl->outgoing_args_size;
644459d0 2087 /* Set the new frame_pointer */
d8dfeb55 2088 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2089 RTX_FRAME_RELATED_P (insn) = 1;
2090 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2091 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2092 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2093 }
2094 }
2095
a512540d 2096 if (flag_stack_usage)
2097 current_function_static_stack_size = total_size;
644459d0 2098}
2099
2100void
2101spu_expand_epilogue (bool sibcall_p)
2102{
2103 int size = get_frame_size (), offset, regno;
2104 HOST_WIDE_INT saved_regs_size, total_size;
2105 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2106 rtx jump, scratch_reg_0;
2107
644459d0 2108 if (spu_naked_function_p (current_function_decl))
2109 return;
2110
2111 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2112
2113 saved_regs_size = spu_saved_regs_size ();
2114 total_size = size + saved_regs_size
abe32cce 2115 + crtl->outgoing_args_size
2116 + crtl->args.pretend_args_size;
644459d0 2117
2118 if (!current_function_is_leaf
18d50ae6 2119 || cfun->calls_alloca || total_size > 0)
644459d0 2120 total_size += STACK_POINTER_OFFSET;
2121
2122 if (total_size > 0)
2123 {
18d50ae6 2124 if (cfun->calls_alloca)
644459d0 2125 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2126 else
2127 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2128
2129
2130 if (saved_regs_size > 0)
2131 {
abe32cce 2132 offset = -crtl->args.pretend_args_size;
644459d0 2133 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2134 if (need_to_save_reg (regno, 1))
2135 {
2136 offset -= 0x10;
2137 frame_emit_load (regno, sp_reg, offset);
2138 }
2139 }
2140 }
2141
2142 if (!current_function_is_leaf)
2143 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2144
2145 if (!sibcall_p)
2146 {
18b42941 2147 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2148 jump = emit_jump_insn (gen__return ());
2149 emit_barrier_after (jump);
2150 }
2151
644459d0 2152}
2153
2154rtx
2155spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2156{
2157 if (count != 0)
2158 return 0;
2159 /* This is inefficient because it ends up copying to a save-register
2160 which then gets saved even though $lr has already been saved. But
2161 it does generate better code for leaf functions and we don't need
2162 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2163 used for __builtin_return_address anyway, so maybe we don't care if
2164 it's inefficient. */
2165 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2166}
2167\f
2168
2169/* Given VAL, generate a constant appropriate for MODE.
2170 If MODE is a vector mode, every element will be VAL.
2171 For TImode, VAL will be zero extended to 128 bits. */
2172rtx
2173spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2174{
2175 rtx inner;
2176 rtvec v;
2177 int units, i;
2178
2179 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2180 || GET_MODE_CLASS (mode) == MODE_FLOAT
2181 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2182 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2183
2184 if (GET_MODE_CLASS (mode) == MODE_INT)
2185 return immed_double_const (val, 0, mode);
2186
2187 /* val is the bit representation of the float */
2188 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2189 return hwint_to_const_double (mode, val);
2190
2191 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2192 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2193 else
2194 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2195
2196 units = GET_MODE_NUNITS (mode);
2197
2198 v = rtvec_alloc (units);
2199
2200 for (i = 0; i < units; ++i)
2201 RTVEC_ELT (v, i) = inner;
2202
2203 return gen_rtx_CONST_VECTOR (mode, v);
2204}
644459d0 2205
5474166e 2206/* Create a MODE vector constant from 4 ints. */
2207rtx
2208spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2209{
2210 unsigned char arr[16];
2211 arr[0] = (a >> 24) & 0xff;
2212 arr[1] = (a >> 16) & 0xff;
2213 arr[2] = (a >> 8) & 0xff;
2214 arr[3] = (a >> 0) & 0xff;
2215 arr[4] = (b >> 24) & 0xff;
2216 arr[5] = (b >> 16) & 0xff;
2217 arr[6] = (b >> 8) & 0xff;
2218 arr[7] = (b >> 0) & 0xff;
2219 arr[8] = (c >> 24) & 0xff;
2220 arr[9] = (c >> 16) & 0xff;
2221 arr[10] = (c >> 8) & 0xff;
2222 arr[11] = (c >> 0) & 0xff;
2223 arr[12] = (d >> 24) & 0xff;
2224 arr[13] = (d >> 16) & 0xff;
2225 arr[14] = (d >> 8) & 0xff;
2226 arr[15] = (d >> 0) & 0xff;
2227 return array_to_constant(mode, arr);
2228}
5a976006 2229\f
2230/* branch hint stuff */
5474166e 2231
644459d0 2232/* An array of these is used to propagate hints to predecessor blocks. */
2233struct spu_bb_info
2234{
5a976006 2235 rtx prop_jump; /* propagated from another block */
2236 int bb_index; /* the original block. */
644459d0 2237};
5a976006 2238static struct spu_bb_info *spu_bb_info;
644459d0 2239
5a976006 2240#define STOP_HINT_P(INSN) \
2241 (GET_CODE(INSN) == CALL_INSN \
2242 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2243 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2244
2245/* 1 when RTX is a hinted branch or its target. We keep track of
2246 what has been hinted so the safe-hint code can test it easily. */
2247#define HINTED_P(RTX) \
2248 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2249
2250/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2251#define SCHED_ON_EVEN_P(RTX) \
2252 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2253
2254/* Emit a nop for INSN such that the two will dual issue. This assumes
2255 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2256 We check for TImode to handle a MULTI1 insn which has dual issued its
2257 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2258 ADDR_VEC insns. */
2259static void
2260emit_nop_for_insn (rtx insn)
644459d0 2261{
5a976006 2262 int p;
2263 rtx new_insn;
2264 p = get_pipe (insn);
2265 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2266 new_insn = emit_insn_after (gen_lnop (), insn);
2267 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2268 {
5a976006 2269 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2270 PUT_MODE (new_insn, TImode);
2271 PUT_MODE (insn, VOIDmode);
2272 }
2273 else
2274 new_insn = emit_insn_after (gen_lnop (), insn);
2275 recog_memoized (new_insn);
2fbdf9ef 2276 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2277}
2278
2279/* Insert nops in basic blocks to meet dual issue alignment
2280 requirements. Also make sure hbrp and hint instructions are at least
2281 one cycle apart, possibly inserting a nop. */
2282static void
2283pad_bb(void)
2284{
2285 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2286 int length;
2287 int addr;
2288
2289 /* This sets up INSN_ADDRESSES. */
2290 shorten_branches (get_insns ());
2291
2292 /* Keep track of length added by nops. */
2293 length = 0;
2294
2295 prev_insn = 0;
2296 insn = get_insns ();
2297 if (!active_insn_p (insn))
2298 insn = next_active_insn (insn);
2299 for (; insn; insn = next_insn)
2300 {
2301 next_insn = next_active_insn (insn);
2302 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2303 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2304 {
5a976006 2305 if (hbr_insn)
2306 {
2307 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2308 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2309 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2310 || (a1 - a0 == 4))
2311 {
2312 prev_insn = emit_insn_before (gen_lnop (), insn);
2313 PUT_MODE (prev_insn, GET_MODE (insn));
2314 PUT_MODE (insn, TImode);
2fbdf9ef 2315 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2316 length += 4;
2317 }
2318 }
2319 hbr_insn = insn;
2320 }
2321 if (INSN_CODE (insn) == CODE_FOR_blockage)
2322 {
2323 if (GET_MODE (insn) == TImode)
2324 PUT_MODE (next_insn, TImode);
2325 insn = next_insn;
2326 next_insn = next_active_insn (insn);
2327 }
2328 addr = INSN_ADDRESSES (INSN_UID (insn));
2329 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2330 {
2331 if (((addr + length) & 7) != 0)
2332 {
2333 emit_nop_for_insn (prev_insn);
2334 length += 4;
2335 }
644459d0 2336 }
5a976006 2337 else if (GET_MODE (insn) == TImode
2338 && ((next_insn && GET_MODE (next_insn) != TImode)
2339 || get_attr_type (insn) == TYPE_MULTI0)
2340 && ((addr + length) & 7) != 0)
2341 {
2342 /* prev_insn will always be set because the first insn is
2343 always 8-byte aligned. */
2344 emit_nop_for_insn (prev_insn);
2345 length += 4;
2346 }
2347 prev_insn = insn;
644459d0 2348 }
644459d0 2349}
2350
5a976006 2351\f
2352/* Routines for branch hints. */
2353
644459d0 2354static void
5a976006 2355spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2356 int distance, sbitmap blocks)
644459d0 2357{
5a976006 2358 rtx branch_label = 0;
2359 rtx hint;
2360 rtx insn;
2361 rtx table;
644459d0 2362
2363 if (before == 0 || branch == 0 || target == 0)
2364 return;
2365
5a976006 2366 /* While scheduling we require hints to be no further than 600, so
2367 we need to enforce that here too */
644459d0 2368 if (distance > 600)
2369 return;
2370
5a976006 2371 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2372 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2373 before = NEXT_INSN (before);
644459d0 2374
2375 branch_label = gen_label_rtx ();
2376 LABEL_NUSES (branch_label)++;
2377 LABEL_PRESERVE_P (branch_label) = 1;
2378 insn = emit_label_before (branch_label, branch);
2379 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2380 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2381
2382 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2383 recog_memoized (hint);
2fbdf9ef 2384 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2385 HINTED_P (branch) = 1;
644459d0 2386
5a976006 2387 if (GET_CODE (target) == LABEL_REF)
2388 HINTED_P (XEXP (target, 0)) = 1;
2389 else if (tablejump_p (branch, 0, &table))
644459d0 2390 {
5a976006 2391 rtvec vec;
2392 int j;
2393 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2394 vec = XVEC (PATTERN (table), 0);
2395 else
2396 vec = XVEC (PATTERN (table), 1);
2397 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2398 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2399 }
5a976006 2400
2401 if (distance >= 588)
644459d0 2402 {
5a976006 2403 /* Make sure the hint isn't scheduled any earlier than this point,
2404 which could make it too far for the branch offest to fit */
2fbdf9ef 2405 insn = emit_insn_before (gen_blockage (), hint);
2406 recog_memoized (insn);
2407 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2408 }
2409 else if (distance <= 8 * 4)
2410 {
2411 /* To guarantee at least 8 insns between the hint and branch we
2412 insert nops. */
2413 int d;
2414 for (d = distance; d < 8 * 4; d += 4)
2415 {
2416 insn =
2417 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2418 recog_memoized (insn);
2fbdf9ef 2419 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2420 }
2421
2422 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2423 insn = emit_insn_after (gen_blockage (), hint);
2424 recog_memoized (insn);
2425 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2426
2427 /* Make sure any nops inserted aren't scheduled after the call. */
2428 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2429 {
2430 insn = emit_insn_before (gen_blockage (), branch);
2431 recog_memoized (insn);
2432 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2433 }
644459d0 2434 }
644459d0 2435}
2436
2437/* Returns 0 if we don't want a hint for this branch. Otherwise return
2438 the rtx for the branch target. */
2439static rtx
2440get_branch_target (rtx branch)
2441{
2442 if (GET_CODE (branch) == JUMP_INSN)
2443 {
2444 rtx set, src;
2445
2446 /* Return statements */
2447 if (GET_CODE (PATTERN (branch)) == RETURN)
2448 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2449
2450 /* jump table */
2451 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2452 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2453 return 0;
2454
fcc31b99 2455 /* ASM GOTOs. */
604157f6 2456 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2457 return NULL;
2458
644459d0 2459 set = single_set (branch);
2460 src = SET_SRC (set);
2461 if (GET_CODE (SET_DEST (set)) != PC)
2462 abort ();
2463
2464 if (GET_CODE (src) == IF_THEN_ELSE)
2465 {
2466 rtx lab = 0;
2467 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2468 if (note)
2469 {
2470 /* If the more probable case is not a fall through, then
2471 try a branch hint. */
2472 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2473 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2474 && GET_CODE (XEXP (src, 1)) != PC)
2475 lab = XEXP (src, 1);
2476 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2477 && GET_CODE (XEXP (src, 2)) != PC)
2478 lab = XEXP (src, 2);
2479 }
2480 if (lab)
2481 {
2482 if (GET_CODE (lab) == RETURN)
2483 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2484 return lab;
2485 }
2486 return 0;
2487 }
2488
2489 return src;
2490 }
2491 else if (GET_CODE (branch) == CALL_INSN)
2492 {
2493 rtx call;
2494 /* All of our call patterns are in a PARALLEL and the CALL is
2495 the first pattern in the PARALLEL. */
2496 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2497 abort ();
2498 call = XVECEXP (PATTERN (branch), 0, 0);
2499 if (GET_CODE (call) == SET)
2500 call = SET_SRC (call);
2501 if (GET_CODE (call) != CALL)
2502 abort ();
2503 return XEXP (XEXP (call, 0), 0);
2504 }
2505 return 0;
2506}
2507
5a976006 2508/* The special $hbr register is used to prevent the insn scheduler from
2509 moving hbr insns across instructions which invalidate them. It
2510 should only be used in a clobber, and this function searches for
2511 insns which clobber it. */
2512static bool
2513insn_clobbers_hbr (rtx insn)
2514{
2515 if (INSN_P (insn)
2516 && GET_CODE (PATTERN (insn)) == PARALLEL)
2517 {
2518 rtx parallel = PATTERN (insn);
2519 rtx clobber;
2520 int j;
2521 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2522 {
2523 clobber = XVECEXP (parallel, 0, j);
2524 if (GET_CODE (clobber) == CLOBBER
2525 && GET_CODE (XEXP (clobber, 0)) == REG
2526 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2527 return 1;
2528 }
2529 }
2530 return 0;
2531}
2532
2533/* Search up to 32 insns starting at FIRST:
2534 - at any kind of hinted branch, just return
2535 - at any unconditional branch in the first 15 insns, just return
2536 - at a call or indirect branch, after the first 15 insns, force it to
2537 an even address and return
2538 - at any unconditional branch, after the first 15 insns, force it to
2539 an even address.
2540 At then end of the search, insert an hbrp within 4 insns of FIRST,
2541 and an hbrp within 16 instructions of FIRST.
2542 */
644459d0 2543static void
5a976006 2544insert_hbrp_for_ilb_runout (rtx first)
644459d0 2545{
5a976006 2546 rtx insn, before_4 = 0, before_16 = 0;
2547 int addr = 0, length, first_addr = -1;
2548 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2549 int insert_lnop_after = 0;
2550 for (insn = first; insn; insn = NEXT_INSN (insn))
2551 if (INSN_P (insn))
2552 {
2553 if (first_addr == -1)
2554 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2555 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2556 length = get_attr_length (insn);
2557
2558 if (before_4 == 0 && addr + length >= 4 * 4)
2559 before_4 = insn;
2560 /* We test for 14 instructions because the first hbrp will add
2561 up to 2 instructions. */
2562 if (before_16 == 0 && addr + length >= 14 * 4)
2563 before_16 = insn;
2564
2565 if (INSN_CODE (insn) == CODE_FOR_hbr)
2566 {
2567 /* Make sure an hbrp is at least 2 cycles away from a hint.
2568 Insert an lnop after the hbrp when necessary. */
2569 if (before_4 == 0 && addr > 0)
2570 {
2571 before_4 = insn;
2572 insert_lnop_after |= 1;
2573 }
2574 else if (before_4 && addr <= 4 * 4)
2575 insert_lnop_after |= 1;
2576 if (before_16 == 0 && addr > 10 * 4)
2577 {
2578 before_16 = insn;
2579 insert_lnop_after |= 2;
2580 }
2581 else if (before_16 && addr <= 14 * 4)
2582 insert_lnop_after |= 2;
2583 }
644459d0 2584
5a976006 2585 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2586 {
2587 if (addr < hbrp_addr0)
2588 hbrp_addr0 = addr;
2589 else if (addr < hbrp_addr1)
2590 hbrp_addr1 = addr;
2591 }
644459d0 2592
5a976006 2593 if (CALL_P (insn) || JUMP_P (insn))
2594 {
2595 if (HINTED_P (insn))
2596 return;
2597
2598 /* Any branch after the first 15 insns should be on an even
2599 address to avoid a special case branch. There might be
2600 some nops and/or hbrps inserted, so we test after 10
2601 insns. */
2602 if (addr > 10 * 4)
2603 SCHED_ON_EVEN_P (insn) = 1;
2604 }
644459d0 2605
5a976006 2606 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2607 return;
2608
2609
2610 if (addr + length >= 32 * 4)
644459d0 2611 {
5a976006 2612 gcc_assert (before_4 && before_16);
2613 if (hbrp_addr0 > 4 * 4)
644459d0 2614 {
5a976006 2615 insn =
2616 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2617 recog_memoized (insn);
2fbdf9ef 2618 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2619 INSN_ADDRESSES_NEW (insn,
2620 INSN_ADDRESSES (INSN_UID (before_4)));
2621 PUT_MODE (insn, GET_MODE (before_4));
2622 PUT_MODE (before_4, TImode);
2623 if (insert_lnop_after & 1)
644459d0 2624 {
5a976006 2625 insn = emit_insn_before (gen_lnop (), before_4);
2626 recog_memoized (insn);
2fbdf9ef 2627 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2628 INSN_ADDRESSES_NEW (insn,
2629 INSN_ADDRESSES (INSN_UID (before_4)));
2630 PUT_MODE (insn, TImode);
644459d0 2631 }
644459d0 2632 }
5a976006 2633 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2634 && hbrp_addr1 > 16 * 4)
644459d0 2635 {
5a976006 2636 insn =
2637 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2638 recog_memoized (insn);
2fbdf9ef 2639 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2640 INSN_ADDRESSES_NEW (insn,
2641 INSN_ADDRESSES (INSN_UID (before_16)));
2642 PUT_MODE (insn, GET_MODE (before_16));
2643 PUT_MODE (before_16, TImode);
2644 if (insert_lnop_after & 2)
644459d0 2645 {
5a976006 2646 insn = emit_insn_before (gen_lnop (), before_16);
2647 recog_memoized (insn);
2fbdf9ef 2648 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2649 INSN_ADDRESSES_NEW (insn,
2650 INSN_ADDRESSES (INSN_UID
2651 (before_16)));
2652 PUT_MODE (insn, TImode);
644459d0 2653 }
2654 }
5a976006 2655 return;
644459d0 2656 }
644459d0 2657 }
5a976006 2658 else if (BARRIER_P (insn))
2659 return;
644459d0 2660
644459d0 2661}
5a976006 2662
2663/* The SPU might hang when it executes 48 inline instructions after a
2664 hinted branch jumps to its hinted target. The beginning of a
851d9296 2665 function and the return from a call might have been hinted, and
2666 must be handled as well. To prevent a hang we insert 2 hbrps. The
2667 first should be within 6 insns of the branch target. The second
2668 should be within 22 insns of the branch target. When determining
2669 if hbrps are necessary, we look for only 32 inline instructions,
2670 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2671 when inserting new hbrps, we insert them within 4 and 16 insns of
2672 the target. */
644459d0 2673static void
5a976006 2674insert_hbrp (void)
644459d0 2675{
5a976006 2676 rtx insn;
2677 if (TARGET_SAFE_HINTS)
644459d0 2678 {
5a976006 2679 shorten_branches (get_insns ());
2680 /* Insert hbrp at beginning of function */
2681 insn = next_active_insn (get_insns ());
2682 if (insn)
2683 insert_hbrp_for_ilb_runout (insn);
2684 /* Insert hbrp after hinted targets. */
2685 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2686 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2687 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2688 }
644459d0 2689}
2690
5a976006 2691static int in_spu_reorg;
2692
2693/* Insert branch hints. There are no branch optimizations after this
2694 pass, so it's safe to set our branch hints now. */
644459d0 2695static void
5a976006 2696spu_machine_dependent_reorg (void)
644459d0 2697{
5a976006 2698 sbitmap blocks;
2699 basic_block bb;
2700 rtx branch, insn;
2701 rtx branch_target = 0;
2702 int branch_addr = 0, insn_addr, required_dist = 0;
2703 int i;
2704 unsigned int j;
644459d0 2705
5a976006 2706 if (!TARGET_BRANCH_HINTS || optimize == 0)
2707 {
2708 /* We still do it for unoptimized code because an external
2709 function might have hinted a call or return. */
2710 insert_hbrp ();
2711 pad_bb ();
2712 return;
2713 }
644459d0 2714
5a976006 2715 blocks = sbitmap_alloc (last_basic_block);
2716 sbitmap_zero (blocks);
644459d0 2717
5a976006 2718 in_spu_reorg = 1;
2719 compute_bb_for_insn ();
2720
2721 compact_blocks ();
2722
2723 spu_bb_info =
2724 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2725 sizeof (struct spu_bb_info));
2726
2727 /* We need exact insn addresses and lengths. */
2728 shorten_branches (get_insns ());
2729
2730 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2731 {
5a976006 2732 bb = BASIC_BLOCK (i);
2733 branch = 0;
2734 if (spu_bb_info[i].prop_jump)
644459d0 2735 {
5a976006 2736 branch = spu_bb_info[i].prop_jump;
2737 branch_target = get_branch_target (branch);
2738 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2739 required_dist = spu_hint_dist;
2740 }
2741 /* Search from end of a block to beginning. In this loop, find
2742 jumps which need a branch and emit them only when:
2743 - it's an indirect branch and we're at the insn which sets
2744 the register
2745 - we're at an insn that will invalidate the hint. e.g., a
2746 call, another hint insn, inline asm that clobbers $hbr, and
2747 some inlined operations (divmodsi4). Don't consider jumps
2748 because they are only at the end of a block and are
2749 considered when we are deciding whether to propagate
2750 - we're getting too far away from the branch. The hbr insns
2751 only have a signed 10 bit offset
2752 We go back as far as possible so the branch will be considered
2753 for propagation when we get to the beginning of the block. */
2754 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2755 {
2756 if (INSN_P (insn))
2757 {
2758 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2759 if (branch
2760 && ((GET_CODE (branch_target) == REG
2761 && set_of (branch_target, insn) != NULL_RTX)
2762 || insn_clobbers_hbr (insn)
2763 || branch_addr - insn_addr > 600))
2764 {
2765 rtx next = NEXT_INSN (insn);
2766 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2767 if (insn != BB_END (bb)
2768 && branch_addr - next_addr >= required_dist)
2769 {
2770 if (dump_file)
2771 fprintf (dump_file,
2772 "hint for %i in block %i before %i\n",
2773 INSN_UID (branch), bb->index,
2774 INSN_UID (next));
2775 spu_emit_branch_hint (next, branch, branch_target,
2776 branch_addr - next_addr, blocks);
2777 }
2778 branch = 0;
2779 }
2780
2781 /* JUMP_P will only be true at the end of a block. When
2782 branch is already set it means we've previously decided
2783 to propagate a hint for that branch into this block. */
2784 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2785 {
2786 branch = 0;
2787 if ((branch_target = get_branch_target (insn)))
2788 {
2789 branch = insn;
2790 branch_addr = insn_addr;
2791 required_dist = spu_hint_dist;
2792 }
2793 }
2794 }
2795 if (insn == BB_HEAD (bb))
2796 break;
2797 }
2798
2799 if (branch)
2800 {
2801 /* If we haven't emitted a hint for this branch yet, it might
2802 be profitable to emit it in one of the predecessor blocks,
2803 especially for loops. */
2804 rtx bbend;
2805 basic_block prev = 0, prop = 0, prev2 = 0;
2806 int loop_exit = 0, simple_loop = 0;
2807 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2808
2809 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2810 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2811 prev = EDGE_PRED (bb, j)->src;
2812 else
2813 prev2 = EDGE_PRED (bb, j)->src;
2814
2815 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2816 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2817 loop_exit = 1;
2818 else if (EDGE_SUCC (bb, j)->dest == bb)
2819 simple_loop = 1;
2820
2821 /* If this branch is a loop exit then propagate to previous
2822 fallthru block. This catches the cases when it is a simple
2823 loop or when there is an initial branch into the loop. */
2824 if (prev && (loop_exit || simple_loop)
2825 && prev->loop_depth <= bb->loop_depth)
2826 prop = prev;
2827
2828 /* If there is only one adjacent predecessor. Don't propagate
2829 outside this loop. This loop_depth test isn't perfect, but
2830 I'm not sure the loop_father member is valid at this point. */
2831 else if (prev && single_pred_p (bb)
2832 && prev->loop_depth == bb->loop_depth)
2833 prop = prev;
2834
2835 /* If this is the JOIN block of a simple IF-THEN then
2836 propogate the hint to the HEADER block. */
2837 else if (prev && prev2
2838 && EDGE_COUNT (bb->preds) == 2
2839 && EDGE_COUNT (prev->preds) == 1
2840 && EDGE_PRED (prev, 0)->src == prev2
2841 && prev2->loop_depth == bb->loop_depth
2842 && GET_CODE (branch_target) != REG)
2843 prop = prev;
2844
2845 /* Don't propagate when:
2846 - this is a simple loop and the hint would be too far
2847 - this is not a simple loop and there are 16 insns in
2848 this block already
2849 - the predecessor block ends in a branch that will be
2850 hinted
2851 - the predecessor block ends in an insn that invalidates
2852 the hint */
2853 if (prop
2854 && prop->index >= 0
2855 && (bbend = BB_END (prop))
2856 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2857 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2858 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2859 {
2860 if (dump_file)
2861 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2862 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2863 bb->index, prop->index, bb->loop_depth,
2864 INSN_UID (branch), loop_exit, simple_loop,
2865 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2866
2867 spu_bb_info[prop->index].prop_jump = branch;
2868 spu_bb_info[prop->index].bb_index = i;
2869 }
2870 else if (branch_addr - next_addr >= required_dist)
2871 {
2872 if (dump_file)
2873 fprintf (dump_file, "hint for %i in block %i before %i\n",
2874 INSN_UID (branch), bb->index,
2875 INSN_UID (NEXT_INSN (insn)));
2876 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2877 branch_addr - next_addr, blocks);
2878 }
2879 branch = 0;
644459d0 2880 }
644459d0 2881 }
5a976006 2882 free (spu_bb_info);
644459d0 2883
5a976006 2884 if (!sbitmap_empty_p (blocks))
2885 find_many_sub_basic_blocks (blocks);
2886
2887 /* We have to schedule to make sure alignment is ok. */
2888 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2889
2890 /* The hints need to be scheduled, so call it again. */
2891 schedule_insns ();
2fbdf9ef 2892 df_finish_pass (true);
5a976006 2893
2894 insert_hbrp ();
2895
2896 pad_bb ();
2897
8f1d58ad 2898 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2899 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2900 {
2901 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2902 between its branch label and the branch . We don't move the
2903 label because GCC expects it at the beginning of the block. */
2904 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2905 rtx label_ref = XVECEXP (unspec, 0, 0);
2906 rtx label = XEXP (label_ref, 0);
2907 rtx branch;
2908 int offset = 0;
2909 for (branch = NEXT_INSN (label);
2910 !JUMP_P (branch) && !CALL_P (branch);
2911 branch = NEXT_INSN (branch))
2912 if (NONJUMP_INSN_P (branch))
2913 offset += get_attr_length (branch);
2914 if (offset > 0)
2915 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2916 }
5a976006 2917
2918 if (spu_flag_var_tracking)
644459d0 2919 {
5a976006 2920 df_analyze ();
2921 timevar_push (TV_VAR_TRACKING);
2922 variable_tracking_main ();
2923 timevar_pop (TV_VAR_TRACKING);
2924 df_finish_pass (false);
644459d0 2925 }
5a976006 2926
2927 free_bb_for_insn ();
2928
2929 in_spu_reorg = 0;
644459d0 2930}
2931\f
2932
2933/* Insn scheduling routines, primarily for dual issue. */
2934static int
2935spu_sched_issue_rate (void)
2936{
2937 return 2;
2938}
2939
2940static int
5a976006 2941uses_ls_unit(rtx insn)
644459d0 2942{
5a976006 2943 rtx set = single_set (insn);
2944 if (set != 0
2945 && (GET_CODE (SET_DEST (set)) == MEM
2946 || GET_CODE (SET_SRC (set)) == MEM))
2947 return 1;
2948 return 0;
644459d0 2949}
2950
2951static int
2952get_pipe (rtx insn)
2953{
2954 enum attr_type t;
2955 /* Handle inline asm */
2956 if (INSN_CODE (insn) == -1)
2957 return -1;
2958 t = get_attr_type (insn);
2959 switch (t)
2960 {
2961 case TYPE_CONVERT:
2962 return -2;
2963 case TYPE_MULTI0:
2964 return -1;
2965
2966 case TYPE_FX2:
2967 case TYPE_FX3:
2968 case TYPE_SPR:
2969 case TYPE_NOP:
2970 case TYPE_FXB:
2971 case TYPE_FPD:
2972 case TYPE_FP6:
2973 case TYPE_FP7:
644459d0 2974 return 0;
2975
2976 case TYPE_LNOP:
2977 case TYPE_SHUF:
2978 case TYPE_LOAD:
2979 case TYPE_STORE:
2980 case TYPE_BR:
2981 case TYPE_MULTI1:
2982 case TYPE_HBR:
5a976006 2983 case TYPE_IPREFETCH:
644459d0 2984 return 1;
2985 default:
2986 abort ();
2987 }
2988}
2989
5a976006 2990
2991/* haifa-sched.c has a static variable that keeps track of the current
2992 cycle. It is passed to spu_sched_reorder, and we record it here for
2993 use by spu_sched_variable_issue. It won't be accurate if the
2994 scheduler updates it's clock_var between the two calls. */
2995static int clock_var;
2996
2997/* This is used to keep track of insn alignment. Set to 0 at the
2998 beginning of each block and increased by the "length" attr of each
2999 insn scheduled. */
3000static int spu_sched_length;
3001
3002/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3003 ready list appropriately in spu_sched_reorder(). */
3004static int pipe0_clock;
3005static int pipe1_clock;
3006
3007static int prev_clock_var;
3008
3009static int prev_priority;
3010
3011/* The SPU needs to load the next ilb sometime during the execution of
3012 the previous ilb. There is a potential conflict if every cycle has a
3013 load or store. To avoid the conflict we make sure the load/store
3014 unit is free for at least one cycle during the execution of insns in
3015 the previous ilb. */
3016static int spu_ls_first;
3017static int prev_ls_clock;
3018
3019static void
3020spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3021 int max_ready ATTRIBUTE_UNUSED)
3022{
3023 spu_sched_length = 0;
3024}
3025
3026static void
3027spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3028 int max_ready ATTRIBUTE_UNUSED)
3029{
3030 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3031 {
3032 /* When any block might be at least 8-byte aligned, assume they
3033 will all be at least 8-byte aligned to make sure dual issue
3034 works out correctly. */
3035 spu_sched_length = 0;
3036 }
3037 spu_ls_first = INT_MAX;
3038 clock_var = -1;
3039 prev_ls_clock = -1;
3040 pipe0_clock = -1;
3041 pipe1_clock = -1;
3042 prev_clock_var = -1;
3043 prev_priority = -1;
3044}
3045
644459d0 3046static int
5a976006 3047spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3048 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3049{
5a976006 3050 int len;
3051 int p;
644459d0 3052 if (GET_CODE (PATTERN (insn)) == USE
3053 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3054 || (len = get_attr_length (insn)) == 0)
3055 return more;
3056
3057 spu_sched_length += len;
3058
3059 /* Reset on inline asm */
3060 if (INSN_CODE (insn) == -1)
3061 {
3062 spu_ls_first = INT_MAX;
3063 pipe0_clock = -1;
3064 pipe1_clock = -1;
3065 return 0;
3066 }
3067 p = get_pipe (insn);
3068 if (p == 0)
3069 pipe0_clock = clock_var;
3070 else
3071 pipe1_clock = clock_var;
3072
3073 if (in_spu_reorg)
3074 {
3075 if (clock_var - prev_ls_clock > 1
3076 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3077 spu_ls_first = INT_MAX;
3078 if (uses_ls_unit (insn))
3079 {
3080 if (spu_ls_first == INT_MAX)
3081 spu_ls_first = spu_sched_length;
3082 prev_ls_clock = clock_var;
3083 }
3084
3085 /* The scheduler hasn't inserted the nop, but we will later on.
3086 Include those nops in spu_sched_length. */
3087 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3088 spu_sched_length += 4;
3089 prev_clock_var = clock_var;
3090
3091 /* more is -1 when called from spu_sched_reorder for new insns
3092 that don't have INSN_PRIORITY */
3093 if (more >= 0)
3094 prev_priority = INSN_PRIORITY (insn);
3095 }
3096
3097 /* Always try issueing more insns. spu_sched_reorder will decide
3098 when the cycle should be advanced. */
3099 return 1;
3100}
3101
3102/* This function is called for both TARGET_SCHED_REORDER and
3103 TARGET_SCHED_REORDER2. */
3104static int
3105spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3106 rtx *ready, int *nreadyp, int clock)
3107{
3108 int i, nready = *nreadyp;
3109 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3110 rtx insn;
3111
3112 clock_var = clock;
3113
3114 if (nready <= 0 || pipe1_clock >= clock)
3115 return 0;
3116
3117 /* Find any rtl insns that don't generate assembly insns and schedule
3118 them first. */
3119 for (i = nready - 1; i >= 0; i--)
3120 {
3121 insn = ready[i];
3122 if (INSN_CODE (insn) == -1
3123 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3124 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3125 {
3126 ready[i] = ready[nready - 1];
3127 ready[nready - 1] = insn;
3128 return 1;
3129 }
3130 }
3131
3132 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3133 for (i = 0; i < nready; i++)
3134 if (INSN_CODE (ready[i]) != -1)
3135 {
3136 insn = ready[i];
3137 switch (get_attr_type (insn))
3138 {
3139 default:
3140 case TYPE_MULTI0:
3141 case TYPE_CONVERT:
3142 case TYPE_FX2:
3143 case TYPE_FX3:
3144 case TYPE_SPR:
3145 case TYPE_NOP:
3146 case TYPE_FXB:
3147 case TYPE_FPD:
3148 case TYPE_FP6:
3149 case TYPE_FP7:
3150 pipe_0 = i;
3151 break;
3152 case TYPE_LOAD:
3153 case TYPE_STORE:
3154 pipe_ls = i;
3155 case TYPE_LNOP:
3156 case TYPE_SHUF:
3157 case TYPE_BR:
3158 case TYPE_MULTI1:
3159 case TYPE_HBR:
3160 pipe_1 = i;
3161 break;
3162 case TYPE_IPREFETCH:
3163 pipe_hbrp = i;
3164 break;
3165 }
3166 }
3167
3168 /* In the first scheduling phase, schedule loads and stores together
3169 to increase the chance they will get merged during postreload CSE. */
3170 if (!reload_completed && pipe_ls >= 0)
3171 {
3172 insn = ready[pipe_ls];
3173 ready[pipe_ls] = ready[nready - 1];
3174 ready[nready - 1] = insn;
3175 return 1;
3176 }
3177
3178 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3179 if (pipe_hbrp >= 0)
3180 pipe_1 = pipe_hbrp;
3181
3182 /* When we have loads/stores in every cycle of the last 15 insns and
3183 we are about to schedule another load/store, emit an hbrp insn
3184 instead. */
3185 if (in_spu_reorg
3186 && spu_sched_length - spu_ls_first >= 4 * 15
3187 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3188 {
3189 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3190 recog_memoized (insn);
3191 if (pipe0_clock < clock)
3192 PUT_MODE (insn, TImode);
3193 spu_sched_variable_issue (file, verbose, insn, -1);
3194 return 0;
3195 }
3196
3197 /* In general, we want to emit nops to increase dual issue, but dual
3198 issue isn't faster when one of the insns could be scheduled later
3199 without effecting the critical path. We look at INSN_PRIORITY to
3200 make a good guess, but it isn't perfect so -mdual-nops=n can be
3201 used to effect it. */
3202 if (in_spu_reorg && spu_dual_nops < 10)
3203 {
3204 /* When we are at an even address and we are not issueing nops to
3205 improve scheduling then we need to advance the cycle. */
3206 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3207 && (spu_dual_nops == 0
3208 || (pipe_1 != -1
3209 && prev_priority >
3210 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3211 return 0;
3212
3213 /* When at an odd address, schedule the highest priority insn
3214 without considering pipeline. */
3215 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3216 && (spu_dual_nops == 0
3217 || (prev_priority >
3218 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3219 return 1;
3220 }
3221
3222
3223 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3224 pipe0 insn in the ready list, schedule it. */
3225 if (pipe0_clock < clock && pipe_0 >= 0)
3226 schedule_i = pipe_0;
3227
3228 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3229 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3230 else
3231 schedule_i = pipe_1;
3232
3233 if (schedule_i > -1)
3234 {
3235 insn = ready[schedule_i];
3236 ready[schedule_i] = ready[nready - 1];
3237 ready[nready - 1] = insn;
3238 return 1;
3239 }
3240 return 0;
644459d0 3241}
3242
3243/* INSN is dependent on DEP_INSN. */
3244static int
5a976006 3245spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3246{
5a976006 3247 rtx set;
3248
3249 /* The blockage pattern is used to prevent instructions from being
3250 moved across it and has no cost. */
3251 if (INSN_CODE (insn) == CODE_FOR_blockage
3252 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3253 return 0;
3254
9d98604b 3255 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3256 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3257 return 0;
3258
3259 /* Make sure hbrps are spread out. */
3260 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3261 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3262 return 8;
3263
3264 /* Make sure hints and hbrps are 2 cycles apart. */
3265 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3266 || INSN_CODE (insn) == CODE_FOR_hbr)
3267 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3268 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3269 return 2;
3270
3271 /* An hbrp has no real dependency on other insns. */
3272 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3273 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3274 return 0;
3275
3276 /* Assuming that it is unlikely an argument register will be used in
3277 the first cycle of the called function, we reduce the cost for
3278 slightly better scheduling of dep_insn. When not hinted, the
3279 mispredicted branch would hide the cost as well. */
3280 if (CALL_P (insn))
3281 {
3282 rtx target = get_branch_target (insn);
3283 if (GET_CODE (target) != REG || !set_of (target, insn))
3284 return cost - 2;
3285 return cost;
3286 }
3287
3288 /* And when returning from a function, let's assume the return values
3289 are completed sooner too. */
3290 if (CALL_P (dep_insn))
644459d0 3291 return cost - 2;
5a976006 3292
3293 /* Make sure an instruction that loads from the back chain is schedule
3294 away from the return instruction so a hint is more likely to get
3295 issued. */
3296 if (INSN_CODE (insn) == CODE_FOR__return
3297 && (set = single_set (dep_insn))
3298 && GET_CODE (SET_DEST (set)) == REG
3299 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3300 return 20;
3301
644459d0 3302 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3303 scheduler makes every insn in a block anti-dependent on the final
3304 jump_insn. We adjust here so higher cost insns will get scheduled
3305 earlier. */
5a976006 3306 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3307 return insn_cost (dep_insn) - 3;
5a976006 3308
644459d0 3309 return cost;
3310}
3311\f
3312/* Create a CONST_DOUBLE from a string. */
3313struct rtx_def *
3314spu_float_const (const char *string, enum machine_mode mode)
3315{
3316 REAL_VALUE_TYPE value;
3317 value = REAL_VALUE_ATOF (string, mode);
3318 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3319}
3320
644459d0 3321int
3322spu_constant_address_p (rtx x)
3323{
3324 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3325 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3326 || GET_CODE (x) == HIGH);
3327}
3328
3329static enum spu_immediate
3330which_immediate_load (HOST_WIDE_INT val)
3331{
3332 gcc_assert (val == trunc_int_for_mode (val, SImode));
3333
3334 if (val >= -0x8000 && val <= 0x7fff)
3335 return SPU_IL;
3336 if (val >= 0 && val <= 0x3ffff)
3337 return SPU_ILA;
3338 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3339 return SPU_ILH;
3340 if ((val & 0xffff) == 0)
3341 return SPU_ILHU;
3342
3343 return SPU_NONE;
3344}
3345
dea01258 3346/* Return true when OP can be loaded by one of the il instructions, or
3347 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3348int
3349immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3350{
3351 if (CONSTANT_P (op))
3352 {
3353 enum immediate_class c = classify_immediate (op, mode);
5df189be 3354 return c == IC_IL1 || c == IC_IL1s
3072d30e 3355 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3356 }
3357 return 0;
3358}
3359
3360/* Return true if the first SIZE bytes of arr is a constant that can be
3361 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3362 represent the size and offset of the instruction to use. */
3363static int
3364cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3365{
3366 int cpat, run, i, start;
3367 cpat = 1;
3368 run = 0;
3369 start = -1;
3370 for (i = 0; i < size && cpat; i++)
3371 if (arr[i] != i+16)
3372 {
3373 if (!run)
3374 {
3375 start = i;
3376 if (arr[i] == 3)
3377 run = 1;
3378 else if (arr[i] == 2 && arr[i+1] == 3)
3379 run = 2;
3380 else if (arr[i] == 0)
3381 {
3382 while (arr[i+run] == run && i+run < 16)
3383 run++;
3384 if (run != 4 && run != 8)
3385 cpat = 0;
3386 }
3387 else
3388 cpat = 0;
3389 if ((i & (run-1)) != 0)
3390 cpat = 0;
3391 i += run;
3392 }
3393 else
3394 cpat = 0;
3395 }
b01a6dc3 3396 if (cpat && (run || size < 16))
dea01258 3397 {
3398 if (run == 0)
3399 run = 1;
3400 if (prun)
3401 *prun = run;
3402 if (pstart)
3403 *pstart = start == -1 ? 16-run : start;
3404 return 1;
3405 }
3406 return 0;
3407}
3408
3409/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3410 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3411static enum immediate_class
3412classify_immediate (rtx op, enum machine_mode mode)
644459d0 3413{
3414 HOST_WIDE_INT val;
3415 unsigned char arr[16];
5df189be 3416 int i, j, repeated, fsmbi, repeat;
dea01258 3417
3418 gcc_assert (CONSTANT_P (op));
3419
644459d0 3420 if (GET_MODE (op) != VOIDmode)
3421 mode = GET_MODE (op);
3422
dea01258 3423 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3424 if (!flag_pic
3425 && mode == V4SImode
dea01258 3426 && GET_CODE (op) == CONST_VECTOR
3427 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3428 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3429 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3430 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3431 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3432 op = CONST_VECTOR_ELT (op, 0);
644459d0 3433
dea01258 3434 switch (GET_CODE (op))
3435 {
3436 case SYMBOL_REF:
3437 case LABEL_REF:
3438 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3439
dea01258 3440 case CONST:
0cfc65d4 3441 /* We can never know if the resulting address fits in 18 bits and can be
3442 loaded with ila. For now, assume the address will not overflow if
3443 the displacement is "small" (fits 'K' constraint). */
3444 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3445 {
3446 rtx sym = XEXP (XEXP (op, 0), 0);
3447 rtx cst = XEXP (XEXP (op, 0), 1);
3448
3449 if (GET_CODE (sym) == SYMBOL_REF
3450 && GET_CODE (cst) == CONST_INT
3451 && satisfies_constraint_K (cst))
3452 return IC_IL1s;
3453 }
3454 return IC_IL2s;
644459d0 3455
dea01258 3456 case HIGH:
3457 return IC_IL1s;
3458
3459 case CONST_VECTOR:
3460 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3461 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3462 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3463 return IC_POOL;
3464 /* Fall through. */
3465
3466 case CONST_INT:
3467 case CONST_DOUBLE:
3468 constant_to_array (mode, op, arr);
644459d0 3469
dea01258 3470 /* Check that each 4-byte slot is identical. */
3471 repeated = 1;
3472 for (i = 4; i < 16; i += 4)
3473 for (j = 0; j < 4; j++)
3474 if (arr[j] != arr[i + j])
3475 repeated = 0;
3476
3477 if (repeated)
3478 {
3479 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3480 val = trunc_int_for_mode (val, SImode);
3481
3482 if (which_immediate_load (val) != SPU_NONE)
3483 return IC_IL1;
3484 }
3485
3486 /* Any mode of 2 bytes or smaller can be loaded with an il
3487 instruction. */
3488 gcc_assert (GET_MODE_SIZE (mode) > 2);
3489
3490 fsmbi = 1;
5df189be 3491 repeat = 0;
dea01258 3492 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3493 if (arr[i] != 0 && repeat == 0)
3494 repeat = arr[i];
3495 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3496 fsmbi = 0;
3497 if (fsmbi)
5df189be 3498 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3499
3500 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3501 return IC_CPAT;
3502
3503 if (repeated)
3504 return IC_IL2;
3505
3506 return IC_POOL;
3507 default:
3508 break;
3509 }
3510 gcc_unreachable ();
644459d0 3511}
3512
3513static enum spu_immediate
3514which_logical_immediate (HOST_WIDE_INT val)
3515{
3516 gcc_assert (val == trunc_int_for_mode (val, SImode));
3517
3518 if (val >= -0x200 && val <= 0x1ff)
3519 return SPU_ORI;
3520 if (val >= 0 && val <= 0xffff)
3521 return SPU_IOHL;
3522 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3523 {
3524 val = trunc_int_for_mode (val, HImode);
3525 if (val >= -0x200 && val <= 0x1ff)
3526 return SPU_ORHI;
3527 if ((val & 0xff) == ((val >> 8) & 0xff))
3528 {
3529 val = trunc_int_for_mode (val, QImode);
3530 if (val >= -0x200 && val <= 0x1ff)
3531 return SPU_ORBI;
3532 }
3533 }
3534 return SPU_NONE;
3535}
3536
5df189be 3537/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3538 CONST_DOUBLEs. */
3539static int
3540const_vector_immediate_p (rtx x)
3541{
3542 int i;
3543 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3544 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3545 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3546 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3547 return 0;
3548 return 1;
3549}
3550
644459d0 3551int
3552logical_immediate_p (rtx op, enum machine_mode mode)
3553{
3554 HOST_WIDE_INT val;
3555 unsigned char arr[16];
3556 int i, j;
3557
3558 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3559 || GET_CODE (op) == CONST_VECTOR);
3560
5df189be 3561 if (GET_CODE (op) == CONST_VECTOR
3562 && !const_vector_immediate_p (op))
3563 return 0;
3564
644459d0 3565 if (GET_MODE (op) != VOIDmode)
3566 mode = GET_MODE (op);
3567
3568 constant_to_array (mode, op, arr);
3569
3570 /* Check that bytes are repeated. */
3571 for (i = 4; i < 16; i += 4)
3572 for (j = 0; j < 4; j++)
3573 if (arr[j] != arr[i + j])
3574 return 0;
3575
3576 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3577 val = trunc_int_for_mode (val, SImode);
3578
3579 i = which_logical_immediate (val);
3580 return i != SPU_NONE && i != SPU_IOHL;
3581}
3582
3583int
3584iohl_immediate_p (rtx op, enum machine_mode mode)
3585{
3586 HOST_WIDE_INT val;
3587 unsigned char arr[16];
3588 int i, j;
3589
3590 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3591 || GET_CODE (op) == CONST_VECTOR);
3592
5df189be 3593 if (GET_CODE (op) == CONST_VECTOR
3594 && !const_vector_immediate_p (op))
3595 return 0;
3596
644459d0 3597 if (GET_MODE (op) != VOIDmode)
3598 mode = GET_MODE (op);
3599
3600 constant_to_array (mode, op, arr);
3601
3602 /* Check that bytes are repeated. */
3603 for (i = 4; i < 16; i += 4)
3604 for (j = 0; j < 4; j++)
3605 if (arr[j] != arr[i + j])
3606 return 0;
3607
3608 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3609 val = trunc_int_for_mode (val, SImode);
3610
3611 return val >= 0 && val <= 0xffff;
3612}
3613
3614int
3615arith_immediate_p (rtx op, enum machine_mode mode,
3616 HOST_WIDE_INT low, HOST_WIDE_INT high)
3617{
3618 HOST_WIDE_INT val;
3619 unsigned char arr[16];
3620 int bytes, i, j;
3621
3622 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3623 || GET_CODE (op) == CONST_VECTOR);
3624
5df189be 3625 if (GET_CODE (op) == CONST_VECTOR
3626 && !const_vector_immediate_p (op))
3627 return 0;
3628
644459d0 3629 if (GET_MODE (op) != VOIDmode)
3630 mode = GET_MODE (op);
3631
3632 constant_to_array (mode, op, arr);
3633
3634 if (VECTOR_MODE_P (mode))
3635 mode = GET_MODE_INNER (mode);
3636
3637 bytes = GET_MODE_SIZE (mode);
3638 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3639
3640 /* Check that bytes are repeated. */
3641 for (i = bytes; i < 16; i += bytes)
3642 for (j = 0; j < bytes; j++)
3643 if (arr[j] != arr[i + j])
3644 return 0;
3645
3646 val = arr[0];
3647 for (j = 1; j < bytes; j++)
3648 val = (val << 8) | arr[j];
3649
3650 val = trunc_int_for_mode (val, mode);
3651
3652 return val >= low && val <= high;
3653}
3654
56c7bfc2 3655/* TRUE when op is an immediate and an exact power of 2, and given that
3656 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3657 all entries must be the same. */
3658bool
3659exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3660{
3661 enum machine_mode int_mode;
3662 HOST_WIDE_INT val;
3663 unsigned char arr[16];
3664 int bytes, i, j;
3665
3666 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3667 || GET_CODE (op) == CONST_VECTOR);
3668
3669 if (GET_CODE (op) == CONST_VECTOR
3670 && !const_vector_immediate_p (op))
3671 return 0;
3672
3673 if (GET_MODE (op) != VOIDmode)
3674 mode = GET_MODE (op);
3675
3676 constant_to_array (mode, op, arr);
3677
3678 if (VECTOR_MODE_P (mode))
3679 mode = GET_MODE_INNER (mode);
3680
3681 bytes = GET_MODE_SIZE (mode);
3682 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3683
3684 /* Check that bytes are repeated. */
3685 for (i = bytes; i < 16; i += bytes)
3686 for (j = 0; j < bytes; j++)
3687 if (arr[j] != arr[i + j])
3688 return 0;
3689
3690 val = arr[0];
3691 for (j = 1; j < bytes; j++)
3692 val = (val << 8) | arr[j];
3693
3694 val = trunc_int_for_mode (val, int_mode);
3695
3696 /* Currently, we only handle SFmode */
3697 gcc_assert (mode == SFmode);
3698 if (mode == SFmode)
3699 {
3700 int exp = (val >> 23) - 127;
3701 return val > 0 && (val & 0x007fffff) == 0
3702 && exp >= low && exp <= high;
3703 }
3704 return FALSE;
3705}
3706
6cf5579e 3707/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3708
3709static int
3710ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3711{
3712 rtx x = *px;
3713 tree decl;
3714
3715 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3716 {
3717 rtx plus = XEXP (x, 0);
3718 rtx op0 = XEXP (plus, 0);
3719 rtx op1 = XEXP (plus, 1);
3720 if (GET_CODE (op1) == CONST_INT)
3721 x = op0;
3722 }
3723
3724 return (GET_CODE (x) == SYMBOL_REF
3725 && (decl = SYMBOL_REF_DECL (x)) != 0
3726 && TREE_CODE (decl) == VAR_DECL
3727 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3728}
3729
644459d0 3730/* We accept:
5b865faf 3731 - any 32-bit constant (SImode, SFmode)
644459d0 3732 - any constant that can be generated with fsmbi (any mode)
5b865faf 3733 - a 64-bit constant where the high and low bits are identical
644459d0 3734 (DImode, DFmode)
5b865faf 3735 - a 128-bit constant where the four 32-bit words match. */
644459d0 3736int
3737spu_legitimate_constant_p (rtx x)
3738{
5df189be 3739 if (GET_CODE (x) == HIGH)
3740 x = XEXP (x, 0);
6cf5579e 3741
3742 /* Reject any __ea qualified reference. These can't appear in
3743 instructions but must be forced to the constant pool. */
3744 if (for_each_rtx (&x, ea_symbol_ref, 0))
3745 return 0;
3746
644459d0 3747 /* V4SI with all identical symbols is valid. */
5df189be 3748 if (!flag_pic
3749 && GET_MODE (x) == V4SImode
644459d0 3750 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3751 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3752 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3753 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3754 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3755 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3756
5df189be 3757 if (GET_CODE (x) == CONST_VECTOR
3758 && !const_vector_immediate_p (x))
3759 return 0;
644459d0 3760 return 1;
3761}
3762
3763/* Valid address are:
3764 - symbol_ref, label_ref, const
3765 - reg
9d98604b 3766 - reg + const_int, where const_int is 16 byte aligned
644459d0 3767 - reg + reg, alignment doesn't matter
3768 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3769 ignore the 4 least significant bits of the const. We only care about
3770 16 byte modes because the expand phase will change all smaller MEM
3771 references to TImode. */
3772static bool
3773spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3774 rtx x, bool reg_ok_strict)
644459d0 3775{
9d98604b 3776 int aligned = GET_MODE_SIZE (mode) >= 16;
3777 if (aligned
3778 && GET_CODE (x) == AND
644459d0 3779 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3780 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3781 x = XEXP (x, 0);
3782 switch (GET_CODE (x))
3783 {
644459d0 3784 case LABEL_REF:
6cf5579e 3785 return !TARGET_LARGE_MEM;
3786
9d98604b 3787 case SYMBOL_REF:
644459d0 3788 case CONST:
6cf5579e 3789 /* Keep __ea references until reload so that spu_expand_mov can see them
3790 in MEMs. */
3791 if (ea_symbol_ref (&x, 0))
3792 return !reload_in_progress && !reload_completed;
9d98604b 3793 return !TARGET_LARGE_MEM;
644459d0 3794
3795 case CONST_INT:
3796 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3797
3798 case SUBREG:
3799 x = XEXP (x, 0);
9d98604b 3800 if (REG_P (x))
3801 return 0;
644459d0 3802
3803 case REG:
3804 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3805
3806 case PLUS:
3807 case LO_SUM:
3808 {
3809 rtx op0 = XEXP (x, 0);
3810 rtx op1 = XEXP (x, 1);
3811 if (GET_CODE (op0) == SUBREG)
3812 op0 = XEXP (op0, 0);
3813 if (GET_CODE (op1) == SUBREG)
3814 op1 = XEXP (op1, 0);
644459d0 3815 if (GET_CODE (op0) == REG
3816 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3817 && GET_CODE (op1) == CONST_INT
3818 && INTVAL (op1) >= -0x2000
3819 && INTVAL (op1) <= 0x1fff
9d98604b 3820 && (!aligned || (INTVAL (op1) & 15) == 0))
3821 return TRUE;
644459d0 3822 if (GET_CODE (op0) == REG
3823 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3824 && GET_CODE (op1) == REG
3825 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3826 return TRUE;
644459d0 3827 }
3828 break;
3829
3830 default:
3831 break;
3832 }
9d98604b 3833 return FALSE;
644459d0 3834}
3835
6cf5579e 3836/* Like spu_legitimate_address_p, except with named addresses. */
3837static bool
3838spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3839 bool reg_ok_strict, addr_space_t as)
3840{
3841 if (as == ADDR_SPACE_EA)
3842 return (REG_P (x) && (GET_MODE (x) == EAmode));
3843
3844 else if (as != ADDR_SPACE_GENERIC)
3845 gcc_unreachable ();
3846
3847 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3848}
3849
644459d0 3850/* When the address is reg + const_int, force the const_int into a
fa7637bd 3851 register. */
644459d0 3852rtx
3853spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3854 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3855{
3856 rtx op0, op1;
3857 /* Make sure both operands are registers. */
3858 if (GET_CODE (x) == PLUS)
3859 {
3860 op0 = XEXP (x, 0);
3861 op1 = XEXP (x, 1);
3862 if (ALIGNED_SYMBOL_REF_P (op0))
3863 {
3864 op0 = force_reg (Pmode, op0);
3865 mark_reg_pointer (op0, 128);
3866 }
3867 else if (GET_CODE (op0) != REG)
3868 op0 = force_reg (Pmode, op0);
3869 if (ALIGNED_SYMBOL_REF_P (op1))
3870 {
3871 op1 = force_reg (Pmode, op1);
3872 mark_reg_pointer (op1, 128);
3873 }
3874 else if (GET_CODE (op1) != REG)
3875 op1 = force_reg (Pmode, op1);
3876 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3877 }
41e3a0c7 3878 return x;
644459d0 3879}
3880
6cf5579e 3881/* Like spu_legitimate_address, except with named address support. */
3882static rtx
3883spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3884 addr_space_t as)
3885{
3886 if (as != ADDR_SPACE_GENERIC)
3887 return x;
3888
3889 return spu_legitimize_address (x, oldx, mode);
3890}
3891
644459d0 3892/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3893 struct attribute_spec.handler. */
3894static tree
3895spu_handle_fndecl_attribute (tree * node,
3896 tree name,
3897 tree args ATTRIBUTE_UNUSED,
3898 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3899{
3900 if (TREE_CODE (*node) != FUNCTION_DECL)
3901 {
67a779df 3902 warning (0, "%qE attribute only applies to functions",
3903 name);
644459d0 3904 *no_add_attrs = true;
3905 }
3906
3907 return NULL_TREE;
3908}
3909
3910/* Handle the "vector" attribute. */
3911static tree
3912spu_handle_vector_attribute (tree * node, tree name,
3913 tree args ATTRIBUTE_UNUSED,
3914 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3915{
3916 tree type = *node, result = NULL_TREE;
3917 enum machine_mode mode;
3918 int unsigned_p;
3919
3920 while (POINTER_TYPE_P (type)
3921 || TREE_CODE (type) == FUNCTION_TYPE
3922 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3923 type = TREE_TYPE (type);
3924
3925 mode = TYPE_MODE (type);
3926
3927 unsigned_p = TYPE_UNSIGNED (type);
3928 switch (mode)
3929 {
3930 case DImode:
3931 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3932 break;
3933 case SImode:
3934 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3935 break;
3936 case HImode:
3937 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3938 break;
3939 case QImode:
3940 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3941 break;
3942 case SFmode:
3943 result = V4SF_type_node;
3944 break;
3945 case DFmode:
3946 result = V2DF_type_node;
3947 break;
3948 default:
3949 break;
3950 }
3951
3952 /* Propagate qualifiers attached to the element type
3953 onto the vector type. */
3954 if (result && result != type && TYPE_QUALS (type))
3955 result = build_qualified_type (result, TYPE_QUALS (type));
3956
3957 *no_add_attrs = true; /* No need to hang on to the attribute. */
3958
3959 if (!result)
67a779df 3960 warning (0, "%qE attribute ignored", name);
644459d0 3961 else
d991e6e8 3962 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3963
3964 return NULL_TREE;
3965}
3966
f2b32076 3967/* Return nonzero if FUNC is a naked function. */
644459d0 3968static int
3969spu_naked_function_p (tree func)
3970{
3971 tree a;
3972
3973 if (TREE_CODE (func) != FUNCTION_DECL)
3974 abort ();
3975
3976 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3977 return a != NULL_TREE;
3978}
3979
3980int
3981spu_initial_elimination_offset (int from, int to)
3982{
3983 int saved_regs_size = spu_saved_regs_size ();
3984 int sp_offset = 0;
abe32cce 3985 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3986 || get_frame_size () || saved_regs_size)
3987 sp_offset = STACK_POINTER_OFFSET;
3988 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3989 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3990 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3991 return get_frame_size ();
644459d0 3992 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3993 return sp_offset + crtl->outgoing_args_size
644459d0 3994 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3995 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3996 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3997 else
3998 gcc_unreachable ();
644459d0 3999}
4000
4001rtx
fb80456a 4002spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 4003{
4004 enum machine_mode mode = TYPE_MODE (type);
4005 int byte_size = ((mode == BLKmode)
4006 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4007
4008 /* Make sure small structs are left justified in a register. */
4009 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4010 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4011 {
4012 enum machine_mode smode;
4013 rtvec v;
4014 int i;
4015 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4016 int n = byte_size / UNITS_PER_WORD;
4017 v = rtvec_alloc (nregs);
4018 for (i = 0; i < n; i++)
4019 {
4020 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4021 gen_rtx_REG (TImode,
4022 FIRST_RETURN_REGNUM
4023 + i),
4024 GEN_INT (UNITS_PER_WORD * i));
4025 byte_size -= UNITS_PER_WORD;
4026 }
4027
4028 if (n < nregs)
4029 {
4030 if (byte_size < 4)
4031 byte_size = 4;
4032 smode =
4033 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4034 RTVEC_ELT (v, n) =
4035 gen_rtx_EXPR_LIST (VOIDmode,
4036 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4037 GEN_INT (UNITS_PER_WORD * n));
4038 }
4039 return gen_rtx_PARALLEL (mode, v);
4040 }
4041 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4042}
4043
ee9034d4 4044static rtx
4045spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4046 enum machine_mode mode,
ee9034d4 4047 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4048{
4049 int byte_size;
4050
a08c5dd0 4051 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4052 return 0;
4053
4054 byte_size = ((mode == BLKmode)
4055 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4056
4057 /* The ABI does not allow parameters to be passed partially in
4058 reg and partially in stack. */
a08c5dd0 4059 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4060 return 0;
4061
4062 /* Make sure small structs are left justified in a register. */
4063 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4064 && byte_size < UNITS_PER_WORD && byte_size > 0)
4065 {
4066 enum machine_mode smode;
4067 rtx gr_reg;
4068 if (byte_size < 4)
4069 byte_size = 4;
4070 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4071 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4072 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4073 const0_rtx);
4074 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4075 }
4076 else
a08c5dd0 4077 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4078}
4079
ee9034d4 4080static void
4081spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4082 const_tree type, bool named ATTRIBUTE_UNUSED)
4083{
4084 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4085 ? 1
4086 : mode == BLKmode
4087 ? ((int_size_in_bytes (type) + 15) / 16)
4088 : mode == VOIDmode
4089 ? 1
4090 : HARD_REGNO_NREGS (cum, mode));
4091}
4092
644459d0 4093/* Variable sized types are passed by reference. */
4094static bool
4095spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4096 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4097 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4098{
4099 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4100}
4101\f
4102
4103/* Var args. */
4104
4105/* Create and return the va_list datatype.
4106
4107 On SPU, va_list is an array type equivalent to
4108
4109 typedef struct __va_list_tag
4110 {
4111 void *__args __attribute__((__aligned(16)));
4112 void *__skip __attribute__((__aligned(16)));
4113
4114 } va_list[1];
4115
fa7637bd 4116 where __args points to the arg that will be returned by the next
644459d0 4117 va_arg(), and __skip points to the previous stack frame such that
4118 when __args == __skip we should advance __args by 32 bytes. */
4119static tree
4120spu_build_builtin_va_list (void)
4121{
4122 tree f_args, f_skip, record, type_decl;
4123 bool owp;
4124
4125 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4126
4127 type_decl =
54e46243 4128 build_decl (BUILTINS_LOCATION,
4129 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4130
54e46243 4131 f_args = build_decl (BUILTINS_LOCATION,
4132 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4133 f_skip = build_decl (BUILTINS_LOCATION,
4134 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4135
4136 DECL_FIELD_CONTEXT (f_args) = record;
4137 DECL_ALIGN (f_args) = 128;
4138 DECL_USER_ALIGN (f_args) = 1;
4139
4140 DECL_FIELD_CONTEXT (f_skip) = record;
4141 DECL_ALIGN (f_skip) = 128;
4142 DECL_USER_ALIGN (f_skip) = 1;
4143
bc907808 4144 TYPE_STUB_DECL (record) = type_decl;
644459d0 4145 TYPE_NAME (record) = type_decl;
4146 TYPE_FIELDS (record) = f_args;
1767a056 4147 DECL_CHAIN (f_args) = f_skip;
644459d0 4148
4149 /* We know this is being padded and we want it too. It is an internal
4150 type so hide the warnings from the user. */
4151 owp = warn_padded;
4152 warn_padded = false;
4153
4154 layout_type (record);
4155
4156 warn_padded = owp;
4157
4158 /* The correct type is an array type of one element. */
4159 return build_array_type (record, build_index_type (size_zero_node));
4160}
4161
4162/* Implement va_start by filling the va_list structure VALIST.
4163 NEXTARG points to the first anonymous stack argument.
4164
4165 The following global variables are used to initialize
4166 the va_list structure:
4167
abe32cce 4168 crtl->args.info;
644459d0 4169 the CUMULATIVE_ARGS for this function
4170
abe32cce 4171 crtl->args.arg_offset_rtx:
644459d0 4172 holds the offset of the first anonymous stack argument
4173 (relative to the virtual arg pointer). */
4174
8a58ed0a 4175static void
644459d0 4176spu_va_start (tree valist, rtx nextarg)
4177{
4178 tree f_args, f_skip;
4179 tree args, skip, t;
4180
4181 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4182 f_skip = DECL_CHAIN (f_args);
644459d0 4183
170efcd4 4184 valist = build_simple_mem_ref (valist);
644459d0 4185 args =
4186 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4187 skip =
4188 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4189
4190 /* Find the __args area. */
4191 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4192 if (crtl->args.pretend_args_size > 0)
0de36bdb 4193 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4194 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4195 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4196 TREE_SIDE_EFFECTS (t) = 1;
4197 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4198
4199 /* Find the __skip area. */
4200 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4201 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4202 size_int (crtl->args.pretend_args_size
0de36bdb 4203 - STACK_POINTER_OFFSET));
75a70cf9 4204 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4205 TREE_SIDE_EFFECTS (t) = 1;
4206 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4207}
4208
4209/* Gimplify va_arg by updating the va_list structure
4210 VALIST as required to retrieve an argument of type
4211 TYPE, and returning that argument.
4212
4213 ret = va_arg(VALIST, TYPE);
4214
4215 generates code equivalent to:
4216
4217 paddedsize = (sizeof(TYPE) + 15) & -16;
4218 if (VALIST.__args + paddedsize > VALIST.__skip
4219 && VALIST.__args <= VALIST.__skip)
4220 addr = VALIST.__skip + 32;
4221 else
4222 addr = VALIST.__args;
4223 VALIST.__args = addr + paddedsize;
4224 ret = *(TYPE *)addr;
4225 */
4226static tree
75a70cf9 4227spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4228 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4229{
4230 tree f_args, f_skip;
4231 tree args, skip;
4232 HOST_WIDE_INT size, rsize;
4233 tree paddedsize, addr, tmp;
4234 bool pass_by_reference_p;
4235
4236 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4237 f_skip = DECL_CHAIN (f_args);
644459d0 4238
182cf5a9 4239 valist = build_simple_mem_ref (valist);
644459d0 4240 args =
4241 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4242 skip =
4243 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4244
4245 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4246
4247 /* if an object is dynamically sized, a pointer to it is passed
4248 instead of the object itself. */
4249 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4250 false);
4251 if (pass_by_reference_p)
4252 type = build_pointer_type (type);
4253 size = int_size_in_bytes (type);
4254 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4255
4256 /* build conditional expression to calculate addr. The expression
4257 will be gimplified later. */
0de36bdb 4258 paddedsize = size_int (rsize);
75a70cf9 4259 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4260 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4261 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4262 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4263 unshare_expr (skip)));
644459d0 4264
4265 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4266 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4267 size_int (32)), unshare_expr (args));
644459d0 4268
75a70cf9 4269 gimplify_assign (addr, tmp, pre_p);
644459d0 4270
4271 /* update VALIST.__args */
0de36bdb 4272 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4273 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4274
8115f0af 4275 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4276 addr);
644459d0 4277
4278 if (pass_by_reference_p)
4279 addr = build_va_arg_indirect_ref (addr);
4280
4281 return build_va_arg_indirect_ref (addr);
4282}
4283
4284/* Save parameter registers starting with the register that corresponds
4285 to the first unnamed parameters. If the first unnamed parameter is
4286 in the stack then save no registers. Set pretend_args_size to the
4287 amount of space needed to save the registers. */
4288void
4289spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4290 tree type, int *pretend_size, int no_rtl)
4291{
4292 if (!no_rtl)
4293 {
4294 rtx tmp;
4295 int regno;
4296 int offset;
4297 int ncum = *cum;
4298
4299 /* cum currently points to the last named argument, we want to
4300 start at the next argument. */
ee9034d4 4301 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4302
4303 offset = -STACK_POINTER_OFFSET;
4304 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4305 {
4306 tmp = gen_frame_mem (V4SImode,
4307 plus_constant (virtual_incoming_args_rtx,
4308 offset));
4309 emit_move_insn (tmp,
4310 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4311 offset += 16;
4312 }
4313 *pretend_size = offset + STACK_POINTER_OFFSET;
4314 }
4315}
4316\f
b2d7ede1 4317static void
644459d0 4318spu_conditional_register_usage (void)
4319{
4320 if (flag_pic)
4321 {
4322 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4323 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4324 }
644459d0 4325}
4326
9d98604b 4327/* This is called any time we inspect the alignment of a register for
4328 addresses. */
644459d0 4329static int
9d98604b 4330reg_aligned_for_addr (rtx x)
644459d0 4331{
9d98604b 4332 int regno =
4333 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4334 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4335}
4336
69ced2d6 4337/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4338 into its SYMBOL_REF_FLAGS. */
4339static void
4340spu_encode_section_info (tree decl, rtx rtl, int first)
4341{
4342 default_encode_section_info (decl, rtl, first);
4343
4344 /* If a variable has a forced alignment to < 16 bytes, mark it with
4345 SYMBOL_FLAG_ALIGN1. */
4346 if (TREE_CODE (decl) == VAR_DECL
4347 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4348 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4349}
4350
644459d0 4351/* Return TRUE if we are certain the mem refers to a complete object
4352 which is both 16-byte aligned and padded to a 16-byte boundary. This
4353 would make it safe to store with a single instruction.
4354 We guarantee the alignment and padding for static objects by aligning
4355 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4356 FIXME: We currently cannot guarantee this for objects on the stack
4357 because assign_parm_setup_stack calls assign_stack_local with the
4358 alignment of the parameter mode and in that case the alignment never
4359 gets adjusted by LOCAL_ALIGNMENT. */
4360static int
4361store_with_one_insn_p (rtx mem)
4362{
9d98604b 4363 enum machine_mode mode = GET_MODE (mem);
644459d0 4364 rtx addr = XEXP (mem, 0);
9d98604b 4365 if (mode == BLKmode)
644459d0 4366 return 0;
9d98604b 4367 if (GET_MODE_SIZE (mode) >= 16)
4368 return 1;
644459d0 4369 /* Only static objects. */
4370 if (GET_CODE (addr) == SYMBOL_REF)
4371 {
4372 /* We use the associated declaration to make sure the access is
fa7637bd 4373 referring to the whole object.
851d9296 4374 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
644459d0 4375 if it is necessary. Will there be cases where one exists, and
4376 the other does not? Will there be cases where both exist, but
4377 have different types? */
4378 tree decl = MEM_EXPR (mem);
4379 if (decl
4380 && TREE_CODE (decl) == VAR_DECL
4381 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4382 return 1;
4383 decl = SYMBOL_REF_DECL (addr);
4384 if (decl
4385 && TREE_CODE (decl) == VAR_DECL
4386 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4387 return 1;
4388 }
4389 return 0;
4390}
4391
9d98604b 4392/* Return 1 when the address is not valid for a simple load and store as
4393 required by the '_mov*' patterns. We could make this less strict
4394 for loads, but we prefer mem's to look the same so they are more
4395 likely to be merged. */
4396static int
4397address_needs_split (rtx mem)
4398{
4399 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4400 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4401 || !(store_with_one_insn_p (mem)
4402 || mem_is_padded_component_ref (mem))))
4403 return 1;
4404
4405 return 0;
4406}
4407
6cf5579e 4408static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4409static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4410static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4411
4412/* MEM is known to be an __ea qualified memory access. Emit a call to
4413 fetch the ppu memory to local store, and return its address in local
4414 store. */
4415
4416static void
4417ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4418{
4419 if (is_store)
4420 {
4421 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4422 if (!cache_fetch_dirty)
4423 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4424 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4425 2, ea_addr, EAmode, ndirty, SImode);
4426 }
4427 else
4428 {
4429 if (!cache_fetch)
4430 cache_fetch = init_one_libfunc ("__cache_fetch");
4431 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4432 1, ea_addr, EAmode);
4433 }
4434}
4435
4436/* Like ea_load_store, but do the cache tag comparison and, for stores,
4437 dirty bit marking, inline.
4438
4439 The cache control data structure is an array of
4440
4441 struct __cache_tag_array
4442 {
4443 unsigned int tag_lo[4];
4444 unsigned int tag_hi[4];
4445 void *data_pointer[4];
4446 int reserved[4];
4447 vector unsigned short dirty_bits[4];
4448 } */
4449
4450static void
4451ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4452{
4453 rtx ea_addr_si;
4454 HOST_WIDE_INT v;
4455 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4456 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4457 rtx index_mask = gen_reg_rtx (SImode);
4458 rtx tag_arr = gen_reg_rtx (Pmode);
4459 rtx splat_mask = gen_reg_rtx (TImode);
4460 rtx splat = gen_reg_rtx (V4SImode);
4461 rtx splat_hi = NULL_RTX;
4462 rtx tag_index = gen_reg_rtx (Pmode);
4463 rtx block_off = gen_reg_rtx (SImode);
4464 rtx tag_addr = gen_reg_rtx (Pmode);
4465 rtx tag = gen_reg_rtx (V4SImode);
4466 rtx cache_tag = gen_reg_rtx (V4SImode);
4467 rtx cache_tag_hi = NULL_RTX;
4468 rtx cache_ptrs = gen_reg_rtx (TImode);
4469 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4470 rtx tag_equal = gen_reg_rtx (V4SImode);
4471 rtx tag_equal_hi = NULL_RTX;
4472 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4473 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4474 rtx eq_index = gen_reg_rtx (SImode);
4475 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4476
4477 if (spu_ea_model != 32)
4478 {
4479 splat_hi = gen_reg_rtx (V4SImode);
4480 cache_tag_hi = gen_reg_rtx (V4SImode);
4481 tag_equal_hi = gen_reg_rtx (V4SImode);
4482 }
4483
4484 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4485 emit_move_insn (tag_arr, tag_arr_sym);
4486 v = 0x0001020300010203LL;
4487 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4488 ea_addr_si = ea_addr;
4489 if (spu_ea_model != 32)
4490 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4491
4492 /* tag_index = ea_addr & (tag_array_size - 128) */
4493 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4494
4495 /* splat ea_addr to all 4 slots. */
4496 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4497 /* Similarly for high 32 bits of ea_addr. */
4498 if (spu_ea_model != 32)
4499 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4500
4501 /* block_off = ea_addr & 127 */
4502 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4503
4504 /* tag_addr = tag_arr + tag_index */
4505 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4506
4507 /* Read cache tags. */
4508 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4509 if (spu_ea_model != 32)
4510 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4511 plus_constant (tag_addr, 16)));
4512
4513 /* tag = ea_addr & -128 */
4514 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4515
4516 /* Read all four cache data pointers. */
4517 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4518 plus_constant (tag_addr, 32)));
4519
4520 /* Compare tags. */
4521 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4522 if (spu_ea_model != 32)
4523 {
4524 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4525 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4526 }
4527
4528 /* At most one of the tags compare equal, so tag_equal has one
4529 32-bit slot set to all 1's, with the other slots all zero.
4530 gbb picks off low bit from each byte in the 128-bit registers,
4531 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4532 we have a hit. */
4533 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4534 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4535
4536 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4537 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4538
4539 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4540 (rotating eq_index mod 16 bytes). */
4541 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4542 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4543
4544 /* Add block offset to form final data address. */
4545 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4546
4547 /* Check that we did hit. */
4548 hit_label = gen_label_rtx ();
4549 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4550 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4551 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4552 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4553 hit_ref, pc_rtx)));
4554 /* Say that this branch is very likely to happen. */
4555 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4556 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4557
4558 ea_load_store (mem, is_store, ea_addr, data_addr);
4559 cont_label = gen_label_rtx ();
4560 emit_jump_insn (gen_jump (cont_label));
4561 emit_barrier ();
4562
4563 emit_label (hit_label);
4564
4565 if (is_store)
4566 {
4567 HOST_WIDE_INT v_hi;
4568 rtx dirty_bits = gen_reg_rtx (TImode);
4569 rtx dirty_off = gen_reg_rtx (SImode);
4570 rtx dirty_128 = gen_reg_rtx (TImode);
4571 rtx neg_block_off = gen_reg_rtx (SImode);
4572
4573 /* Set up mask with one dirty bit per byte of the mem we are
4574 writing, starting from top bit. */
4575 v_hi = v = -1;
4576 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4577 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4578 {
4579 v_hi = v;
4580 v = 0;
4581 }
4582 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4583
4584 /* Form index into cache dirty_bits. eq_index is one of
4585 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4586 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4587 offset to each of the four dirty_bits elements. */
4588 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4589
4590 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4591
4592 /* Rotate bit mask to proper bit. */
4593 emit_insn (gen_negsi2 (neg_block_off, block_off));
4594 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4595 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4596
4597 /* Or in the new dirty bits. */
4598 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4599
4600 /* Store. */
4601 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4602 }
4603
4604 emit_label (cont_label);
4605}
4606
4607static rtx
4608expand_ea_mem (rtx mem, bool is_store)
4609{
4610 rtx ea_addr;
4611 rtx data_addr = gen_reg_rtx (Pmode);
4612 rtx new_mem;
4613
4614 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4615 if (optimize_size || optimize == 0)
4616 ea_load_store (mem, is_store, ea_addr, data_addr);
4617 else
4618 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4619
4620 if (ea_alias_set == -1)
4621 ea_alias_set = new_alias_set ();
4622
4623 /* We generate a new MEM RTX to refer to the copy of the data
4624 in the cache. We do not copy memory attributes (except the
4625 alignment) from the original MEM, as they may no longer apply
4626 to the cache copy. */
4627 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4628 set_mem_alias_set (new_mem, ea_alias_set);
4629 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4630
4631 return new_mem;
4632}
4633
644459d0 4634int
4635spu_expand_mov (rtx * ops, enum machine_mode mode)
4636{
4637 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4638 {
4639 /* Perform the move in the destination SUBREG's inner mode. */
4640 ops[0] = SUBREG_REG (ops[0]);
4641 mode = GET_MODE (ops[0]);
4642 ops[1] = gen_lowpart_common (mode, ops[1]);
4643 gcc_assert (ops[1]);
4644 }
644459d0 4645
4646 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4647 {
4648 rtx from = SUBREG_REG (ops[1]);
8d72495d 4649 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4650
4651 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4652 && GET_MODE_CLASS (imode) == MODE_INT
4653 && subreg_lowpart_p (ops[1]));
4654
4655 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4656 imode = SImode;
4657 if (imode != GET_MODE (from))
4658 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4659
4660 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4661 {
d6bf3b14 4662 enum insn_code icode = convert_optab_handler (trunc_optab,
4663 mode, imode);
644459d0 4664 emit_insn (GEN_FCN (icode) (ops[0], from));
4665 }
4666 else
4667 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4668 return 1;
4669 }
4670
4671 /* At least one of the operands needs to be a register. */
4672 if ((reload_in_progress | reload_completed) == 0
4673 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4674 {
4675 rtx temp = force_reg (mode, ops[1]);
4676 emit_move_insn (ops[0], temp);
4677 return 1;
4678 }
4679 if (reload_in_progress || reload_completed)
4680 {
dea01258 4681 if (CONSTANT_P (ops[1]))
4682 return spu_split_immediate (ops);
644459d0 4683 return 0;
4684 }
9d98604b 4685
4686 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4687 extend them. */
4688 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4689 {
9d98604b 4690 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4691 if (val != INTVAL (ops[1]))
644459d0 4692 {
9d98604b 4693 emit_move_insn (ops[0], GEN_INT (val));
4694 return 1;
644459d0 4695 }
4696 }
9d98604b 4697 if (MEM_P (ops[0]))
6cf5579e 4698 {
4699 if (MEM_ADDR_SPACE (ops[0]))
4700 ops[0] = expand_ea_mem (ops[0], true);
4701 return spu_split_store (ops);
4702 }
9d98604b 4703 if (MEM_P (ops[1]))
6cf5579e 4704 {
4705 if (MEM_ADDR_SPACE (ops[1]))
4706 ops[1] = expand_ea_mem (ops[1], false);
4707 return spu_split_load (ops);
4708 }
9d98604b 4709
644459d0 4710 return 0;
4711}
4712
9d98604b 4713static void
4714spu_convert_move (rtx dst, rtx src)
644459d0 4715{
9d98604b 4716 enum machine_mode mode = GET_MODE (dst);
4717 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4718 rtx reg;
4719 gcc_assert (GET_MODE (src) == TImode);
4720 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4721 emit_insn (gen_rtx_SET (VOIDmode, reg,
4722 gen_rtx_TRUNCATE (int_mode,
4723 gen_rtx_LSHIFTRT (TImode, src,
4724 GEN_INT (int_mode == DImode ? 64 : 96)))));
4725 if (int_mode != mode)
4726 {
4727 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4728 emit_move_insn (dst, reg);
4729 }
4730}
644459d0 4731
9d98604b 4732/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4733 the address from SRC and SRC+16. Return a REG or CONST_INT that
4734 specifies how many bytes to rotate the loaded registers, plus any
4735 extra from EXTRA_ROTQBY. The address and rotate amounts are
4736 normalized to improve merging of loads and rotate computations. */
4737static rtx
4738spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4739{
4740 rtx addr = XEXP (src, 0);
4741 rtx p0, p1, rot, addr0, addr1;
4742 int rot_amt;
644459d0 4743
4744 rot = 0;
4745 rot_amt = 0;
9d98604b 4746
4747 if (MEM_ALIGN (src) >= 128)
4748 /* Address is already aligned; simply perform a TImode load. */ ;
4749 else if (GET_CODE (addr) == PLUS)
644459d0 4750 {
4751 /* 8 cases:
4752 aligned reg + aligned reg => lqx
4753 aligned reg + unaligned reg => lqx, rotqby
4754 aligned reg + aligned const => lqd
4755 aligned reg + unaligned const => lqd, rotqbyi
4756 unaligned reg + aligned reg => lqx, rotqby
4757 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4758 unaligned reg + aligned const => lqd, rotqby
4759 unaligned reg + unaligned const -> not allowed by legitimate address
4760 */
4761 p0 = XEXP (addr, 0);
4762 p1 = XEXP (addr, 1);
9d98604b 4763 if (!reg_aligned_for_addr (p0))
644459d0 4764 {
9d98604b 4765 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4766 {
9d98604b 4767 rot = gen_reg_rtx (SImode);
4768 emit_insn (gen_addsi3 (rot, p0, p1));
4769 }
4770 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4771 {
4772 if (INTVAL (p1) > 0
4773 && REG_POINTER (p0)
4774 && INTVAL (p1) * BITS_PER_UNIT
4775 < REGNO_POINTER_ALIGN (REGNO (p0)))
4776 {
4777 rot = gen_reg_rtx (SImode);
4778 emit_insn (gen_addsi3 (rot, p0, p1));
4779 addr = p0;
4780 }
4781 else
4782 {
4783 rtx x = gen_reg_rtx (SImode);
4784 emit_move_insn (x, p1);
4785 if (!spu_arith_operand (p1, SImode))
4786 p1 = x;
4787 rot = gen_reg_rtx (SImode);
4788 emit_insn (gen_addsi3 (rot, p0, p1));
4789 addr = gen_rtx_PLUS (Pmode, p0, x);
4790 }
644459d0 4791 }
4792 else
4793 rot = p0;
4794 }
4795 else
4796 {
4797 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4798 {
4799 rot_amt = INTVAL (p1) & 15;
9d98604b 4800 if (INTVAL (p1) & -16)
4801 {
4802 p1 = GEN_INT (INTVAL (p1) & -16);
4803 addr = gen_rtx_PLUS (SImode, p0, p1);
4804 }
4805 else
4806 addr = p0;
644459d0 4807 }
9d98604b 4808 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4809 rot = p1;
4810 }
4811 }
9d98604b 4812 else if (REG_P (addr))
644459d0 4813 {
9d98604b 4814 if (!reg_aligned_for_addr (addr))
644459d0 4815 rot = addr;
4816 }
4817 else if (GET_CODE (addr) == CONST)
4818 {
4819 if (GET_CODE (XEXP (addr, 0)) == PLUS
4820 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4821 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4822 {
4823 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4824 if (rot_amt & -16)
4825 addr = gen_rtx_CONST (Pmode,
4826 gen_rtx_PLUS (Pmode,
4827 XEXP (XEXP (addr, 0), 0),
4828 GEN_INT (rot_amt & -16)));
4829 else
4830 addr = XEXP (XEXP (addr, 0), 0);
4831 }
4832 else
9d98604b 4833 {
4834 rot = gen_reg_rtx (Pmode);
4835 emit_move_insn (rot, addr);
4836 }
644459d0 4837 }
4838 else if (GET_CODE (addr) == CONST_INT)
4839 {
4840 rot_amt = INTVAL (addr);
4841 addr = GEN_INT (rot_amt & -16);
4842 }
4843 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4844 {
4845 rot = gen_reg_rtx (Pmode);
4846 emit_move_insn (rot, addr);
4847 }
644459d0 4848
9d98604b 4849 rot_amt += extra_rotby;
644459d0 4850
4851 rot_amt &= 15;
4852
4853 if (rot && rot_amt)
4854 {
9d98604b 4855 rtx x = gen_reg_rtx (SImode);
4856 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4857 rot = x;
644459d0 4858 rot_amt = 0;
4859 }
9d98604b 4860 if (!rot && rot_amt)
4861 rot = GEN_INT (rot_amt);
4862
4863 addr0 = copy_rtx (addr);
4864 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4865 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4866
4867 if (dst1)
4868 {
4869 addr1 = plus_constant (copy_rtx (addr), 16);
4870 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4871 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4872 }
644459d0 4873
9d98604b 4874 return rot;
4875}
4876
4877int
4878spu_split_load (rtx * ops)
4879{
4880 enum machine_mode mode = GET_MODE (ops[0]);
4881 rtx addr, load, rot;
4882 int rot_amt;
644459d0 4883
9d98604b 4884 if (GET_MODE_SIZE (mode) >= 16)
4885 return 0;
644459d0 4886
9d98604b 4887 addr = XEXP (ops[1], 0);
4888 gcc_assert (GET_CODE (addr) != AND);
4889
4890 if (!address_needs_split (ops[1]))
4891 {
4892 ops[1] = change_address (ops[1], TImode, addr);
4893 load = gen_reg_rtx (TImode);
4894 emit_insn (gen__movti (load, ops[1]));
4895 spu_convert_move (ops[0], load);
4896 return 1;
4897 }
4898
4899 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4900
4901 load = gen_reg_rtx (TImode);
4902 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4903
4904 if (rot)
4905 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4906
9d98604b 4907 spu_convert_move (ops[0], load);
4908 return 1;
644459d0 4909}
4910
9d98604b 4911int
644459d0 4912spu_split_store (rtx * ops)
4913{
4914 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4915 rtx reg;
644459d0 4916 rtx addr, p0, p1, p1_lo, smem;
4917 int aform;
4918 int scalar;
4919
9d98604b 4920 if (GET_MODE_SIZE (mode) >= 16)
4921 return 0;
4922
644459d0 4923 addr = XEXP (ops[0], 0);
9d98604b 4924 gcc_assert (GET_CODE (addr) != AND);
4925
4926 if (!address_needs_split (ops[0]))
4927 {
4928 reg = gen_reg_rtx (TImode);
4929 emit_insn (gen_spu_convert (reg, ops[1]));
4930 ops[0] = change_address (ops[0], TImode, addr);
4931 emit_move_insn (ops[0], reg);
4932 return 1;
4933 }
644459d0 4934
4935 if (GET_CODE (addr) == PLUS)
4936 {
4937 /* 8 cases:
4938 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4939 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4940 aligned reg + aligned const => lqd, c?d, shuf, stqx
4941 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4942 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4943 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4944 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4945 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4946 */
4947 aform = 0;
4948 p0 = XEXP (addr, 0);
4949 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4950 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4951 {
4952 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4953 if (reg_aligned_for_addr (p0))
4954 {
4955 p1 = GEN_INT (INTVAL (p1) & -16);
4956 if (p1 == const0_rtx)
4957 addr = p0;
4958 else
4959 addr = gen_rtx_PLUS (SImode, p0, p1);
4960 }
4961 else
4962 {
4963 rtx x = gen_reg_rtx (SImode);
4964 emit_move_insn (x, p1);
4965 addr = gen_rtx_PLUS (SImode, p0, x);
4966 }
644459d0 4967 }
4968 }
9d98604b 4969 else if (REG_P (addr))
644459d0 4970 {
4971 aform = 0;
4972 p0 = addr;
4973 p1 = p1_lo = const0_rtx;
4974 }
4975 else
4976 {
4977 aform = 1;
4978 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4979 p1 = 0; /* aform doesn't use p1 */
4980 p1_lo = addr;
4981 if (ALIGNED_SYMBOL_REF_P (addr))
4982 p1_lo = const0_rtx;
9d98604b 4983 else if (GET_CODE (addr) == CONST
4984 && GET_CODE (XEXP (addr, 0)) == PLUS
4985 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4986 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4987 {
9d98604b 4988 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4989 if ((v & -16) != 0)
4990 addr = gen_rtx_CONST (Pmode,
4991 gen_rtx_PLUS (Pmode,
4992 XEXP (XEXP (addr, 0), 0),
4993 GEN_INT (v & -16)));
4994 else
4995 addr = XEXP (XEXP (addr, 0), 0);
4996 p1_lo = GEN_INT (v & 15);
644459d0 4997 }
4998 else if (GET_CODE (addr) == CONST_INT)
4999 {
5000 p1_lo = GEN_INT (INTVAL (addr) & 15);
5001 addr = GEN_INT (INTVAL (addr) & -16);
5002 }
9d98604b 5003 else
5004 {
5005 p1_lo = gen_reg_rtx (SImode);
5006 emit_move_insn (p1_lo, addr);
5007 }
644459d0 5008 }
5009
4cbad5bb 5010 gcc_assert (aform == 0 || aform == 1);
9d98604b 5011 reg = gen_reg_rtx (TImode);
e04cf423 5012
644459d0 5013 scalar = store_with_one_insn_p (ops[0]);
5014 if (!scalar)
5015 {
5016 /* We could copy the flags from the ops[0] MEM to mem here,
5017 We don't because we want this load to be optimized away if
5018 possible, and copying the flags will prevent that in certain
5019 cases, e.g. consider the volatile flag. */
5020
9d98604b 5021 rtx pat = gen_reg_rtx (TImode);
e04cf423 5022 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5023 set_mem_alias_set (lmem, 0);
5024 emit_insn (gen_movti (reg, lmem));
644459d0 5025
9d98604b 5026 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5027 p0 = stack_pointer_rtx;
5028 if (!p1_lo)
5029 p1_lo = const0_rtx;
5030
5031 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5032 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5033 }
644459d0 5034 else
5035 {
5036 if (GET_CODE (ops[1]) == REG)
5037 emit_insn (gen_spu_convert (reg, ops[1]));
5038 else if (GET_CODE (ops[1]) == SUBREG)
5039 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5040 else
5041 abort ();
5042 }
5043
5044 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5045 emit_insn (gen_ashlti3
5046 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5047
9d98604b 5048 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5049 /* We can't use the previous alias set because the memory has changed
5050 size and can potentially overlap objects of other types. */
5051 set_mem_alias_set (smem, 0);
5052
e04cf423 5053 emit_insn (gen_movti (smem, reg));
9d98604b 5054 return 1;
644459d0 5055}
5056
5057/* Return TRUE if X is MEM which is a struct member reference
5058 and the member can safely be loaded and stored with a single
5059 instruction because it is padded. */
5060static int
5061mem_is_padded_component_ref (rtx x)
5062{
5063 tree t = MEM_EXPR (x);
5064 tree r;
5065 if (!t || TREE_CODE (t) != COMPONENT_REF)
5066 return 0;
5067 t = TREE_OPERAND (t, 1);
5068 if (!t || TREE_CODE (t) != FIELD_DECL
5069 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5070 return 0;
5071 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5072 r = DECL_FIELD_CONTEXT (t);
5073 if (!r || TREE_CODE (r) != RECORD_TYPE)
5074 return 0;
5075 /* Make sure they are the same mode */
5076 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5077 return 0;
5078 /* If there are no following fields then the field alignment assures
fa7637bd 5079 the structure is padded to the alignment which means this field is
5080 padded too. */
644459d0 5081 if (TREE_CHAIN (t) == 0)
5082 return 1;
5083 /* If the following field is also aligned then this field will be
5084 padded. */
5085 t = TREE_CHAIN (t);
5086 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5087 return 1;
5088 return 0;
5089}
5090
c7b91b14 5091/* Parse the -mfixed-range= option string. */
5092static void
5093fix_range (const char *const_str)
5094{
5095 int i, first, last;
5096 char *str, *dash, *comma;
5097
5098 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5099 REG2 are either register names or register numbers. The effect
5100 of this option is to mark the registers in the range from REG1 to
5101 REG2 as ``fixed'' so they won't be used by the compiler. */
5102
5103 i = strlen (const_str);
5104 str = (char *) alloca (i + 1);
5105 memcpy (str, const_str, i + 1);
5106
5107 while (1)
5108 {
5109 dash = strchr (str, '-');
5110 if (!dash)
5111 {
5112 warning (0, "value of -mfixed-range must have form REG1-REG2");
5113 return;
5114 }
5115 *dash = '\0';
5116 comma = strchr (dash + 1, ',');
5117 if (comma)
5118 *comma = '\0';
5119
5120 first = decode_reg_name (str);
5121 if (first < 0)
5122 {
5123 warning (0, "unknown register name: %s", str);
5124 return;
5125 }
5126
5127 last = decode_reg_name (dash + 1);
5128 if (last < 0)
5129 {
5130 warning (0, "unknown register name: %s", dash + 1);
5131 return;
5132 }
5133
5134 *dash = '-';
5135
5136 if (first > last)
5137 {
5138 warning (0, "%s-%s is an empty range", str, dash + 1);
5139 return;
5140 }
5141
5142 for (i = first; i <= last; ++i)
5143 fixed_regs[i] = call_used_regs[i] = 1;
5144
5145 if (!comma)
5146 break;
5147
5148 *comma = ',';
5149 str = comma + 1;
5150 }
5151}
5152
644459d0 5153/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5154 can be generated using the fsmbi instruction. */
5155int
5156fsmbi_const_p (rtx x)
5157{
dea01258 5158 if (CONSTANT_P (x))
5159 {
5df189be 5160 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5161 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5162 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5163 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5164 }
5165 return 0;
5166}
5167
5168/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5169 can be generated using the cbd, chd, cwd or cdd instruction. */
5170int
5171cpat_const_p (rtx x, enum machine_mode mode)
5172{
5173 if (CONSTANT_P (x))
5174 {
5175 enum immediate_class c = classify_immediate (x, mode);
5176 return c == IC_CPAT;
5177 }
5178 return 0;
5179}
644459d0 5180
dea01258 5181rtx
5182gen_cpat_const (rtx * ops)
5183{
5184 unsigned char dst[16];
5185 int i, offset, shift, isize;
5186 if (GET_CODE (ops[3]) != CONST_INT
5187 || GET_CODE (ops[2]) != CONST_INT
5188 || (GET_CODE (ops[1]) != CONST_INT
5189 && GET_CODE (ops[1]) != REG))
5190 return 0;
5191 if (GET_CODE (ops[1]) == REG
5192 && (!REG_POINTER (ops[1])
5193 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5194 return 0;
644459d0 5195
5196 for (i = 0; i < 16; i++)
dea01258 5197 dst[i] = i + 16;
5198 isize = INTVAL (ops[3]);
5199 if (isize == 1)
5200 shift = 3;
5201 else if (isize == 2)
5202 shift = 2;
5203 else
5204 shift = 0;
5205 offset = (INTVAL (ops[2]) +
5206 (GET_CODE (ops[1]) ==
5207 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5208 for (i = 0; i < isize; i++)
5209 dst[offset + i] = i + shift;
5210 return array_to_constant (TImode, dst);
644459d0 5211}
5212
5213/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5214 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5215 than 16 bytes, the value is repeated across the rest of the array. */
5216void
5217constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5218{
5219 HOST_WIDE_INT val;
5220 int i, j, first;
5221
5222 memset (arr, 0, 16);
5223 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5224 if (GET_CODE (x) == CONST_INT
5225 || (GET_CODE (x) == CONST_DOUBLE
5226 && (mode == SFmode || mode == DFmode)))
5227 {
5228 gcc_assert (mode != VOIDmode && mode != BLKmode);
5229
5230 if (GET_CODE (x) == CONST_DOUBLE)
5231 val = const_double_to_hwint (x);
5232 else
5233 val = INTVAL (x);
5234 first = GET_MODE_SIZE (mode) - 1;
5235 for (i = first; i >= 0; i--)
5236 {
5237 arr[i] = val & 0xff;
5238 val >>= 8;
5239 }
5240 /* Splat the constant across the whole array. */
5241 for (j = 0, i = first + 1; i < 16; i++)
5242 {
5243 arr[i] = arr[j];
5244 j = (j == first) ? 0 : j + 1;
5245 }
5246 }
5247 else if (GET_CODE (x) == CONST_DOUBLE)
5248 {
5249 val = CONST_DOUBLE_LOW (x);
5250 for (i = 15; i >= 8; i--)
5251 {
5252 arr[i] = val & 0xff;
5253 val >>= 8;
5254 }
5255 val = CONST_DOUBLE_HIGH (x);
5256 for (i = 7; i >= 0; i--)
5257 {
5258 arr[i] = val & 0xff;
5259 val >>= 8;
5260 }
5261 }
5262 else if (GET_CODE (x) == CONST_VECTOR)
5263 {
5264 int units;
5265 rtx elt;
5266 mode = GET_MODE_INNER (mode);
5267 units = CONST_VECTOR_NUNITS (x);
5268 for (i = 0; i < units; i++)
5269 {
5270 elt = CONST_VECTOR_ELT (x, i);
5271 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5272 {
5273 if (GET_CODE (elt) == CONST_DOUBLE)
5274 val = const_double_to_hwint (elt);
5275 else
5276 val = INTVAL (elt);
5277 first = GET_MODE_SIZE (mode) - 1;
5278 if (first + i * GET_MODE_SIZE (mode) > 16)
5279 abort ();
5280 for (j = first; j >= 0; j--)
5281 {
5282 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5283 val >>= 8;
5284 }
5285 }
5286 }
5287 }
5288 else
5289 gcc_unreachable();
5290}
5291
5292/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5293 smaller than 16 bytes, use the bytes that would represent that value
5294 in a register, e.g., for QImode return the value of arr[3]. */
5295rtx
e96f2783 5296array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5297{
5298 enum machine_mode inner_mode;
5299 rtvec v;
5300 int units, size, i, j, k;
5301 HOST_WIDE_INT val;
5302
5303 if (GET_MODE_CLASS (mode) == MODE_INT
5304 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5305 {
5306 j = GET_MODE_SIZE (mode);
5307 i = j < 4 ? 4 - j : 0;
5308 for (val = 0; i < j; i++)
5309 val = (val << 8) | arr[i];
5310 val = trunc_int_for_mode (val, mode);
5311 return GEN_INT (val);
5312 }
5313
5314 if (mode == TImode)
5315 {
5316 HOST_WIDE_INT high;
5317 for (i = high = 0; i < 8; i++)
5318 high = (high << 8) | arr[i];
5319 for (i = 8, val = 0; i < 16; i++)
5320 val = (val << 8) | arr[i];
5321 return immed_double_const (val, high, TImode);
5322 }
5323 if (mode == SFmode)
5324 {
5325 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5326 val = trunc_int_for_mode (val, SImode);
171b6d22 5327 return hwint_to_const_double (SFmode, val);
644459d0 5328 }
5329 if (mode == DFmode)
5330 {
1f915911 5331 for (i = 0, val = 0; i < 8; i++)
5332 val = (val << 8) | arr[i];
171b6d22 5333 return hwint_to_const_double (DFmode, val);
644459d0 5334 }
5335
5336 if (!VECTOR_MODE_P (mode))
5337 abort ();
5338
5339 units = GET_MODE_NUNITS (mode);
5340 size = GET_MODE_UNIT_SIZE (mode);
5341 inner_mode = GET_MODE_INNER (mode);
5342 v = rtvec_alloc (units);
5343
5344 for (k = i = 0; i < units; ++i)
5345 {
5346 val = 0;
5347 for (j = 0; j < size; j++, k++)
5348 val = (val << 8) | arr[k];
5349
5350 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5351 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5352 else
5353 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5354 }
5355 if (k > 16)
5356 abort ();
5357
5358 return gen_rtx_CONST_VECTOR (mode, v);
5359}
5360
5361static void
5362reloc_diagnostic (rtx x)
5363{
712d2297 5364 tree decl = 0;
644459d0 5365 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5366 return;
5367
5368 if (GET_CODE (x) == SYMBOL_REF)
5369 decl = SYMBOL_REF_DECL (x);
5370 else if (GET_CODE (x) == CONST
5371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5372 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5373
5374 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5375 if (decl && !DECL_P (decl))
5376 decl = 0;
5377
644459d0 5378 /* The decl could be a string constant. */
5379 if (decl && DECL_P (decl))
712d2297 5380 {
5381 location_t loc;
5382 /* We use last_assemble_variable_decl to get line information. It's
5383 not always going to be right and might not even be close, but will
5384 be right for the more common cases. */
5385 if (!last_assemble_variable_decl || in_section == ctors_section)
5386 loc = DECL_SOURCE_LOCATION (decl);
5387 else
5388 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5389
712d2297 5390 if (TARGET_WARN_RELOC)
5391 warning_at (loc, 0,
5392 "creating run-time relocation for %qD", decl);
5393 else
5394 error_at (loc,
5395 "creating run-time relocation for %qD", decl);
5396 }
5397 else
5398 {
5399 if (TARGET_WARN_RELOC)
5400 warning_at (input_location, 0, "creating run-time relocation");
5401 else
5402 error_at (input_location, "creating run-time relocation");
5403 }
644459d0 5404}
5405
5406/* Hook into assemble_integer so we can generate an error for run-time
5407 relocations. The SPU ABI disallows them. */
5408static bool
5409spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5410{
5411 /* By default run-time relocations aren't supported, but we allow them
5412 in case users support it in their own run-time loader. And we provide
5413 a warning for those users that don't. */
5414 if ((GET_CODE (x) == SYMBOL_REF)
5415 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5416 reloc_diagnostic (x);
5417
5418 return default_assemble_integer (x, size, aligned_p);
5419}
5420
5421static void
5422spu_asm_globalize_label (FILE * file, const char *name)
5423{
5424 fputs ("\t.global\t", file);
5425 assemble_name (file, name);
5426 fputs ("\n", file);
5427}
5428
5429static bool
f529eb25 5430spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5431 bool speed ATTRIBUTE_UNUSED)
644459d0 5432{
5433 enum machine_mode mode = GET_MODE (x);
5434 int cost = COSTS_N_INSNS (2);
5435
5436 /* Folding to a CONST_VECTOR will use extra space but there might
5437 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5438 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5439 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5440 because this cost will only be compared against a single insn.
5441 if (code == CONST_VECTOR)
5442 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5443 */
5444
5445 /* Use defaults for float operations. Not accurate but good enough. */
5446 if (mode == DFmode)
5447 {
5448 *total = COSTS_N_INSNS (13);
5449 return true;
5450 }
5451 if (mode == SFmode)
5452 {
5453 *total = COSTS_N_INSNS (6);
5454 return true;
5455 }
5456 switch (code)
5457 {
5458 case CONST_INT:
5459 if (satisfies_constraint_K (x))
5460 *total = 0;
5461 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5462 *total = COSTS_N_INSNS (1);
5463 else
5464 *total = COSTS_N_INSNS (3);
5465 return true;
5466
5467 case CONST:
5468 *total = COSTS_N_INSNS (3);
5469 return true;
5470
5471 case LABEL_REF:
5472 case SYMBOL_REF:
5473 *total = COSTS_N_INSNS (0);
5474 return true;
5475
5476 case CONST_DOUBLE:
5477 *total = COSTS_N_INSNS (5);
5478 return true;
5479
5480 case FLOAT_EXTEND:
5481 case FLOAT_TRUNCATE:
5482 case FLOAT:
5483 case UNSIGNED_FLOAT:
5484 case FIX:
5485 case UNSIGNED_FIX:
5486 *total = COSTS_N_INSNS (7);
5487 return true;
5488
5489 case PLUS:
5490 if (mode == TImode)
5491 {
5492 *total = COSTS_N_INSNS (9);
5493 return true;
5494 }
5495 break;
5496
5497 case MULT:
5498 cost =
5499 GET_CODE (XEXP (x, 0)) ==
5500 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5501 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5502 {
5503 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5504 {
5505 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5506 cost = COSTS_N_INSNS (14);
5507 if ((val & 0xffff) == 0)
5508 cost = COSTS_N_INSNS (9);
5509 else if (val > 0 && val < 0x10000)
5510 cost = COSTS_N_INSNS (11);
5511 }
5512 }
5513 *total = cost;
5514 return true;
5515 case DIV:
5516 case UDIV:
5517 case MOD:
5518 case UMOD:
5519 *total = COSTS_N_INSNS (20);
5520 return true;
5521 case ROTATE:
5522 case ROTATERT:
5523 case ASHIFT:
5524 case ASHIFTRT:
5525 case LSHIFTRT:
5526 *total = COSTS_N_INSNS (4);
5527 return true;
5528 case UNSPEC:
5529 if (XINT (x, 1) == UNSPEC_CONVERT)
5530 *total = COSTS_N_INSNS (0);
5531 else
5532 *total = COSTS_N_INSNS (4);
5533 return true;
5534 }
5535 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5536 if (GET_MODE_CLASS (mode) == MODE_INT
5537 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5538 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5539 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5540 *total = cost;
5541 return true;
5542}
5543
1bd43494 5544static enum machine_mode
5545spu_unwind_word_mode (void)
644459d0 5546{
1bd43494 5547 return SImode;
644459d0 5548}
5549
5550/* Decide whether we can make a sibling call to a function. DECL is the
5551 declaration of the function being targeted by the call and EXP is the
5552 CALL_EXPR representing the call. */
5553static bool
5554spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5555{
5556 return decl && !TARGET_LARGE_MEM;
5557}
5558
5559/* We need to correctly update the back chain pointer and the Available
5560 Stack Size (which is in the second slot of the sp register.) */
5561void
5562spu_allocate_stack (rtx op0, rtx op1)
5563{
5564 HOST_WIDE_INT v;
5565 rtx chain = gen_reg_rtx (V4SImode);
5566 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5567 rtx sp = gen_reg_rtx (V4SImode);
5568 rtx splatted = gen_reg_rtx (V4SImode);
5569 rtx pat = gen_reg_rtx (TImode);
5570
5571 /* copy the back chain so we can save it back again. */
5572 emit_move_insn (chain, stack_bot);
5573
5574 op1 = force_reg (SImode, op1);
5575
5576 v = 0x1020300010203ll;
5577 emit_move_insn (pat, immed_double_const (v, v, TImode));
5578 emit_insn (gen_shufb (splatted, op1, op1, pat));
5579
5580 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5581 emit_insn (gen_subv4si3 (sp, sp, splatted));
5582
5583 if (flag_stack_check)
5584 {
5585 rtx avail = gen_reg_rtx(SImode);
5586 rtx result = gen_reg_rtx(SImode);
5587 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5588 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5589 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5590 }
5591
5592 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5593
5594 emit_move_insn (stack_bot, chain);
5595
5596 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5597}
5598
5599void
5600spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5601{
5602 static unsigned char arr[16] =
5603 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5604 rtx temp = gen_reg_rtx (SImode);
5605 rtx temp2 = gen_reg_rtx (SImode);
5606 rtx temp3 = gen_reg_rtx (V4SImode);
5607 rtx temp4 = gen_reg_rtx (V4SImode);
5608 rtx pat = gen_reg_rtx (TImode);
5609 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5610
5611 /* Restore the backchain from the first word, sp from the second. */
5612 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5613 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5614
5615 emit_move_insn (pat, array_to_constant (TImode, arr));
5616
5617 /* Compute Available Stack Size for sp */
5618 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5619 emit_insn (gen_shufb (temp3, temp, temp, pat));
5620
5621 /* Compute Available Stack Size for back chain */
5622 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5623 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5624 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5625
5626 emit_insn (gen_addv4si3 (sp, sp, temp3));
5627 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5628}
5629
5630static void
5631spu_init_libfuncs (void)
5632{
5633 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5634 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5635 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5636 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5637 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5638 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5639 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5640 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5641 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5642 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5643 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5644
5645 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5646 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5647
5825ec3f 5648 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5649 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5650 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5651 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5652 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5653 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5654 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5655 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5656 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5657 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5658 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5659 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5660
19a53068 5661 set_optab_libfunc (smul_optab, TImode, "__multi3");
5662 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5663 set_optab_libfunc (smod_optab, TImode, "__modti3");
5664 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5665 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5666 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5667}
5668
5669/* Make a subreg, stripping any existing subreg. We could possibly just
5670 call simplify_subreg, but in this case we know what we want. */
5671rtx
5672spu_gen_subreg (enum machine_mode mode, rtx x)
5673{
5674 if (GET_CODE (x) == SUBREG)
5675 x = SUBREG_REG (x);
5676 if (GET_MODE (x) == mode)
5677 return x;
5678 return gen_rtx_SUBREG (mode, x, 0);
5679}
5680
5681static bool
fb80456a 5682spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5683{
5684 return (TYPE_MODE (type) == BLKmode
5685 && ((type) == 0
5686 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5687 || int_size_in_bytes (type) >
5688 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5689}
5690\f
5691/* Create the built-in types and functions */
5692
c2233b46 5693enum spu_function_code
5694{
5695#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5696#include "spu-builtins.def"
5697#undef DEF_BUILTIN
5698 NUM_SPU_BUILTINS
5699};
5700
5701extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5702
644459d0 5703struct spu_builtin_description spu_builtins[] = {
5704#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5705 {fcode, icode, name, type, params},
644459d0 5706#include "spu-builtins.def"
5707#undef DEF_BUILTIN
5708};
5709
0c5c4d59 5710static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5711
5712/* Returns the spu builtin decl for CODE. */
e6925042 5713
5714static tree
5715spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5716{
5717 if (code >= NUM_SPU_BUILTINS)
5718 return error_mark_node;
5719
0c5c4d59 5720 return spu_builtin_decls[code];
e6925042 5721}
5722
5723
644459d0 5724static void
5725spu_init_builtins (void)
5726{
5727 struct spu_builtin_description *d;
5728 unsigned int i;
5729
5730 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5731 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5732 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5733 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5734 V4SF_type_node = build_vector_type (float_type_node, 4);
5735 V2DF_type_node = build_vector_type (double_type_node, 2);
5736
5737 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5738 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5739 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5740 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5741
c4ecce0c 5742 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5743
5744 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5748 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5750 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5751 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5753 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5754 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5755 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5756
5757 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5758 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5759 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5760 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5761 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5762 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5763 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5764 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5765
5766 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5767 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5768
5769 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5770
5771 spu_builtin_types[SPU_BTI_PTR] =
5772 build_pointer_type (build_qualified_type
5773 (void_type_node,
5774 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5775
5776 /* For each builtin we build a new prototype. The tree code will make
5777 sure nodes are shared. */
5778 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5779 {
5780 tree p;
5781 char name[64]; /* build_function will make a copy. */
5782 int parm;
5783
5784 if (d->name == 0)
5785 continue;
5786
5dfbd18f 5787 /* Find last parm. */
644459d0 5788 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5789 ;
644459d0 5790
5791 p = void_list_node;
5792 while (parm > 1)
5793 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5794
5795 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5796
5797 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5798 spu_builtin_decls[i] =
3726fe5e 5799 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5800 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5801 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5802
5803 /* These builtins don't throw. */
0c5c4d59 5804 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5805 }
5806}
5807
cf31d486 5808void
5809spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5810{
5811 static unsigned char arr[16] =
5812 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5813
5814 rtx temp = gen_reg_rtx (Pmode);
5815 rtx temp2 = gen_reg_rtx (V4SImode);
5816 rtx temp3 = gen_reg_rtx (V4SImode);
5817 rtx pat = gen_reg_rtx (TImode);
5818 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5819
5820 emit_move_insn (pat, array_to_constant (TImode, arr));
5821
5822 /* Restore the sp. */
5823 emit_move_insn (temp, op1);
5824 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5825
5826 /* Compute available stack size for sp. */
5827 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5828 emit_insn (gen_shufb (temp3, temp, temp, pat));
5829
5830 emit_insn (gen_addv4si3 (sp, sp, temp3));
5831 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5832}
5833
644459d0 5834int
5835spu_safe_dma (HOST_WIDE_INT channel)
5836{
006e4b96 5837 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5838}
5839
5840void
5841spu_builtin_splats (rtx ops[])
5842{
5843 enum machine_mode mode = GET_MODE (ops[0]);
5844 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5845 {
5846 unsigned char arr[16];
5847 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5848 emit_move_insn (ops[0], array_to_constant (mode, arr));
5849 }
644459d0 5850 else
5851 {
5852 rtx reg = gen_reg_rtx (TImode);
5853 rtx shuf;
5854 if (GET_CODE (ops[1]) != REG
5855 && GET_CODE (ops[1]) != SUBREG)
5856 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5857 switch (mode)
5858 {
5859 case V2DImode:
5860 case V2DFmode:
5861 shuf =
5862 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5863 TImode);
5864 break;
5865 case V4SImode:
5866 case V4SFmode:
5867 shuf =
5868 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5869 TImode);
5870 break;
5871 case V8HImode:
5872 shuf =
5873 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5874 TImode);
5875 break;
5876 case V16QImode:
5877 shuf =
5878 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5879 TImode);
5880 break;
5881 default:
5882 abort ();
5883 }
5884 emit_move_insn (reg, shuf);
5885 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5886 }
5887}
5888
5889void
5890spu_builtin_extract (rtx ops[])
5891{
5892 enum machine_mode mode;
5893 rtx rot, from, tmp;
5894
5895 mode = GET_MODE (ops[1]);
5896
5897 if (GET_CODE (ops[2]) == CONST_INT)
5898 {
5899 switch (mode)
5900 {
5901 case V16QImode:
5902 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5903 break;
5904 case V8HImode:
5905 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5906 break;
5907 case V4SFmode:
5908 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5909 break;
5910 case V4SImode:
5911 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5912 break;
5913 case V2DImode:
5914 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5915 break;
5916 case V2DFmode:
5917 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5918 break;
5919 default:
5920 abort ();
5921 }
5922 return;
5923 }
5924
5925 from = spu_gen_subreg (TImode, ops[1]);
5926 rot = gen_reg_rtx (TImode);
5927 tmp = gen_reg_rtx (SImode);
5928
5929 switch (mode)
5930 {
5931 case V16QImode:
5932 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5933 break;
5934 case V8HImode:
5935 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5936 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5937 break;
5938 case V4SFmode:
5939 case V4SImode:
5940 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5941 break;
5942 case V2DImode:
5943 case V2DFmode:
5944 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5945 break;
5946 default:
5947 abort ();
5948 }
5949 emit_insn (gen_rotqby_ti (rot, from, tmp));
5950
5951 emit_insn (gen_spu_convert (ops[0], rot));
5952}
5953
5954void
5955spu_builtin_insert (rtx ops[])
5956{
5957 enum machine_mode mode = GET_MODE (ops[0]);
5958 enum machine_mode imode = GET_MODE_INNER (mode);
5959 rtx mask = gen_reg_rtx (TImode);
5960 rtx offset;
5961
5962 if (GET_CODE (ops[3]) == CONST_INT)
5963 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5964 else
5965 {
5966 offset = gen_reg_rtx (SImode);
5967 emit_insn (gen_mulsi3
5968 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5969 }
5970 emit_insn (gen_cpat
5971 (mask, stack_pointer_rtx, offset,
5972 GEN_INT (GET_MODE_SIZE (imode))));
5973 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5974}
5975
5976void
5977spu_builtin_promote (rtx ops[])
5978{
5979 enum machine_mode mode, imode;
5980 rtx rot, from, offset;
5981 HOST_WIDE_INT pos;
5982
5983 mode = GET_MODE (ops[0]);
5984 imode = GET_MODE_INNER (mode);
5985
5986 from = gen_reg_rtx (TImode);
5987 rot = spu_gen_subreg (TImode, ops[0]);
5988
5989 emit_insn (gen_spu_convert (from, ops[1]));
5990
5991 if (GET_CODE (ops[2]) == CONST_INT)
5992 {
5993 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5994 if (GET_MODE_SIZE (imode) < 4)
5995 pos += 4 - GET_MODE_SIZE (imode);
5996 offset = GEN_INT (pos & 15);
5997 }
5998 else
5999 {
6000 offset = gen_reg_rtx (SImode);
6001 switch (mode)
6002 {
6003 case V16QImode:
6004 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6005 break;
6006 case V8HImode:
6007 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6008 emit_insn (gen_addsi3 (offset, offset, offset));
6009 break;
6010 case V4SFmode:
6011 case V4SImode:
6012 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6013 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6014 break;
6015 case V2DImode:
6016 case V2DFmode:
6017 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6018 break;
6019 default:
6020 abort ();
6021 }
6022 }
6023 emit_insn (gen_rotqby_ti (rot, from, offset));
6024}
6025
e96f2783 6026static void
6027spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6028{
e96f2783 6029 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6030 rtx shuf = gen_reg_rtx (V4SImode);
6031 rtx insn = gen_reg_rtx (V4SImode);
6032 rtx shufc;
6033 rtx insnc;
6034 rtx mem;
6035
6036 fnaddr = force_reg (SImode, fnaddr);
6037 cxt = force_reg (SImode, cxt);
6038
6039 if (TARGET_LARGE_MEM)
6040 {
6041 rtx rotl = gen_reg_rtx (V4SImode);
6042 rtx mask = gen_reg_rtx (V4SImode);
6043 rtx bi = gen_reg_rtx (SImode);
e96f2783 6044 static unsigned char const shufa[16] = {
644459d0 6045 2, 3, 0, 1, 18, 19, 16, 17,
6046 0, 1, 2, 3, 16, 17, 18, 19
6047 };
e96f2783 6048 static unsigned char const insna[16] = {
644459d0 6049 0x41, 0, 0, 79,
6050 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6051 0x60, 0x80, 0, 79,
6052 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6053 };
6054
6055 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6056 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6057
6058 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6059 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6060 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6061 emit_insn (gen_selb (insn, insnc, rotl, mask));
6062
e96f2783 6063 mem = adjust_address (m_tramp, V4SImode, 0);
6064 emit_move_insn (mem, insn);
644459d0 6065
6066 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6067 mem = adjust_address (m_tramp, Pmode, 16);
6068 emit_move_insn (mem, bi);
644459d0 6069 }
6070 else
6071 {
6072 rtx scxt = gen_reg_rtx (SImode);
6073 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6074 static unsigned char const insna[16] = {
644459d0 6075 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6076 0x30, 0, 0, 0,
6077 0, 0, 0, 0,
6078 0, 0, 0, 0
6079 };
6080
6081 shufc = gen_reg_rtx (TImode);
6082 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6083
6084 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6085 fits 18 bits and the last 4 are zeros. This will be true if
6086 the stack pointer is initialized to 0x3fff0 at program start,
6087 otherwise the ila instruction will be garbage. */
6088
6089 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6090 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6091 emit_insn (gen_cpat
6092 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6093 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6094 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6095
e96f2783 6096 mem = adjust_address (m_tramp, V4SImode, 0);
6097 emit_move_insn (mem, insn);
644459d0 6098 }
6099 emit_insn (gen_sync ());
6100}
6101
6102void
6103spu_expand_sign_extend (rtx ops[])
6104{
6105 unsigned char arr[16];
6106 rtx pat = gen_reg_rtx (TImode);
6107 rtx sign, c;
6108 int i, last;
6109 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6110 if (GET_MODE (ops[1]) == QImode)
6111 {
6112 sign = gen_reg_rtx (HImode);
6113 emit_insn (gen_extendqihi2 (sign, ops[1]));
6114 for (i = 0; i < 16; i++)
6115 arr[i] = 0x12;
6116 arr[last] = 0x13;
6117 }
6118 else
6119 {
6120 for (i = 0; i < 16; i++)
6121 arr[i] = 0x10;
6122 switch (GET_MODE (ops[1]))
6123 {
6124 case HImode:
6125 sign = gen_reg_rtx (SImode);
6126 emit_insn (gen_extendhisi2 (sign, ops[1]));
6127 arr[last] = 0x03;
6128 arr[last - 1] = 0x02;
6129 break;
6130 case SImode:
6131 sign = gen_reg_rtx (SImode);
6132 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6133 for (i = 0; i < 4; i++)
6134 arr[last - i] = 3 - i;
6135 break;
6136 case DImode:
6137 sign = gen_reg_rtx (SImode);
6138 c = gen_reg_rtx (SImode);
6139 emit_insn (gen_spu_convert (c, ops[1]));
6140 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6141 for (i = 0; i < 8; i++)
6142 arr[last - i] = 7 - i;
6143 break;
6144 default:
6145 abort ();
6146 }
6147 }
6148 emit_move_insn (pat, array_to_constant (TImode, arr));
6149 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6150}
6151
6152/* expand vector initialization. If there are any constant parts,
6153 load constant parts first. Then load any non-constant parts. */
6154void
6155spu_expand_vector_init (rtx target, rtx vals)
6156{
6157 enum machine_mode mode = GET_MODE (target);
6158 int n_elts = GET_MODE_NUNITS (mode);
6159 int n_var = 0;
6160 bool all_same = true;
790c536c 6161 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6162 int i;
6163
6164 first = XVECEXP (vals, 0, 0);
6165 for (i = 0; i < n_elts; ++i)
6166 {
6167 x = XVECEXP (vals, 0, i);
e442af0b 6168 if (!(CONST_INT_P (x)
6169 || GET_CODE (x) == CONST_DOUBLE
6170 || GET_CODE (x) == CONST_FIXED))
644459d0 6171 ++n_var;
6172 else
6173 {
6174 if (first_constant == NULL_RTX)
6175 first_constant = x;
6176 }
6177 if (i > 0 && !rtx_equal_p (x, first))
6178 all_same = false;
6179 }
6180
6181 /* if all elements are the same, use splats to repeat elements */
6182 if (all_same)
6183 {
6184 if (!CONSTANT_P (first)
6185 && !register_operand (first, GET_MODE (x)))
6186 first = force_reg (GET_MODE (first), first);
6187 emit_insn (gen_spu_splats (target, first));
6188 return;
6189 }
6190
6191 /* load constant parts */
6192 if (n_var != n_elts)
6193 {
6194 if (n_var == 0)
6195 {
6196 emit_move_insn (target,
6197 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6198 }
6199 else
6200 {
6201 rtx constant_parts_rtx = copy_rtx (vals);
6202
6203 gcc_assert (first_constant != NULL_RTX);
6204 /* fill empty slots with the first constant, this increases
6205 our chance of using splats in the recursive call below. */
6206 for (i = 0; i < n_elts; ++i)
e442af0b 6207 {
6208 x = XVECEXP (constant_parts_rtx, 0, i);
6209 if (!(CONST_INT_P (x)
6210 || GET_CODE (x) == CONST_DOUBLE
6211 || GET_CODE (x) == CONST_FIXED))
6212 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6213 }
644459d0 6214
6215 spu_expand_vector_init (target, constant_parts_rtx);
6216 }
6217 }
6218
6219 /* load variable parts */
6220 if (n_var != 0)
6221 {
6222 rtx insert_operands[4];
6223
6224 insert_operands[0] = target;
6225 insert_operands[2] = target;
6226 for (i = 0; i < n_elts; ++i)
6227 {
6228 x = XVECEXP (vals, 0, i);
e442af0b 6229 if (!(CONST_INT_P (x)
6230 || GET_CODE (x) == CONST_DOUBLE
6231 || GET_CODE (x) == CONST_FIXED))
644459d0 6232 {
6233 if (!register_operand (x, GET_MODE (x)))
6234 x = force_reg (GET_MODE (x), x);
6235 insert_operands[1] = x;
6236 insert_operands[3] = GEN_INT (i);
6237 spu_builtin_insert (insert_operands);
6238 }
6239 }
6240 }
6241}
6352eedf 6242
5474166e 6243/* Return insn index for the vector compare instruction for given CODE,
6244 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6245
6246static int
6247get_vec_cmp_insn (enum rtx_code code,
6248 enum machine_mode dest_mode,
6249 enum machine_mode op_mode)
6250
6251{
6252 switch (code)
6253 {
6254 case EQ:
6255 if (dest_mode == V16QImode && op_mode == V16QImode)
6256 return CODE_FOR_ceq_v16qi;
6257 if (dest_mode == V8HImode && op_mode == V8HImode)
6258 return CODE_FOR_ceq_v8hi;
6259 if (dest_mode == V4SImode && op_mode == V4SImode)
6260 return CODE_FOR_ceq_v4si;
6261 if (dest_mode == V4SImode && op_mode == V4SFmode)
6262 return CODE_FOR_ceq_v4sf;
6263 if (dest_mode == V2DImode && op_mode == V2DFmode)
6264 return CODE_FOR_ceq_v2df;
6265 break;
6266 case GT:
6267 if (dest_mode == V16QImode && op_mode == V16QImode)
6268 return CODE_FOR_cgt_v16qi;
6269 if (dest_mode == V8HImode && op_mode == V8HImode)
6270 return CODE_FOR_cgt_v8hi;
6271 if (dest_mode == V4SImode && op_mode == V4SImode)
6272 return CODE_FOR_cgt_v4si;
6273 if (dest_mode == V4SImode && op_mode == V4SFmode)
6274 return CODE_FOR_cgt_v4sf;
6275 if (dest_mode == V2DImode && op_mode == V2DFmode)
6276 return CODE_FOR_cgt_v2df;
6277 break;
6278 case GTU:
6279 if (dest_mode == V16QImode && op_mode == V16QImode)
6280 return CODE_FOR_clgt_v16qi;
6281 if (dest_mode == V8HImode && op_mode == V8HImode)
6282 return CODE_FOR_clgt_v8hi;
6283 if (dest_mode == V4SImode && op_mode == V4SImode)
6284 return CODE_FOR_clgt_v4si;
6285 break;
6286 default:
6287 break;
6288 }
6289 return -1;
6290}
6291
6292/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6293 DMODE is expected destination mode. This is a recursive function. */
6294
6295static rtx
6296spu_emit_vector_compare (enum rtx_code rcode,
6297 rtx op0, rtx op1,
6298 enum machine_mode dmode)
6299{
6300 int vec_cmp_insn;
6301 rtx mask;
6302 enum machine_mode dest_mode;
6303 enum machine_mode op_mode = GET_MODE (op1);
6304
6305 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6306
6307 /* Floating point vector compare instructions uses destination V4SImode.
6308 Double floating point vector compare instructions uses destination V2DImode.
6309 Move destination to appropriate mode later. */
6310 if (dmode == V4SFmode)
6311 dest_mode = V4SImode;
6312 else if (dmode == V2DFmode)
6313 dest_mode = V2DImode;
6314 else
6315 dest_mode = dmode;
6316
6317 mask = gen_reg_rtx (dest_mode);
6318 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6319
6320 if (vec_cmp_insn == -1)
6321 {
6322 bool swap_operands = false;
6323 bool try_again = false;
6324 switch (rcode)
6325 {
6326 case LT:
6327 rcode = GT;
6328 swap_operands = true;
6329 try_again = true;
6330 break;
6331 case LTU:
6332 rcode = GTU;
6333 swap_operands = true;
6334 try_again = true;
6335 break;
6336 case NE:
6337 /* Treat A != B as ~(A==B). */
6338 {
6339 enum insn_code nor_code;
6340 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6341 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6342 gcc_assert (nor_code != CODE_FOR_nothing);
6343 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6344 if (dmode != dest_mode)
6345 {
6346 rtx temp = gen_reg_rtx (dest_mode);
6347 convert_move (temp, mask, 0);
6348 return temp;
6349 }
6350 return mask;
6351 }
6352 break;
6353 case GE:
6354 case GEU:
6355 case LE:
6356 case LEU:
6357 /* Try GT/GTU/LT/LTU OR EQ */
6358 {
6359 rtx c_rtx, eq_rtx;
6360 enum insn_code ior_code;
6361 enum rtx_code new_code;
6362
6363 switch (rcode)
6364 {
6365 case GE: new_code = GT; break;
6366 case GEU: new_code = GTU; break;
6367 case LE: new_code = LT; break;
6368 case LEU: new_code = LTU; break;
6369 default:
6370 gcc_unreachable ();
6371 }
6372
6373 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6374 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6375
d6bf3b14 6376 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6377 gcc_assert (ior_code != CODE_FOR_nothing);
6378 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6379 if (dmode != dest_mode)
6380 {
6381 rtx temp = gen_reg_rtx (dest_mode);
6382 convert_move (temp, mask, 0);
6383 return temp;
6384 }
6385 return mask;
6386 }
6387 break;
6388 default:
6389 gcc_unreachable ();
6390 }
6391
6392 /* You only get two chances. */
6393 if (try_again)
6394 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6395
6396 gcc_assert (vec_cmp_insn != -1);
6397
6398 if (swap_operands)
6399 {
6400 rtx tmp;
6401 tmp = op0;
6402 op0 = op1;
6403 op1 = tmp;
6404 }
6405 }
6406
6407 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6408 if (dmode != dest_mode)
6409 {
6410 rtx temp = gen_reg_rtx (dest_mode);
6411 convert_move (temp, mask, 0);
6412 return temp;
6413 }
6414 return mask;
6415}
6416
6417
6418/* Emit vector conditional expression.
6419 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6420 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6421
6422int
6423spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6424 rtx cond, rtx cc_op0, rtx cc_op1)
6425{
6426 enum machine_mode dest_mode = GET_MODE (dest);
6427 enum rtx_code rcode = GET_CODE (cond);
6428 rtx mask;
6429
6430 /* Get the vector mask for the given relational operations. */
6431 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6432
6433 emit_insn(gen_selb (dest, op2, op1, mask));
6434
6435 return 1;
6436}
6437
6352eedf 6438static rtx
6439spu_force_reg (enum machine_mode mode, rtx op)
6440{
6441 rtx x, r;
6442 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6443 {
6444 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6445 || GET_MODE (op) == BLKmode)
6446 return force_reg (mode, convert_to_mode (mode, op, 0));
6447 abort ();
6448 }
6449
6450 r = force_reg (GET_MODE (op), op);
6451 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6452 {
6453 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6454 if (x)
6455 return x;
6456 }
6457
6458 x = gen_reg_rtx (mode);
6459 emit_insn (gen_spu_convert (x, r));
6460 return x;
6461}
6462
6463static void
6464spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6465{
6466 HOST_WIDE_INT v = 0;
6467 int lsbits;
6468 /* Check the range of immediate operands. */
6469 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6470 {
6471 int range = p - SPU_BTI_7;
5df189be 6472
6473 if (!CONSTANT_P (op))
bf776685 6474 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6475 d->name,
6476 spu_builtin_range[range].low, spu_builtin_range[range].high);
6477
6478 if (GET_CODE (op) == CONST
6479 && (GET_CODE (XEXP (op, 0)) == PLUS
6480 || GET_CODE (XEXP (op, 0)) == MINUS))
6481 {
6482 v = INTVAL (XEXP (XEXP (op, 0), 1));
6483 op = XEXP (XEXP (op, 0), 0);
6484 }
6485 else if (GET_CODE (op) == CONST_INT)
6486 v = INTVAL (op);
5df189be 6487 else if (GET_CODE (op) == CONST_VECTOR
6488 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6489 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6490
6491 /* The default for v is 0 which is valid in every range. */
6492 if (v < spu_builtin_range[range].low
6493 || v > spu_builtin_range[range].high)
bf776685 6494 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6495 d->name,
6496 spu_builtin_range[range].low, spu_builtin_range[range].high,
6497 v);
6352eedf 6498
6499 switch (p)
6500 {
6501 case SPU_BTI_S10_4:
6502 lsbits = 4;
6503 break;
6504 case SPU_BTI_U16_2:
6505 /* This is only used in lqa, and stqa. Even though the insns
6506 encode 16 bits of the address (all but the 2 least
6507 significant), only 14 bits are used because it is masked to
6508 be 16 byte aligned. */
6509 lsbits = 4;
6510 break;
6511 case SPU_BTI_S16_2:
6512 /* This is used for lqr and stqr. */
6513 lsbits = 2;
6514 break;
6515 default:
6516 lsbits = 0;
6517 }
6518
6519 if (GET_CODE (op) == LABEL_REF
6520 || (GET_CODE (op) == SYMBOL_REF
6521 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6522 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6523 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6524 d->name);
6525 }
6526}
6527
6528
70ca06f8 6529static int
5df189be 6530expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6531 rtx target, rtx ops[])
6532{
bc620c5c 6533 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6534 int i = 0, a;
6352eedf 6535
6536 /* Expand the arguments into rtl. */
6537
6538 if (d->parm[0] != SPU_BTI_VOID)
6539 ops[i++] = target;
6540
70ca06f8 6541 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6542 {
5df189be 6543 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6544 if (arg == 0)
6545 abort ();
b9c74b4d 6546 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6547 }
70ca06f8 6548
32f79657 6549 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6550 return i;
6352eedf 6551}
6552
6553static rtx
6554spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6555 tree exp, rtx target)
6352eedf 6556{
6557 rtx pat;
6558 rtx ops[8];
bc620c5c 6559 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6560 enum machine_mode mode, tmode;
6561 int i, p;
70ca06f8 6562 int n_operands;
6352eedf 6563 tree return_type;
6564
6565 /* Set up ops[] with values from arglist. */
70ca06f8 6566 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6567
6568 /* Handle the target operand which must be operand 0. */
6569 i = 0;
6570 if (d->parm[0] != SPU_BTI_VOID)
6571 {
6572
6573 /* We prefer the mode specified for the match_operand otherwise
6574 use the mode from the builtin function prototype. */
6575 tmode = insn_data[d->icode].operand[0].mode;
6576 if (tmode == VOIDmode)
6577 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6578
6579 /* Try to use target because not using it can lead to extra copies
6580 and when we are using all of the registers extra copies leads
6581 to extra spills. */
6582 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6583 ops[0] = target;
6584 else
6585 target = ops[0] = gen_reg_rtx (tmode);
6586
6587 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6588 abort ();
6589
6590 i++;
6591 }
6592
a76866d3 6593 if (d->fcode == SPU_MASK_FOR_LOAD)
6594 {
6595 enum machine_mode mode = insn_data[icode].operand[1].mode;
6596 tree arg;
6597 rtx addr, op, pat;
6598
6599 /* get addr */
5df189be 6600 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6601 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6602 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6603 addr = memory_address (mode, op);
6604
6605 /* negate addr */
6606 op = gen_reg_rtx (GET_MODE (addr));
6607 emit_insn (gen_rtx_SET (VOIDmode, op,
6608 gen_rtx_NEG (GET_MODE (addr), addr)));
6609 op = gen_rtx_MEM (mode, op);
6610
6611 pat = GEN_FCN (icode) (target, op);
6612 if (!pat)
6613 return 0;
6614 emit_insn (pat);
6615 return target;
6616 }
6617
6352eedf 6618 /* Ignore align_hint, but still expand it's args in case they have
6619 side effects. */
6620 if (icode == CODE_FOR_spu_align_hint)
6621 return 0;
6622
6623 /* Handle the rest of the operands. */
70ca06f8 6624 for (p = 1; i < n_operands; i++, p++)
6352eedf 6625 {
6626 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6627 mode = insn_data[d->icode].operand[i].mode;
6628 else
6629 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6630
6631 /* mode can be VOIDmode here for labels */
6632
6633 /* For specific intrinsics with an immediate operand, e.g.,
6634 si_ai(), we sometimes need to convert the scalar argument to a
6635 vector argument by splatting the scalar. */
6636 if (VECTOR_MODE_P (mode)
6637 && (GET_CODE (ops[i]) == CONST_INT
6638 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6639 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6640 {
6641 if (GET_CODE (ops[i]) == CONST_INT)
6642 ops[i] = spu_const (mode, INTVAL (ops[i]));
6643 else
6644 {
6645 rtx reg = gen_reg_rtx (mode);
6646 enum machine_mode imode = GET_MODE_INNER (mode);
6647 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6648 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6649 if (imode != GET_MODE (ops[i]))
6650 ops[i] = convert_to_mode (imode, ops[i],
6651 TYPE_UNSIGNED (spu_builtin_types
6652 [d->parm[i]]));
6653 emit_insn (gen_spu_splats (reg, ops[i]));
6654 ops[i] = reg;
6655 }
6656 }
6657
5df189be 6658 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6659
6352eedf 6660 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6661 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6662 }
6663
70ca06f8 6664 switch (n_operands)
6352eedf 6665 {
6666 case 0:
6667 pat = GEN_FCN (icode) (0);
6668 break;
6669 case 1:
6670 pat = GEN_FCN (icode) (ops[0]);
6671 break;
6672 case 2:
6673 pat = GEN_FCN (icode) (ops[0], ops[1]);
6674 break;
6675 case 3:
6676 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6677 break;
6678 case 4:
6679 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6680 break;
6681 case 5:
6682 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6683 break;
6684 case 6:
6685 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6686 break;
6687 default:
6688 abort ();
6689 }
6690
6691 if (!pat)
6692 abort ();
6693
6694 if (d->type == B_CALL || d->type == B_BISLED)
6695 emit_call_insn (pat);
6696 else if (d->type == B_JUMP)
6697 {
6698 emit_jump_insn (pat);
6699 emit_barrier ();
6700 }
6701 else
6702 emit_insn (pat);
6703
6704 return_type = spu_builtin_types[d->parm[0]];
6705 if (d->parm[0] != SPU_BTI_VOID
6706 && GET_MODE (target) != TYPE_MODE (return_type))
6707 {
6708 /* target is the return value. It should always be the mode of
6709 the builtin function prototype. */
6710 target = spu_force_reg (TYPE_MODE (return_type), target);
6711 }
6712
6713 return target;
6714}
6715
6716rtx
6717spu_expand_builtin (tree exp,
6718 rtx target,
6719 rtx subtarget ATTRIBUTE_UNUSED,
6720 enum machine_mode mode ATTRIBUTE_UNUSED,
6721 int ignore ATTRIBUTE_UNUSED)
6722{
5df189be 6723 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6724 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6725 struct spu_builtin_description *d;
6726
6727 if (fcode < NUM_SPU_BUILTINS)
6728 {
6729 d = &spu_builtins[fcode];
6730
5df189be 6731 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6732 }
6733 abort ();
6734}
6735
e99f512d 6736/* Implement targetm.vectorize.builtin_mul_widen_even. */
6737static tree
6738spu_builtin_mul_widen_even (tree type)
6739{
e99f512d 6740 switch (TYPE_MODE (type))
6741 {
6742 case V8HImode:
6743 if (TYPE_UNSIGNED (type))
0c5c4d59 6744 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6745 else
0c5c4d59 6746 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6747 break;
6748 default:
6749 return NULL_TREE;
6750 }
6751}
6752
6753/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6754static tree
6755spu_builtin_mul_widen_odd (tree type)
6756{
6757 switch (TYPE_MODE (type))
6758 {
6759 case V8HImode:
6760 if (TYPE_UNSIGNED (type))
0c5c4d59 6761 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6762 else
0c5c4d59 6763 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6764 break;
6765 default:
6766 return NULL_TREE;
6767 }
6768}
6769
a76866d3 6770/* Implement targetm.vectorize.builtin_mask_for_load. */
6771static tree
6772spu_builtin_mask_for_load (void)
6773{
0c5c4d59 6774 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6775}
5df189be 6776
a28df51d 6777/* Implement targetm.vectorize.builtin_vectorization_cost. */
6778static int
0822b158 6779spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6780 tree vectype ATTRIBUTE_UNUSED,
6781 int misalign ATTRIBUTE_UNUSED)
559093aa 6782{
6783 switch (type_of_cost)
6784 {
6785 case scalar_stmt:
6786 case vector_stmt:
6787 case vector_load:
6788 case vector_store:
6789 case vec_to_scalar:
6790 case scalar_to_vec:
6791 case cond_branch_not_taken:
6792 case vec_perm:
6793 return 1;
6794
6795 case scalar_store:
6796 return 10;
6797
6798 case scalar_load:
6799 /* Load + rotate. */
6800 return 2;
6801
6802 case unaligned_load:
6803 return 2;
6804
6805 case cond_branch_taken:
6806 return 6;
6807
6808 default:
6809 gcc_unreachable ();
6810 }
a28df51d 6811}
6812
0e87db76 6813/* Return true iff, data reference of TYPE can reach vector alignment (16)
6814 after applying N number of iterations. This routine does not determine
6815 how may iterations are required to reach desired alignment. */
6816
6817static bool
a9f1838b 6818spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6819{
6820 if (is_packed)
6821 return false;
6822
6823 /* All other types are naturally aligned. */
6824 return true;
6825}
6826
a0515226 6827/* Implement targetm.vectorize.builtin_vec_perm. */
6828tree
6829spu_builtin_vec_perm (tree type, tree *mask_element_type)
6830{
a0515226 6831 *mask_element_type = unsigned_char_type_node;
6832
6833 switch (TYPE_MODE (type))
6834 {
6835 case V16QImode:
6836 if (TYPE_UNSIGNED (type))
0c5c4d59 6837 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6838 else
0c5c4d59 6839 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6840
6841 case V8HImode:
6842 if (TYPE_UNSIGNED (type))
0c5c4d59 6843 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6844 else
0c5c4d59 6845 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6846
6847 case V4SImode:
6848 if (TYPE_UNSIGNED (type))
0c5c4d59 6849 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6850 else
0c5c4d59 6851 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6852
6853 case V2DImode:
6854 if (TYPE_UNSIGNED (type))
0c5c4d59 6855 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6856 else
0c5c4d59 6857 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6858
6859 case V4SFmode:
0c5c4d59 6860 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6861
6862 case V2DFmode:
0c5c4d59 6863 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6864
6865 default:
6866 return NULL_TREE;
6867 }
a0515226 6868}
6869
6cf5579e 6870/* Return the appropriate mode for a named address pointer. */
6871static enum machine_mode
6872spu_addr_space_pointer_mode (addr_space_t addrspace)
6873{
6874 switch (addrspace)
6875 {
6876 case ADDR_SPACE_GENERIC:
6877 return ptr_mode;
6878 case ADDR_SPACE_EA:
6879 return EAmode;
6880 default:
6881 gcc_unreachable ();
6882 }
6883}
6884
6885/* Return the appropriate mode for a named address address. */
6886static enum machine_mode
6887spu_addr_space_address_mode (addr_space_t addrspace)
6888{
6889 switch (addrspace)
6890 {
6891 case ADDR_SPACE_GENERIC:
6892 return Pmode;
6893 case ADDR_SPACE_EA:
6894 return EAmode;
6895 default:
6896 gcc_unreachable ();
6897 }
6898}
6899
6900/* Determine if one named address space is a subset of another. */
6901
6902static bool
6903spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6904{
6905 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6906 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6907
6908 if (subset == superset)
6909 return true;
6910
6911 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6912 being subsets but instead as disjoint address spaces. */
6913 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6914 return false;
6915
6916 else
6917 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6918}
6919
6920/* Convert from one address space to another. */
6921static rtx
6922spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6923{
6924 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6925 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6926
6927 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6928 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6929
6930 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6931 {
6932 rtx result, ls;
6933
6934 ls = gen_const_mem (DImode,
6935 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6936 set_mem_align (ls, 128);
6937
6938 result = gen_reg_rtx (Pmode);
6939 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6940 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6941 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6942 ls, const0_rtx, Pmode, 1);
6943
6944 emit_insn (gen_subsi3 (result, op, ls));
6945
6946 return result;
6947 }
6948
6949 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6950 {
6951 rtx result, ls;
6952
6953 ls = gen_const_mem (DImode,
6954 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6955 set_mem_align (ls, 128);
6956
6957 result = gen_reg_rtx (EAmode);
6958 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6959 op = force_reg (Pmode, op);
6960 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6961 ls, const0_rtx, EAmode, 1);
6962 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6963
6964 if (EAmode == SImode)
6965 emit_insn (gen_addsi3 (result, op, ls));
6966 else
6967 emit_insn (gen_adddi3 (result, op, ls));
6968
6969 return result;
6970 }
6971
6972 else
6973 gcc_unreachable ();
6974}
6975
6976
d52fd16a 6977/* Count the total number of instructions in each pipe and return the
6978 maximum, which is used as the Minimum Iteration Interval (MII)
6979 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6980 -2 are instructions that can go in pipe0 or pipe1. */
6981static int
6982spu_sms_res_mii (struct ddg *g)
6983{
6984 int i;
6985 unsigned t[4] = {0, 0, 0, 0};
6986
6987 for (i = 0; i < g->num_nodes; i++)
6988 {
6989 rtx insn = g->nodes[i].insn;
6990 int p = get_pipe (insn) + 2;
6991
1e944a0b 6992 gcc_assert (p >= 0);
6993 gcc_assert (p < 4);
d52fd16a 6994
6995 t[p]++;
6996 if (dump_file && INSN_P (insn))
6997 fprintf (dump_file, "i%d %s %d %d\n",
6998 INSN_UID (insn),
6999 insn_data[INSN_CODE(insn)].name,
7000 p, t[p]);
7001 }
7002 if (dump_file)
7003 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7004
7005 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7006}
7007
7008
5df189be 7009void
7010spu_init_expanders (void)
9d98604b 7011{
5df189be 7012 if (cfun)
9d98604b 7013 {
7014 rtx r0, r1;
7015 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7016 frame_pointer_needed is true. We don't know that until we're
7017 expanding the prologue. */
7018 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7019
7020 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7021 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7022 to be treated as aligned, so generate them here. */
7023 r0 = gen_reg_rtx (SImode);
7024 r1 = gen_reg_rtx (SImode);
7025 mark_reg_pointer (r0, 128);
7026 mark_reg_pointer (r1, 128);
7027 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7028 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7029 }
ea32e033 7030}
7031
7032static enum machine_mode
7033spu_libgcc_cmp_return_mode (void)
7034{
7035
7036/* For SPU word mode is TI mode so it is better to use SImode
7037 for compare returns. */
7038 return SImode;
7039}
7040
7041static enum machine_mode
7042spu_libgcc_shift_count_mode (void)
7043{
7044/* For SPU word mode is TI mode so it is better to use SImode
7045 for shift counts. */
7046 return SImode;
7047}
5a976006 7048
7049/* An early place to adjust some flags after GCC has finished processing
7050 * them. */
7051static void
7052asm_file_start (void)
7053{
7054 /* Variable tracking should be run after all optimizations which
0ef14db8 7055 change order of insns. It also needs a valid CFG. Therefore,
7056 *if* we make nontrivial changes in machine-dependent reorg,
7057 run variable tracking after those. However, if we do not run
7058 our machine-dependent reorg pass, we must still run the normal
7059 variable tracking pass (or else we will ICE in final since
7060 debug insns have not been removed). */
7061 if (TARGET_BRANCH_HINTS && optimize)
7062 {
7063 spu_flag_var_tracking = flag_var_tracking;
7064 flag_var_tracking = 0;
7065 }
5a976006 7066
7067 default_file_start ();
7068}
7069
a08dfd55 7070/* Implement targetm.section_type_flags. */
7071static unsigned int
7072spu_section_type_flags (tree decl, const char *name, int reloc)
7073{
7074 /* .toe needs to have type @nobits. */
7075 if (strcmp (name, ".toe") == 0)
7076 return SECTION_BSS;
6cf5579e 7077 /* Don't load _ea into the current address space. */
7078 if (strcmp (name, "._ea") == 0)
7079 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7080 return default_section_type_flags (decl, name, reloc);
7081}
c2233b46 7082
6cf5579e 7083/* Implement targetm.select_section. */
7084static section *
7085spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7086{
7087 /* Variables and constants defined in the __ea address space
7088 go into a special section named "._ea". */
7089 if (TREE_TYPE (decl) != error_mark_node
7090 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7091 {
7092 /* We might get called with string constants, but get_named_section
7093 doesn't like them as they are not DECLs. Also, we need to set
7094 flags in that case. */
7095 if (!DECL_P (decl))
7096 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7097
7098 return get_named_section (decl, "._ea", reloc);
7099 }
7100
7101 return default_elf_select_section (decl, reloc, align);
7102}
7103
7104/* Implement targetm.unique_section. */
7105static void
7106spu_unique_section (tree decl, int reloc)
7107{
7108 /* We don't support unique section names in the __ea address
7109 space for now. */
7110 if (TREE_TYPE (decl) != error_mark_node
7111 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7112 return;
7113
7114 default_unique_section (decl, reloc);
7115}
7116
56c7bfc2 7117/* Generate a constant or register which contains 2^SCALE. We assume
7118 the result is valid for MODE. Currently, MODE must be V4SFmode and
7119 SCALE must be SImode. */
7120rtx
7121spu_gen_exp2 (enum machine_mode mode, rtx scale)
7122{
7123 gcc_assert (mode == V4SFmode);
7124 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7125 if (GET_CODE (scale) != CONST_INT)
7126 {
7127 /* unsigned int exp = (127 + scale) << 23;
7128 __vector float m = (__vector float) spu_splats (exp); */
7129 rtx reg = force_reg (SImode, scale);
7130 rtx exp = gen_reg_rtx (SImode);
7131 rtx mul = gen_reg_rtx (mode);
7132 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7133 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7134 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7135 return mul;
7136 }
7137 else
7138 {
7139 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7140 unsigned char arr[16];
7141 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7142 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7143 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7144 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7145 return array_to_constant (mode, arr);
7146 }
7147}
7148
9d98604b 7149/* After reload, just change the convert into a move instruction
7150 or a dead instruction. */
7151void
7152spu_split_convert (rtx ops[])
7153{
7154 if (REGNO (ops[0]) == REGNO (ops[1]))
7155 emit_note (NOTE_INSN_DELETED);
7156 else
7157 {
7158 /* Use TImode always as this might help hard reg copyprop. */
7159 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7160 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7161 emit_insn (gen_move_insn (op0, op1));
7162 }
7163}
7164
b3878a6c 7165void
4cbad5bb 7166spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7167{
7168 fprintf (file, "# profile\n");
7169 fprintf (file, "brsl $75, _mcount\n");
7170}
7171
329c1e4e 7172/* Implement targetm.ref_may_alias_errno. */
7173static bool
7174spu_ref_may_alias_errno (ao_ref *ref)
7175{
7176 tree base = ao_ref_base (ref);
7177
7178 /* With SPU newlib, errno is defined as something like
7179 _impure_data._errno
7180 The default implementation of this target macro does not
7181 recognize such expressions, so special-code for it here. */
7182
7183 if (TREE_CODE (base) == VAR_DECL
7184 && !TREE_STATIC (base)
7185 && DECL_EXTERNAL (base)
7186 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7187 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7188 "_impure_data") == 0
7189 /* _errno is the first member of _impure_data. */
7190 && ref->offset == 0)
7191 return true;
7192
7193 return default_ref_may_alias_errno (ref);
7194}
7195
c2233b46 7196#include "gt-spu.h"