]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2011-04-10 Jim Meyering <meyering@redhat.com>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "ggc.h"
40#include "hashtab.h"
41#include "tm_p.h"
42#include "target.h"
43#include "target-def.h"
44#include "langhooks.h"
45#include "reload.h"
46#include "cfglayout.h"
47#include "sched-int.h"
48#include "params.h"
644459d0 49#include "machmode.h"
75a70cf9 50#include "gimple.h"
644459d0 51#include "tm-constrs.h"
d52fd16a 52#include "ddg.h"
5a976006 53#include "sbitmap.h"
54#include "timevar.h"
55#include "df.h"
6352eedf 56
57/* Builtin types, data and prototypes. */
c2233b46 58
59enum spu_builtin_type_index
60{
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110};
111
112#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
6352eedf 125struct spu_builtin_range
126{
127 int low, high;
128};
129
130static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143};
144
644459d0 145\f
146/* Target specific attribute specifications. */
147char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149/* Prototypes and external defs. */
4c834714 150static void spu_option_override (void);
cc07c468 151static void spu_option_init_struct (struct gcc_options *opts);
686e2769 152static void spu_option_default_params (void);
644459d0 153static void spu_init_builtins (void);
e6925042 154static tree spu_builtin_decl (unsigned, bool);
b62e30b8 155static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 157static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 158static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
644459d0 160static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161static rtx get_pic_reg (void);
162static int need_to_save_reg (int regno, int saving);
163static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167static void emit_nop_for_insn (rtx insn);
168static bool insn_clobbers_hbr (rtx insn);
169static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 170 int distance, sbitmap blocks);
5474166e 171static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
644459d0 173static rtx get_branch_target (rtx branch);
644459d0 174static void spu_machine_dependent_reorg (void);
175static int spu_sched_issue_rate (void);
176static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178static int get_pipe (rtx insn);
644459d0 179static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 180static void spu_sched_init_global (FILE *, int, int);
181static void spu_sched_init (FILE *, int, int);
182static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 183static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
b62e30b8 185 bool *no_add_attrs);
644459d0 186static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
b62e30b8 188 bool *no_add_attrs);
644459d0 189static int spu_naked_function_p (tree func);
b62e30b8 190static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
ee9034d4 192static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
644459d0 196static tree spu_build_builtin_va_list (void);
8a58ed0a 197static void spu_va_start (tree, rtx);
75a70cf9 198static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 200static int store_with_one_insn_p (rtx mem);
644459d0 201static int mem_is_padded_component_ref (rtx x);
9d98604b 202static int reg_aligned_for_addr (rtx x);
644459d0 203static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 205static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 208static void spu_init_libfuncs (void);
fb80456a 209static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 210static void fix_range (const char *);
69ced2d6 211static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 212static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 213static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
e99f512d 215static tree spu_builtin_mul_widen_even (tree);
216static tree spu_builtin_mul_widen_odd (tree);
a76866d3 217static tree spu_builtin_mask_for_load (void);
0822b158 218static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 219static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 220static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
5a976006 226static void asm_file_start (void);
a08dfd55 227static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 228static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229static void spu_unique_section (tree, int);
9d98604b 230static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 231static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 232static void spu_conditional_register_usage (void);
329c1e4e 233static bool spu_ref_may_alias_errno (ao_ref *);
644459d0 234
5474166e 235/* Which instruction set architecture to use. */
236int spu_arch;
237/* Which cpu are we tuning for. */
238int spu_tune;
239
5a976006 240/* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246int spu_hint_dist = (8*4) - (2*4);
247
248/* Determines whether we run variable tracking in machine dependent
249 reorganization. */
250static int spu_flag_var_tracking;
251
644459d0 252enum spu_immediate {
253 SPU_NONE,
254 SPU_IL,
255 SPU_ILA,
256 SPU_ILH,
257 SPU_ILHU,
258 SPU_ORI,
259 SPU_ORHI,
260 SPU_ORBI,
99369027 261 SPU_IOHL
644459d0 262};
dea01258 263enum immediate_class
264{
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
5df189be 272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 273};
644459d0 274
275static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 277static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
644459d0 280
1bd43494 281static enum machine_mode spu_unwind_word_mode (void);
282
ea32e033 283static enum machine_mode
284spu_libgcc_cmp_return_mode (void);
285
286static enum machine_mode
287spu_libgcc_shift_count_mode (void);
6cf5579e 288
289/* Pointer mode for __ea references. */
290#define EAmode (spu_ea_model != 32 ? DImode : SImode)
291
ef51d1e3 292\f
293/* Table of machine attributes. */
294static const struct attribute_spec spu_attribute_table[] =
295{
ac86af5d 296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
297 affects_type_identity } */
298 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
299 false },
300 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
301 false },
302 { NULL, 0, 0, false, false, false, NULL, false }
ef51d1e3 303};
644459d0 304\f
305/* TARGET overrides. */
306
6cf5579e 307#undef TARGET_ADDR_SPACE_POINTER_MODE
308#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
309
310#undef TARGET_ADDR_SPACE_ADDRESS_MODE
311#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
312
313#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
314#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
315 spu_addr_space_legitimate_address_p
316
317#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
318#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
319
320#undef TARGET_ADDR_SPACE_SUBSET_P
321#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
322
323#undef TARGET_ADDR_SPACE_CONVERT
324#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
325
644459d0 326#undef TARGET_INIT_BUILTINS
327#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 328#undef TARGET_BUILTIN_DECL
329#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 330
644459d0 331#undef TARGET_EXPAND_BUILTIN
332#define TARGET_EXPAND_BUILTIN spu_expand_builtin
333
1bd43494 334#undef TARGET_UNWIND_WORD_MODE
335#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 336
41e3a0c7 337#undef TARGET_LEGITIMIZE_ADDRESS
338#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
339
6cf5579e 340/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
341 and .quad for the debugger. When it is known that the assembler is fixed,
342 these can be removed. */
343#undef TARGET_ASM_UNALIGNED_SI_OP
344#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
345
346#undef TARGET_ASM_ALIGNED_DI_OP
347#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
348
644459d0 349/* The .8byte directive doesn't seem to work well for a 32 bit
350 architecture. */
351#undef TARGET_ASM_UNALIGNED_DI_OP
352#define TARGET_ASM_UNALIGNED_DI_OP NULL
353
354#undef TARGET_RTX_COSTS
355#define TARGET_RTX_COSTS spu_rtx_costs
356
357#undef TARGET_ADDRESS_COST
f529eb25 358#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 359
360#undef TARGET_SCHED_ISSUE_RATE
361#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
362
5a976006 363#undef TARGET_SCHED_INIT_GLOBAL
364#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
365
366#undef TARGET_SCHED_INIT
367#define TARGET_SCHED_INIT spu_sched_init
368
644459d0 369#undef TARGET_SCHED_VARIABLE_ISSUE
370#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
371
5a976006 372#undef TARGET_SCHED_REORDER
373#define TARGET_SCHED_REORDER spu_sched_reorder
374
375#undef TARGET_SCHED_REORDER2
376#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 377
378#undef TARGET_SCHED_ADJUST_COST
379#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
380
644459d0 381#undef TARGET_ATTRIBUTE_TABLE
382#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
383
384#undef TARGET_ASM_INTEGER
385#define TARGET_ASM_INTEGER spu_assemble_integer
386
387#undef TARGET_SCALAR_MODE_SUPPORTED_P
388#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
389
390#undef TARGET_VECTOR_MODE_SUPPORTED_P
391#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
392
393#undef TARGET_FUNCTION_OK_FOR_SIBCALL
394#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
395
396#undef TARGET_ASM_GLOBALIZE_LABEL
397#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
398
399#undef TARGET_PASS_BY_REFERENCE
400#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
401
ee9034d4 402#undef TARGET_FUNCTION_ARG
403#define TARGET_FUNCTION_ARG spu_function_arg
404
405#undef TARGET_FUNCTION_ARG_ADVANCE
406#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
407
644459d0 408#undef TARGET_MUST_PASS_IN_STACK
409#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
410
411#undef TARGET_BUILD_BUILTIN_VA_LIST
412#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
413
8a58ed0a 414#undef TARGET_EXPAND_BUILTIN_VA_START
415#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
416
644459d0 417#undef TARGET_SETUP_INCOMING_VARARGS
418#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
419
420#undef TARGET_MACHINE_DEPENDENT_REORG
421#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
422
423#undef TARGET_GIMPLIFY_VA_ARG_EXPR
424#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
425
426#undef TARGET_DEFAULT_TARGET_FLAGS
427#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
428
429#undef TARGET_INIT_LIBFUNCS
430#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
431
432#undef TARGET_RETURN_IN_MEMORY
433#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
434
69ced2d6 435#undef TARGET_ENCODE_SECTION_INFO
436#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
437
e99f512d 438#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
439#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
440
441#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
442#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
443
a76866d3 444#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
445#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
446
a28df51d 447#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
448#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
449
202d6e5f 450#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
451#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 452
a0515226 453#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
454#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
455
ea32e033 456#undef TARGET_LIBGCC_CMP_RETURN_MODE
457#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
458
459#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
460#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
461
d52fd16a 462#undef TARGET_SCHED_SMS_RES_MII
463#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
464
5a976006 465#undef TARGET_ASM_FILE_START
466#define TARGET_ASM_FILE_START asm_file_start
467
a08dfd55 468#undef TARGET_SECTION_TYPE_FLAGS
469#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
470
6cf5579e 471#undef TARGET_ASM_SELECT_SECTION
472#define TARGET_ASM_SELECT_SECTION spu_select_section
473
474#undef TARGET_ASM_UNIQUE_SECTION
475#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
476
fd50b071 477#undef TARGET_LEGITIMATE_ADDRESS_P
478#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
479
e96f2783 480#undef TARGET_TRAMPOLINE_INIT
481#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
482
4c834714 483#undef TARGET_OPTION_OVERRIDE
484#define TARGET_OPTION_OVERRIDE spu_option_override
485
cc07c468 486#undef TARGET_OPTION_INIT_STRUCT
487#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
02e53c17 488
686e2769 489#undef TARGET_OPTION_DEFAULT_PARAMS
490#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
491
ed6befa5 492#undef TARGET_EXCEPT_UNWIND_INFO
493#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
494
b2d7ede1 495#undef TARGET_CONDITIONAL_REGISTER_USAGE
496#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
497
329c1e4e 498#undef TARGET_REF_MAY_ALIAS_ERRNO
499#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
500
644459d0 501struct gcc_target targetm = TARGET_INITIALIZER;
502
02e53c17 503static void
cc07c468 504spu_option_init_struct (struct gcc_options *opts)
5df189be 505{
5df189be 506 /* With so many registers this is better on by default. */
cc07c468 507 opts->x_flag_rename_registers = 1;
5df189be 508}
509
686e2769 510/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
511static void
512spu_option_default_params (void)
513{
514 /* Override some of the default param values. With so many registers
515 larger values are better for these params. */
516 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
517}
518
4c834714 519/* Implement TARGET_OPTION_OVERRIDE. */
520static void
521spu_option_override (void)
644459d0 522{
14d408d9 523 /* Small loops will be unpeeled at -O3. For SPU it is more important
524 to keep code small by default. */
686e2769 525 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 526 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 527 global_options.x_param_values,
528 global_options_set.x_param_values);
14d408d9 529
644459d0 530 flag_omit_frame_pointer = 1;
531
5a976006 532 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 533 if (align_functions < 8)
534 align_functions = 8;
c7b91b14 535
5a976006 536 spu_hint_dist = 8*4 - spu_max_nops*4;
537 if (spu_hint_dist < 0)
538 spu_hint_dist = 0;
539
c7b91b14 540 if (spu_fixed_range_string)
541 fix_range (spu_fixed_range_string);
5474166e 542
543 /* Determine processor architectural level. */
544 if (spu_arch_string)
545 {
546 if (strcmp (&spu_arch_string[0], "cell") == 0)
547 spu_arch = PROCESSOR_CELL;
548 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
549 spu_arch = PROCESSOR_CELLEDP;
550 else
8e181c9d 551 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 552 }
553
554 /* Determine processor to tune for. */
555 if (spu_tune_string)
556 {
557 if (strcmp (&spu_tune_string[0], "cell") == 0)
558 spu_tune = PROCESSOR_CELL;
559 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
560 spu_tune = PROCESSOR_CELLEDP;
561 else
8e181c9d 562 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 563 }
98bbec1e 564
13684256 565 /* Change defaults according to the processor architecture. */
566 if (spu_arch == PROCESSOR_CELLEDP)
567 {
568 /* If no command line option has been otherwise specified, change
569 the default to -mno-safe-hints on celledp -- only the original
570 Cell/B.E. processors require this workaround. */
571 if (!(target_flags_explicit & MASK_SAFE_HINTS))
572 target_flags &= ~MASK_SAFE_HINTS;
573 }
574
98bbec1e 575 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 576}
577\f
578/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
579 struct attribute_spec.handler. */
580
644459d0 581/* True if MODE is valid for the target. By "valid", we mean able to
582 be manipulated in non-trivial ways. In particular, this means all
583 the arithmetic is supported. */
584static bool
585spu_scalar_mode_supported_p (enum machine_mode mode)
586{
587 switch (mode)
588 {
589 case QImode:
590 case HImode:
591 case SImode:
592 case SFmode:
593 case DImode:
594 case TImode:
595 case DFmode:
596 return true;
597
598 default:
599 return false;
600 }
601}
602
603/* Similarly for vector modes. "Supported" here is less strict. At
604 least some operations are supported; need to check optabs or builtins
605 for further details. */
606static bool
607spu_vector_mode_supported_p (enum machine_mode mode)
608{
609 switch (mode)
610 {
611 case V16QImode:
612 case V8HImode:
613 case V4SImode:
614 case V2DImode:
615 case V4SFmode:
616 case V2DFmode:
617 return true;
618
619 default:
620 return false;
621 }
622}
623
624/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
625 least significant bytes of the outer mode. This function returns
626 TRUE for the SUBREG's where this is correct. */
627int
628valid_subreg (rtx op)
629{
630 enum machine_mode om = GET_MODE (op);
631 enum machine_mode im = GET_MODE (SUBREG_REG (op));
632 return om != VOIDmode && im != VOIDmode
633 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 634 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
635 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 636}
637
638/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 639 and adjust the start offset. */
644459d0 640static rtx
641adjust_operand (rtx op, HOST_WIDE_INT * start)
642{
643 enum machine_mode mode;
644 int op_size;
38aca5eb 645 /* Strip any paradoxical SUBREG. */
646 if (GET_CODE (op) == SUBREG
647 && (GET_MODE_BITSIZE (GET_MODE (op))
648 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 649 {
650 if (start)
651 *start -=
652 GET_MODE_BITSIZE (GET_MODE (op)) -
653 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
654 op = SUBREG_REG (op);
655 }
656 /* If it is smaller than SI, assure a SUBREG */
657 op_size = GET_MODE_BITSIZE (GET_MODE (op));
658 if (op_size < 32)
659 {
660 if (start)
661 *start += 32 - op_size;
662 op_size = 32;
663 }
664 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
665 mode = mode_for_size (op_size, MODE_INT, 0);
666 if (mode != GET_MODE (op))
667 op = gen_rtx_SUBREG (mode, op, 0);
668 return op;
669}
670
671void
672spu_expand_extv (rtx ops[], int unsignedp)
673{
9d98604b 674 rtx dst = ops[0], src = ops[1];
644459d0 675 HOST_WIDE_INT width = INTVAL (ops[2]);
676 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 677 HOST_WIDE_INT align_mask;
678 rtx s0, s1, mask, r0;
644459d0 679
9d98604b 680 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 681
9d98604b 682 if (MEM_P (src))
644459d0 683 {
9d98604b 684 /* First, determine if we need 1 TImode load or 2. We need only 1
685 if the bits being extracted do not cross the alignment boundary
686 as determined by the MEM and its address. */
687
688 align_mask = -MEM_ALIGN (src);
689 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 690 {
9d98604b 691 /* Alignment is sufficient for 1 load. */
692 s0 = gen_reg_rtx (TImode);
693 r0 = spu_expand_load (s0, 0, src, start / 8);
694 start &= 7;
695 if (r0)
696 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 697 }
9d98604b 698 else
699 {
700 /* Need 2 loads. */
701 s0 = gen_reg_rtx (TImode);
702 s1 = gen_reg_rtx (TImode);
703 r0 = spu_expand_load (s0, s1, src, start / 8);
704 start &= 7;
705
706 gcc_assert (start + width <= 128);
707 if (r0)
708 {
709 rtx r1 = gen_reg_rtx (SImode);
710 mask = gen_reg_rtx (TImode);
711 emit_move_insn (mask, GEN_INT (-1));
712 emit_insn (gen_rotqby_ti (s0, s0, r0));
713 emit_insn (gen_rotqby_ti (s1, s1, r0));
714 if (GET_CODE (r0) == CONST_INT)
715 r1 = GEN_INT (INTVAL (r0) & 15);
716 else
717 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
718 emit_insn (gen_shlqby_ti (mask, mask, r1));
719 emit_insn (gen_selb (s0, s1, s0, mask));
720 }
721 }
722
723 }
724 else if (GET_CODE (src) == SUBREG)
725 {
726 rtx r = SUBREG_REG (src);
727 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
728 s0 = gen_reg_rtx (TImode);
729 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
730 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
731 else
732 emit_move_insn (s0, src);
733 }
734 else
735 {
736 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
737 s0 = gen_reg_rtx (TImode);
738 emit_move_insn (s0, src);
644459d0 739 }
740
9d98604b 741 /* Now s0 is TImode and contains the bits to extract at start. */
742
743 if (start)
744 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
745
746 if (128 - width)
644459d0 747 {
9d98604b 748 tree c = build_int_cst (NULL_TREE, 128 - width);
749 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 750 }
751
9d98604b 752 emit_move_insn (dst, s0);
644459d0 753}
754
755void
756spu_expand_insv (rtx ops[])
757{
758 HOST_WIDE_INT width = INTVAL (ops[1]);
759 HOST_WIDE_INT start = INTVAL (ops[2]);
760 HOST_WIDE_INT maskbits;
4cbad5bb 761 enum machine_mode dst_mode;
644459d0 762 rtx dst = ops[0], src = ops[3];
4cbad5bb 763 int dst_size;
644459d0 764 rtx mask;
765 rtx shift_reg;
766 int shift;
767
768
769 if (GET_CODE (ops[0]) == MEM)
770 dst = gen_reg_rtx (TImode);
771 else
772 dst = adjust_operand (dst, &start);
773 dst_mode = GET_MODE (dst);
774 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
775
776 if (CONSTANT_P (src))
777 {
778 enum machine_mode m =
779 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
780 src = force_reg (m, convert_to_mode (m, src, 0));
781 }
782 src = adjust_operand (src, 0);
644459d0 783
784 mask = gen_reg_rtx (dst_mode);
785 shift_reg = gen_reg_rtx (dst_mode);
786 shift = dst_size - start - width;
787
788 /* It's not safe to use subreg here because the compiler assumes
789 that the SUBREG_REG is right justified in the SUBREG. */
790 convert_move (shift_reg, src, 1);
791
792 if (shift > 0)
793 {
794 switch (dst_mode)
795 {
796 case SImode:
797 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
798 break;
799 case DImode:
800 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
801 break;
802 case TImode:
803 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
804 break;
805 default:
806 abort ();
807 }
808 }
809 else if (shift < 0)
810 abort ();
811
812 switch (dst_size)
813 {
814 case 32:
815 maskbits = (-1ll << (32 - width - start));
816 if (start)
817 maskbits += (1ll << (32 - start));
818 emit_move_insn (mask, GEN_INT (maskbits));
819 break;
820 case 64:
821 maskbits = (-1ll << (64 - width - start));
822 if (start)
823 maskbits += (1ll << (64 - start));
824 emit_move_insn (mask, GEN_INT (maskbits));
825 break;
826 case 128:
827 {
828 unsigned char arr[16];
829 int i = start / 8;
830 memset (arr, 0, sizeof (arr));
831 arr[i] = 0xff >> (start & 7);
832 for (i++; i <= (start + width - 1) / 8; i++)
833 arr[i] = 0xff;
834 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
835 emit_move_insn (mask, array_to_constant (TImode, arr));
836 }
837 break;
838 default:
839 abort ();
840 }
841 if (GET_CODE (ops[0]) == MEM)
842 {
644459d0 843 rtx low = gen_reg_rtx (SImode);
644459d0 844 rtx rotl = gen_reg_rtx (SImode);
845 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 846 rtx addr;
847 rtx addr0;
848 rtx addr1;
644459d0 849 rtx mem;
850
9d98604b 851 addr = force_reg (Pmode, XEXP (ops[0], 0));
852 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 853 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
854 emit_insn (gen_negsi2 (rotl, low));
855 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
856 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 857 mem = change_address (ops[0], TImode, addr0);
644459d0 858 set_mem_alias_set (mem, 0);
859 emit_move_insn (dst, mem);
860 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 861 if (start + width > MEM_ALIGN (ops[0]))
862 {
863 rtx shl = gen_reg_rtx (SImode);
864 rtx mask1 = gen_reg_rtx (TImode);
865 rtx dst1 = gen_reg_rtx (TImode);
866 rtx mem1;
9d98604b 867 addr1 = plus_constant (addr, 16);
868 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 869 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
870 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 871 mem1 = change_address (ops[0], TImode, addr1);
644459d0 872 set_mem_alias_set (mem1, 0);
873 emit_move_insn (dst1, mem1);
874 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
875 emit_move_insn (mem1, dst1);
876 }
9d98604b 877 emit_move_insn (mem, dst);
644459d0 878 }
879 else
71cd778d 880 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 881}
882
883
884int
885spu_expand_block_move (rtx ops[])
886{
887 HOST_WIDE_INT bytes, align, offset;
888 rtx src, dst, sreg, dreg, target;
889 int i;
890 if (GET_CODE (ops[2]) != CONST_INT
891 || GET_CODE (ops[3]) != CONST_INT
48eb4342 892 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 893 return 0;
894
895 bytes = INTVAL (ops[2]);
896 align = INTVAL (ops[3]);
897
898 if (bytes <= 0)
899 return 1;
900
901 dst = ops[0];
902 src = ops[1];
903
904 if (align == 16)
905 {
906 for (offset = 0; offset + 16 <= bytes; offset += 16)
907 {
908 dst = adjust_address (ops[0], V16QImode, offset);
909 src = adjust_address (ops[1], V16QImode, offset);
910 emit_move_insn (dst, src);
911 }
912 if (offset < bytes)
913 {
914 rtx mask;
915 unsigned char arr[16] = { 0 };
916 for (i = 0; i < bytes - offset; i++)
917 arr[i] = 0xff;
918 dst = adjust_address (ops[0], V16QImode, offset);
919 src = adjust_address (ops[1], V16QImode, offset);
920 mask = gen_reg_rtx (V16QImode);
921 sreg = gen_reg_rtx (V16QImode);
922 dreg = gen_reg_rtx (V16QImode);
923 target = gen_reg_rtx (V16QImode);
924 emit_move_insn (mask, array_to_constant (V16QImode, arr));
925 emit_move_insn (dreg, dst);
926 emit_move_insn (sreg, src);
927 emit_insn (gen_selb (target, dreg, sreg, mask));
928 emit_move_insn (dst, target);
929 }
930 return 1;
931 }
932 return 0;
933}
934
935enum spu_comp_code
936{ SPU_EQ, SPU_GT, SPU_GTU };
937
5474166e 938int spu_comp_icode[12][3] = {
939 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
940 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
941 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
942 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
943 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
944 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
945 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
946 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
947 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
948 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
949 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
950 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 951};
952
953/* Generate a compare for CODE. Return a brand-new rtx that represents
954 the result of the compare. GCC can figure this out too if we don't
955 provide all variations of compares, but GCC always wants to use
956 WORD_MODE, we can generate better code in most cases if we do it
957 ourselves. */
958void
74f4459c 959spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 960{
961 int reverse_compare = 0;
962 int reverse_test = 0;
5d70b918 963 rtx compare_result, eq_result;
964 rtx comp_rtx, eq_rtx;
644459d0 965 enum machine_mode comp_mode;
966 enum machine_mode op_mode;
b9c74b4d 967 enum spu_comp_code scode, eq_code;
968 enum insn_code ior_code;
74f4459c 969 enum rtx_code code = GET_CODE (cmp);
970 rtx op0 = XEXP (cmp, 0);
971 rtx op1 = XEXP (cmp, 1);
644459d0 972 int index;
5d70b918 973 int eq_test = 0;
644459d0 974
74f4459c 975 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 976 and so on, to keep the constant in operand 1. */
74f4459c 977 if (GET_CODE (op1) == CONST_INT)
644459d0 978 {
74f4459c 979 HOST_WIDE_INT val = INTVAL (op1) - 1;
980 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 981 switch (code)
982 {
983 case GE:
74f4459c 984 op1 = GEN_INT (val);
644459d0 985 code = GT;
986 break;
987 case LT:
74f4459c 988 op1 = GEN_INT (val);
644459d0 989 code = LE;
990 break;
991 case GEU:
74f4459c 992 op1 = GEN_INT (val);
644459d0 993 code = GTU;
994 break;
995 case LTU:
74f4459c 996 op1 = GEN_INT (val);
644459d0 997 code = LEU;
998 break;
999 default:
1000 break;
1001 }
1002 }
1003
5d70b918 1004 comp_mode = SImode;
74f4459c 1005 op_mode = GET_MODE (op0);
5d70b918 1006
644459d0 1007 switch (code)
1008 {
1009 case GE:
644459d0 1010 scode = SPU_GT;
07027691 1011 if (HONOR_NANS (op_mode))
5d70b918 1012 {
1013 reverse_compare = 0;
1014 reverse_test = 0;
1015 eq_test = 1;
1016 eq_code = SPU_EQ;
1017 }
1018 else
1019 {
1020 reverse_compare = 1;
1021 reverse_test = 1;
1022 }
644459d0 1023 break;
1024 case LE:
644459d0 1025 scode = SPU_GT;
07027691 1026 if (HONOR_NANS (op_mode))
5d70b918 1027 {
1028 reverse_compare = 1;
1029 reverse_test = 0;
1030 eq_test = 1;
1031 eq_code = SPU_EQ;
1032 }
1033 else
1034 {
1035 reverse_compare = 0;
1036 reverse_test = 1;
1037 }
644459d0 1038 break;
1039 case LT:
1040 reverse_compare = 1;
1041 reverse_test = 0;
1042 scode = SPU_GT;
1043 break;
1044 case GEU:
1045 reverse_compare = 1;
1046 reverse_test = 1;
1047 scode = SPU_GTU;
1048 break;
1049 case LEU:
1050 reverse_compare = 0;
1051 reverse_test = 1;
1052 scode = SPU_GTU;
1053 break;
1054 case LTU:
1055 reverse_compare = 1;
1056 reverse_test = 0;
1057 scode = SPU_GTU;
1058 break;
1059 case NE:
1060 reverse_compare = 0;
1061 reverse_test = 1;
1062 scode = SPU_EQ;
1063 break;
1064
1065 case EQ:
1066 scode = SPU_EQ;
1067 break;
1068 case GT:
1069 scode = SPU_GT;
1070 break;
1071 case GTU:
1072 scode = SPU_GTU;
1073 break;
1074 default:
1075 scode = SPU_EQ;
1076 break;
1077 }
1078
644459d0 1079 switch (op_mode)
1080 {
1081 case QImode:
1082 index = 0;
1083 comp_mode = QImode;
1084 break;
1085 case HImode:
1086 index = 1;
1087 comp_mode = HImode;
1088 break;
1089 case SImode:
1090 index = 2;
1091 break;
1092 case DImode:
1093 index = 3;
1094 break;
1095 case TImode:
1096 index = 4;
1097 break;
1098 case SFmode:
1099 index = 5;
1100 break;
1101 case DFmode:
1102 index = 6;
1103 break;
1104 case V16QImode:
5474166e 1105 index = 7;
1106 comp_mode = op_mode;
1107 break;
644459d0 1108 case V8HImode:
5474166e 1109 index = 8;
1110 comp_mode = op_mode;
1111 break;
644459d0 1112 case V4SImode:
5474166e 1113 index = 9;
1114 comp_mode = op_mode;
1115 break;
644459d0 1116 case V4SFmode:
5474166e 1117 index = 10;
1118 comp_mode = V4SImode;
1119 break;
644459d0 1120 case V2DFmode:
5474166e 1121 index = 11;
1122 comp_mode = V2DImode;
644459d0 1123 break;
5474166e 1124 case V2DImode:
644459d0 1125 default:
1126 abort ();
1127 }
1128
74f4459c 1129 if (GET_MODE (op1) == DFmode
07027691 1130 && (scode != SPU_GT && scode != SPU_EQ))
1131 abort ();
644459d0 1132
74f4459c 1133 if (is_set == 0 && op1 == const0_rtx
1134 && (GET_MODE (op0) == SImode
1135 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1136 {
1137 /* Don't need to set a register with the result when we are
1138 comparing against zero and branching. */
1139 reverse_test = !reverse_test;
74f4459c 1140 compare_result = op0;
644459d0 1141 }
1142 else
1143 {
1144 compare_result = gen_reg_rtx (comp_mode);
1145
1146 if (reverse_compare)
1147 {
74f4459c 1148 rtx t = op1;
1149 op1 = op0;
1150 op0 = t;
644459d0 1151 }
1152
1153 if (spu_comp_icode[index][scode] == 0)
1154 abort ();
1155
1156 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1157 (op0, op_mode))
1158 op0 = force_reg (op_mode, op0);
644459d0 1159 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1160 (op1, op_mode))
1161 op1 = force_reg (op_mode, op1);
644459d0 1162 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1163 op0, op1);
644459d0 1164 if (comp_rtx == 0)
1165 abort ();
1166 emit_insn (comp_rtx);
1167
5d70b918 1168 if (eq_test)
1169 {
1170 eq_result = gen_reg_rtx (comp_mode);
1171 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1172 op0, op1);
5d70b918 1173 if (eq_rtx == 0)
1174 abort ();
1175 emit_insn (eq_rtx);
d6bf3b14 1176 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1177 gcc_assert (ior_code != CODE_FOR_nothing);
1178 emit_insn (GEN_FCN (ior_code)
1179 (compare_result, compare_result, eq_result));
1180 }
644459d0 1181 }
1182
1183 if (is_set == 0)
1184 {
1185 rtx bcomp;
1186 rtx loc_ref;
1187
1188 /* We don't have branch on QI compare insns, so we convert the
1189 QI compare result to a HI result. */
1190 if (comp_mode == QImode)
1191 {
1192 rtx old_res = compare_result;
1193 compare_result = gen_reg_rtx (HImode);
1194 comp_mode = HImode;
1195 emit_insn (gen_extendqihi2 (compare_result, old_res));
1196 }
1197
1198 if (reverse_test)
1199 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1200 else
1201 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1202
74f4459c 1203 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1204 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1205 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1206 loc_ref, pc_rtx)));
1207 }
1208 else if (is_set == 2)
1209 {
74f4459c 1210 rtx target = operands[0];
644459d0 1211 int compare_size = GET_MODE_BITSIZE (comp_mode);
1212 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1213 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1214 rtx select_mask;
1215 rtx op_t = operands[2];
1216 rtx op_f = operands[3];
1217
1218 /* The result of the comparison can be SI, HI or QI mode. Create a
1219 mask based on that result. */
1220 if (target_size > compare_size)
1221 {
1222 select_mask = gen_reg_rtx (mode);
1223 emit_insn (gen_extend_compare (select_mask, compare_result));
1224 }
1225 else if (target_size < compare_size)
1226 select_mask =
1227 gen_rtx_SUBREG (mode, compare_result,
1228 (compare_size - target_size) / BITS_PER_UNIT);
1229 else if (comp_mode != mode)
1230 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1231 else
1232 select_mask = compare_result;
1233
1234 if (GET_MODE (target) != GET_MODE (op_t)
1235 || GET_MODE (target) != GET_MODE (op_f))
1236 abort ();
1237
1238 if (reverse_test)
1239 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1240 else
1241 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1242 }
1243 else
1244 {
74f4459c 1245 rtx target = operands[0];
644459d0 1246 if (reverse_test)
1247 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1248 gen_rtx_NOT (comp_mode, compare_result)));
1249 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1250 emit_insn (gen_extendhisi2 (target, compare_result));
1251 else if (GET_MODE (target) == SImode
1252 && GET_MODE (compare_result) == QImode)
1253 emit_insn (gen_extend_compare (target, compare_result));
1254 else
1255 emit_move_insn (target, compare_result);
1256 }
1257}
1258
1259HOST_WIDE_INT
1260const_double_to_hwint (rtx x)
1261{
1262 HOST_WIDE_INT val;
1263 REAL_VALUE_TYPE rv;
1264 if (GET_MODE (x) == SFmode)
1265 {
1266 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1267 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1268 }
1269 else if (GET_MODE (x) == DFmode)
1270 {
1271 long l[2];
1272 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1273 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1274 val = l[0];
1275 val = (val << 32) | (l[1] & 0xffffffff);
1276 }
1277 else
1278 abort ();
1279 return val;
1280}
1281
1282rtx
1283hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1284{
1285 long tv[2];
1286 REAL_VALUE_TYPE rv;
1287 gcc_assert (mode == SFmode || mode == DFmode);
1288
1289 if (mode == SFmode)
1290 tv[0] = (v << 32) >> 32;
1291 else if (mode == DFmode)
1292 {
1293 tv[1] = (v << 32) >> 32;
1294 tv[0] = v >> 32;
1295 }
1296 real_from_target (&rv, tv, mode);
1297 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1298}
1299
1300void
1301print_operand_address (FILE * file, register rtx addr)
1302{
1303 rtx reg;
1304 rtx offset;
1305
e04cf423 1306 if (GET_CODE (addr) == AND
1307 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1308 && INTVAL (XEXP (addr, 1)) == -16)
1309 addr = XEXP (addr, 0);
1310
644459d0 1311 switch (GET_CODE (addr))
1312 {
1313 case REG:
1314 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1315 break;
1316
1317 case PLUS:
1318 reg = XEXP (addr, 0);
1319 offset = XEXP (addr, 1);
1320 if (GET_CODE (offset) == REG)
1321 {
1322 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1323 reg_names[REGNO (offset)]);
1324 }
1325 else if (GET_CODE (offset) == CONST_INT)
1326 {
1327 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1328 INTVAL (offset), reg_names[REGNO (reg)]);
1329 }
1330 else
1331 abort ();
1332 break;
1333
1334 case CONST:
1335 case LABEL_REF:
1336 case SYMBOL_REF:
1337 case CONST_INT:
1338 output_addr_const (file, addr);
1339 break;
1340
1341 default:
1342 debug_rtx (addr);
1343 abort ();
1344 }
1345}
1346
1347void
1348print_operand (FILE * file, rtx x, int code)
1349{
1350 enum machine_mode mode = GET_MODE (x);
1351 HOST_WIDE_INT val;
1352 unsigned char arr[16];
1353 int xcode = GET_CODE (x);
dea01258 1354 int i, info;
644459d0 1355 if (GET_MODE (x) == VOIDmode)
1356 switch (code)
1357 {
644459d0 1358 case 'L': /* 128 bits, signed */
1359 case 'm': /* 128 bits, signed */
1360 case 'T': /* 128 bits, signed */
1361 case 't': /* 128 bits, signed */
1362 mode = TImode;
1363 break;
644459d0 1364 case 'K': /* 64 bits, signed */
1365 case 'k': /* 64 bits, signed */
1366 case 'D': /* 64 bits, signed */
1367 case 'd': /* 64 bits, signed */
1368 mode = DImode;
1369 break;
644459d0 1370 case 'J': /* 32 bits, signed */
1371 case 'j': /* 32 bits, signed */
1372 case 's': /* 32 bits, signed */
1373 case 'S': /* 32 bits, signed */
1374 mode = SImode;
1375 break;
1376 }
1377 switch (code)
1378 {
1379
1380 case 'j': /* 32 bits, signed */
1381 case 'k': /* 64 bits, signed */
1382 case 'm': /* 128 bits, signed */
1383 if (xcode == CONST_INT
1384 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1385 {
1386 gcc_assert (logical_immediate_p (x, mode));
1387 constant_to_array (mode, x, arr);
1388 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1389 val = trunc_int_for_mode (val, SImode);
1390 switch (which_logical_immediate (val))
1391 {
1392 case SPU_ORI:
1393 break;
1394 case SPU_ORHI:
1395 fprintf (file, "h");
1396 break;
1397 case SPU_ORBI:
1398 fprintf (file, "b");
1399 break;
1400 default:
1401 gcc_unreachable();
1402 }
1403 }
1404 else
1405 gcc_unreachable();
1406 return;
1407
1408 case 'J': /* 32 bits, signed */
1409 case 'K': /* 64 bits, signed */
1410 case 'L': /* 128 bits, signed */
1411 if (xcode == CONST_INT
1412 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1413 {
1414 gcc_assert (logical_immediate_p (x, mode)
1415 || iohl_immediate_p (x, mode));
1416 constant_to_array (mode, x, arr);
1417 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1418 val = trunc_int_for_mode (val, SImode);
1419 switch (which_logical_immediate (val))
1420 {
1421 case SPU_ORI:
1422 case SPU_IOHL:
1423 break;
1424 case SPU_ORHI:
1425 val = trunc_int_for_mode (val, HImode);
1426 break;
1427 case SPU_ORBI:
1428 val = trunc_int_for_mode (val, QImode);
1429 break;
1430 default:
1431 gcc_unreachable();
1432 }
1433 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1434 }
1435 else
1436 gcc_unreachable();
1437 return;
1438
1439 case 't': /* 128 bits, signed */
1440 case 'd': /* 64 bits, signed */
1441 case 's': /* 32 bits, signed */
dea01258 1442 if (CONSTANT_P (x))
644459d0 1443 {
dea01258 1444 enum immediate_class c = classify_immediate (x, mode);
1445 switch (c)
1446 {
1447 case IC_IL1:
1448 constant_to_array (mode, x, arr);
1449 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1450 val = trunc_int_for_mode (val, SImode);
1451 switch (which_immediate_load (val))
1452 {
1453 case SPU_IL:
1454 break;
1455 case SPU_ILA:
1456 fprintf (file, "a");
1457 break;
1458 case SPU_ILH:
1459 fprintf (file, "h");
1460 break;
1461 case SPU_ILHU:
1462 fprintf (file, "hu");
1463 break;
1464 default:
1465 gcc_unreachable ();
1466 }
1467 break;
1468 case IC_CPAT:
1469 constant_to_array (mode, x, arr);
1470 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1471 if (info == 1)
1472 fprintf (file, "b");
1473 else if (info == 2)
1474 fprintf (file, "h");
1475 else if (info == 4)
1476 fprintf (file, "w");
1477 else if (info == 8)
1478 fprintf (file, "d");
1479 break;
1480 case IC_IL1s:
1481 if (xcode == CONST_VECTOR)
1482 {
1483 x = CONST_VECTOR_ELT (x, 0);
1484 xcode = GET_CODE (x);
1485 }
1486 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1487 fprintf (file, "a");
1488 else if (xcode == HIGH)
1489 fprintf (file, "hu");
1490 break;
1491 case IC_FSMBI:
5df189be 1492 case IC_FSMBI2:
dea01258 1493 case IC_IL2:
1494 case IC_IL2s:
1495 case IC_POOL:
1496 abort ();
1497 }
644459d0 1498 }
644459d0 1499 else
1500 gcc_unreachable ();
1501 return;
1502
1503 case 'T': /* 128 bits, signed */
1504 case 'D': /* 64 bits, signed */
1505 case 'S': /* 32 bits, signed */
dea01258 1506 if (CONSTANT_P (x))
644459d0 1507 {
dea01258 1508 enum immediate_class c = classify_immediate (x, mode);
1509 switch (c)
644459d0 1510 {
dea01258 1511 case IC_IL1:
1512 constant_to_array (mode, x, arr);
1513 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1514 val = trunc_int_for_mode (val, SImode);
1515 switch (which_immediate_load (val))
1516 {
1517 case SPU_IL:
1518 case SPU_ILA:
1519 break;
1520 case SPU_ILH:
1521 case SPU_ILHU:
1522 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1523 break;
1524 default:
1525 gcc_unreachable ();
1526 }
1527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1528 break;
1529 case IC_FSMBI:
1530 constant_to_array (mode, x, arr);
1531 val = 0;
1532 for (i = 0; i < 16; i++)
1533 {
1534 val <<= 1;
1535 val |= arr[i] & 1;
1536 }
1537 print_operand (file, GEN_INT (val), 0);
1538 break;
1539 case IC_CPAT:
1540 constant_to_array (mode, x, arr);
1541 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1542 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1543 break;
dea01258 1544 case IC_IL1s:
dea01258 1545 if (xcode == HIGH)
5df189be 1546 x = XEXP (x, 0);
1547 if (GET_CODE (x) == CONST_VECTOR)
1548 x = CONST_VECTOR_ELT (x, 0);
1549 output_addr_const (file, x);
1550 if (xcode == HIGH)
1551 fprintf (file, "@h");
644459d0 1552 break;
dea01258 1553 case IC_IL2:
1554 case IC_IL2s:
5df189be 1555 case IC_FSMBI2:
dea01258 1556 case IC_POOL:
1557 abort ();
644459d0 1558 }
c8befdb9 1559 }
644459d0 1560 else
1561 gcc_unreachable ();
1562 return;
1563
644459d0 1564 case 'C':
1565 if (xcode == CONST_INT)
1566 {
1567 /* Only 4 least significant bits are relevant for generate
1568 control word instructions. */
1569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1570 return;
1571 }
1572 break;
1573
1574 case 'M': /* print code for c*d */
1575 if (GET_CODE (x) == CONST_INT)
1576 switch (INTVAL (x))
1577 {
1578 case 1:
1579 fprintf (file, "b");
1580 break;
1581 case 2:
1582 fprintf (file, "h");
1583 break;
1584 case 4:
1585 fprintf (file, "w");
1586 break;
1587 case 8:
1588 fprintf (file, "d");
1589 break;
1590 default:
1591 gcc_unreachable();
1592 }
1593 else
1594 gcc_unreachable();
1595 return;
1596
1597 case 'N': /* Negate the operand */
1598 if (xcode == CONST_INT)
1599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1600 else if (xcode == CONST_VECTOR)
1601 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1602 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1603 return;
1604
1605 case 'I': /* enable/disable interrupts */
1606 if (xcode == CONST_INT)
1607 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1608 return;
1609
1610 case 'b': /* branch modifiers */
1611 if (xcode == REG)
1612 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1613 else if (COMPARISON_P (x))
1614 fprintf (file, "%s", xcode == NE ? "n" : "");
1615 return;
1616
1617 case 'i': /* indirect call */
1618 if (xcode == MEM)
1619 {
1620 if (GET_CODE (XEXP (x, 0)) == REG)
1621 /* Used in indirect function calls. */
1622 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1623 else
1624 output_address (XEXP (x, 0));
1625 }
1626 return;
1627
1628 case 'p': /* load/store */
1629 if (xcode == MEM)
1630 {
1631 x = XEXP (x, 0);
1632 xcode = GET_CODE (x);
1633 }
e04cf423 1634 if (xcode == AND)
1635 {
1636 x = XEXP (x, 0);
1637 xcode = GET_CODE (x);
1638 }
644459d0 1639 if (xcode == REG)
1640 fprintf (file, "d");
1641 else if (xcode == CONST_INT)
1642 fprintf (file, "a");
1643 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1644 fprintf (file, "r");
1645 else if (xcode == PLUS || xcode == LO_SUM)
1646 {
1647 if (GET_CODE (XEXP (x, 1)) == REG)
1648 fprintf (file, "x");
1649 else
1650 fprintf (file, "d");
1651 }
1652 return;
1653
5df189be 1654 case 'e':
1655 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1656 val &= 0x7;
1657 output_addr_const (file, GEN_INT (val));
1658 return;
1659
1660 case 'f':
1661 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1662 val &= 0x1f;
1663 output_addr_const (file, GEN_INT (val));
1664 return;
1665
1666 case 'g':
1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668 val &= 0x3f;
1669 output_addr_const (file, GEN_INT (val));
1670 return;
1671
1672 case 'h':
1673 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1674 val = (val >> 3) & 0x1f;
1675 output_addr_const (file, GEN_INT (val));
1676 return;
1677
1678 case 'E':
1679 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 val = -val;
1681 val &= 0x7;
1682 output_addr_const (file, GEN_INT (val));
1683 return;
1684
1685 case 'F':
1686 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 val = -val;
1688 val &= 0x1f;
1689 output_addr_const (file, GEN_INT (val));
1690 return;
1691
1692 case 'G':
1693 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1694 val = -val;
1695 val &= 0x3f;
1696 output_addr_const (file, GEN_INT (val));
1697 return;
1698
1699 case 'H':
1700 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1701 val = -(val & -8ll);
1702 val = (val >> 3) & 0x1f;
1703 output_addr_const (file, GEN_INT (val));
1704 return;
1705
56c7bfc2 1706 case 'v':
1707 case 'w':
1708 constant_to_array (mode, x, arr);
1709 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1710 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1711 return;
1712
644459d0 1713 case 0:
1714 if (xcode == REG)
1715 fprintf (file, "%s", reg_names[REGNO (x)]);
1716 else if (xcode == MEM)
1717 output_address (XEXP (x, 0));
1718 else if (xcode == CONST_VECTOR)
dea01258 1719 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1720 else
1721 output_addr_const (file, x);
1722 return;
1723
f6a0d06f 1724 /* unused letters
56c7bfc2 1725 o qr u yz
5df189be 1726 AB OPQR UVWXYZ */
644459d0 1727 default:
1728 output_operand_lossage ("invalid %%xn code");
1729 }
1730 gcc_unreachable ();
1731}
1732
644459d0 1733/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1734 caller saved register. For leaf functions it is more efficient to
1735 use a volatile register because we won't need to save and restore the
1736 pic register. This routine is only valid after register allocation
1737 is completed, so we can pick an unused register. */
1738static rtx
1739get_pic_reg (void)
1740{
1741 rtx pic_reg = pic_offset_table_rtx;
1742 if (!reload_completed && !reload_in_progress)
1743 abort ();
87a95921 1744 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1745 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1746 return pic_reg;
1747}
1748
5df189be 1749/* Split constant addresses to handle cases that are too large.
1750 Add in the pic register when in PIC mode.
1751 Split immediates that require more than 1 instruction. */
dea01258 1752int
1753spu_split_immediate (rtx * ops)
c8befdb9 1754{
dea01258 1755 enum machine_mode mode = GET_MODE (ops[0]);
1756 enum immediate_class c = classify_immediate (ops[1], mode);
1757
1758 switch (c)
c8befdb9 1759 {
dea01258 1760 case IC_IL2:
1761 {
1762 unsigned char arrhi[16];
1763 unsigned char arrlo[16];
98bbec1e 1764 rtx to, temp, hi, lo;
dea01258 1765 int i;
98bbec1e 1766 enum machine_mode imode = mode;
1767 /* We need to do reals as ints because the constant used in the
1768 IOR might not be a legitimate real constant. */
1769 imode = int_mode_for_mode (mode);
dea01258 1770 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1771 if (imode != mode)
1772 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1773 else
1774 to = ops[0];
1775 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1776 for (i = 0; i < 16; i += 4)
1777 {
1778 arrlo[i + 2] = arrhi[i + 2];
1779 arrlo[i + 3] = arrhi[i + 3];
1780 arrlo[i + 0] = arrlo[i + 1] = 0;
1781 arrhi[i + 2] = arrhi[i + 3] = 0;
1782 }
98bbec1e 1783 hi = array_to_constant (imode, arrhi);
1784 lo = array_to_constant (imode, arrlo);
1785 emit_move_insn (temp, hi);
dea01258 1786 emit_insn (gen_rtx_SET
98bbec1e 1787 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1788 return 1;
1789 }
5df189be 1790 case IC_FSMBI2:
1791 {
1792 unsigned char arr_fsmbi[16];
1793 unsigned char arr_andbi[16];
1794 rtx to, reg_fsmbi, reg_and;
1795 int i;
1796 enum machine_mode imode = mode;
1797 /* We need to do reals as ints because the constant used in the
1798 * AND might not be a legitimate real constant. */
1799 imode = int_mode_for_mode (mode);
1800 constant_to_array (mode, ops[1], arr_fsmbi);
1801 if (imode != mode)
1802 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1803 else
1804 to = ops[0];
1805 for (i = 0; i < 16; i++)
1806 if (arr_fsmbi[i] != 0)
1807 {
1808 arr_andbi[0] = arr_fsmbi[i];
1809 arr_fsmbi[i] = 0xff;
1810 }
1811 for (i = 1; i < 16; i++)
1812 arr_andbi[i] = arr_andbi[0];
1813 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1814 reg_and = array_to_constant (imode, arr_andbi);
1815 emit_move_insn (to, reg_fsmbi);
1816 emit_insn (gen_rtx_SET
1817 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1818 return 1;
1819 }
dea01258 1820 case IC_POOL:
1821 if (reload_in_progress || reload_completed)
1822 {
1823 rtx mem = force_const_mem (mode, ops[1]);
1824 if (TARGET_LARGE_MEM)
1825 {
1826 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1827 emit_move_insn (addr, XEXP (mem, 0));
1828 mem = replace_equiv_address (mem, addr);
1829 }
1830 emit_move_insn (ops[0], mem);
1831 return 1;
1832 }
1833 break;
1834 case IC_IL1s:
1835 case IC_IL2s:
1836 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1837 {
1838 if (c == IC_IL2s)
1839 {
5df189be 1840 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1841 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1842 }
1843 else if (flag_pic)
1844 emit_insn (gen_pic (ops[0], ops[1]));
1845 if (flag_pic)
1846 {
1847 rtx pic_reg = get_pic_reg ();
1848 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1849 crtl->uses_pic_offset_table = 1;
dea01258 1850 }
1851 return flag_pic || c == IC_IL2s;
1852 }
1853 break;
1854 case IC_IL1:
1855 case IC_FSMBI:
1856 case IC_CPAT:
1857 break;
c8befdb9 1858 }
dea01258 1859 return 0;
c8befdb9 1860}
1861
644459d0 1862/* SAVING is TRUE when we are generating the actual load and store
1863 instructions for REGNO. When determining the size of the stack
1864 needed for saving register we must allocate enough space for the
1865 worst case, because we don't always have the information early enough
1866 to not allocate it. But we can at least eliminate the actual loads
1867 and stores during the prologue/epilogue. */
1868static int
1869need_to_save_reg (int regno, int saving)
1870{
3072d30e 1871 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1872 return 1;
1873 if (flag_pic
1874 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1875 && (!saving || crtl->uses_pic_offset_table)
644459d0 1876 && (!saving
3072d30e 1877 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1878 return 1;
1879 return 0;
1880}
1881
1882/* This function is only correct starting with local register
1883 allocation */
1884int
1885spu_saved_regs_size (void)
1886{
1887 int reg_save_size = 0;
1888 int regno;
1889
1890 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1891 if (need_to_save_reg (regno, 0))
1892 reg_save_size += 0x10;
1893 return reg_save_size;
1894}
1895
1896static rtx
1897frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1898{
1899 rtx reg = gen_rtx_REG (V4SImode, regno);
1900 rtx mem =
1901 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1902 return emit_insn (gen_movv4si (mem, reg));
1903}
1904
1905static rtx
1906frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1907{
1908 rtx reg = gen_rtx_REG (V4SImode, regno);
1909 rtx mem =
1910 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1911 return emit_insn (gen_movv4si (reg, mem));
1912}
1913
1914/* This happens after reload, so we need to expand it. */
1915static rtx
1916frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1917{
1918 rtx insn;
1919 if (satisfies_constraint_K (GEN_INT (imm)))
1920 {
1921 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1922 }
1923 else
1924 {
3072d30e 1925 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1926 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1927 if (REGNO (src) == REGNO (scratch))
1928 abort ();
1929 }
644459d0 1930 return insn;
1931}
1932
1933/* Return nonzero if this function is known to have a null epilogue. */
1934
1935int
1936direct_return (void)
1937{
1938 if (reload_completed)
1939 {
1940 if (cfun->static_chain_decl == 0
1941 && (spu_saved_regs_size ()
1942 + get_frame_size ()
abe32cce 1943 + crtl->outgoing_args_size
1944 + crtl->args.pretend_args_size == 0)
644459d0 1945 && current_function_is_leaf)
1946 return 1;
1947 }
1948 return 0;
1949}
1950
1951/*
1952 The stack frame looks like this:
1953 +-------------+
1954 | incoming |
a8e019fa 1955 | args |
1956 AP -> +-------------+
644459d0 1957 | $lr save |
1958 +-------------+
1959 prev SP | back chain |
1960 +-------------+
1961 | var args |
abe32cce 1962 | reg save | crtl->args.pretend_args_size bytes
644459d0 1963 +-------------+
1964 | ... |
1965 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1966 FP -> +-------------+
644459d0 1967 | ... |
a8e019fa 1968 | vars | get_frame_size() bytes
1969 HFP -> +-------------+
644459d0 1970 | ... |
1971 | outgoing |
abe32cce 1972 | args | crtl->outgoing_args_size bytes
644459d0 1973 +-------------+
1974 | $lr of next |
1975 | frame |
1976 +-------------+
a8e019fa 1977 | back chain |
1978 SP -> +-------------+
644459d0 1979
1980*/
1981void
1982spu_expand_prologue (void)
1983{
1984 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1985 HOST_WIDE_INT total_size;
1986 HOST_WIDE_INT saved_regs_size;
1987 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1988 rtx scratch_reg_0, scratch_reg_1;
1989 rtx insn, real;
1990
644459d0 1991 if (flag_pic && optimize == 0)
18d50ae6 1992 crtl->uses_pic_offset_table = 1;
644459d0 1993
1994 if (spu_naked_function_p (current_function_decl))
1995 return;
1996
1997 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1998 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1999
2000 saved_regs_size = spu_saved_regs_size ();
2001 total_size = size + saved_regs_size
abe32cce 2002 + crtl->outgoing_args_size
2003 + crtl->args.pretend_args_size;
644459d0 2004
2005 if (!current_function_is_leaf
18d50ae6 2006 || cfun->calls_alloca || total_size > 0)
644459d0 2007 total_size += STACK_POINTER_OFFSET;
2008
2009 /* Save this first because code after this might use the link
2010 register as a scratch register. */
2011 if (!current_function_is_leaf)
2012 {
2013 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2014 RTX_FRAME_RELATED_P (insn) = 1;
2015 }
2016
2017 if (total_size > 0)
2018 {
abe32cce 2019 offset = -crtl->args.pretend_args_size;
644459d0 2020 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2021 if (need_to_save_reg (regno, 1))
2022 {
2023 offset -= 16;
2024 insn = frame_emit_store (regno, sp_reg, offset);
2025 RTX_FRAME_RELATED_P (insn) = 1;
2026 }
2027 }
2028
18d50ae6 2029 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2030 {
2031 rtx pic_reg = get_pic_reg ();
2032 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2033 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2034 }
2035
2036 if (total_size > 0)
2037 {
2038 if (flag_stack_check)
2039 {
d819917f 2040 /* We compare against total_size-1 because
644459d0 2041 ($sp >= total_size) <=> ($sp > total_size-1) */
2042 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2043 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2044 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2045 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2046 {
2047 emit_move_insn (scratch_v4si, size_v4si);
2048 size_v4si = scratch_v4si;
2049 }
2050 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2051 emit_insn (gen_vec_extractv4si
2052 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2053 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2054 }
2055
2056 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2057 the value of the previous $sp because we save it as the back
2058 chain. */
2059 if (total_size <= 2000)
2060 {
2061 /* In this case we save the back chain first. */
2062 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2063 insn =
2064 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2065 }
644459d0 2066 else
2067 {
2068 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2069 insn =
2070 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2071 }
2072 RTX_FRAME_RELATED_P (insn) = 1;
2073 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2074 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2075
2076 if (total_size > 2000)
2077 {
2078 /* Save the back chain ptr */
2079 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2080 }
2081
2082 if (frame_pointer_needed)
2083 {
2084 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2085 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2086 + crtl->outgoing_args_size;
644459d0 2087 /* Set the new frame_pointer */
d8dfeb55 2088 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2089 RTX_FRAME_RELATED_P (insn) = 1;
2090 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2091 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2092 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2093 }
2094 }
2095
a512540d 2096 if (flag_stack_usage)
2097 current_function_static_stack_size = total_size;
644459d0 2098}
2099
2100void
2101spu_expand_epilogue (bool sibcall_p)
2102{
2103 int size = get_frame_size (), offset, regno;
2104 HOST_WIDE_INT saved_regs_size, total_size;
2105 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2106 rtx jump, scratch_reg_0;
2107
644459d0 2108 if (spu_naked_function_p (current_function_decl))
2109 return;
2110
2111 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2112
2113 saved_regs_size = spu_saved_regs_size ();
2114 total_size = size + saved_regs_size
abe32cce 2115 + crtl->outgoing_args_size
2116 + crtl->args.pretend_args_size;
644459d0 2117
2118 if (!current_function_is_leaf
18d50ae6 2119 || cfun->calls_alloca || total_size > 0)
644459d0 2120 total_size += STACK_POINTER_OFFSET;
2121
2122 if (total_size > 0)
2123 {
18d50ae6 2124 if (cfun->calls_alloca)
644459d0 2125 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2126 else
2127 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2128
2129
2130 if (saved_regs_size > 0)
2131 {
abe32cce 2132 offset = -crtl->args.pretend_args_size;
644459d0 2133 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2134 if (need_to_save_reg (regno, 1))
2135 {
2136 offset -= 0x10;
2137 frame_emit_load (regno, sp_reg, offset);
2138 }
2139 }
2140 }
2141
2142 if (!current_function_is_leaf)
2143 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2144
2145 if (!sibcall_p)
2146 {
18b42941 2147 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2148 jump = emit_jump_insn (gen__return ());
2149 emit_barrier_after (jump);
2150 }
2151
644459d0 2152}
2153
2154rtx
2155spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2156{
2157 if (count != 0)
2158 return 0;
2159 /* This is inefficient because it ends up copying to a save-register
2160 which then gets saved even though $lr has already been saved. But
2161 it does generate better code for leaf functions and we don't need
2162 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2163 used for __builtin_return_address anyway, so maybe we don't care if
2164 it's inefficient. */
2165 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2166}
2167\f
2168
2169/* Given VAL, generate a constant appropriate for MODE.
2170 If MODE is a vector mode, every element will be VAL.
2171 For TImode, VAL will be zero extended to 128 bits. */
2172rtx
2173spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2174{
2175 rtx inner;
2176 rtvec v;
2177 int units, i;
2178
2179 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2180 || GET_MODE_CLASS (mode) == MODE_FLOAT
2181 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2182 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2183
2184 if (GET_MODE_CLASS (mode) == MODE_INT)
2185 return immed_double_const (val, 0, mode);
2186
2187 /* val is the bit representation of the float */
2188 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2189 return hwint_to_const_double (mode, val);
2190
2191 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2192 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2193 else
2194 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2195
2196 units = GET_MODE_NUNITS (mode);
2197
2198 v = rtvec_alloc (units);
2199
2200 for (i = 0; i < units; ++i)
2201 RTVEC_ELT (v, i) = inner;
2202
2203 return gen_rtx_CONST_VECTOR (mode, v);
2204}
644459d0 2205
5474166e 2206/* Create a MODE vector constant from 4 ints. */
2207rtx
2208spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2209{
2210 unsigned char arr[16];
2211 arr[0] = (a >> 24) & 0xff;
2212 arr[1] = (a >> 16) & 0xff;
2213 arr[2] = (a >> 8) & 0xff;
2214 arr[3] = (a >> 0) & 0xff;
2215 arr[4] = (b >> 24) & 0xff;
2216 arr[5] = (b >> 16) & 0xff;
2217 arr[6] = (b >> 8) & 0xff;
2218 arr[7] = (b >> 0) & 0xff;
2219 arr[8] = (c >> 24) & 0xff;
2220 arr[9] = (c >> 16) & 0xff;
2221 arr[10] = (c >> 8) & 0xff;
2222 arr[11] = (c >> 0) & 0xff;
2223 arr[12] = (d >> 24) & 0xff;
2224 arr[13] = (d >> 16) & 0xff;
2225 arr[14] = (d >> 8) & 0xff;
2226 arr[15] = (d >> 0) & 0xff;
2227 return array_to_constant(mode, arr);
2228}
5a976006 2229\f
2230/* branch hint stuff */
5474166e 2231
644459d0 2232/* An array of these is used to propagate hints to predecessor blocks. */
2233struct spu_bb_info
2234{
5a976006 2235 rtx prop_jump; /* propagated from another block */
2236 int bb_index; /* the original block. */
644459d0 2237};
5a976006 2238static struct spu_bb_info *spu_bb_info;
644459d0 2239
5a976006 2240#define STOP_HINT_P(INSN) \
2241 (GET_CODE(INSN) == CALL_INSN \
2242 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2243 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2244
2245/* 1 when RTX is a hinted branch or its target. We keep track of
2246 what has been hinted so the safe-hint code can test it easily. */
2247#define HINTED_P(RTX) \
2248 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2249
2250/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2251#define SCHED_ON_EVEN_P(RTX) \
2252 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2253
2254/* Emit a nop for INSN such that the two will dual issue. This assumes
2255 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2256 We check for TImode to handle a MULTI1 insn which has dual issued its
2257 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2258 ADDR_VEC insns. */
2259static void
2260emit_nop_for_insn (rtx insn)
644459d0 2261{
5a976006 2262 int p;
2263 rtx new_insn;
2264 p = get_pipe (insn);
2265 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2266 new_insn = emit_insn_after (gen_lnop (), insn);
2267 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2268 {
5a976006 2269 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2270 PUT_MODE (new_insn, TImode);
2271 PUT_MODE (insn, VOIDmode);
2272 }
2273 else
2274 new_insn = emit_insn_after (gen_lnop (), insn);
2275 recog_memoized (new_insn);
2fbdf9ef 2276 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2277}
2278
2279/* Insert nops in basic blocks to meet dual issue alignment
2280 requirements. Also make sure hbrp and hint instructions are at least
2281 one cycle apart, possibly inserting a nop. */
2282static void
2283pad_bb(void)
2284{
2285 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2286 int length;
2287 int addr;
2288
2289 /* This sets up INSN_ADDRESSES. */
2290 shorten_branches (get_insns ());
2291
2292 /* Keep track of length added by nops. */
2293 length = 0;
2294
2295 prev_insn = 0;
2296 insn = get_insns ();
2297 if (!active_insn_p (insn))
2298 insn = next_active_insn (insn);
2299 for (; insn; insn = next_insn)
2300 {
2301 next_insn = next_active_insn (insn);
2302 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2303 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2304 {
5a976006 2305 if (hbr_insn)
2306 {
2307 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2308 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2309 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2310 || (a1 - a0 == 4))
2311 {
2312 prev_insn = emit_insn_before (gen_lnop (), insn);
2313 PUT_MODE (prev_insn, GET_MODE (insn));
2314 PUT_MODE (insn, TImode);
2fbdf9ef 2315 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2316 length += 4;
2317 }
2318 }
2319 hbr_insn = insn;
2320 }
2321 if (INSN_CODE (insn) == CODE_FOR_blockage)
2322 {
2323 if (GET_MODE (insn) == TImode)
2324 PUT_MODE (next_insn, TImode);
2325 insn = next_insn;
2326 next_insn = next_active_insn (insn);
2327 }
2328 addr = INSN_ADDRESSES (INSN_UID (insn));
2329 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2330 {
2331 if (((addr + length) & 7) != 0)
2332 {
2333 emit_nop_for_insn (prev_insn);
2334 length += 4;
2335 }
644459d0 2336 }
5a976006 2337 else if (GET_MODE (insn) == TImode
2338 && ((next_insn && GET_MODE (next_insn) != TImode)
2339 || get_attr_type (insn) == TYPE_MULTI0)
2340 && ((addr + length) & 7) != 0)
2341 {
2342 /* prev_insn will always be set because the first insn is
2343 always 8-byte aligned. */
2344 emit_nop_for_insn (prev_insn);
2345 length += 4;
2346 }
2347 prev_insn = insn;
644459d0 2348 }
644459d0 2349}
2350
5a976006 2351\f
2352/* Routines for branch hints. */
2353
644459d0 2354static void
5a976006 2355spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2356 int distance, sbitmap blocks)
644459d0 2357{
5a976006 2358 rtx branch_label = 0;
2359 rtx hint;
2360 rtx insn;
2361 rtx table;
644459d0 2362
2363 if (before == 0 || branch == 0 || target == 0)
2364 return;
2365
5a976006 2366 /* While scheduling we require hints to be no further than 600, so
2367 we need to enforce that here too */
644459d0 2368 if (distance > 600)
2369 return;
2370
5a976006 2371 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2372 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2373 before = NEXT_INSN (before);
644459d0 2374
2375 branch_label = gen_label_rtx ();
2376 LABEL_NUSES (branch_label)++;
2377 LABEL_PRESERVE_P (branch_label) = 1;
2378 insn = emit_label_before (branch_label, branch);
2379 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2380 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2381
2382 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2383 recog_memoized (hint);
2fbdf9ef 2384 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2385 HINTED_P (branch) = 1;
644459d0 2386
5a976006 2387 if (GET_CODE (target) == LABEL_REF)
2388 HINTED_P (XEXP (target, 0)) = 1;
2389 else if (tablejump_p (branch, 0, &table))
644459d0 2390 {
5a976006 2391 rtvec vec;
2392 int j;
2393 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2394 vec = XVEC (PATTERN (table), 0);
2395 else
2396 vec = XVEC (PATTERN (table), 1);
2397 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2398 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2399 }
5a976006 2400
2401 if (distance >= 588)
644459d0 2402 {
5a976006 2403 /* Make sure the hint isn't scheduled any earlier than this point,
2404 which could make it too far for the branch offest to fit */
2fbdf9ef 2405 insn = emit_insn_before (gen_blockage (), hint);
2406 recog_memoized (insn);
2407 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2408 }
2409 else if (distance <= 8 * 4)
2410 {
2411 /* To guarantee at least 8 insns between the hint and branch we
2412 insert nops. */
2413 int d;
2414 for (d = distance; d < 8 * 4; d += 4)
2415 {
2416 insn =
2417 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2418 recog_memoized (insn);
2fbdf9ef 2419 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2420 }
2421
2422 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2423 insn = emit_insn_after (gen_blockage (), hint);
2424 recog_memoized (insn);
2425 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2426
2427 /* Make sure any nops inserted aren't scheduled after the call. */
2428 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2429 {
2430 insn = emit_insn_before (gen_blockage (), branch);
2431 recog_memoized (insn);
2432 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2433 }
644459d0 2434 }
644459d0 2435}
2436
2437/* Returns 0 if we don't want a hint for this branch. Otherwise return
2438 the rtx for the branch target. */
2439static rtx
2440get_branch_target (rtx branch)
2441{
2442 if (GET_CODE (branch) == JUMP_INSN)
2443 {
2444 rtx set, src;
2445
2446 /* Return statements */
2447 if (GET_CODE (PATTERN (branch)) == RETURN)
2448 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2449
2450 /* jump table */
2451 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2452 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2453 return 0;
2454
fcc31b99 2455 /* ASM GOTOs. */
604157f6 2456 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2457 return NULL;
2458
644459d0 2459 set = single_set (branch);
2460 src = SET_SRC (set);
2461 if (GET_CODE (SET_DEST (set)) != PC)
2462 abort ();
2463
2464 if (GET_CODE (src) == IF_THEN_ELSE)
2465 {
2466 rtx lab = 0;
2467 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2468 if (note)
2469 {
2470 /* If the more probable case is not a fall through, then
2471 try a branch hint. */
2472 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2473 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2474 && GET_CODE (XEXP (src, 1)) != PC)
2475 lab = XEXP (src, 1);
2476 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2477 && GET_CODE (XEXP (src, 2)) != PC)
2478 lab = XEXP (src, 2);
2479 }
2480 if (lab)
2481 {
2482 if (GET_CODE (lab) == RETURN)
2483 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2484 return lab;
2485 }
2486 return 0;
2487 }
2488
2489 return src;
2490 }
2491 else if (GET_CODE (branch) == CALL_INSN)
2492 {
2493 rtx call;
2494 /* All of our call patterns are in a PARALLEL and the CALL is
2495 the first pattern in the PARALLEL. */
2496 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2497 abort ();
2498 call = XVECEXP (PATTERN (branch), 0, 0);
2499 if (GET_CODE (call) == SET)
2500 call = SET_SRC (call);
2501 if (GET_CODE (call) != CALL)
2502 abort ();
2503 return XEXP (XEXP (call, 0), 0);
2504 }
2505 return 0;
2506}
2507
5a976006 2508/* The special $hbr register is used to prevent the insn scheduler from
2509 moving hbr insns across instructions which invalidate them. It
2510 should only be used in a clobber, and this function searches for
2511 insns which clobber it. */
2512static bool
2513insn_clobbers_hbr (rtx insn)
2514{
2515 if (INSN_P (insn)
2516 && GET_CODE (PATTERN (insn)) == PARALLEL)
2517 {
2518 rtx parallel = PATTERN (insn);
2519 rtx clobber;
2520 int j;
2521 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2522 {
2523 clobber = XVECEXP (parallel, 0, j);
2524 if (GET_CODE (clobber) == CLOBBER
2525 && GET_CODE (XEXP (clobber, 0)) == REG
2526 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2527 return 1;
2528 }
2529 }
2530 return 0;
2531}
2532
2533/* Search up to 32 insns starting at FIRST:
2534 - at any kind of hinted branch, just return
2535 - at any unconditional branch in the first 15 insns, just return
2536 - at a call or indirect branch, after the first 15 insns, force it to
2537 an even address and return
2538 - at any unconditional branch, after the first 15 insns, force it to
2539 an even address.
2540 At then end of the search, insert an hbrp within 4 insns of FIRST,
2541 and an hbrp within 16 instructions of FIRST.
2542 */
644459d0 2543static void
5a976006 2544insert_hbrp_for_ilb_runout (rtx first)
644459d0 2545{
5a976006 2546 rtx insn, before_4 = 0, before_16 = 0;
2547 int addr = 0, length, first_addr = -1;
2548 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2549 int insert_lnop_after = 0;
2550 for (insn = first; insn; insn = NEXT_INSN (insn))
2551 if (INSN_P (insn))
2552 {
2553 if (first_addr == -1)
2554 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2555 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2556 length = get_attr_length (insn);
2557
2558 if (before_4 == 0 && addr + length >= 4 * 4)
2559 before_4 = insn;
2560 /* We test for 14 instructions because the first hbrp will add
2561 up to 2 instructions. */
2562 if (before_16 == 0 && addr + length >= 14 * 4)
2563 before_16 = insn;
2564
2565 if (INSN_CODE (insn) == CODE_FOR_hbr)
2566 {
2567 /* Make sure an hbrp is at least 2 cycles away from a hint.
2568 Insert an lnop after the hbrp when necessary. */
2569 if (before_4 == 0 && addr > 0)
2570 {
2571 before_4 = insn;
2572 insert_lnop_after |= 1;
2573 }
2574 else if (before_4 && addr <= 4 * 4)
2575 insert_lnop_after |= 1;
2576 if (before_16 == 0 && addr > 10 * 4)
2577 {
2578 before_16 = insn;
2579 insert_lnop_after |= 2;
2580 }
2581 else if (before_16 && addr <= 14 * 4)
2582 insert_lnop_after |= 2;
2583 }
644459d0 2584
5a976006 2585 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2586 {
2587 if (addr < hbrp_addr0)
2588 hbrp_addr0 = addr;
2589 else if (addr < hbrp_addr1)
2590 hbrp_addr1 = addr;
2591 }
644459d0 2592
5a976006 2593 if (CALL_P (insn) || JUMP_P (insn))
2594 {
2595 if (HINTED_P (insn))
2596 return;
2597
2598 /* Any branch after the first 15 insns should be on an even
2599 address to avoid a special case branch. There might be
2600 some nops and/or hbrps inserted, so we test after 10
2601 insns. */
2602 if (addr > 10 * 4)
2603 SCHED_ON_EVEN_P (insn) = 1;
2604 }
644459d0 2605
5a976006 2606 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2607 return;
2608
2609
2610 if (addr + length >= 32 * 4)
644459d0 2611 {
5a976006 2612 gcc_assert (before_4 && before_16);
2613 if (hbrp_addr0 > 4 * 4)
644459d0 2614 {
5a976006 2615 insn =
2616 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2617 recog_memoized (insn);
2fbdf9ef 2618 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2619 INSN_ADDRESSES_NEW (insn,
2620 INSN_ADDRESSES (INSN_UID (before_4)));
2621 PUT_MODE (insn, GET_MODE (before_4));
2622 PUT_MODE (before_4, TImode);
2623 if (insert_lnop_after & 1)
644459d0 2624 {
5a976006 2625 insn = emit_insn_before (gen_lnop (), before_4);
2626 recog_memoized (insn);
2fbdf9ef 2627 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2628 INSN_ADDRESSES_NEW (insn,
2629 INSN_ADDRESSES (INSN_UID (before_4)));
2630 PUT_MODE (insn, TImode);
644459d0 2631 }
644459d0 2632 }
5a976006 2633 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2634 && hbrp_addr1 > 16 * 4)
644459d0 2635 {
5a976006 2636 insn =
2637 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2638 recog_memoized (insn);
2fbdf9ef 2639 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2640 INSN_ADDRESSES_NEW (insn,
2641 INSN_ADDRESSES (INSN_UID (before_16)));
2642 PUT_MODE (insn, GET_MODE (before_16));
2643 PUT_MODE (before_16, TImode);
2644 if (insert_lnop_after & 2)
644459d0 2645 {
5a976006 2646 insn = emit_insn_before (gen_lnop (), before_16);
2647 recog_memoized (insn);
2fbdf9ef 2648 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2649 INSN_ADDRESSES_NEW (insn,
2650 INSN_ADDRESSES (INSN_UID
2651 (before_16)));
2652 PUT_MODE (insn, TImode);
644459d0 2653 }
2654 }
5a976006 2655 return;
644459d0 2656 }
644459d0 2657 }
5a976006 2658 else if (BARRIER_P (insn))
2659 return;
644459d0 2660
644459d0 2661}
5a976006 2662
2663/* The SPU might hang when it executes 48 inline instructions after a
2664 hinted branch jumps to its hinted target. The beginning of a
2665 function and the return from a call might have been hinted, and must
2666 be handled as well. To prevent a hang we insert 2 hbrps. The first
2667 should be within 6 insns of the branch target. The second should be
2668 within 22 insns of the branch target. When determining if hbrps are
2669 necessary, we look for only 32 inline instructions, because up to to
2670 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2671 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2672static void
5a976006 2673insert_hbrp (void)
644459d0 2674{
5a976006 2675 rtx insn;
2676 if (TARGET_SAFE_HINTS)
644459d0 2677 {
5a976006 2678 shorten_branches (get_insns ());
2679 /* Insert hbrp at beginning of function */
2680 insn = next_active_insn (get_insns ());
2681 if (insn)
2682 insert_hbrp_for_ilb_runout (insn);
2683 /* Insert hbrp after hinted targets. */
2684 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2685 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2686 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2687 }
644459d0 2688}
2689
5a976006 2690static int in_spu_reorg;
2691
2692/* Insert branch hints. There are no branch optimizations after this
2693 pass, so it's safe to set our branch hints now. */
644459d0 2694static void
5a976006 2695spu_machine_dependent_reorg (void)
644459d0 2696{
5a976006 2697 sbitmap blocks;
2698 basic_block bb;
2699 rtx branch, insn;
2700 rtx branch_target = 0;
2701 int branch_addr = 0, insn_addr, required_dist = 0;
2702 int i;
2703 unsigned int j;
644459d0 2704
5a976006 2705 if (!TARGET_BRANCH_HINTS || optimize == 0)
2706 {
2707 /* We still do it for unoptimized code because an external
2708 function might have hinted a call or return. */
2709 insert_hbrp ();
2710 pad_bb ();
2711 return;
2712 }
644459d0 2713
5a976006 2714 blocks = sbitmap_alloc (last_basic_block);
2715 sbitmap_zero (blocks);
644459d0 2716
5a976006 2717 in_spu_reorg = 1;
2718 compute_bb_for_insn ();
2719
2720 compact_blocks ();
2721
2722 spu_bb_info =
2723 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2724 sizeof (struct spu_bb_info));
2725
2726 /* We need exact insn addresses and lengths. */
2727 shorten_branches (get_insns ());
2728
2729 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2730 {
5a976006 2731 bb = BASIC_BLOCK (i);
2732 branch = 0;
2733 if (spu_bb_info[i].prop_jump)
644459d0 2734 {
5a976006 2735 branch = spu_bb_info[i].prop_jump;
2736 branch_target = get_branch_target (branch);
2737 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2738 required_dist = spu_hint_dist;
2739 }
2740 /* Search from end of a block to beginning. In this loop, find
2741 jumps which need a branch and emit them only when:
2742 - it's an indirect branch and we're at the insn which sets
2743 the register
2744 - we're at an insn that will invalidate the hint. e.g., a
2745 call, another hint insn, inline asm that clobbers $hbr, and
2746 some inlined operations (divmodsi4). Don't consider jumps
2747 because they are only at the end of a block and are
2748 considered when we are deciding whether to propagate
2749 - we're getting too far away from the branch. The hbr insns
2750 only have a signed 10 bit offset
2751 We go back as far as possible so the branch will be considered
2752 for propagation when we get to the beginning of the block. */
2753 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2754 {
2755 if (INSN_P (insn))
2756 {
2757 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2758 if (branch
2759 && ((GET_CODE (branch_target) == REG
2760 && set_of (branch_target, insn) != NULL_RTX)
2761 || insn_clobbers_hbr (insn)
2762 || branch_addr - insn_addr > 600))
2763 {
2764 rtx next = NEXT_INSN (insn);
2765 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2766 if (insn != BB_END (bb)
2767 && branch_addr - next_addr >= required_dist)
2768 {
2769 if (dump_file)
2770 fprintf (dump_file,
2771 "hint for %i in block %i before %i\n",
2772 INSN_UID (branch), bb->index,
2773 INSN_UID (next));
2774 spu_emit_branch_hint (next, branch, branch_target,
2775 branch_addr - next_addr, blocks);
2776 }
2777 branch = 0;
2778 }
2779
2780 /* JUMP_P will only be true at the end of a block. When
2781 branch is already set it means we've previously decided
2782 to propagate a hint for that branch into this block. */
2783 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2784 {
2785 branch = 0;
2786 if ((branch_target = get_branch_target (insn)))
2787 {
2788 branch = insn;
2789 branch_addr = insn_addr;
2790 required_dist = spu_hint_dist;
2791 }
2792 }
2793 }
2794 if (insn == BB_HEAD (bb))
2795 break;
2796 }
2797
2798 if (branch)
2799 {
2800 /* If we haven't emitted a hint for this branch yet, it might
2801 be profitable to emit it in one of the predecessor blocks,
2802 especially for loops. */
2803 rtx bbend;
2804 basic_block prev = 0, prop = 0, prev2 = 0;
2805 int loop_exit = 0, simple_loop = 0;
2806 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2807
2808 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2809 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2810 prev = EDGE_PRED (bb, j)->src;
2811 else
2812 prev2 = EDGE_PRED (bb, j)->src;
2813
2814 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2815 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2816 loop_exit = 1;
2817 else if (EDGE_SUCC (bb, j)->dest == bb)
2818 simple_loop = 1;
2819
2820 /* If this branch is a loop exit then propagate to previous
2821 fallthru block. This catches the cases when it is a simple
2822 loop or when there is an initial branch into the loop. */
2823 if (prev && (loop_exit || simple_loop)
2824 && prev->loop_depth <= bb->loop_depth)
2825 prop = prev;
2826
2827 /* If there is only one adjacent predecessor. Don't propagate
2828 outside this loop. This loop_depth test isn't perfect, but
2829 I'm not sure the loop_father member is valid at this point. */
2830 else if (prev && single_pred_p (bb)
2831 && prev->loop_depth == bb->loop_depth)
2832 prop = prev;
2833
2834 /* If this is the JOIN block of a simple IF-THEN then
2835 propogate the hint to the HEADER block. */
2836 else if (prev && prev2
2837 && EDGE_COUNT (bb->preds) == 2
2838 && EDGE_COUNT (prev->preds) == 1
2839 && EDGE_PRED (prev, 0)->src == prev2
2840 && prev2->loop_depth == bb->loop_depth
2841 && GET_CODE (branch_target) != REG)
2842 prop = prev;
2843
2844 /* Don't propagate when:
2845 - this is a simple loop and the hint would be too far
2846 - this is not a simple loop and there are 16 insns in
2847 this block already
2848 - the predecessor block ends in a branch that will be
2849 hinted
2850 - the predecessor block ends in an insn that invalidates
2851 the hint */
2852 if (prop
2853 && prop->index >= 0
2854 && (bbend = BB_END (prop))
2855 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2856 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2857 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2858 {
2859 if (dump_file)
2860 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2861 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2862 bb->index, prop->index, bb->loop_depth,
2863 INSN_UID (branch), loop_exit, simple_loop,
2864 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2865
2866 spu_bb_info[prop->index].prop_jump = branch;
2867 spu_bb_info[prop->index].bb_index = i;
2868 }
2869 else if (branch_addr - next_addr >= required_dist)
2870 {
2871 if (dump_file)
2872 fprintf (dump_file, "hint for %i in block %i before %i\n",
2873 INSN_UID (branch), bb->index,
2874 INSN_UID (NEXT_INSN (insn)));
2875 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2876 branch_addr - next_addr, blocks);
2877 }
2878 branch = 0;
644459d0 2879 }
644459d0 2880 }
5a976006 2881 free (spu_bb_info);
644459d0 2882
5a976006 2883 if (!sbitmap_empty_p (blocks))
2884 find_many_sub_basic_blocks (blocks);
2885
2886 /* We have to schedule to make sure alignment is ok. */
2887 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2888
2889 /* The hints need to be scheduled, so call it again. */
2890 schedule_insns ();
2fbdf9ef 2891 df_finish_pass (true);
5a976006 2892
2893 insert_hbrp ();
2894
2895 pad_bb ();
2896
8f1d58ad 2897 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2898 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2899 {
2900 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2901 between its branch label and the branch . We don't move the
2902 label because GCC expects it at the beginning of the block. */
2903 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2904 rtx label_ref = XVECEXP (unspec, 0, 0);
2905 rtx label = XEXP (label_ref, 0);
2906 rtx branch;
2907 int offset = 0;
2908 for (branch = NEXT_INSN (label);
2909 !JUMP_P (branch) && !CALL_P (branch);
2910 branch = NEXT_INSN (branch))
2911 if (NONJUMP_INSN_P (branch))
2912 offset += get_attr_length (branch);
2913 if (offset > 0)
2914 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2915 }
5a976006 2916
2917 if (spu_flag_var_tracking)
644459d0 2918 {
5a976006 2919 df_analyze ();
2920 timevar_push (TV_VAR_TRACKING);
2921 variable_tracking_main ();
2922 timevar_pop (TV_VAR_TRACKING);
2923 df_finish_pass (false);
644459d0 2924 }
5a976006 2925
2926 free_bb_for_insn ();
2927
2928 in_spu_reorg = 0;
644459d0 2929}
2930\f
2931
2932/* Insn scheduling routines, primarily for dual issue. */
2933static int
2934spu_sched_issue_rate (void)
2935{
2936 return 2;
2937}
2938
2939static int
5a976006 2940uses_ls_unit(rtx insn)
644459d0 2941{
5a976006 2942 rtx set = single_set (insn);
2943 if (set != 0
2944 && (GET_CODE (SET_DEST (set)) == MEM
2945 || GET_CODE (SET_SRC (set)) == MEM))
2946 return 1;
2947 return 0;
644459d0 2948}
2949
2950static int
2951get_pipe (rtx insn)
2952{
2953 enum attr_type t;
2954 /* Handle inline asm */
2955 if (INSN_CODE (insn) == -1)
2956 return -1;
2957 t = get_attr_type (insn);
2958 switch (t)
2959 {
2960 case TYPE_CONVERT:
2961 return -2;
2962 case TYPE_MULTI0:
2963 return -1;
2964
2965 case TYPE_FX2:
2966 case TYPE_FX3:
2967 case TYPE_SPR:
2968 case TYPE_NOP:
2969 case TYPE_FXB:
2970 case TYPE_FPD:
2971 case TYPE_FP6:
2972 case TYPE_FP7:
644459d0 2973 return 0;
2974
2975 case TYPE_LNOP:
2976 case TYPE_SHUF:
2977 case TYPE_LOAD:
2978 case TYPE_STORE:
2979 case TYPE_BR:
2980 case TYPE_MULTI1:
2981 case TYPE_HBR:
5a976006 2982 case TYPE_IPREFETCH:
644459d0 2983 return 1;
2984 default:
2985 abort ();
2986 }
2987}
2988
5a976006 2989
2990/* haifa-sched.c has a static variable that keeps track of the current
2991 cycle. It is passed to spu_sched_reorder, and we record it here for
2992 use by spu_sched_variable_issue. It won't be accurate if the
2993 scheduler updates it's clock_var between the two calls. */
2994static int clock_var;
2995
2996/* This is used to keep track of insn alignment. Set to 0 at the
2997 beginning of each block and increased by the "length" attr of each
2998 insn scheduled. */
2999static int spu_sched_length;
3000
3001/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3002 ready list appropriately in spu_sched_reorder(). */
3003static int pipe0_clock;
3004static int pipe1_clock;
3005
3006static int prev_clock_var;
3007
3008static int prev_priority;
3009
3010/* The SPU needs to load the next ilb sometime during the execution of
3011 the previous ilb. There is a potential conflict if every cycle has a
3012 load or store. To avoid the conflict we make sure the load/store
3013 unit is free for at least one cycle during the execution of insns in
3014 the previous ilb. */
3015static int spu_ls_first;
3016static int prev_ls_clock;
3017
3018static void
3019spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3020 int max_ready ATTRIBUTE_UNUSED)
3021{
3022 spu_sched_length = 0;
3023}
3024
3025static void
3026spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3027 int max_ready ATTRIBUTE_UNUSED)
3028{
3029 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3030 {
3031 /* When any block might be at least 8-byte aligned, assume they
3032 will all be at least 8-byte aligned to make sure dual issue
3033 works out correctly. */
3034 spu_sched_length = 0;
3035 }
3036 spu_ls_first = INT_MAX;
3037 clock_var = -1;
3038 prev_ls_clock = -1;
3039 pipe0_clock = -1;
3040 pipe1_clock = -1;
3041 prev_clock_var = -1;
3042 prev_priority = -1;
3043}
3044
644459d0 3045static int
5a976006 3046spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3047 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3048{
5a976006 3049 int len;
3050 int p;
644459d0 3051 if (GET_CODE (PATTERN (insn)) == USE
3052 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3053 || (len = get_attr_length (insn)) == 0)
3054 return more;
3055
3056 spu_sched_length += len;
3057
3058 /* Reset on inline asm */
3059 if (INSN_CODE (insn) == -1)
3060 {
3061 spu_ls_first = INT_MAX;
3062 pipe0_clock = -1;
3063 pipe1_clock = -1;
3064 return 0;
3065 }
3066 p = get_pipe (insn);
3067 if (p == 0)
3068 pipe0_clock = clock_var;
3069 else
3070 pipe1_clock = clock_var;
3071
3072 if (in_spu_reorg)
3073 {
3074 if (clock_var - prev_ls_clock > 1
3075 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3076 spu_ls_first = INT_MAX;
3077 if (uses_ls_unit (insn))
3078 {
3079 if (spu_ls_first == INT_MAX)
3080 spu_ls_first = spu_sched_length;
3081 prev_ls_clock = clock_var;
3082 }
3083
3084 /* The scheduler hasn't inserted the nop, but we will later on.
3085 Include those nops in spu_sched_length. */
3086 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3087 spu_sched_length += 4;
3088 prev_clock_var = clock_var;
3089
3090 /* more is -1 when called from spu_sched_reorder for new insns
3091 that don't have INSN_PRIORITY */
3092 if (more >= 0)
3093 prev_priority = INSN_PRIORITY (insn);
3094 }
3095
3096 /* Always try issueing more insns. spu_sched_reorder will decide
3097 when the cycle should be advanced. */
3098 return 1;
3099}
3100
3101/* This function is called for both TARGET_SCHED_REORDER and
3102 TARGET_SCHED_REORDER2. */
3103static int
3104spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3105 rtx *ready, int *nreadyp, int clock)
3106{
3107 int i, nready = *nreadyp;
3108 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3109 rtx insn;
3110
3111 clock_var = clock;
3112
3113 if (nready <= 0 || pipe1_clock >= clock)
3114 return 0;
3115
3116 /* Find any rtl insns that don't generate assembly insns and schedule
3117 them first. */
3118 for (i = nready - 1; i >= 0; i--)
3119 {
3120 insn = ready[i];
3121 if (INSN_CODE (insn) == -1
3122 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3123 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3124 {
3125 ready[i] = ready[nready - 1];
3126 ready[nready - 1] = insn;
3127 return 1;
3128 }
3129 }
3130
3131 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3132 for (i = 0; i < nready; i++)
3133 if (INSN_CODE (ready[i]) != -1)
3134 {
3135 insn = ready[i];
3136 switch (get_attr_type (insn))
3137 {
3138 default:
3139 case TYPE_MULTI0:
3140 case TYPE_CONVERT:
3141 case TYPE_FX2:
3142 case TYPE_FX3:
3143 case TYPE_SPR:
3144 case TYPE_NOP:
3145 case TYPE_FXB:
3146 case TYPE_FPD:
3147 case TYPE_FP6:
3148 case TYPE_FP7:
3149 pipe_0 = i;
3150 break;
3151 case TYPE_LOAD:
3152 case TYPE_STORE:
3153 pipe_ls = i;
3154 case TYPE_LNOP:
3155 case TYPE_SHUF:
3156 case TYPE_BR:
3157 case TYPE_MULTI1:
3158 case TYPE_HBR:
3159 pipe_1 = i;
3160 break;
3161 case TYPE_IPREFETCH:
3162 pipe_hbrp = i;
3163 break;
3164 }
3165 }
3166
3167 /* In the first scheduling phase, schedule loads and stores together
3168 to increase the chance they will get merged during postreload CSE. */
3169 if (!reload_completed && pipe_ls >= 0)
3170 {
3171 insn = ready[pipe_ls];
3172 ready[pipe_ls] = ready[nready - 1];
3173 ready[nready - 1] = insn;
3174 return 1;
3175 }
3176
3177 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3178 if (pipe_hbrp >= 0)
3179 pipe_1 = pipe_hbrp;
3180
3181 /* When we have loads/stores in every cycle of the last 15 insns and
3182 we are about to schedule another load/store, emit an hbrp insn
3183 instead. */
3184 if (in_spu_reorg
3185 && spu_sched_length - spu_ls_first >= 4 * 15
3186 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3187 {
3188 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3189 recog_memoized (insn);
3190 if (pipe0_clock < clock)
3191 PUT_MODE (insn, TImode);
3192 spu_sched_variable_issue (file, verbose, insn, -1);
3193 return 0;
3194 }
3195
3196 /* In general, we want to emit nops to increase dual issue, but dual
3197 issue isn't faster when one of the insns could be scheduled later
3198 without effecting the critical path. We look at INSN_PRIORITY to
3199 make a good guess, but it isn't perfect so -mdual-nops=n can be
3200 used to effect it. */
3201 if (in_spu_reorg && spu_dual_nops < 10)
3202 {
3203 /* When we are at an even address and we are not issueing nops to
3204 improve scheduling then we need to advance the cycle. */
3205 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3206 && (spu_dual_nops == 0
3207 || (pipe_1 != -1
3208 && prev_priority >
3209 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3210 return 0;
3211
3212 /* When at an odd address, schedule the highest priority insn
3213 without considering pipeline. */
3214 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3215 && (spu_dual_nops == 0
3216 || (prev_priority >
3217 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3218 return 1;
3219 }
3220
3221
3222 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3223 pipe0 insn in the ready list, schedule it. */
3224 if (pipe0_clock < clock && pipe_0 >= 0)
3225 schedule_i = pipe_0;
3226
3227 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3228 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3229 else
3230 schedule_i = pipe_1;
3231
3232 if (schedule_i > -1)
3233 {
3234 insn = ready[schedule_i];
3235 ready[schedule_i] = ready[nready - 1];
3236 ready[nready - 1] = insn;
3237 return 1;
3238 }
3239 return 0;
644459d0 3240}
3241
3242/* INSN is dependent on DEP_INSN. */
3243static int
5a976006 3244spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3245{
5a976006 3246 rtx set;
3247
3248 /* The blockage pattern is used to prevent instructions from being
3249 moved across it and has no cost. */
3250 if (INSN_CODE (insn) == CODE_FOR_blockage
3251 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3252 return 0;
3253
9d98604b 3254 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3255 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3256 return 0;
3257
3258 /* Make sure hbrps are spread out. */
3259 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3260 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3261 return 8;
3262
3263 /* Make sure hints and hbrps are 2 cycles apart. */
3264 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3265 || INSN_CODE (insn) == CODE_FOR_hbr)
3266 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3267 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3268 return 2;
3269
3270 /* An hbrp has no real dependency on other insns. */
3271 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3272 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3273 return 0;
3274
3275 /* Assuming that it is unlikely an argument register will be used in
3276 the first cycle of the called function, we reduce the cost for
3277 slightly better scheduling of dep_insn. When not hinted, the
3278 mispredicted branch would hide the cost as well. */
3279 if (CALL_P (insn))
3280 {
3281 rtx target = get_branch_target (insn);
3282 if (GET_CODE (target) != REG || !set_of (target, insn))
3283 return cost - 2;
3284 return cost;
3285 }
3286
3287 /* And when returning from a function, let's assume the return values
3288 are completed sooner too. */
3289 if (CALL_P (dep_insn))
644459d0 3290 return cost - 2;
5a976006 3291
3292 /* Make sure an instruction that loads from the back chain is schedule
3293 away from the return instruction so a hint is more likely to get
3294 issued. */
3295 if (INSN_CODE (insn) == CODE_FOR__return
3296 && (set = single_set (dep_insn))
3297 && GET_CODE (SET_DEST (set)) == REG
3298 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3299 return 20;
3300
644459d0 3301 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3302 scheduler makes every insn in a block anti-dependent on the final
3303 jump_insn. We adjust here so higher cost insns will get scheduled
3304 earlier. */
5a976006 3305 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3306 return insn_cost (dep_insn) - 3;
5a976006 3307
644459d0 3308 return cost;
3309}
3310\f
3311/* Create a CONST_DOUBLE from a string. */
3312struct rtx_def *
3313spu_float_const (const char *string, enum machine_mode mode)
3314{
3315 REAL_VALUE_TYPE value;
3316 value = REAL_VALUE_ATOF (string, mode);
3317 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3318}
3319
644459d0 3320int
3321spu_constant_address_p (rtx x)
3322{
3323 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3324 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3325 || GET_CODE (x) == HIGH);
3326}
3327
3328static enum spu_immediate
3329which_immediate_load (HOST_WIDE_INT val)
3330{
3331 gcc_assert (val == trunc_int_for_mode (val, SImode));
3332
3333 if (val >= -0x8000 && val <= 0x7fff)
3334 return SPU_IL;
3335 if (val >= 0 && val <= 0x3ffff)
3336 return SPU_ILA;
3337 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3338 return SPU_ILH;
3339 if ((val & 0xffff) == 0)
3340 return SPU_ILHU;
3341
3342 return SPU_NONE;
3343}
3344
dea01258 3345/* Return true when OP can be loaded by one of the il instructions, or
3346 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3347int
3348immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3349{
3350 if (CONSTANT_P (op))
3351 {
3352 enum immediate_class c = classify_immediate (op, mode);
5df189be 3353 return c == IC_IL1 || c == IC_IL1s
3072d30e 3354 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3355 }
3356 return 0;
3357}
3358
3359/* Return true if the first SIZE bytes of arr is a constant that can be
3360 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3361 represent the size and offset of the instruction to use. */
3362static int
3363cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3364{
3365 int cpat, run, i, start;
3366 cpat = 1;
3367 run = 0;
3368 start = -1;
3369 for (i = 0; i < size && cpat; i++)
3370 if (arr[i] != i+16)
3371 {
3372 if (!run)
3373 {
3374 start = i;
3375 if (arr[i] == 3)
3376 run = 1;
3377 else if (arr[i] == 2 && arr[i+1] == 3)
3378 run = 2;
3379 else if (arr[i] == 0)
3380 {
3381 while (arr[i+run] == run && i+run < 16)
3382 run++;
3383 if (run != 4 && run != 8)
3384 cpat = 0;
3385 }
3386 else
3387 cpat = 0;
3388 if ((i & (run-1)) != 0)
3389 cpat = 0;
3390 i += run;
3391 }
3392 else
3393 cpat = 0;
3394 }
b01a6dc3 3395 if (cpat && (run || size < 16))
dea01258 3396 {
3397 if (run == 0)
3398 run = 1;
3399 if (prun)
3400 *prun = run;
3401 if (pstart)
3402 *pstart = start == -1 ? 16-run : start;
3403 return 1;
3404 }
3405 return 0;
3406}
3407
3408/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3409 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3410static enum immediate_class
3411classify_immediate (rtx op, enum machine_mode mode)
644459d0 3412{
3413 HOST_WIDE_INT val;
3414 unsigned char arr[16];
5df189be 3415 int i, j, repeated, fsmbi, repeat;
dea01258 3416
3417 gcc_assert (CONSTANT_P (op));
3418
644459d0 3419 if (GET_MODE (op) != VOIDmode)
3420 mode = GET_MODE (op);
3421
dea01258 3422 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3423 if (!flag_pic
3424 && mode == V4SImode
dea01258 3425 && GET_CODE (op) == CONST_VECTOR
3426 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3427 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3428 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3429 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3430 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3431 op = CONST_VECTOR_ELT (op, 0);
644459d0 3432
dea01258 3433 switch (GET_CODE (op))
3434 {
3435 case SYMBOL_REF:
3436 case LABEL_REF:
3437 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3438
dea01258 3439 case CONST:
0cfc65d4 3440 /* We can never know if the resulting address fits in 18 bits and can be
3441 loaded with ila. For now, assume the address will not overflow if
3442 the displacement is "small" (fits 'K' constraint). */
3443 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3444 {
3445 rtx sym = XEXP (XEXP (op, 0), 0);
3446 rtx cst = XEXP (XEXP (op, 0), 1);
3447
3448 if (GET_CODE (sym) == SYMBOL_REF
3449 && GET_CODE (cst) == CONST_INT
3450 && satisfies_constraint_K (cst))
3451 return IC_IL1s;
3452 }
3453 return IC_IL2s;
644459d0 3454
dea01258 3455 case HIGH:
3456 return IC_IL1s;
3457
3458 case CONST_VECTOR:
3459 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3460 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3461 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3462 return IC_POOL;
3463 /* Fall through. */
3464
3465 case CONST_INT:
3466 case CONST_DOUBLE:
3467 constant_to_array (mode, op, arr);
644459d0 3468
dea01258 3469 /* Check that each 4-byte slot is identical. */
3470 repeated = 1;
3471 for (i = 4; i < 16; i += 4)
3472 for (j = 0; j < 4; j++)
3473 if (arr[j] != arr[i + j])
3474 repeated = 0;
3475
3476 if (repeated)
3477 {
3478 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3479 val = trunc_int_for_mode (val, SImode);
3480
3481 if (which_immediate_load (val) != SPU_NONE)
3482 return IC_IL1;
3483 }
3484
3485 /* Any mode of 2 bytes or smaller can be loaded with an il
3486 instruction. */
3487 gcc_assert (GET_MODE_SIZE (mode) > 2);
3488
3489 fsmbi = 1;
5df189be 3490 repeat = 0;
dea01258 3491 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3492 if (arr[i] != 0 && repeat == 0)
3493 repeat = arr[i];
3494 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3495 fsmbi = 0;
3496 if (fsmbi)
5df189be 3497 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3498
3499 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3500 return IC_CPAT;
3501
3502 if (repeated)
3503 return IC_IL2;
3504
3505 return IC_POOL;
3506 default:
3507 break;
3508 }
3509 gcc_unreachable ();
644459d0 3510}
3511
3512static enum spu_immediate
3513which_logical_immediate (HOST_WIDE_INT val)
3514{
3515 gcc_assert (val == trunc_int_for_mode (val, SImode));
3516
3517 if (val >= -0x200 && val <= 0x1ff)
3518 return SPU_ORI;
3519 if (val >= 0 && val <= 0xffff)
3520 return SPU_IOHL;
3521 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3522 {
3523 val = trunc_int_for_mode (val, HImode);
3524 if (val >= -0x200 && val <= 0x1ff)
3525 return SPU_ORHI;
3526 if ((val & 0xff) == ((val >> 8) & 0xff))
3527 {
3528 val = trunc_int_for_mode (val, QImode);
3529 if (val >= -0x200 && val <= 0x1ff)
3530 return SPU_ORBI;
3531 }
3532 }
3533 return SPU_NONE;
3534}
3535
5df189be 3536/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3537 CONST_DOUBLEs. */
3538static int
3539const_vector_immediate_p (rtx x)
3540{
3541 int i;
3542 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3543 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3544 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3545 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3546 return 0;
3547 return 1;
3548}
3549
644459d0 3550int
3551logical_immediate_p (rtx op, enum machine_mode mode)
3552{
3553 HOST_WIDE_INT val;
3554 unsigned char arr[16];
3555 int i, j;
3556
3557 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3558 || GET_CODE (op) == CONST_VECTOR);
3559
5df189be 3560 if (GET_CODE (op) == CONST_VECTOR
3561 && !const_vector_immediate_p (op))
3562 return 0;
3563
644459d0 3564 if (GET_MODE (op) != VOIDmode)
3565 mode = GET_MODE (op);
3566
3567 constant_to_array (mode, op, arr);
3568
3569 /* Check that bytes are repeated. */
3570 for (i = 4; i < 16; i += 4)
3571 for (j = 0; j < 4; j++)
3572 if (arr[j] != arr[i + j])
3573 return 0;
3574
3575 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3576 val = trunc_int_for_mode (val, SImode);
3577
3578 i = which_logical_immediate (val);
3579 return i != SPU_NONE && i != SPU_IOHL;
3580}
3581
3582int
3583iohl_immediate_p (rtx op, enum machine_mode mode)
3584{
3585 HOST_WIDE_INT val;
3586 unsigned char arr[16];
3587 int i, j;
3588
3589 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3590 || GET_CODE (op) == CONST_VECTOR);
3591
5df189be 3592 if (GET_CODE (op) == CONST_VECTOR
3593 && !const_vector_immediate_p (op))
3594 return 0;
3595
644459d0 3596 if (GET_MODE (op) != VOIDmode)
3597 mode = GET_MODE (op);
3598
3599 constant_to_array (mode, op, arr);
3600
3601 /* Check that bytes are repeated. */
3602 for (i = 4; i < 16; i += 4)
3603 for (j = 0; j < 4; j++)
3604 if (arr[j] != arr[i + j])
3605 return 0;
3606
3607 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3608 val = trunc_int_for_mode (val, SImode);
3609
3610 return val >= 0 && val <= 0xffff;
3611}
3612
3613int
3614arith_immediate_p (rtx op, enum machine_mode mode,
3615 HOST_WIDE_INT low, HOST_WIDE_INT high)
3616{
3617 HOST_WIDE_INT val;
3618 unsigned char arr[16];
3619 int bytes, i, j;
3620
3621 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3622 || GET_CODE (op) == CONST_VECTOR);
3623
5df189be 3624 if (GET_CODE (op) == CONST_VECTOR
3625 && !const_vector_immediate_p (op))
3626 return 0;
3627
644459d0 3628 if (GET_MODE (op) != VOIDmode)
3629 mode = GET_MODE (op);
3630
3631 constant_to_array (mode, op, arr);
3632
3633 if (VECTOR_MODE_P (mode))
3634 mode = GET_MODE_INNER (mode);
3635
3636 bytes = GET_MODE_SIZE (mode);
3637 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3638
3639 /* Check that bytes are repeated. */
3640 for (i = bytes; i < 16; i += bytes)
3641 for (j = 0; j < bytes; j++)
3642 if (arr[j] != arr[i + j])
3643 return 0;
3644
3645 val = arr[0];
3646 for (j = 1; j < bytes; j++)
3647 val = (val << 8) | arr[j];
3648
3649 val = trunc_int_for_mode (val, mode);
3650
3651 return val >= low && val <= high;
3652}
3653
56c7bfc2 3654/* TRUE when op is an immediate and an exact power of 2, and given that
3655 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3656 all entries must be the same. */
3657bool
3658exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3659{
3660 enum machine_mode int_mode;
3661 HOST_WIDE_INT val;
3662 unsigned char arr[16];
3663 int bytes, i, j;
3664
3665 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3666 || GET_CODE (op) == CONST_VECTOR);
3667
3668 if (GET_CODE (op) == CONST_VECTOR
3669 && !const_vector_immediate_p (op))
3670 return 0;
3671
3672 if (GET_MODE (op) != VOIDmode)
3673 mode = GET_MODE (op);
3674
3675 constant_to_array (mode, op, arr);
3676
3677 if (VECTOR_MODE_P (mode))
3678 mode = GET_MODE_INNER (mode);
3679
3680 bytes = GET_MODE_SIZE (mode);
3681 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3682
3683 /* Check that bytes are repeated. */
3684 for (i = bytes; i < 16; i += bytes)
3685 for (j = 0; j < bytes; j++)
3686 if (arr[j] != arr[i + j])
3687 return 0;
3688
3689 val = arr[0];
3690 for (j = 1; j < bytes; j++)
3691 val = (val << 8) | arr[j];
3692
3693 val = trunc_int_for_mode (val, int_mode);
3694
3695 /* Currently, we only handle SFmode */
3696 gcc_assert (mode == SFmode);
3697 if (mode == SFmode)
3698 {
3699 int exp = (val >> 23) - 127;
3700 return val > 0 && (val & 0x007fffff) == 0
3701 && exp >= low && exp <= high;
3702 }
3703 return FALSE;
3704}
3705
6cf5579e 3706/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3707
3708static int
3709ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3710{
3711 rtx x = *px;
3712 tree decl;
3713
3714 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3715 {
3716 rtx plus = XEXP (x, 0);
3717 rtx op0 = XEXP (plus, 0);
3718 rtx op1 = XEXP (plus, 1);
3719 if (GET_CODE (op1) == CONST_INT)
3720 x = op0;
3721 }
3722
3723 return (GET_CODE (x) == SYMBOL_REF
3724 && (decl = SYMBOL_REF_DECL (x)) != 0
3725 && TREE_CODE (decl) == VAR_DECL
3726 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3727}
3728
644459d0 3729/* We accept:
5b865faf 3730 - any 32-bit constant (SImode, SFmode)
644459d0 3731 - any constant that can be generated with fsmbi (any mode)
5b865faf 3732 - a 64-bit constant where the high and low bits are identical
644459d0 3733 (DImode, DFmode)
5b865faf 3734 - a 128-bit constant where the four 32-bit words match. */
644459d0 3735int
3736spu_legitimate_constant_p (rtx x)
3737{
5df189be 3738 if (GET_CODE (x) == HIGH)
3739 x = XEXP (x, 0);
6cf5579e 3740
3741 /* Reject any __ea qualified reference. These can't appear in
3742 instructions but must be forced to the constant pool. */
3743 if (for_each_rtx (&x, ea_symbol_ref, 0))
3744 return 0;
3745
644459d0 3746 /* V4SI with all identical symbols is valid. */
5df189be 3747 if (!flag_pic
3748 && GET_MODE (x) == V4SImode
644459d0 3749 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3750 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3751 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3752 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3753 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3754 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3755
5df189be 3756 if (GET_CODE (x) == CONST_VECTOR
3757 && !const_vector_immediate_p (x))
3758 return 0;
644459d0 3759 return 1;
3760}
3761
3762/* Valid address are:
3763 - symbol_ref, label_ref, const
3764 - reg
9d98604b 3765 - reg + const_int, where const_int is 16 byte aligned
644459d0 3766 - reg + reg, alignment doesn't matter
3767 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3768 ignore the 4 least significant bits of the const. We only care about
3769 16 byte modes because the expand phase will change all smaller MEM
3770 references to TImode. */
3771static bool
3772spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3773 rtx x, bool reg_ok_strict)
644459d0 3774{
9d98604b 3775 int aligned = GET_MODE_SIZE (mode) >= 16;
3776 if (aligned
3777 && GET_CODE (x) == AND
644459d0 3778 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3779 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3780 x = XEXP (x, 0);
3781 switch (GET_CODE (x))
3782 {
644459d0 3783 case LABEL_REF:
6cf5579e 3784 return !TARGET_LARGE_MEM;
3785
9d98604b 3786 case SYMBOL_REF:
644459d0 3787 case CONST:
6cf5579e 3788 /* Keep __ea references until reload so that spu_expand_mov can see them
3789 in MEMs. */
3790 if (ea_symbol_ref (&x, 0))
3791 return !reload_in_progress && !reload_completed;
9d98604b 3792 return !TARGET_LARGE_MEM;
644459d0 3793
3794 case CONST_INT:
3795 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3796
3797 case SUBREG:
3798 x = XEXP (x, 0);
9d98604b 3799 if (REG_P (x))
3800 return 0;
644459d0 3801
3802 case REG:
3803 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3804
3805 case PLUS:
3806 case LO_SUM:
3807 {
3808 rtx op0 = XEXP (x, 0);
3809 rtx op1 = XEXP (x, 1);
3810 if (GET_CODE (op0) == SUBREG)
3811 op0 = XEXP (op0, 0);
3812 if (GET_CODE (op1) == SUBREG)
3813 op1 = XEXP (op1, 0);
644459d0 3814 if (GET_CODE (op0) == REG
3815 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3816 && GET_CODE (op1) == CONST_INT
3817 && INTVAL (op1) >= -0x2000
3818 && INTVAL (op1) <= 0x1fff
9d98604b 3819 && (!aligned || (INTVAL (op1) & 15) == 0))
3820 return TRUE;
644459d0 3821 if (GET_CODE (op0) == REG
3822 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3823 && GET_CODE (op1) == REG
3824 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3825 return TRUE;
644459d0 3826 }
3827 break;
3828
3829 default:
3830 break;
3831 }
9d98604b 3832 return FALSE;
644459d0 3833}
3834
6cf5579e 3835/* Like spu_legitimate_address_p, except with named addresses. */
3836static bool
3837spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3838 bool reg_ok_strict, addr_space_t as)
3839{
3840 if (as == ADDR_SPACE_EA)
3841 return (REG_P (x) && (GET_MODE (x) == EAmode));
3842
3843 else if (as != ADDR_SPACE_GENERIC)
3844 gcc_unreachable ();
3845
3846 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3847}
3848
644459d0 3849/* When the address is reg + const_int, force the const_int into a
fa7637bd 3850 register. */
644459d0 3851rtx
3852spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3853 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3854{
3855 rtx op0, op1;
3856 /* Make sure both operands are registers. */
3857 if (GET_CODE (x) == PLUS)
3858 {
3859 op0 = XEXP (x, 0);
3860 op1 = XEXP (x, 1);
3861 if (ALIGNED_SYMBOL_REF_P (op0))
3862 {
3863 op0 = force_reg (Pmode, op0);
3864 mark_reg_pointer (op0, 128);
3865 }
3866 else if (GET_CODE (op0) != REG)
3867 op0 = force_reg (Pmode, op0);
3868 if (ALIGNED_SYMBOL_REF_P (op1))
3869 {
3870 op1 = force_reg (Pmode, op1);
3871 mark_reg_pointer (op1, 128);
3872 }
3873 else if (GET_CODE (op1) != REG)
3874 op1 = force_reg (Pmode, op1);
3875 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3876 }
41e3a0c7 3877 return x;
644459d0 3878}
3879
6cf5579e 3880/* Like spu_legitimate_address, except with named address support. */
3881static rtx
3882spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3883 addr_space_t as)
3884{
3885 if (as != ADDR_SPACE_GENERIC)
3886 return x;
3887
3888 return spu_legitimize_address (x, oldx, mode);
3889}
3890
644459d0 3891/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3892 struct attribute_spec.handler. */
3893static tree
3894spu_handle_fndecl_attribute (tree * node,
3895 tree name,
3896 tree args ATTRIBUTE_UNUSED,
3897 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3898{
3899 if (TREE_CODE (*node) != FUNCTION_DECL)
3900 {
67a779df 3901 warning (0, "%qE attribute only applies to functions",
3902 name);
644459d0 3903 *no_add_attrs = true;
3904 }
3905
3906 return NULL_TREE;
3907}
3908
3909/* Handle the "vector" attribute. */
3910static tree
3911spu_handle_vector_attribute (tree * node, tree name,
3912 tree args ATTRIBUTE_UNUSED,
3913 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3914{
3915 tree type = *node, result = NULL_TREE;
3916 enum machine_mode mode;
3917 int unsigned_p;
3918
3919 while (POINTER_TYPE_P (type)
3920 || TREE_CODE (type) == FUNCTION_TYPE
3921 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3922 type = TREE_TYPE (type);
3923
3924 mode = TYPE_MODE (type);
3925
3926 unsigned_p = TYPE_UNSIGNED (type);
3927 switch (mode)
3928 {
3929 case DImode:
3930 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3931 break;
3932 case SImode:
3933 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3934 break;
3935 case HImode:
3936 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3937 break;
3938 case QImode:
3939 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3940 break;
3941 case SFmode:
3942 result = V4SF_type_node;
3943 break;
3944 case DFmode:
3945 result = V2DF_type_node;
3946 break;
3947 default:
3948 break;
3949 }
3950
3951 /* Propagate qualifiers attached to the element type
3952 onto the vector type. */
3953 if (result && result != type && TYPE_QUALS (type))
3954 result = build_qualified_type (result, TYPE_QUALS (type));
3955
3956 *no_add_attrs = true; /* No need to hang on to the attribute. */
3957
3958 if (!result)
67a779df 3959 warning (0, "%qE attribute ignored", name);
644459d0 3960 else
d991e6e8 3961 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3962
3963 return NULL_TREE;
3964}
3965
f2b32076 3966/* Return nonzero if FUNC is a naked function. */
644459d0 3967static int
3968spu_naked_function_p (tree func)
3969{
3970 tree a;
3971
3972 if (TREE_CODE (func) != FUNCTION_DECL)
3973 abort ();
3974
3975 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3976 return a != NULL_TREE;
3977}
3978
3979int
3980spu_initial_elimination_offset (int from, int to)
3981{
3982 int saved_regs_size = spu_saved_regs_size ();
3983 int sp_offset = 0;
abe32cce 3984 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3985 || get_frame_size () || saved_regs_size)
3986 sp_offset = STACK_POINTER_OFFSET;
3987 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3988 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3989 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3990 return get_frame_size ();
644459d0 3991 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3992 return sp_offset + crtl->outgoing_args_size
644459d0 3993 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3994 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3995 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3996 else
3997 gcc_unreachable ();
644459d0 3998}
3999
4000rtx
fb80456a 4001spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 4002{
4003 enum machine_mode mode = TYPE_MODE (type);
4004 int byte_size = ((mode == BLKmode)
4005 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4006
4007 /* Make sure small structs are left justified in a register. */
4008 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4009 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4010 {
4011 enum machine_mode smode;
4012 rtvec v;
4013 int i;
4014 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4015 int n = byte_size / UNITS_PER_WORD;
4016 v = rtvec_alloc (nregs);
4017 for (i = 0; i < n; i++)
4018 {
4019 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4020 gen_rtx_REG (TImode,
4021 FIRST_RETURN_REGNUM
4022 + i),
4023 GEN_INT (UNITS_PER_WORD * i));
4024 byte_size -= UNITS_PER_WORD;
4025 }
4026
4027 if (n < nregs)
4028 {
4029 if (byte_size < 4)
4030 byte_size = 4;
4031 smode =
4032 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4033 RTVEC_ELT (v, n) =
4034 gen_rtx_EXPR_LIST (VOIDmode,
4035 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4036 GEN_INT (UNITS_PER_WORD * n));
4037 }
4038 return gen_rtx_PARALLEL (mode, v);
4039 }
4040 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4041}
4042
ee9034d4 4043static rtx
4044spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4045 enum machine_mode mode,
ee9034d4 4046 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4047{
4048 int byte_size;
4049
a08c5dd0 4050 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4051 return 0;
4052
4053 byte_size = ((mode == BLKmode)
4054 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4055
4056 /* The ABI does not allow parameters to be passed partially in
4057 reg and partially in stack. */
a08c5dd0 4058 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4059 return 0;
4060
4061 /* Make sure small structs are left justified in a register. */
4062 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4063 && byte_size < UNITS_PER_WORD && byte_size > 0)
4064 {
4065 enum machine_mode smode;
4066 rtx gr_reg;
4067 if (byte_size < 4)
4068 byte_size = 4;
4069 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4070 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4071 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4072 const0_rtx);
4073 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4074 }
4075 else
a08c5dd0 4076 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4077}
4078
ee9034d4 4079static void
4080spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4081 const_tree type, bool named ATTRIBUTE_UNUSED)
4082{
4083 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4084 ? 1
4085 : mode == BLKmode
4086 ? ((int_size_in_bytes (type) + 15) / 16)
4087 : mode == VOIDmode
4088 ? 1
4089 : HARD_REGNO_NREGS (cum, mode));
4090}
4091
644459d0 4092/* Variable sized types are passed by reference. */
4093static bool
4094spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4095 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4096 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4097{
4098 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4099}
4100\f
4101
4102/* Var args. */
4103
4104/* Create and return the va_list datatype.
4105
4106 On SPU, va_list is an array type equivalent to
4107
4108 typedef struct __va_list_tag
4109 {
4110 void *__args __attribute__((__aligned(16)));
4111 void *__skip __attribute__((__aligned(16)));
4112
4113 } va_list[1];
4114
fa7637bd 4115 where __args points to the arg that will be returned by the next
644459d0 4116 va_arg(), and __skip points to the previous stack frame such that
4117 when __args == __skip we should advance __args by 32 bytes. */
4118static tree
4119spu_build_builtin_va_list (void)
4120{
4121 tree f_args, f_skip, record, type_decl;
4122 bool owp;
4123
4124 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4125
4126 type_decl =
54e46243 4127 build_decl (BUILTINS_LOCATION,
4128 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4129
54e46243 4130 f_args = build_decl (BUILTINS_LOCATION,
4131 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4132 f_skip = build_decl (BUILTINS_LOCATION,
4133 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4134
4135 DECL_FIELD_CONTEXT (f_args) = record;
4136 DECL_ALIGN (f_args) = 128;
4137 DECL_USER_ALIGN (f_args) = 1;
4138
4139 DECL_FIELD_CONTEXT (f_skip) = record;
4140 DECL_ALIGN (f_skip) = 128;
4141 DECL_USER_ALIGN (f_skip) = 1;
4142
bc907808 4143 TYPE_STUB_DECL (record) = type_decl;
644459d0 4144 TYPE_NAME (record) = type_decl;
4145 TYPE_FIELDS (record) = f_args;
1767a056 4146 DECL_CHAIN (f_args) = f_skip;
644459d0 4147
4148 /* We know this is being padded and we want it too. It is an internal
4149 type so hide the warnings from the user. */
4150 owp = warn_padded;
4151 warn_padded = false;
4152
4153 layout_type (record);
4154
4155 warn_padded = owp;
4156
4157 /* The correct type is an array type of one element. */
4158 return build_array_type (record, build_index_type (size_zero_node));
4159}
4160
4161/* Implement va_start by filling the va_list structure VALIST.
4162 NEXTARG points to the first anonymous stack argument.
4163
4164 The following global variables are used to initialize
4165 the va_list structure:
4166
abe32cce 4167 crtl->args.info;
644459d0 4168 the CUMULATIVE_ARGS for this function
4169
abe32cce 4170 crtl->args.arg_offset_rtx:
644459d0 4171 holds the offset of the first anonymous stack argument
4172 (relative to the virtual arg pointer). */
4173
8a58ed0a 4174static void
644459d0 4175spu_va_start (tree valist, rtx nextarg)
4176{
4177 tree f_args, f_skip;
4178 tree args, skip, t;
4179
4180 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4181 f_skip = DECL_CHAIN (f_args);
644459d0 4182
170efcd4 4183 valist = build_simple_mem_ref (valist);
644459d0 4184 args =
4185 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4186 skip =
4187 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4188
4189 /* Find the __args area. */
4190 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4191 if (crtl->args.pretend_args_size > 0)
0de36bdb 4192 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4193 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4194 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4195 TREE_SIDE_EFFECTS (t) = 1;
4196 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4197
4198 /* Find the __skip area. */
4199 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4200 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4201 size_int (crtl->args.pretend_args_size
0de36bdb 4202 - STACK_POINTER_OFFSET));
75a70cf9 4203 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4204 TREE_SIDE_EFFECTS (t) = 1;
4205 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4206}
4207
4208/* Gimplify va_arg by updating the va_list structure
4209 VALIST as required to retrieve an argument of type
4210 TYPE, and returning that argument.
4211
4212 ret = va_arg(VALIST, TYPE);
4213
4214 generates code equivalent to:
4215
4216 paddedsize = (sizeof(TYPE) + 15) & -16;
4217 if (VALIST.__args + paddedsize > VALIST.__skip
4218 && VALIST.__args <= VALIST.__skip)
4219 addr = VALIST.__skip + 32;
4220 else
4221 addr = VALIST.__args;
4222 VALIST.__args = addr + paddedsize;
4223 ret = *(TYPE *)addr;
4224 */
4225static tree
75a70cf9 4226spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4227 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4228{
4229 tree f_args, f_skip;
4230 tree args, skip;
4231 HOST_WIDE_INT size, rsize;
4232 tree paddedsize, addr, tmp;
4233 bool pass_by_reference_p;
4234
4235 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4236 f_skip = DECL_CHAIN (f_args);
644459d0 4237
182cf5a9 4238 valist = build_simple_mem_ref (valist);
644459d0 4239 args =
4240 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4241 skip =
4242 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4243
4244 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4245
4246 /* if an object is dynamically sized, a pointer to it is passed
4247 instead of the object itself. */
4248 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4249 false);
4250 if (pass_by_reference_p)
4251 type = build_pointer_type (type);
4252 size = int_size_in_bytes (type);
4253 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4254
4255 /* build conditional expression to calculate addr. The expression
4256 will be gimplified later. */
0de36bdb 4257 paddedsize = size_int (rsize);
75a70cf9 4258 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4259 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4260 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4261 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4262 unshare_expr (skip)));
644459d0 4263
4264 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4265 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4266 size_int (32)), unshare_expr (args));
644459d0 4267
75a70cf9 4268 gimplify_assign (addr, tmp, pre_p);
644459d0 4269
4270 /* update VALIST.__args */
0de36bdb 4271 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4272 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4273
8115f0af 4274 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4275 addr);
644459d0 4276
4277 if (pass_by_reference_p)
4278 addr = build_va_arg_indirect_ref (addr);
4279
4280 return build_va_arg_indirect_ref (addr);
4281}
4282
4283/* Save parameter registers starting with the register that corresponds
4284 to the first unnamed parameters. If the first unnamed parameter is
4285 in the stack then save no registers. Set pretend_args_size to the
4286 amount of space needed to save the registers. */
4287void
4288spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4289 tree type, int *pretend_size, int no_rtl)
4290{
4291 if (!no_rtl)
4292 {
4293 rtx tmp;
4294 int regno;
4295 int offset;
4296 int ncum = *cum;
4297
4298 /* cum currently points to the last named argument, we want to
4299 start at the next argument. */
ee9034d4 4300 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4301
4302 offset = -STACK_POINTER_OFFSET;
4303 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4304 {
4305 tmp = gen_frame_mem (V4SImode,
4306 plus_constant (virtual_incoming_args_rtx,
4307 offset));
4308 emit_move_insn (tmp,
4309 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4310 offset += 16;
4311 }
4312 *pretend_size = offset + STACK_POINTER_OFFSET;
4313 }
4314}
4315\f
b2d7ede1 4316static void
644459d0 4317spu_conditional_register_usage (void)
4318{
4319 if (flag_pic)
4320 {
4321 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4322 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4323 }
644459d0 4324}
4325
9d98604b 4326/* This is called any time we inspect the alignment of a register for
4327 addresses. */
644459d0 4328static int
9d98604b 4329reg_aligned_for_addr (rtx x)
644459d0 4330{
9d98604b 4331 int regno =
4332 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4333 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4334}
4335
69ced2d6 4336/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4337 into its SYMBOL_REF_FLAGS. */
4338static void
4339spu_encode_section_info (tree decl, rtx rtl, int first)
4340{
4341 default_encode_section_info (decl, rtl, first);
4342
4343 /* If a variable has a forced alignment to < 16 bytes, mark it with
4344 SYMBOL_FLAG_ALIGN1. */
4345 if (TREE_CODE (decl) == VAR_DECL
4346 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4347 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4348}
4349
644459d0 4350/* Return TRUE if we are certain the mem refers to a complete object
4351 which is both 16-byte aligned and padded to a 16-byte boundary. This
4352 would make it safe to store with a single instruction.
4353 We guarantee the alignment and padding for static objects by aligning
4354 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4355 FIXME: We currently cannot guarantee this for objects on the stack
4356 because assign_parm_setup_stack calls assign_stack_local with the
4357 alignment of the parameter mode and in that case the alignment never
4358 gets adjusted by LOCAL_ALIGNMENT. */
4359static int
4360store_with_one_insn_p (rtx mem)
4361{
9d98604b 4362 enum machine_mode mode = GET_MODE (mem);
644459d0 4363 rtx addr = XEXP (mem, 0);
9d98604b 4364 if (mode == BLKmode)
644459d0 4365 return 0;
9d98604b 4366 if (GET_MODE_SIZE (mode) >= 16)
4367 return 1;
644459d0 4368 /* Only static objects. */
4369 if (GET_CODE (addr) == SYMBOL_REF)
4370 {
4371 /* We use the associated declaration to make sure the access is
fa7637bd 4372 referring to the whole object.
644459d0 4373 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4374 if it is necessary. Will there be cases where one exists, and
4375 the other does not? Will there be cases where both exist, but
4376 have different types? */
4377 tree decl = MEM_EXPR (mem);
4378 if (decl
4379 && TREE_CODE (decl) == VAR_DECL
4380 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4381 return 1;
4382 decl = SYMBOL_REF_DECL (addr);
4383 if (decl
4384 && TREE_CODE (decl) == VAR_DECL
4385 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4386 return 1;
4387 }
4388 return 0;
4389}
4390
9d98604b 4391/* Return 1 when the address is not valid for a simple load and store as
4392 required by the '_mov*' patterns. We could make this less strict
4393 for loads, but we prefer mem's to look the same so they are more
4394 likely to be merged. */
4395static int
4396address_needs_split (rtx mem)
4397{
4398 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4399 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4400 || !(store_with_one_insn_p (mem)
4401 || mem_is_padded_component_ref (mem))))
4402 return 1;
4403
4404 return 0;
4405}
4406
6cf5579e 4407static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4408static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4409static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4410
4411/* MEM is known to be an __ea qualified memory access. Emit a call to
4412 fetch the ppu memory to local store, and return its address in local
4413 store. */
4414
4415static void
4416ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4417{
4418 if (is_store)
4419 {
4420 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4421 if (!cache_fetch_dirty)
4422 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4423 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4424 2, ea_addr, EAmode, ndirty, SImode);
4425 }
4426 else
4427 {
4428 if (!cache_fetch)
4429 cache_fetch = init_one_libfunc ("__cache_fetch");
4430 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4431 1, ea_addr, EAmode);
4432 }
4433}
4434
4435/* Like ea_load_store, but do the cache tag comparison and, for stores,
4436 dirty bit marking, inline.
4437
4438 The cache control data structure is an array of
4439
4440 struct __cache_tag_array
4441 {
4442 unsigned int tag_lo[4];
4443 unsigned int tag_hi[4];
4444 void *data_pointer[4];
4445 int reserved[4];
4446 vector unsigned short dirty_bits[4];
4447 } */
4448
4449static void
4450ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4451{
4452 rtx ea_addr_si;
4453 HOST_WIDE_INT v;
4454 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4455 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4456 rtx index_mask = gen_reg_rtx (SImode);
4457 rtx tag_arr = gen_reg_rtx (Pmode);
4458 rtx splat_mask = gen_reg_rtx (TImode);
4459 rtx splat = gen_reg_rtx (V4SImode);
4460 rtx splat_hi = NULL_RTX;
4461 rtx tag_index = gen_reg_rtx (Pmode);
4462 rtx block_off = gen_reg_rtx (SImode);
4463 rtx tag_addr = gen_reg_rtx (Pmode);
4464 rtx tag = gen_reg_rtx (V4SImode);
4465 rtx cache_tag = gen_reg_rtx (V4SImode);
4466 rtx cache_tag_hi = NULL_RTX;
4467 rtx cache_ptrs = gen_reg_rtx (TImode);
4468 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4469 rtx tag_equal = gen_reg_rtx (V4SImode);
4470 rtx tag_equal_hi = NULL_RTX;
4471 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4472 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4473 rtx eq_index = gen_reg_rtx (SImode);
4474 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4475
4476 if (spu_ea_model != 32)
4477 {
4478 splat_hi = gen_reg_rtx (V4SImode);
4479 cache_tag_hi = gen_reg_rtx (V4SImode);
4480 tag_equal_hi = gen_reg_rtx (V4SImode);
4481 }
4482
4483 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4484 emit_move_insn (tag_arr, tag_arr_sym);
4485 v = 0x0001020300010203LL;
4486 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4487 ea_addr_si = ea_addr;
4488 if (spu_ea_model != 32)
4489 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4490
4491 /* tag_index = ea_addr & (tag_array_size - 128) */
4492 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4493
4494 /* splat ea_addr to all 4 slots. */
4495 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4496 /* Similarly for high 32 bits of ea_addr. */
4497 if (spu_ea_model != 32)
4498 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4499
4500 /* block_off = ea_addr & 127 */
4501 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4502
4503 /* tag_addr = tag_arr + tag_index */
4504 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4505
4506 /* Read cache tags. */
4507 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4508 if (spu_ea_model != 32)
4509 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4510 plus_constant (tag_addr, 16)));
4511
4512 /* tag = ea_addr & -128 */
4513 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4514
4515 /* Read all four cache data pointers. */
4516 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4517 plus_constant (tag_addr, 32)));
4518
4519 /* Compare tags. */
4520 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4521 if (spu_ea_model != 32)
4522 {
4523 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4524 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4525 }
4526
4527 /* At most one of the tags compare equal, so tag_equal has one
4528 32-bit slot set to all 1's, with the other slots all zero.
4529 gbb picks off low bit from each byte in the 128-bit registers,
4530 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4531 we have a hit. */
4532 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4533 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4534
4535 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4536 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4537
4538 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4539 (rotating eq_index mod 16 bytes). */
4540 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4541 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4542
4543 /* Add block offset to form final data address. */
4544 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4545
4546 /* Check that we did hit. */
4547 hit_label = gen_label_rtx ();
4548 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4549 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4550 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4551 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4552 hit_ref, pc_rtx)));
4553 /* Say that this branch is very likely to happen. */
4554 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4555 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4556
4557 ea_load_store (mem, is_store, ea_addr, data_addr);
4558 cont_label = gen_label_rtx ();
4559 emit_jump_insn (gen_jump (cont_label));
4560 emit_barrier ();
4561
4562 emit_label (hit_label);
4563
4564 if (is_store)
4565 {
4566 HOST_WIDE_INT v_hi;
4567 rtx dirty_bits = gen_reg_rtx (TImode);
4568 rtx dirty_off = gen_reg_rtx (SImode);
4569 rtx dirty_128 = gen_reg_rtx (TImode);
4570 rtx neg_block_off = gen_reg_rtx (SImode);
4571
4572 /* Set up mask with one dirty bit per byte of the mem we are
4573 writing, starting from top bit. */
4574 v_hi = v = -1;
4575 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4576 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4577 {
4578 v_hi = v;
4579 v = 0;
4580 }
4581 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4582
4583 /* Form index into cache dirty_bits. eq_index is one of
4584 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4585 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4586 offset to each of the four dirty_bits elements. */
4587 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4588
4589 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4590
4591 /* Rotate bit mask to proper bit. */
4592 emit_insn (gen_negsi2 (neg_block_off, block_off));
4593 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4594 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4595
4596 /* Or in the new dirty bits. */
4597 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4598
4599 /* Store. */
4600 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4601 }
4602
4603 emit_label (cont_label);
4604}
4605
4606static rtx
4607expand_ea_mem (rtx mem, bool is_store)
4608{
4609 rtx ea_addr;
4610 rtx data_addr = gen_reg_rtx (Pmode);
4611 rtx new_mem;
4612
4613 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4614 if (optimize_size || optimize == 0)
4615 ea_load_store (mem, is_store, ea_addr, data_addr);
4616 else
4617 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4618
4619 if (ea_alias_set == -1)
4620 ea_alias_set = new_alias_set ();
4621
4622 /* We generate a new MEM RTX to refer to the copy of the data
4623 in the cache. We do not copy memory attributes (except the
4624 alignment) from the original MEM, as they may no longer apply
4625 to the cache copy. */
4626 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4627 set_mem_alias_set (new_mem, ea_alias_set);
4628 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4629
4630 return new_mem;
4631}
4632
644459d0 4633int
4634spu_expand_mov (rtx * ops, enum machine_mode mode)
4635{
4636 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4637 {
4638 /* Perform the move in the destination SUBREG's inner mode. */
4639 ops[0] = SUBREG_REG (ops[0]);
4640 mode = GET_MODE (ops[0]);
4641 ops[1] = gen_lowpart_common (mode, ops[1]);
4642 gcc_assert (ops[1]);
4643 }
644459d0 4644
4645 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4646 {
4647 rtx from = SUBREG_REG (ops[1]);
8d72495d 4648 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4649
4650 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4651 && GET_MODE_CLASS (imode) == MODE_INT
4652 && subreg_lowpart_p (ops[1]));
4653
4654 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4655 imode = SImode;
4656 if (imode != GET_MODE (from))
4657 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4658
4659 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4660 {
d6bf3b14 4661 enum insn_code icode = convert_optab_handler (trunc_optab,
4662 mode, imode);
644459d0 4663 emit_insn (GEN_FCN (icode) (ops[0], from));
4664 }
4665 else
4666 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4667 return 1;
4668 }
4669
4670 /* At least one of the operands needs to be a register. */
4671 if ((reload_in_progress | reload_completed) == 0
4672 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4673 {
4674 rtx temp = force_reg (mode, ops[1]);
4675 emit_move_insn (ops[0], temp);
4676 return 1;
4677 }
4678 if (reload_in_progress || reload_completed)
4679 {
dea01258 4680 if (CONSTANT_P (ops[1]))
4681 return spu_split_immediate (ops);
644459d0 4682 return 0;
4683 }
9d98604b 4684
4685 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4686 extend them. */
4687 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4688 {
9d98604b 4689 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4690 if (val != INTVAL (ops[1]))
644459d0 4691 {
9d98604b 4692 emit_move_insn (ops[0], GEN_INT (val));
4693 return 1;
644459d0 4694 }
4695 }
9d98604b 4696 if (MEM_P (ops[0]))
6cf5579e 4697 {
4698 if (MEM_ADDR_SPACE (ops[0]))
4699 ops[0] = expand_ea_mem (ops[0], true);
4700 return spu_split_store (ops);
4701 }
9d98604b 4702 if (MEM_P (ops[1]))
6cf5579e 4703 {
4704 if (MEM_ADDR_SPACE (ops[1]))
4705 ops[1] = expand_ea_mem (ops[1], false);
4706 return spu_split_load (ops);
4707 }
9d98604b 4708
644459d0 4709 return 0;
4710}
4711
9d98604b 4712static void
4713spu_convert_move (rtx dst, rtx src)
644459d0 4714{
9d98604b 4715 enum machine_mode mode = GET_MODE (dst);
4716 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4717 rtx reg;
4718 gcc_assert (GET_MODE (src) == TImode);
4719 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4720 emit_insn (gen_rtx_SET (VOIDmode, reg,
4721 gen_rtx_TRUNCATE (int_mode,
4722 gen_rtx_LSHIFTRT (TImode, src,
4723 GEN_INT (int_mode == DImode ? 64 : 96)))));
4724 if (int_mode != mode)
4725 {
4726 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4727 emit_move_insn (dst, reg);
4728 }
4729}
644459d0 4730
9d98604b 4731/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4732 the address from SRC and SRC+16. Return a REG or CONST_INT that
4733 specifies how many bytes to rotate the loaded registers, plus any
4734 extra from EXTRA_ROTQBY. The address and rotate amounts are
4735 normalized to improve merging of loads and rotate computations. */
4736static rtx
4737spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4738{
4739 rtx addr = XEXP (src, 0);
4740 rtx p0, p1, rot, addr0, addr1;
4741 int rot_amt;
644459d0 4742
4743 rot = 0;
4744 rot_amt = 0;
9d98604b 4745
4746 if (MEM_ALIGN (src) >= 128)
4747 /* Address is already aligned; simply perform a TImode load. */ ;
4748 else if (GET_CODE (addr) == PLUS)
644459d0 4749 {
4750 /* 8 cases:
4751 aligned reg + aligned reg => lqx
4752 aligned reg + unaligned reg => lqx, rotqby
4753 aligned reg + aligned const => lqd
4754 aligned reg + unaligned const => lqd, rotqbyi
4755 unaligned reg + aligned reg => lqx, rotqby
4756 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4757 unaligned reg + aligned const => lqd, rotqby
4758 unaligned reg + unaligned const -> not allowed by legitimate address
4759 */
4760 p0 = XEXP (addr, 0);
4761 p1 = XEXP (addr, 1);
9d98604b 4762 if (!reg_aligned_for_addr (p0))
644459d0 4763 {
9d98604b 4764 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4765 {
9d98604b 4766 rot = gen_reg_rtx (SImode);
4767 emit_insn (gen_addsi3 (rot, p0, p1));
4768 }
4769 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4770 {
4771 if (INTVAL (p1) > 0
4772 && REG_POINTER (p0)
4773 && INTVAL (p1) * BITS_PER_UNIT
4774 < REGNO_POINTER_ALIGN (REGNO (p0)))
4775 {
4776 rot = gen_reg_rtx (SImode);
4777 emit_insn (gen_addsi3 (rot, p0, p1));
4778 addr = p0;
4779 }
4780 else
4781 {
4782 rtx x = gen_reg_rtx (SImode);
4783 emit_move_insn (x, p1);
4784 if (!spu_arith_operand (p1, SImode))
4785 p1 = x;
4786 rot = gen_reg_rtx (SImode);
4787 emit_insn (gen_addsi3 (rot, p0, p1));
4788 addr = gen_rtx_PLUS (Pmode, p0, x);
4789 }
644459d0 4790 }
4791 else
4792 rot = p0;
4793 }
4794 else
4795 {
4796 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4797 {
4798 rot_amt = INTVAL (p1) & 15;
9d98604b 4799 if (INTVAL (p1) & -16)
4800 {
4801 p1 = GEN_INT (INTVAL (p1) & -16);
4802 addr = gen_rtx_PLUS (SImode, p0, p1);
4803 }
4804 else
4805 addr = p0;
644459d0 4806 }
9d98604b 4807 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4808 rot = p1;
4809 }
4810 }
9d98604b 4811 else if (REG_P (addr))
644459d0 4812 {
9d98604b 4813 if (!reg_aligned_for_addr (addr))
644459d0 4814 rot = addr;
4815 }
4816 else if (GET_CODE (addr) == CONST)
4817 {
4818 if (GET_CODE (XEXP (addr, 0)) == PLUS
4819 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4820 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4821 {
4822 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4823 if (rot_amt & -16)
4824 addr = gen_rtx_CONST (Pmode,
4825 gen_rtx_PLUS (Pmode,
4826 XEXP (XEXP (addr, 0), 0),
4827 GEN_INT (rot_amt & -16)));
4828 else
4829 addr = XEXP (XEXP (addr, 0), 0);
4830 }
4831 else
9d98604b 4832 {
4833 rot = gen_reg_rtx (Pmode);
4834 emit_move_insn (rot, addr);
4835 }
644459d0 4836 }
4837 else if (GET_CODE (addr) == CONST_INT)
4838 {
4839 rot_amt = INTVAL (addr);
4840 addr = GEN_INT (rot_amt & -16);
4841 }
4842 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4843 {
4844 rot = gen_reg_rtx (Pmode);
4845 emit_move_insn (rot, addr);
4846 }
644459d0 4847
9d98604b 4848 rot_amt += extra_rotby;
644459d0 4849
4850 rot_amt &= 15;
4851
4852 if (rot && rot_amt)
4853 {
9d98604b 4854 rtx x = gen_reg_rtx (SImode);
4855 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4856 rot = x;
644459d0 4857 rot_amt = 0;
4858 }
9d98604b 4859 if (!rot && rot_amt)
4860 rot = GEN_INT (rot_amt);
4861
4862 addr0 = copy_rtx (addr);
4863 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4864 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4865
4866 if (dst1)
4867 {
4868 addr1 = plus_constant (copy_rtx (addr), 16);
4869 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4870 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4871 }
644459d0 4872
9d98604b 4873 return rot;
4874}
4875
4876int
4877spu_split_load (rtx * ops)
4878{
4879 enum machine_mode mode = GET_MODE (ops[0]);
4880 rtx addr, load, rot;
4881 int rot_amt;
644459d0 4882
9d98604b 4883 if (GET_MODE_SIZE (mode) >= 16)
4884 return 0;
644459d0 4885
9d98604b 4886 addr = XEXP (ops[1], 0);
4887 gcc_assert (GET_CODE (addr) != AND);
4888
4889 if (!address_needs_split (ops[1]))
4890 {
4891 ops[1] = change_address (ops[1], TImode, addr);
4892 load = gen_reg_rtx (TImode);
4893 emit_insn (gen__movti (load, ops[1]));
4894 spu_convert_move (ops[0], load);
4895 return 1;
4896 }
4897
4898 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4899
4900 load = gen_reg_rtx (TImode);
4901 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4902
4903 if (rot)
4904 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4905
9d98604b 4906 spu_convert_move (ops[0], load);
4907 return 1;
644459d0 4908}
4909
9d98604b 4910int
644459d0 4911spu_split_store (rtx * ops)
4912{
4913 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4914 rtx reg;
644459d0 4915 rtx addr, p0, p1, p1_lo, smem;
4916 int aform;
4917 int scalar;
4918
9d98604b 4919 if (GET_MODE_SIZE (mode) >= 16)
4920 return 0;
4921
644459d0 4922 addr = XEXP (ops[0], 0);
9d98604b 4923 gcc_assert (GET_CODE (addr) != AND);
4924
4925 if (!address_needs_split (ops[0]))
4926 {
4927 reg = gen_reg_rtx (TImode);
4928 emit_insn (gen_spu_convert (reg, ops[1]));
4929 ops[0] = change_address (ops[0], TImode, addr);
4930 emit_move_insn (ops[0], reg);
4931 return 1;
4932 }
644459d0 4933
4934 if (GET_CODE (addr) == PLUS)
4935 {
4936 /* 8 cases:
4937 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4938 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4939 aligned reg + aligned const => lqd, c?d, shuf, stqx
4940 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4941 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4942 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4943 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4944 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4945 */
4946 aform = 0;
4947 p0 = XEXP (addr, 0);
4948 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4949 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4950 {
4951 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4952 if (reg_aligned_for_addr (p0))
4953 {
4954 p1 = GEN_INT (INTVAL (p1) & -16);
4955 if (p1 == const0_rtx)
4956 addr = p0;
4957 else
4958 addr = gen_rtx_PLUS (SImode, p0, p1);
4959 }
4960 else
4961 {
4962 rtx x = gen_reg_rtx (SImode);
4963 emit_move_insn (x, p1);
4964 addr = gen_rtx_PLUS (SImode, p0, x);
4965 }
644459d0 4966 }
4967 }
9d98604b 4968 else if (REG_P (addr))
644459d0 4969 {
4970 aform = 0;
4971 p0 = addr;
4972 p1 = p1_lo = const0_rtx;
4973 }
4974 else
4975 {
4976 aform = 1;
4977 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4978 p1 = 0; /* aform doesn't use p1 */
4979 p1_lo = addr;
4980 if (ALIGNED_SYMBOL_REF_P (addr))
4981 p1_lo = const0_rtx;
9d98604b 4982 else if (GET_CODE (addr) == CONST
4983 && GET_CODE (XEXP (addr, 0)) == PLUS
4984 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4985 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4986 {
9d98604b 4987 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4988 if ((v & -16) != 0)
4989 addr = gen_rtx_CONST (Pmode,
4990 gen_rtx_PLUS (Pmode,
4991 XEXP (XEXP (addr, 0), 0),
4992 GEN_INT (v & -16)));
4993 else
4994 addr = XEXP (XEXP (addr, 0), 0);
4995 p1_lo = GEN_INT (v & 15);
644459d0 4996 }
4997 else if (GET_CODE (addr) == CONST_INT)
4998 {
4999 p1_lo = GEN_INT (INTVAL (addr) & 15);
5000 addr = GEN_INT (INTVAL (addr) & -16);
5001 }
9d98604b 5002 else
5003 {
5004 p1_lo = gen_reg_rtx (SImode);
5005 emit_move_insn (p1_lo, addr);
5006 }
644459d0 5007 }
5008
4cbad5bb 5009 gcc_assert (aform == 0 || aform == 1);
9d98604b 5010 reg = gen_reg_rtx (TImode);
e04cf423 5011
644459d0 5012 scalar = store_with_one_insn_p (ops[0]);
5013 if (!scalar)
5014 {
5015 /* We could copy the flags from the ops[0] MEM to mem here,
5016 We don't because we want this load to be optimized away if
5017 possible, and copying the flags will prevent that in certain
5018 cases, e.g. consider the volatile flag. */
5019
9d98604b 5020 rtx pat = gen_reg_rtx (TImode);
e04cf423 5021 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5022 set_mem_alias_set (lmem, 0);
5023 emit_insn (gen_movti (reg, lmem));
644459d0 5024
9d98604b 5025 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5026 p0 = stack_pointer_rtx;
5027 if (!p1_lo)
5028 p1_lo = const0_rtx;
5029
5030 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5031 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5032 }
644459d0 5033 else
5034 {
5035 if (GET_CODE (ops[1]) == REG)
5036 emit_insn (gen_spu_convert (reg, ops[1]));
5037 else if (GET_CODE (ops[1]) == SUBREG)
5038 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5039 else
5040 abort ();
5041 }
5042
5043 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5044 emit_insn (gen_ashlti3
5045 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5046
9d98604b 5047 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5048 /* We can't use the previous alias set because the memory has changed
5049 size and can potentially overlap objects of other types. */
5050 set_mem_alias_set (smem, 0);
5051
e04cf423 5052 emit_insn (gen_movti (smem, reg));
9d98604b 5053 return 1;
644459d0 5054}
5055
5056/* Return TRUE if X is MEM which is a struct member reference
5057 and the member can safely be loaded and stored with a single
5058 instruction because it is padded. */
5059static int
5060mem_is_padded_component_ref (rtx x)
5061{
5062 tree t = MEM_EXPR (x);
5063 tree r;
5064 if (!t || TREE_CODE (t) != COMPONENT_REF)
5065 return 0;
5066 t = TREE_OPERAND (t, 1);
5067 if (!t || TREE_CODE (t) != FIELD_DECL
5068 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5069 return 0;
5070 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5071 r = DECL_FIELD_CONTEXT (t);
5072 if (!r || TREE_CODE (r) != RECORD_TYPE)
5073 return 0;
5074 /* Make sure they are the same mode */
5075 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5076 return 0;
5077 /* If there are no following fields then the field alignment assures
fa7637bd 5078 the structure is padded to the alignment which means this field is
5079 padded too. */
644459d0 5080 if (TREE_CHAIN (t) == 0)
5081 return 1;
5082 /* If the following field is also aligned then this field will be
5083 padded. */
5084 t = TREE_CHAIN (t);
5085 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5086 return 1;
5087 return 0;
5088}
5089
c7b91b14 5090/* Parse the -mfixed-range= option string. */
5091static void
5092fix_range (const char *const_str)
5093{
5094 int i, first, last;
5095 char *str, *dash, *comma;
5096
5097 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5098 REG2 are either register names or register numbers. The effect
5099 of this option is to mark the registers in the range from REG1 to
5100 REG2 as ``fixed'' so they won't be used by the compiler. */
5101
5102 i = strlen (const_str);
5103 str = (char *) alloca (i + 1);
5104 memcpy (str, const_str, i + 1);
5105
5106 while (1)
5107 {
5108 dash = strchr (str, '-');
5109 if (!dash)
5110 {
5111 warning (0, "value of -mfixed-range must have form REG1-REG2");
5112 return;
5113 }
5114 *dash = '\0';
5115 comma = strchr (dash + 1, ',');
5116 if (comma)
5117 *comma = '\0';
5118
5119 first = decode_reg_name (str);
5120 if (first < 0)
5121 {
5122 warning (0, "unknown register name: %s", str);
5123 return;
5124 }
5125
5126 last = decode_reg_name (dash + 1);
5127 if (last < 0)
5128 {
5129 warning (0, "unknown register name: %s", dash + 1);
5130 return;
5131 }
5132
5133 *dash = '-';
5134
5135 if (first > last)
5136 {
5137 warning (0, "%s-%s is an empty range", str, dash + 1);
5138 return;
5139 }
5140
5141 for (i = first; i <= last; ++i)
5142 fixed_regs[i] = call_used_regs[i] = 1;
5143
5144 if (!comma)
5145 break;
5146
5147 *comma = ',';
5148 str = comma + 1;
5149 }
5150}
5151
644459d0 5152/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5153 can be generated using the fsmbi instruction. */
5154int
5155fsmbi_const_p (rtx x)
5156{
dea01258 5157 if (CONSTANT_P (x))
5158 {
5df189be 5159 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5160 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5161 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5162 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5163 }
5164 return 0;
5165}
5166
5167/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5168 can be generated using the cbd, chd, cwd or cdd instruction. */
5169int
5170cpat_const_p (rtx x, enum machine_mode mode)
5171{
5172 if (CONSTANT_P (x))
5173 {
5174 enum immediate_class c = classify_immediate (x, mode);
5175 return c == IC_CPAT;
5176 }
5177 return 0;
5178}
644459d0 5179
dea01258 5180rtx
5181gen_cpat_const (rtx * ops)
5182{
5183 unsigned char dst[16];
5184 int i, offset, shift, isize;
5185 if (GET_CODE (ops[3]) != CONST_INT
5186 || GET_CODE (ops[2]) != CONST_INT
5187 || (GET_CODE (ops[1]) != CONST_INT
5188 && GET_CODE (ops[1]) != REG))
5189 return 0;
5190 if (GET_CODE (ops[1]) == REG
5191 && (!REG_POINTER (ops[1])
5192 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5193 return 0;
644459d0 5194
5195 for (i = 0; i < 16; i++)
dea01258 5196 dst[i] = i + 16;
5197 isize = INTVAL (ops[3]);
5198 if (isize == 1)
5199 shift = 3;
5200 else if (isize == 2)
5201 shift = 2;
5202 else
5203 shift = 0;
5204 offset = (INTVAL (ops[2]) +
5205 (GET_CODE (ops[1]) ==
5206 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5207 for (i = 0; i < isize; i++)
5208 dst[offset + i] = i + shift;
5209 return array_to_constant (TImode, dst);
644459d0 5210}
5211
5212/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5213 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5214 than 16 bytes, the value is repeated across the rest of the array. */
5215void
5216constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5217{
5218 HOST_WIDE_INT val;
5219 int i, j, first;
5220
5221 memset (arr, 0, 16);
5222 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5223 if (GET_CODE (x) == CONST_INT
5224 || (GET_CODE (x) == CONST_DOUBLE
5225 && (mode == SFmode || mode == DFmode)))
5226 {
5227 gcc_assert (mode != VOIDmode && mode != BLKmode);
5228
5229 if (GET_CODE (x) == CONST_DOUBLE)
5230 val = const_double_to_hwint (x);
5231 else
5232 val = INTVAL (x);
5233 first = GET_MODE_SIZE (mode) - 1;
5234 for (i = first; i >= 0; i--)
5235 {
5236 arr[i] = val & 0xff;
5237 val >>= 8;
5238 }
5239 /* Splat the constant across the whole array. */
5240 for (j = 0, i = first + 1; i < 16; i++)
5241 {
5242 arr[i] = arr[j];
5243 j = (j == first) ? 0 : j + 1;
5244 }
5245 }
5246 else if (GET_CODE (x) == CONST_DOUBLE)
5247 {
5248 val = CONST_DOUBLE_LOW (x);
5249 for (i = 15; i >= 8; i--)
5250 {
5251 arr[i] = val & 0xff;
5252 val >>= 8;
5253 }
5254 val = CONST_DOUBLE_HIGH (x);
5255 for (i = 7; i >= 0; i--)
5256 {
5257 arr[i] = val & 0xff;
5258 val >>= 8;
5259 }
5260 }
5261 else if (GET_CODE (x) == CONST_VECTOR)
5262 {
5263 int units;
5264 rtx elt;
5265 mode = GET_MODE_INNER (mode);
5266 units = CONST_VECTOR_NUNITS (x);
5267 for (i = 0; i < units; i++)
5268 {
5269 elt = CONST_VECTOR_ELT (x, i);
5270 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5271 {
5272 if (GET_CODE (elt) == CONST_DOUBLE)
5273 val = const_double_to_hwint (elt);
5274 else
5275 val = INTVAL (elt);
5276 first = GET_MODE_SIZE (mode) - 1;
5277 if (first + i * GET_MODE_SIZE (mode) > 16)
5278 abort ();
5279 for (j = first; j >= 0; j--)
5280 {
5281 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5282 val >>= 8;
5283 }
5284 }
5285 }
5286 }
5287 else
5288 gcc_unreachable();
5289}
5290
5291/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5292 smaller than 16 bytes, use the bytes that would represent that value
5293 in a register, e.g., for QImode return the value of arr[3]. */
5294rtx
e96f2783 5295array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5296{
5297 enum machine_mode inner_mode;
5298 rtvec v;
5299 int units, size, i, j, k;
5300 HOST_WIDE_INT val;
5301
5302 if (GET_MODE_CLASS (mode) == MODE_INT
5303 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5304 {
5305 j = GET_MODE_SIZE (mode);
5306 i = j < 4 ? 4 - j : 0;
5307 for (val = 0; i < j; i++)
5308 val = (val << 8) | arr[i];
5309 val = trunc_int_for_mode (val, mode);
5310 return GEN_INT (val);
5311 }
5312
5313 if (mode == TImode)
5314 {
5315 HOST_WIDE_INT high;
5316 for (i = high = 0; i < 8; i++)
5317 high = (high << 8) | arr[i];
5318 for (i = 8, val = 0; i < 16; i++)
5319 val = (val << 8) | arr[i];
5320 return immed_double_const (val, high, TImode);
5321 }
5322 if (mode == SFmode)
5323 {
5324 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5325 val = trunc_int_for_mode (val, SImode);
171b6d22 5326 return hwint_to_const_double (SFmode, val);
644459d0 5327 }
5328 if (mode == DFmode)
5329 {
1f915911 5330 for (i = 0, val = 0; i < 8; i++)
5331 val = (val << 8) | arr[i];
171b6d22 5332 return hwint_to_const_double (DFmode, val);
644459d0 5333 }
5334
5335 if (!VECTOR_MODE_P (mode))
5336 abort ();
5337
5338 units = GET_MODE_NUNITS (mode);
5339 size = GET_MODE_UNIT_SIZE (mode);
5340 inner_mode = GET_MODE_INNER (mode);
5341 v = rtvec_alloc (units);
5342
5343 for (k = i = 0; i < units; ++i)
5344 {
5345 val = 0;
5346 for (j = 0; j < size; j++, k++)
5347 val = (val << 8) | arr[k];
5348
5349 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5350 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5351 else
5352 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5353 }
5354 if (k > 16)
5355 abort ();
5356
5357 return gen_rtx_CONST_VECTOR (mode, v);
5358}
5359
5360static void
5361reloc_diagnostic (rtx x)
5362{
712d2297 5363 tree decl = 0;
644459d0 5364 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5365 return;
5366
5367 if (GET_CODE (x) == SYMBOL_REF)
5368 decl = SYMBOL_REF_DECL (x);
5369 else if (GET_CODE (x) == CONST
5370 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5371 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5372
5373 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5374 if (decl && !DECL_P (decl))
5375 decl = 0;
5376
644459d0 5377 /* The decl could be a string constant. */
5378 if (decl && DECL_P (decl))
712d2297 5379 {
5380 location_t loc;
5381 /* We use last_assemble_variable_decl to get line information. It's
5382 not always going to be right and might not even be close, but will
5383 be right for the more common cases. */
5384 if (!last_assemble_variable_decl || in_section == ctors_section)
5385 loc = DECL_SOURCE_LOCATION (decl);
5386 else
5387 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5388
712d2297 5389 if (TARGET_WARN_RELOC)
5390 warning_at (loc, 0,
5391 "creating run-time relocation for %qD", decl);
5392 else
5393 error_at (loc,
5394 "creating run-time relocation for %qD", decl);
5395 }
5396 else
5397 {
5398 if (TARGET_WARN_RELOC)
5399 warning_at (input_location, 0, "creating run-time relocation");
5400 else
5401 error_at (input_location, "creating run-time relocation");
5402 }
644459d0 5403}
5404
5405/* Hook into assemble_integer so we can generate an error for run-time
5406 relocations. The SPU ABI disallows them. */
5407static bool
5408spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5409{
5410 /* By default run-time relocations aren't supported, but we allow them
5411 in case users support it in their own run-time loader. And we provide
5412 a warning for those users that don't. */
5413 if ((GET_CODE (x) == SYMBOL_REF)
5414 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5415 reloc_diagnostic (x);
5416
5417 return default_assemble_integer (x, size, aligned_p);
5418}
5419
5420static void
5421spu_asm_globalize_label (FILE * file, const char *name)
5422{
5423 fputs ("\t.global\t", file);
5424 assemble_name (file, name);
5425 fputs ("\n", file);
5426}
5427
5428static bool
f529eb25 5429spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5430 bool speed ATTRIBUTE_UNUSED)
644459d0 5431{
5432 enum machine_mode mode = GET_MODE (x);
5433 int cost = COSTS_N_INSNS (2);
5434
5435 /* Folding to a CONST_VECTOR will use extra space but there might
5436 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5437 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5438 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5439 because this cost will only be compared against a single insn.
5440 if (code == CONST_VECTOR)
5441 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5442 */
5443
5444 /* Use defaults for float operations. Not accurate but good enough. */
5445 if (mode == DFmode)
5446 {
5447 *total = COSTS_N_INSNS (13);
5448 return true;
5449 }
5450 if (mode == SFmode)
5451 {
5452 *total = COSTS_N_INSNS (6);
5453 return true;
5454 }
5455 switch (code)
5456 {
5457 case CONST_INT:
5458 if (satisfies_constraint_K (x))
5459 *total = 0;
5460 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5461 *total = COSTS_N_INSNS (1);
5462 else
5463 *total = COSTS_N_INSNS (3);
5464 return true;
5465
5466 case CONST:
5467 *total = COSTS_N_INSNS (3);
5468 return true;
5469
5470 case LABEL_REF:
5471 case SYMBOL_REF:
5472 *total = COSTS_N_INSNS (0);
5473 return true;
5474
5475 case CONST_DOUBLE:
5476 *total = COSTS_N_INSNS (5);
5477 return true;
5478
5479 case FLOAT_EXTEND:
5480 case FLOAT_TRUNCATE:
5481 case FLOAT:
5482 case UNSIGNED_FLOAT:
5483 case FIX:
5484 case UNSIGNED_FIX:
5485 *total = COSTS_N_INSNS (7);
5486 return true;
5487
5488 case PLUS:
5489 if (mode == TImode)
5490 {
5491 *total = COSTS_N_INSNS (9);
5492 return true;
5493 }
5494 break;
5495
5496 case MULT:
5497 cost =
5498 GET_CODE (XEXP (x, 0)) ==
5499 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5500 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5501 {
5502 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5503 {
5504 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5505 cost = COSTS_N_INSNS (14);
5506 if ((val & 0xffff) == 0)
5507 cost = COSTS_N_INSNS (9);
5508 else if (val > 0 && val < 0x10000)
5509 cost = COSTS_N_INSNS (11);
5510 }
5511 }
5512 *total = cost;
5513 return true;
5514 case DIV:
5515 case UDIV:
5516 case MOD:
5517 case UMOD:
5518 *total = COSTS_N_INSNS (20);
5519 return true;
5520 case ROTATE:
5521 case ROTATERT:
5522 case ASHIFT:
5523 case ASHIFTRT:
5524 case LSHIFTRT:
5525 *total = COSTS_N_INSNS (4);
5526 return true;
5527 case UNSPEC:
5528 if (XINT (x, 1) == UNSPEC_CONVERT)
5529 *total = COSTS_N_INSNS (0);
5530 else
5531 *total = COSTS_N_INSNS (4);
5532 return true;
5533 }
5534 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5535 if (GET_MODE_CLASS (mode) == MODE_INT
5536 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5537 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5538 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5539 *total = cost;
5540 return true;
5541}
5542
1bd43494 5543static enum machine_mode
5544spu_unwind_word_mode (void)
644459d0 5545{
1bd43494 5546 return SImode;
644459d0 5547}
5548
5549/* Decide whether we can make a sibling call to a function. DECL is the
5550 declaration of the function being targeted by the call and EXP is the
5551 CALL_EXPR representing the call. */
5552static bool
5553spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5554{
5555 return decl && !TARGET_LARGE_MEM;
5556}
5557
5558/* We need to correctly update the back chain pointer and the Available
5559 Stack Size (which is in the second slot of the sp register.) */
5560void
5561spu_allocate_stack (rtx op0, rtx op1)
5562{
5563 HOST_WIDE_INT v;
5564 rtx chain = gen_reg_rtx (V4SImode);
5565 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5566 rtx sp = gen_reg_rtx (V4SImode);
5567 rtx splatted = gen_reg_rtx (V4SImode);
5568 rtx pat = gen_reg_rtx (TImode);
5569
5570 /* copy the back chain so we can save it back again. */
5571 emit_move_insn (chain, stack_bot);
5572
5573 op1 = force_reg (SImode, op1);
5574
5575 v = 0x1020300010203ll;
5576 emit_move_insn (pat, immed_double_const (v, v, TImode));
5577 emit_insn (gen_shufb (splatted, op1, op1, pat));
5578
5579 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5580 emit_insn (gen_subv4si3 (sp, sp, splatted));
5581
5582 if (flag_stack_check)
5583 {
5584 rtx avail = gen_reg_rtx(SImode);
5585 rtx result = gen_reg_rtx(SImode);
5586 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5587 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5588 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5589 }
5590
5591 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5592
5593 emit_move_insn (stack_bot, chain);
5594
5595 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5596}
5597
5598void
5599spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5600{
5601 static unsigned char arr[16] =
5602 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5603 rtx temp = gen_reg_rtx (SImode);
5604 rtx temp2 = gen_reg_rtx (SImode);
5605 rtx temp3 = gen_reg_rtx (V4SImode);
5606 rtx temp4 = gen_reg_rtx (V4SImode);
5607 rtx pat = gen_reg_rtx (TImode);
5608 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5609
5610 /* Restore the backchain from the first word, sp from the second. */
5611 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5612 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5613
5614 emit_move_insn (pat, array_to_constant (TImode, arr));
5615
5616 /* Compute Available Stack Size for sp */
5617 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5618 emit_insn (gen_shufb (temp3, temp, temp, pat));
5619
5620 /* Compute Available Stack Size for back chain */
5621 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5622 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5623 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5624
5625 emit_insn (gen_addv4si3 (sp, sp, temp3));
5626 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5627}
5628
5629static void
5630spu_init_libfuncs (void)
5631{
5632 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5633 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5634 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5635 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5636 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5637 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5638 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5639 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5640 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5641 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5642 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5643
5644 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5645 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5646
5825ec3f 5647 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5648 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5649 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5650 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5651 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5652 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5653 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5654 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5655 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5656 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5657 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5658 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5659
19a53068 5660 set_optab_libfunc (smul_optab, TImode, "__multi3");
5661 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5662 set_optab_libfunc (smod_optab, TImode, "__modti3");
5663 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5664 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5665 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5666}
5667
5668/* Make a subreg, stripping any existing subreg. We could possibly just
5669 call simplify_subreg, but in this case we know what we want. */
5670rtx
5671spu_gen_subreg (enum machine_mode mode, rtx x)
5672{
5673 if (GET_CODE (x) == SUBREG)
5674 x = SUBREG_REG (x);
5675 if (GET_MODE (x) == mode)
5676 return x;
5677 return gen_rtx_SUBREG (mode, x, 0);
5678}
5679
5680static bool
fb80456a 5681spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5682{
5683 return (TYPE_MODE (type) == BLKmode
5684 && ((type) == 0
5685 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5686 || int_size_in_bytes (type) >
5687 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5688}
5689\f
5690/* Create the built-in types and functions */
5691
c2233b46 5692enum spu_function_code
5693{
5694#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5695#include "spu-builtins.def"
5696#undef DEF_BUILTIN
5697 NUM_SPU_BUILTINS
5698};
5699
5700extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5701
644459d0 5702struct spu_builtin_description spu_builtins[] = {
5703#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5704 {fcode, icode, name, type, params},
644459d0 5705#include "spu-builtins.def"
5706#undef DEF_BUILTIN
5707};
5708
0c5c4d59 5709static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5710
5711/* Returns the spu builtin decl for CODE. */
e6925042 5712
5713static tree
5714spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5715{
5716 if (code >= NUM_SPU_BUILTINS)
5717 return error_mark_node;
5718
0c5c4d59 5719 return spu_builtin_decls[code];
e6925042 5720}
5721
5722
644459d0 5723static void
5724spu_init_builtins (void)
5725{
5726 struct spu_builtin_description *d;
5727 unsigned int i;
5728
5729 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5730 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5731 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5732 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5733 V4SF_type_node = build_vector_type (float_type_node, 4);
5734 V2DF_type_node = build_vector_type (double_type_node, 2);
5735
5736 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5737 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5738 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5739 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5740
c4ecce0c 5741 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5742
5743 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5744 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5745 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5746 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5748 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5749 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5750 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5751 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5752 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5753 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5754 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5755
5756 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5757 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5758 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5759 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5760 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5761 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5762 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5763 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5764
5765 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5766 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5767
5768 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5769
5770 spu_builtin_types[SPU_BTI_PTR] =
5771 build_pointer_type (build_qualified_type
5772 (void_type_node,
5773 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5774
5775 /* For each builtin we build a new prototype. The tree code will make
5776 sure nodes are shared. */
5777 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5778 {
5779 tree p;
5780 char name[64]; /* build_function will make a copy. */
5781 int parm;
5782
5783 if (d->name == 0)
5784 continue;
5785
5dfbd18f 5786 /* Find last parm. */
644459d0 5787 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5788 ;
644459d0 5789
5790 p = void_list_node;
5791 while (parm > 1)
5792 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5793
5794 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5795
5796 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5797 spu_builtin_decls[i] =
3726fe5e 5798 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5799 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5800 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5801
5802 /* These builtins don't throw. */
0c5c4d59 5803 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5804 }
5805}
5806
cf31d486 5807void
5808spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5809{
5810 static unsigned char arr[16] =
5811 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5812
5813 rtx temp = gen_reg_rtx (Pmode);
5814 rtx temp2 = gen_reg_rtx (V4SImode);
5815 rtx temp3 = gen_reg_rtx (V4SImode);
5816 rtx pat = gen_reg_rtx (TImode);
5817 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5818
5819 emit_move_insn (pat, array_to_constant (TImode, arr));
5820
5821 /* Restore the sp. */
5822 emit_move_insn (temp, op1);
5823 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5824
5825 /* Compute available stack size for sp. */
5826 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5827 emit_insn (gen_shufb (temp3, temp, temp, pat));
5828
5829 emit_insn (gen_addv4si3 (sp, sp, temp3));
5830 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5831}
5832
644459d0 5833int
5834spu_safe_dma (HOST_WIDE_INT channel)
5835{
006e4b96 5836 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5837}
5838
5839void
5840spu_builtin_splats (rtx ops[])
5841{
5842 enum machine_mode mode = GET_MODE (ops[0]);
5843 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5844 {
5845 unsigned char arr[16];
5846 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5847 emit_move_insn (ops[0], array_to_constant (mode, arr));
5848 }
644459d0 5849 else
5850 {
5851 rtx reg = gen_reg_rtx (TImode);
5852 rtx shuf;
5853 if (GET_CODE (ops[1]) != REG
5854 && GET_CODE (ops[1]) != SUBREG)
5855 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5856 switch (mode)
5857 {
5858 case V2DImode:
5859 case V2DFmode:
5860 shuf =
5861 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5862 TImode);
5863 break;
5864 case V4SImode:
5865 case V4SFmode:
5866 shuf =
5867 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5868 TImode);
5869 break;
5870 case V8HImode:
5871 shuf =
5872 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5873 TImode);
5874 break;
5875 case V16QImode:
5876 shuf =
5877 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5878 TImode);
5879 break;
5880 default:
5881 abort ();
5882 }
5883 emit_move_insn (reg, shuf);
5884 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5885 }
5886}
5887
5888void
5889spu_builtin_extract (rtx ops[])
5890{
5891 enum machine_mode mode;
5892 rtx rot, from, tmp;
5893
5894 mode = GET_MODE (ops[1]);
5895
5896 if (GET_CODE (ops[2]) == CONST_INT)
5897 {
5898 switch (mode)
5899 {
5900 case V16QImode:
5901 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5902 break;
5903 case V8HImode:
5904 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5905 break;
5906 case V4SFmode:
5907 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5908 break;
5909 case V4SImode:
5910 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5911 break;
5912 case V2DImode:
5913 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5914 break;
5915 case V2DFmode:
5916 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5917 break;
5918 default:
5919 abort ();
5920 }
5921 return;
5922 }
5923
5924 from = spu_gen_subreg (TImode, ops[1]);
5925 rot = gen_reg_rtx (TImode);
5926 tmp = gen_reg_rtx (SImode);
5927
5928 switch (mode)
5929 {
5930 case V16QImode:
5931 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5932 break;
5933 case V8HImode:
5934 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5935 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5936 break;
5937 case V4SFmode:
5938 case V4SImode:
5939 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5940 break;
5941 case V2DImode:
5942 case V2DFmode:
5943 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5944 break;
5945 default:
5946 abort ();
5947 }
5948 emit_insn (gen_rotqby_ti (rot, from, tmp));
5949
5950 emit_insn (gen_spu_convert (ops[0], rot));
5951}
5952
5953void
5954spu_builtin_insert (rtx ops[])
5955{
5956 enum machine_mode mode = GET_MODE (ops[0]);
5957 enum machine_mode imode = GET_MODE_INNER (mode);
5958 rtx mask = gen_reg_rtx (TImode);
5959 rtx offset;
5960
5961 if (GET_CODE (ops[3]) == CONST_INT)
5962 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5963 else
5964 {
5965 offset = gen_reg_rtx (SImode);
5966 emit_insn (gen_mulsi3
5967 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5968 }
5969 emit_insn (gen_cpat
5970 (mask, stack_pointer_rtx, offset,
5971 GEN_INT (GET_MODE_SIZE (imode))));
5972 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5973}
5974
5975void
5976spu_builtin_promote (rtx ops[])
5977{
5978 enum machine_mode mode, imode;
5979 rtx rot, from, offset;
5980 HOST_WIDE_INT pos;
5981
5982 mode = GET_MODE (ops[0]);
5983 imode = GET_MODE_INNER (mode);
5984
5985 from = gen_reg_rtx (TImode);
5986 rot = spu_gen_subreg (TImode, ops[0]);
5987
5988 emit_insn (gen_spu_convert (from, ops[1]));
5989
5990 if (GET_CODE (ops[2]) == CONST_INT)
5991 {
5992 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5993 if (GET_MODE_SIZE (imode) < 4)
5994 pos += 4 - GET_MODE_SIZE (imode);
5995 offset = GEN_INT (pos & 15);
5996 }
5997 else
5998 {
5999 offset = gen_reg_rtx (SImode);
6000 switch (mode)
6001 {
6002 case V16QImode:
6003 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
6004 break;
6005 case V8HImode:
6006 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
6007 emit_insn (gen_addsi3 (offset, offset, offset));
6008 break;
6009 case V4SFmode:
6010 case V4SImode:
6011 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
6012 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
6013 break;
6014 case V2DImode:
6015 case V2DFmode:
6016 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6017 break;
6018 default:
6019 abort ();
6020 }
6021 }
6022 emit_insn (gen_rotqby_ti (rot, from, offset));
6023}
6024
e96f2783 6025static void
6026spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6027{
e96f2783 6028 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6029 rtx shuf = gen_reg_rtx (V4SImode);
6030 rtx insn = gen_reg_rtx (V4SImode);
6031 rtx shufc;
6032 rtx insnc;
6033 rtx mem;
6034
6035 fnaddr = force_reg (SImode, fnaddr);
6036 cxt = force_reg (SImode, cxt);
6037
6038 if (TARGET_LARGE_MEM)
6039 {
6040 rtx rotl = gen_reg_rtx (V4SImode);
6041 rtx mask = gen_reg_rtx (V4SImode);
6042 rtx bi = gen_reg_rtx (SImode);
e96f2783 6043 static unsigned char const shufa[16] = {
644459d0 6044 2, 3, 0, 1, 18, 19, 16, 17,
6045 0, 1, 2, 3, 16, 17, 18, 19
6046 };
e96f2783 6047 static unsigned char const insna[16] = {
644459d0 6048 0x41, 0, 0, 79,
6049 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6050 0x60, 0x80, 0, 79,
6051 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6052 };
6053
6054 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6055 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6056
6057 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6058 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6059 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6060 emit_insn (gen_selb (insn, insnc, rotl, mask));
6061
e96f2783 6062 mem = adjust_address (m_tramp, V4SImode, 0);
6063 emit_move_insn (mem, insn);
644459d0 6064
6065 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6066 mem = adjust_address (m_tramp, Pmode, 16);
6067 emit_move_insn (mem, bi);
644459d0 6068 }
6069 else
6070 {
6071 rtx scxt = gen_reg_rtx (SImode);
6072 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6073 static unsigned char const insna[16] = {
644459d0 6074 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6075 0x30, 0, 0, 0,
6076 0, 0, 0, 0,
6077 0, 0, 0, 0
6078 };
6079
6080 shufc = gen_reg_rtx (TImode);
6081 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6082
6083 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6084 fits 18 bits and the last 4 are zeros. This will be true if
6085 the stack pointer is initialized to 0x3fff0 at program start,
6086 otherwise the ila instruction will be garbage. */
6087
6088 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6089 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6090 emit_insn (gen_cpat
6091 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6092 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6093 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6094
e96f2783 6095 mem = adjust_address (m_tramp, V4SImode, 0);
6096 emit_move_insn (mem, insn);
644459d0 6097 }
6098 emit_insn (gen_sync ());
6099}
6100
6101void
6102spu_expand_sign_extend (rtx ops[])
6103{
6104 unsigned char arr[16];
6105 rtx pat = gen_reg_rtx (TImode);
6106 rtx sign, c;
6107 int i, last;
6108 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6109 if (GET_MODE (ops[1]) == QImode)
6110 {
6111 sign = gen_reg_rtx (HImode);
6112 emit_insn (gen_extendqihi2 (sign, ops[1]));
6113 for (i = 0; i < 16; i++)
6114 arr[i] = 0x12;
6115 arr[last] = 0x13;
6116 }
6117 else
6118 {
6119 for (i = 0; i < 16; i++)
6120 arr[i] = 0x10;
6121 switch (GET_MODE (ops[1]))
6122 {
6123 case HImode:
6124 sign = gen_reg_rtx (SImode);
6125 emit_insn (gen_extendhisi2 (sign, ops[1]));
6126 arr[last] = 0x03;
6127 arr[last - 1] = 0x02;
6128 break;
6129 case SImode:
6130 sign = gen_reg_rtx (SImode);
6131 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6132 for (i = 0; i < 4; i++)
6133 arr[last - i] = 3 - i;
6134 break;
6135 case DImode:
6136 sign = gen_reg_rtx (SImode);
6137 c = gen_reg_rtx (SImode);
6138 emit_insn (gen_spu_convert (c, ops[1]));
6139 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6140 for (i = 0; i < 8; i++)
6141 arr[last - i] = 7 - i;
6142 break;
6143 default:
6144 abort ();
6145 }
6146 }
6147 emit_move_insn (pat, array_to_constant (TImode, arr));
6148 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6149}
6150
6151/* expand vector initialization. If there are any constant parts,
6152 load constant parts first. Then load any non-constant parts. */
6153void
6154spu_expand_vector_init (rtx target, rtx vals)
6155{
6156 enum machine_mode mode = GET_MODE (target);
6157 int n_elts = GET_MODE_NUNITS (mode);
6158 int n_var = 0;
6159 bool all_same = true;
790c536c 6160 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6161 int i;
6162
6163 first = XVECEXP (vals, 0, 0);
6164 for (i = 0; i < n_elts; ++i)
6165 {
6166 x = XVECEXP (vals, 0, i);
e442af0b 6167 if (!(CONST_INT_P (x)
6168 || GET_CODE (x) == CONST_DOUBLE
6169 || GET_CODE (x) == CONST_FIXED))
644459d0 6170 ++n_var;
6171 else
6172 {
6173 if (first_constant == NULL_RTX)
6174 first_constant = x;
6175 }
6176 if (i > 0 && !rtx_equal_p (x, first))
6177 all_same = false;
6178 }
6179
6180 /* if all elements are the same, use splats to repeat elements */
6181 if (all_same)
6182 {
6183 if (!CONSTANT_P (first)
6184 && !register_operand (first, GET_MODE (x)))
6185 first = force_reg (GET_MODE (first), first);
6186 emit_insn (gen_spu_splats (target, first));
6187 return;
6188 }
6189
6190 /* load constant parts */
6191 if (n_var != n_elts)
6192 {
6193 if (n_var == 0)
6194 {
6195 emit_move_insn (target,
6196 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6197 }
6198 else
6199 {
6200 rtx constant_parts_rtx = copy_rtx (vals);
6201
6202 gcc_assert (first_constant != NULL_RTX);
6203 /* fill empty slots with the first constant, this increases
6204 our chance of using splats in the recursive call below. */
6205 for (i = 0; i < n_elts; ++i)
e442af0b 6206 {
6207 x = XVECEXP (constant_parts_rtx, 0, i);
6208 if (!(CONST_INT_P (x)
6209 || GET_CODE (x) == CONST_DOUBLE
6210 || GET_CODE (x) == CONST_FIXED))
6211 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6212 }
644459d0 6213
6214 spu_expand_vector_init (target, constant_parts_rtx);
6215 }
6216 }
6217
6218 /* load variable parts */
6219 if (n_var != 0)
6220 {
6221 rtx insert_operands[4];
6222
6223 insert_operands[0] = target;
6224 insert_operands[2] = target;
6225 for (i = 0; i < n_elts; ++i)
6226 {
6227 x = XVECEXP (vals, 0, i);
e442af0b 6228 if (!(CONST_INT_P (x)
6229 || GET_CODE (x) == CONST_DOUBLE
6230 || GET_CODE (x) == CONST_FIXED))
644459d0 6231 {
6232 if (!register_operand (x, GET_MODE (x)))
6233 x = force_reg (GET_MODE (x), x);
6234 insert_operands[1] = x;
6235 insert_operands[3] = GEN_INT (i);
6236 spu_builtin_insert (insert_operands);
6237 }
6238 }
6239 }
6240}
6352eedf 6241
5474166e 6242/* Return insn index for the vector compare instruction for given CODE,
6243 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6244
6245static int
6246get_vec_cmp_insn (enum rtx_code code,
6247 enum machine_mode dest_mode,
6248 enum machine_mode op_mode)
6249
6250{
6251 switch (code)
6252 {
6253 case EQ:
6254 if (dest_mode == V16QImode && op_mode == V16QImode)
6255 return CODE_FOR_ceq_v16qi;
6256 if (dest_mode == V8HImode && op_mode == V8HImode)
6257 return CODE_FOR_ceq_v8hi;
6258 if (dest_mode == V4SImode && op_mode == V4SImode)
6259 return CODE_FOR_ceq_v4si;
6260 if (dest_mode == V4SImode && op_mode == V4SFmode)
6261 return CODE_FOR_ceq_v4sf;
6262 if (dest_mode == V2DImode && op_mode == V2DFmode)
6263 return CODE_FOR_ceq_v2df;
6264 break;
6265 case GT:
6266 if (dest_mode == V16QImode && op_mode == V16QImode)
6267 return CODE_FOR_cgt_v16qi;
6268 if (dest_mode == V8HImode && op_mode == V8HImode)
6269 return CODE_FOR_cgt_v8hi;
6270 if (dest_mode == V4SImode && op_mode == V4SImode)
6271 return CODE_FOR_cgt_v4si;
6272 if (dest_mode == V4SImode && op_mode == V4SFmode)
6273 return CODE_FOR_cgt_v4sf;
6274 if (dest_mode == V2DImode && op_mode == V2DFmode)
6275 return CODE_FOR_cgt_v2df;
6276 break;
6277 case GTU:
6278 if (dest_mode == V16QImode && op_mode == V16QImode)
6279 return CODE_FOR_clgt_v16qi;
6280 if (dest_mode == V8HImode && op_mode == V8HImode)
6281 return CODE_FOR_clgt_v8hi;
6282 if (dest_mode == V4SImode && op_mode == V4SImode)
6283 return CODE_FOR_clgt_v4si;
6284 break;
6285 default:
6286 break;
6287 }
6288 return -1;
6289}
6290
6291/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6292 DMODE is expected destination mode. This is a recursive function. */
6293
6294static rtx
6295spu_emit_vector_compare (enum rtx_code rcode,
6296 rtx op0, rtx op1,
6297 enum machine_mode dmode)
6298{
6299 int vec_cmp_insn;
6300 rtx mask;
6301 enum machine_mode dest_mode;
6302 enum machine_mode op_mode = GET_MODE (op1);
6303
6304 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6305
6306 /* Floating point vector compare instructions uses destination V4SImode.
6307 Double floating point vector compare instructions uses destination V2DImode.
6308 Move destination to appropriate mode later. */
6309 if (dmode == V4SFmode)
6310 dest_mode = V4SImode;
6311 else if (dmode == V2DFmode)
6312 dest_mode = V2DImode;
6313 else
6314 dest_mode = dmode;
6315
6316 mask = gen_reg_rtx (dest_mode);
6317 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6318
6319 if (vec_cmp_insn == -1)
6320 {
6321 bool swap_operands = false;
6322 bool try_again = false;
6323 switch (rcode)
6324 {
6325 case LT:
6326 rcode = GT;
6327 swap_operands = true;
6328 try_again = true;
6329 break;
6330 case LTU:
6331 rcode = GTU;
6332 swap_operands = true;
6333 try_again = true;
6334 break;
6335 case NE:
6336 /* Treat A != B as ~(A==B). */
6337 {
6338 enum insn_code nor_code;
6339 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6340 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6341 gcc_assert (nor_code != CODE_FOR_nothing);
6342 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6343 if (dmode != dest_mode)
6344 {
6345 rtx temp = gen_reg_rtx (dest_mode);
6346 convert_move (temp, mask, 0);
6347 return temp;
6348 }
6349 return mask;
6350 }
6351 break;
6352 case GE:
6353 case GEU:
6354 case LE:
6355 case LEU:
6356 /* Try GT/GTU/LT/LTU OR EQ */
6357 {
6358 rtx c_rtx, eq_rtx;
6359 enum insn_code ior_code;
6360 enum rtx_code new_code;
6361
6362 switch (rcode)
6363 {
6364 case GE: new_code = GT; break;
6365 case GEU: new_code = GTU; break;
6366 case LE: new_code = LT; break;
6367 case LEU: new_code = LTU; break;
6368 default:
6369 gcc_unreachable ();
6370 }
6371
6372 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6373 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6374
d6bf3b14 6375 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6376 gcc_assert (ior_code != CODE_FOR_nothing);
6377 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6378 if (dmode != dest_mode)
6379 {
6380 rtx temp = gen_reg_rtx (dest_mode);
6381 convert_move (temp, mask, 0);
6382 return temp;
6383 }
6384 return mask;
6385 }
6386 break;
6387 default:
6388 gcc_unreachable ();
6389 }
6390
6391 /* You only get two chances. */
6392 if (try_again)
6393 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6394
6395 gcc_assert (vec_cmp_insn != -1);
6396
6397 if (swap_operands)
6398 {
6399 rtx tmp;
6400 tmp = op0;
6401 op0 = op1;
6402 op1 = tmp;
6403 }
6404 }
6405
6406 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6407 if (dmode != dest_mode)
6408 {
6409 rtx temp = gen_reg_rtx (dest_mode);
6410 convert_move (temp, mask, 0);
6411 return temp;
6412 }
6413 return mask;
6414}
6415
6416
6417/* Emit vector conditional expression.
6418 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6419 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6420
6421int
6422spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6423 rtx cond, rtx cc_op0, rtx cc_op1)
6424{
6425 enum machine_mode dest_mode = GET_MODE (dest);
6426 enum rtx_code rcode = GET_CODE (cond);
6427 rtx mask;
6428
6429 /* Get the vector mask for the given relational operations. */
6430 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6431
6432 emit_insn(gen_selb (dest, op2, op1, mask));
6433
6434 return 1;
6435}
6436
6352eedf 6437static rtx
6438spu_force_reg (enum machine_mode mode, rtx op)
6439{
6440 rtx x, r;
6441 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6442 {
6443 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6444 || GET_MODE (op) == BLKmode)
6445 return force_reg (mode, convert_to_mode (mode, op, 0));
6446 abort ();
6447 }
6448
6449 r = force_reg (GET_MODE (op), op);
6450 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6451 {
6452 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6453 if (x)
6454 return x;
6455 }
6456
6457 x = gen_reg_rtx (mode);
6458 emit_insn (gen_spu_convert (x, r));
6459 return x;
6460}
6461
6462static void
6463spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6464{
6465 HOST_WIDE_INT v = 0;
6466 int lsbits;
6467 /* Check the range of immediate operands. */
6468 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6469 {
6470 int range = p - SPU_BTI_7;
5df189be 6471
6472 if (!CONSTANT_P (op))
bf776685 6473 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6474 d->name,
6475 spu_builtin_range[range].low, spu_builtin_range[range].high);
6476
6477 if (GET_CODE (op) == CONST
6478 && (GET_CODE (XEXP (op, 0)) == PLUS
6479 || GET_CODE (XEXP (op, 0)) == MINUS))
6480 {
6481 v = INTVAL (XEXP (XEXP (op, 0), 1));
6482 op = XEXP (XEXP (op, 0), 0);
6483 }
6484 else if (GET_CODE (op) == CONST_INT)
6485 v = INTVAL (op);
5df189be 6486 else if (GET_CODE (op) == CONST_VECTOR
6487 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6488 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6489
6490 /* The default for v is 0 which is valid in every range. */
6491 if (v < spu_builtin_range[range].low
6492 || v > spu_builtin_range[range].high)
bf776685 6493 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6494 d->name,
6495 spu_builtin_range[range].low, spu_builtin_range[range].high,
6496 v);
6352eedf 6497
6498 switch (p)
6499 {
6500 case SPU_BTI_S10_4:
6501 lsbits = 4;
6502 break;
6503 case SPU_BTI_U16_2:
6504 /* This is only used in lqa, and stqa. Even though the insns
6505 encode 16 bits of the address (all but the 2 least
6506 significant), only 14 bits are used because it is masked to
6507 be 16 byte aligned. */
6508 lsbits = 4;
6509 break;
6510 case SPU_BTI_S16_2:
6511 /* This is used for lqr and stqr. */
6512 lsbits = 2;
6513 break;
6514 default:
6515 lsbits = 0;
6516 }
6517
6518 if (GET_CODE (op) == LABEL_REF
6519 || (GET_CODE (op) == SYMBOL_REF
6520 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6521 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6522 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6523 d->name);
6524 }
6525}
6526
6527
70ca06f8 6528static int
5df189be 6529expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6530 rtx target, rtx ops[])
6531{
bc620c5c 6532 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6533 int i = 0, a;
6352eedf 6534
6535 /* Expand the arguments into rtl. */
6536
6537 if (d->parm[0] != SPU_BTI_VOID)
6538 ops[i++] = target;
6539
70ca06f8 6540 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6541 {
5df189be 6542 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6543 if (arg == 0)
6544 abort ();
b9c74b4d 6545 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6546 }
70ca06f8 6547
32f79657 6548 gcc_assert (i == insn_data[icode].n_generator_args);
70ca06f8 6549 return i;
6352eedf 6550}
6551
6552static rtx
6553spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6554 tree exp, rtx target)
6352eedf 6555{
6556 rtx pat;
6557 rtx ops[8];
bc620c5c 6558 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6559 enum machine_mode mode, tmode;
6560 int i, p;
70ca06f8 6561 int n_operands;
6352eedf 6562 tree return_type;
6563
6564 /* Set up ops[] with values from arglist. */
70ca06f8 6565 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6566
6567 /* Handle the target operand which must be operand 0. */
6568 i = 0;
6569 if (d->parm[0] != SPU_BTI_VOID)
6570 {
6571
6572 /* We prefer the mode specified for the match_operand otherwise
6573 use the mode from the builtin function prototype. */
6574 tmode = insn_data[d->icode].operand[0].mode;
6575 if (tmode == VOIDmode)
6576 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6577
6578 /* Try to use target because not using it can lead to extra copies
6579 and when we are using all of the registers extra copies leads
6580 to extra spills. */
6581 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6582 ops[0] = target;
6583 else
6584 target = ops[0] = gen_reg_rtx (tmode);
6585
6586 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6587 abort ();
6588
6589 i++;
6590 }
6591
a76866d3 6592 if (d->fcode == SPU_MASK_FOR_LOAD)
6593 {
6594 enum machine_mode mode = insn_data[icode].operand[1].mode;
6595 tree arg;
6596 rtx addr, op, pat;
6597
6598 /* get addr */
5df189be 6599 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6600 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6601 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6602 addr = memory_address (mode, op);
6603
6604 /* negate addr */
6605 op = gen_reg_rtx (GET_MODE (addr));
6606 emit_insn (gen_rtx_SET (VOIDmode, op,
6607 gen_rtx_NEG (GET_MODE (addr), addr)));
6608 op = gen_rtx_MEM (mode, op);
6609
6610 pat = GEN_FCN (icode) (target, op);
6611 if (!pat)
6612 return 0;
6613 emit_insn (pat);
6614 return target;
6615 }
6616
6352eedf 6617 /* Ignore align_hint, but still expand it's args in case they have
6618 side effects. */
6619 if (icode == CODE_FOR_spu_align_hint)
6620 return 0;
6621
6622 /* Handle the rest of the operands. */
70ca06f8 6623 for (p = 1; i < n_operands; i++, p++)
6352eedf 6624 {
6625 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6626 mode = insn_data[d->icode].operand[i].mode;
6627 else
6628 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6629
6630 /* mode can be VOIDmode here for labels */
6631
6632 /* For specific intrinsics with an immediate operand, e.g.,
6633 si_ai(), we sometimes need to convert the scalar argument to a
6634 vector argument by splatting the scalar. */
6635 if (VECTOR_MODE_P (mode)
6636 && (GET_CODE (ops[i]) == CONST_INT
6637 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6638 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6639 {
6640 if (GET_CODE (ops[i]) == CONST_INT)
6641 ops[i] = spu_const (mode, INTVAL (ops[i]));
6642 else
6643 {
6644 rtx reg = gen_reg_rtx (mode);
6645 enum machine_mode imode = GET_MODE_INNER (mode);
6646 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6647 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6648 if (imode != GET_MODE (ops[i]))
6649 ops[i] = convert_to_mode (imode, ops[i],
6650 TYPE_UNSIGNED (spu_builtin_types
6651 [d->parm[i]]));
6652 emit_insn (gen_spu_splats (reg, ops[i]));
6653 ops[i] = reg;
6654 }
6655 }
6656
5df189be 6657 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6658
6352eedf 6659 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6660 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6661 }
6662
70ca06f8 6663 switch (n_operands)
6352eedf 6664 {
6665 case 0:
6666 pat = GEN_FCN (icode) (0);
6667 break;
6668 case 1:
6669 pat = GEN_FCN (icode) (ops[0]);
6670 break;
6671 case 2:
6672 pat = GEN_FCN (icode) (ops[0], ops[1]);
6673 break;
6674 case 3:
6675 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6676 break;
6677 case 4:
6678 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6679 break;
6680 case 5:
6681 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6682 break;
6683 case 6:
6684 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6685 break;
6686 default:
6687 abort ();
6688 }
6689
6690 if (!pat)
6691 abort ();
6692
6693 if (d->type == B_CALL || d->type == B_BISLED)
6694 emit_call_insn (pat);
6695 else if (d->type == B_JUMP)
6696 {
6697 emit_jump_insn (pat);
6698 emit_barrier ();
6699 }
6700 else
6701 emit_insn (pat);
6702
6703 return_type = spu_builtin_types[d->parm[0]];
6704 if (d->parm[0] != SPU_BTI_VOID
6705 && GET_MODE (target) != TYPE_MODE (return_type))
6706 {
6707 /* target is the return value. It should always be the mode of
6708 the builtin function prototype. */
6709 target = spu_force_reg (TYPE_MODE (return_type), target);
6710 }
6711
6712 return target;
6713}
6714
6715rtx
6716spu_expand_builtin (tree exp,
6717 rtx target,
6718 rtx subtarget ATTRIBUTE_UNUSED,
6719 enum machine_mode mode ATTRIBUTE_UNUSED,
6720 int ignore ATTRIBUTE_UNUSED)
6721{
5df189be 6722 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6723 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6724 struct spu_builtin_description *d;
6725
6726 if (fcode < NUM_SPU_BUILTINS)
6727 {
6728 d = &spu_builtins[fcode];
6729
5df189be 6730 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6731 }
6732 abort ();
6733}
6734
e99f512d 6735/* Implement targetm.vectorize.builtin_mul_widen_even. */
6736static tree
6737spu_builtin_mul_widen_even (tree type)
6738{
e99f512d 6739 switch (TYPE_MODE (type))
6740 {
6741 case V8HImode:
6742 if (TYPE_UNSIGNED (type))
0c5c4d59 6743 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6744 else
0c5c4d59 6745 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6746 break;
6747 default:
6748 return NULL_TREE;
6749 }
6750}
6751
6752/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6753static tree
6754spu_builtin_mul_widen_odd (tree type)
6755{
6756 switch (TYPE_MODE (type))
6757 {
6758 case V8HImode:
6759 if (TYPE_UNSIGNED (type))
0c5c4d59 6760 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6761 else
0c5c4d59 6762 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6763 break;
6764 default:
6765 return NULL_TREE;
6766 }
6767}
6768
a76866d3 6769/* Implement targetm.vectorize.builtin_mask_for_load. */
6770static tree
6771spu_builtin_mask_for_load (void)
6772{
0c5c4d59 6773 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6774}
5df189be 6775
a28df51d 6776/* Implement targetm.vectorize.builtin_vectorization_cost. */
6777static int
0822b158 6778spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6779 tree vectype ATTRIBUTE_UNUSED,
6780 int misalign ATTRIBUTE_UNUSED)
559093aa 6781{
6782 switch (type_of_cost)
6783 {
6784 case scalar_stmt:
6785 case vector_stmt:
6786 case vector_load:
6787 case vector_store:
6788 case vec_to_scalar:
6789 case scalar_to_vec:
6790 case cond_branch_not_taken:
6791 case vec_perm:
6792 return 1;
6793
6794 case scalar_store:
6795 return 10;
6796
6797 case scalar_load:
6798 /* Load + rotate. */
6799 return 2;
6800
6801 case unaligned_load:
6802 return 2;
6803
6804 case cond_branch_taken:
6805 return 6;
6806
6807 default:
6808 gcc_unreachable ();
6809 }
a28df51d 6810}
6811
0e87db76 6812/* Return true iff, data reference of TYPE can reach vector alignment (16)
6813 after applying N number of iterations. This routine does not determine
6814 how may iterations are required to reach desired alignment. */
6815
6816static bool
a9f1838b 6817spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6818{
6819 if (is_packed)
6820 return false;
6821
6822 /* All other types are naturally aligned. */
6823 return true;
6824}
6825
a0515226 6826/* Implement targetm.vectorize.builtin_vec_perm. */
6827tree
6828spu_builtin_vec_perm (tree type, tree *mask_element_type)
6829{
a0515226 6830 *mask_element_type = unsigned_char_type_node;
6831
6832 switch (TYPE_MODE (type))
6833 {
6834 case V16QImode:
6835 if (TYPE_UNSIGNED (type))
0c5c4d59 6836 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6837 else
0c5c4d59 6838 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6839
6840 case V8HImode:
6841 if (TYPE_UNSIGNED (type))
0c5c4d59 6842 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6843 else
0c5c4d59 6844 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6845
6846 case V4SImode:
6847 if (TYPE_UNSIGNED (type))
0c5c4d59 6848 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6849 else
0c5c4d59 6850 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6851
6852 case V2DImode:
6853 if (TYPE_UNSIGNED (type))
0c5c4d59 6854 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6855 else
0c5c4d59 6856 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6857
6858 case V4SFmode:
0c5c4d59 6859 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6860
6861 case V2DFmode:
0c5c4d59 6862 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6863
6864 default:
6865 return NULL_TREE;
6866 }
a0515226 6867}
6868
6cf5579e 6869/* Return the appropriate mode for a named address pointer. */
6870static enum machine_mode
6871spu_addr_space_pointer_mode (addr_space_t addrspace)
6872{
6873 switch (addrspace)
6874 {
6875 case ADDR_SPACE_GENERIC:
6876 return ptr_mode;
6877 case ADDR_SPACE_EA:
6878 return EAmode;
6879 default:
6880 gcc_unreachable ();
6881 }
6882}
6883
6884/* Return the appropriate mode for a named address address. */
6885static enum machine_mode
6886spu_addr_space_address_mode (addr_space_t addrspace)
6887{
6888 switch (addrspace)
6889 {
6890 case ADDR_SPACE_GENERIC:
6891 return Pmode;
6892 case ADDR_SPACE_EA:
6893 return EAmode;
6894 default:
6895 gcc_unreachable ();
6896 }
6897}
6898
6899/* Determine if one named address space is a subset of another. */
6900
6901static bool
6902spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6903{
6904 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6905 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6906
6907 if (subset == superset)
6908 return true;
6909
6910 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6911 being subsets but instead as disjoint address spaces. */
6912 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6913 return false;
6914
6915 else
6916 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6917}
6918
6919/* Convert from one address space to another. */
6920static rtx
6921spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6922{
6923 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6924 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6925
6926 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6927 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6928
6929 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6930 {
6931 rtx result, ls;
6932
6933 ls = gen_const_mem (DImode,
6934 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6935 set_mem_align (ls, 128);
6936
6937 result = gen_reg_rtx (Pmode);
6938 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6939 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6940 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6941 ls, const0_rtx, Pmode, 1);
6942
6943 emit_insn (gen_subsi3 (result, op, ls));
6944
6945 return result;
6946 }
6947
6948 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6949 {
6950 rtx result, ls;
6951
6952 ls = gen_const_mem (DImode,
6953 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6954 set_mem_align (ls, 128);
6955
6956 result = gen_reg_rtx (EAmode);
6957 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6958 op = force_reg (Pmode, op);
6959 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6960 ls, const0_rtx, EAmode, 1);
6961 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6962
6963 if (EAmode == SImode)
6964 emit_insn (gen_addsi3 (result, op, ls));
6965 else
6966 emit_insn (gen_adddi3 (result, op, ls));
6967
6968 return result;
6969 }
6970
6971 else
6972 gcc_unreachable ();
6973}
6974
6975
d52fd16a 6976/* Count the total number of instructions in each pipe and return the
6977 maximum, which is used as the Minimum Iteration Interval (MII)
6978 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6979 -2 are instructions that can go in pipe0 or pipe1. */
6980static int
6981spu_sms_res_mii (struct ddg *g)
6982{
6983 int i;
6984 unsigned t[4] = {0, 0, 0, 0};
6985
6986 for (i = 0; i < g->num_nodes; i++)
6987 {
6988 rtx insn = g->nodes[i].insn;
6989 int p = get_pipe (insn) + 2;
6990
1e944a0b 6991 gcc_assert (p >= 0);
6992 gcc_assert (p < 4);
d52fd16a 6993
6994 t[p]++;
6995 if (dump_file && INSN_P (insn))
6996 fprintf (dump_file, "i%d %s %d %d\n",
6997 INSN_UID (insn),
6998 insn_data[INSN_CODE(insn)].name,
6999 p, t[p]);
7000 }
7001 if (dump_file)
7002 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
7003
7004 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
7005}
7006
7007
5df189be 7008void
7009spu_init_expanders (void)
9d98604b 7010{
5df189be 7011 if (cfun)
9d98604b 7012 {
7013 rtx r0, r1;
7014 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7015 frame_pointer_needed is true. We don't know that until we're
7016 expanding the prologue. */
7017 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7018
7019 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7020 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7021 to be treated as aligned, so generate them here. */
7022 r0 = gen_reg_rtx (SImode);
7023 r1 = gen_reg_rtx (SImode);
7024 mark_reg_pointer (r0, 128);
7025 mark_reg_pointer (r1, 128);
7026 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7027 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7028 }
ea32e033 7029}
7030
7031static enum machine_mode
7032spu_libgcc_cmp_return_mode (void)
7033{
7034
7035/* For SPU word mode is TI mode so it is better to use SImode
7036 for compare returns. */
7037 return SImode;
7038}
7039
7040static enum machine_mode
7041spu_libgcc_shift_count_mode (void)
7042{
7043/* For SPU word mode is TI mode so it is better to use SImode
7044 for shift counts. */
7045 return SImode;
7046}
5a976006 7047
7048/* An early place to adjust some flags after GCC has finished processing
7049 * them. */
7050static void
7051asm_file_start (void)
7052{
7053 /* Variable tracking should be run after all optimizations which
0ef14db8 7054 change order of insns. It also needs a valid CFG. Therefore,
7055 *if* we make nontrivial changes in machine-dependent reorg,
7056 run variable tracking after those. However, if we do not run
7057 our machine-dependent reorg pass, we must still run the normal
7058 variable tracking pass (or else we will ICE in final since
7059 debug insns have not been removed). */
7060 if (TARGET_BRANCH_HINTS && optimize)
7061 {
7062 spu_flag_var_tracking = flag_var_tracking;
7063 flag_var_tracking = 0;
7064 }
5a976006 7065
7066 default_file_start ();
7067}
7068
a08dfd55 7069/* Implement targetm.section_type_flags. */
7070static unsigned int
7071spu_section_type_flags (tree decl, const char *name, int reloc)
7072{
7073 /* .toe needs to have type @nobits. */
7074 if (strcmp (name, ".toe") == 0)
7075 return SECTION_BSS;
6cf5579e 7076 /* Don't load _ea into the current address space. */
7077 if (strcmp (name, "._ea") == 0)
7078 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7079 return default_section_type_flags (decl, name, reloc);
7080}
c2233b46 7081
6cf5579e 7082/* Implement targetm.select_section. */
7083static section *
7084spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7085{
7086 /* Variables and constants defined in the __ea address space
7087 go into a special section named "._ea". */
7088 if (TREE_TYPE (decl) != error_mark_node
7089 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7090 {
7091 /* We might get called with string constants, but get_named_section
7092 doesn't like them as they are not DECLs. Also, we need to set
7093 flags in that case. */
7094 if (!DECL_P (decl))
7095 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7096
7097 return get_named_section (decl, "._ea", reloc);
7098 }
7099
7100 return default_elf_select_section (decl, reloc, align);
7101}
7102
7103/* Implement targetm.unique_section. */
7104static void
7105spu_unique_section (tree decl, int reloc)
7106{
7107 /* We don't support unique section names in the __ea address
7108 space for now. */
7109 if (TREE_TYPE (decl) != error_mark_node
7110 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7111 return;
7112
7113 default_unique_section (decl, reloc);
7114}
7115
56c7bfc2 7116/* Generate a constant or register which contains 2^SCALE. We assume
7117 the result is valid for MODE. Currently, MODE must be V4SFmode and
7118 SCALE must be SImode. */
7119rtx
7120spu_gen_exp2 (enum machine_mode mode, rtx scale)
7121{
7122 gcc_assert (mode == V4SFmode);
7123 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7124 if (GET_CODE (scale) != CONST_INT)
7125 {
7126 /* unsigned int exp = (127 + scale) << 23;
7127 __vector float m = (__vector float) spu_splats (exp); */
7128 rtx reg = force_reg (SImode, scale);
7129 rtx exp = gen_reg_rtx (SImode);
7130 rtx mul = gen_reg_rtx (mode);
7131 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7132 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7133 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7134 return mul;
7135 }
7136 else
7137 {
7138 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7139 unsigned char arr[16];
7140 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7141 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7142 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7143 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7144 return array_to_constant (mode, arr);
7145 }
7146}
7147
9d98604b 7148/* After reload, just change the convert into a move instruction
7149 or a dead instruction. */
7150void
7151spu_split_convert (rtx ops[])
7152{
7153 if (REGNO (ops[0]) == REGNO (ops[1]))
7154 emit_note (NOTE_INSN_DELETED);
7155 else
7156 {
7157 /* Use TImode always as this might help hard reg copyprop. */
7158 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7159 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7160 emit_insn (gen_move_insn (op0, op1));
7161 }
7162}
7163
b3878a6c 7164void
4cbad5bb 7165spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7166{
7167 fprintf (file, "# profile\n");
7168 fprintf (file, "brsl $75, _mcount\n");
7169}
7170
329c1e4e 7171/* Implement targetm.ref_may_alias_errno. */
7172static bool
7173spu_ref_may_alias_errno (ao_ref *ref)
7174{
7175 tree base = ao_ref_base (ref);
7176
7177 /* With SPU newlib, errno is defined as something like
7178 _impure_data._errno
7179 The default implementation of this target macro does not
7180 recognize such expressions, so special-code for it here. */
7181
7182 if (TREE_CODE (base) == VAR_DECL
7183 && !TREE_STATIC (base)
7184 && DECL_EXTERNAL (base)
7185 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7186 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7187 "_impure_data") == 0
7188 /* _errno is the first member of _impure_data. */
7189 && ref->offset == 0)
7190 return true;
7191
7192 return default_ref_may_alias_errno (ref);
7193}
7194
c2233b46 7195#include "gt-spu.h"