]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2011-01-05 Janus Weil <janus@gcc.gnu.org>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "ggc.h"
40#include "hashtab.h"
41#include "tm_p.h"
42#include "target.h"
43#include "target-def.h"
44#include "langhooks.h"
45#include "reload.h"
46#include "cfglayout.h"
47#include "sched-int.h"
48#include "params.h"
644459d0 49#include "machmode.h"
75a70cf9 50#include "gimple.h"
644459d0 51#include "tm-constrs.h"
d52fd16a 52#include "ddg.h"
5a976006 53#include "sbitmap.h"
54#include "timevar.h"
55#include "df.h"
6352eedf 56
57/* Builtin types, data and prototypes. */
c2233b46 58
59enum spu_builtin_type_index
60{
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110};
111
112#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
6352eedf 125struct spu_builtin_range
126{
127 int low, high;
128};
129
130static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143};
144
644459d0 145\f
146/* Target specific attribute specifications. */
147char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149/* Prototypes and external defs. */
4c834714 150static void spu_option_override (void);
cc07c468 151static void spu_option_init_struct (struct gcc_options *opts);
686e2769 152static void spu_option_default_params (void);
644459d0 153static void spu_init_builtins (void);
e6925042 154static tree spu_builtin_decl (unsigned, bool);
b62e30b8 155static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 157static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 158static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
644459d0 160static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161static rtx get_pic_reg (void);
162static int need_to_save_reg (int regno, int saving);
163static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167static void emit_nop_for_insn (rtx insn);
168static bool insn_clobbers_hbr (rtx insn);
169static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 170 int distance, sbitmap blocks);
5474166e 171static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
644459d0 173static rtx get_branch_target (rtx branch);
644459d0 174static void spu_machine_dependent_reorg (void);
175static int spu_sched_issue_rate (void);
176static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178static int get_pipe (rtx insn);
644459d0 179static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 180static void spu_sched_init_global (FILE *, int, int);
181static void spu_sched_init (FILE *, int, int);
182static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 183static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
b62e30b8 185 bool *no_add_attrs);
644459d0 186static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
b62e30b8 188 bool *no_add_attrs);
644459d0 189static int spu_naked_function_p (tree func);
b62e30b8 190static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
ee9034d4 192static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
644459d0 196static tree spu_build_builtin_va_list (void);
8a58ed0a 197static void spu_va_start (tree, rtx);
75a70cf9 198static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 200static int store_with_one_insn_p (rtx mem);
644459d0 201static int mem_is_padded_component_ref (rtx x);
9d98604b 202static int reg_aligned_for_addr (rtx x);
644459d0 203static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 205static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 208static void spu_init_libfuncs (void);
fb80456a 209static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 210static void fix_range (const char *);
69ced2d6 211static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 212static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 213static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
e99f512d 215static tree spu_builtin_mul_widen_even (tree);
216static tree spu_builtin_mul_widen_odd (tree);
a76866d3 217static tree spu_builtin_mask_for_load (void);
0822b158 218static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 219static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 220static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
5a976006 226static void asm_file_start (void);
a08dfd55 227static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 228static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229static void spu_unique_section (tree, int);
9d98604b 230static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 231static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 232static void spu_conditional_register_usage (void);
644459d0 233
5474166e 234/* Which instruction set architecture to use. */
235int spu_arch;
236/* Which cpu are we tuning for. */
237int spu_tune;
238
5a976006 239/* The hardware requires 8 insns between a hint and the branch it
240 effects. This variable describes how many rtl instructions the
241 compiler needs to see before inserting a hint, and then the compiler
242 will insert enough nops to make it at least 8 insns. The default is
243 for the compiler to allow up to 2 nops be emitted. The nops are
244 inserted in pairs, so we round down. */
245int spu_hint_dist = (8*4) - (2*4);
246
247/* Determines whether we run variable tracking in machine dependent
248 reorganization. */
249static int spu_flag_var_tracking;
250
644459d0 251enum spu_immediate {
252 SPU_NONE,
253 SPU_IL,
254 SPU_ILA,
255 SPU_ILH,
256 SPU_ILHU,
257 SPU_ORI,
258 SPU_ORHI,
259 SPU_ORBI,
99369027 260 SPU_IOHL
644459d0 261};
dea01258 262enum immediate_class
263{
264 IC_POOL, /* constant pool */
265 IC_IL1, /* one il* instruction */
266 IC_IL2, /* both ilhu and iohl instructions */
267 IC_IL1s, /* one il* instruction */
268 IC_IL2s, /* both ilhu and iohl instructions */
269 IC_FSMBI, /* the fsmbi instruction */
270 IC_CPAT, /* one of the c*d instructions */
5df189be 271 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 272};
644459d0 273
274static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
275static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 276static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
277static enum immediate_class classify_immediate (rtx op,
278 enum machine_mode mode);
644459d0 279
1bd43494 280static enum machine_mode spu_unwind_word_mode (void);
281
ea32e033 282static enum machine_mode
283spu_libgcc_cmp_return_mode (void);
284
285static enum machine_mode
286spu_libgcc_shift_count_mode (void);
6cf5579e 287
288/* Pointer mode for __ea references. */
289#define EAmode (spu_ea_model != 32 ? DImode : SImode)
290
ef51d1e3 291\f
292/* Table of machine attributes. */
293static const struct attribute_spec spu_attribute_table[] =
294{
295 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
297 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
298 { NULL, 0, 0, false, false, false, NULL }
299};
644459d0 300\f
301/* TARGET overrides. */
302
6cf5579e 303#undef TARGET_ADDR_SPACE_POINTER_MODE
304#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
305
306#undef TARGET_ADDR_SPACE_ADDRESS_MODE
307#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
308
309#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
310#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
311 spu_addr_space_legitimate_address_p
312
313#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
314#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
315
316#undef TARGET_ADDR_SPACE_SUBSET_P
317#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
318
319#undef TARGET_ADDR_SPACE_CONVERT
320#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
321
644459d0 322#undef TARGET_INIT_BUILTINS
323#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 324#undef TARGET_BUILTIN_DECL
325#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 326
644459d0 327#undef TARGET_EXPAND_BUILTIN
328#define TARGET_EXPAND_BUILTIN spu_expand_builtin
329
1bd43494 330#undef TARGET_UNWIND_WORD_MODE
331#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 332
41e3a0c7 333#undef TARGET_LEGITIMIZE_ADDRESS
334#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
335
6cf5579e 336/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
337 and .quad for the debugger. When it is known that the assembler is fixed,
338 these can be removed. */
339#undef TARGET_ASM_UNALIGNED_SI_OP
340#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
341
342#undef TARGET_ASM_ALIGNED_DI_OP
343#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
344
644459d0 345/* The .8byte directive doesn't seem to work well for a 32 bit
346 architecture. */
347#undef TARGET_ASM_UNALIGNED_DI_OP
348#define TARGET_ASM_UNALIGNED_DI_OP NULL
349
350#undef TARGET_RTX_COSTS
351#define TARGET_RTX_COSTS spu_rtx_costs
352
353#undef TARGET_ADDRESS_COST
f529eb25 354#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 355
356#undef TARGET_SCHED_ISSUE_RATE
357#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
358
5a976006 359#undef TARGET_SCHED_INIT_GLOBAL
360#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
361
362#undef TARGET_SCHED_INIT
363#define TARGET_SCHED_INIT spu_sched_init
364
644459d0 365#undef TARGET_SCHED_VARIABLE_ISSUE
366#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
367
5a976006 368#undef TARGET_SCHED_REORDER
369#define TARGET_SCHED_REORDER spu_sched_reorder
370
371#undef TARGET_SCHED_REORDER2
372#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 373
374#undef TARGET_SCHED_ADJUST_COST
375#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
376
644459d0 377#undef TARGET_ATTRIBUTE_TABLE
378#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
379
380#undef TARGET_ASM_INTEGER
381#define TARGET_ASM_INTEGER spu_assemble_integer
382
383#undef TARGET_SCALAR_MODE_SUPPORTED_P
384#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
385
386#undef TARGET_VECTOR_MODE_SUPPORTED_P
387#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
388
389#undef TARGET_FUNCTION_OK_FOR_SIBCALL
390#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
391
392#undef TARGET_ASM_GLOBALIZE_LABEL
393#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
394
395#undef TARGET_PASS_BY_REFERENCE
396#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
397
ee9034d4 398#undef TARGET_FUNCTION_ARG
399#define TARGET_FUNCTION_ARG spu_function_arg
400
401#undef TARGET_FUNCTION_ARG_ADVANCE
402#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
403
644459d0 404#undef TARGET_MUST_PASS_IN_STACK
405#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
406
407#undef TARGET_BUILD_BUILTIN_VA_LIST
408#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
409
8a58ed0a 410#undef TARGET_EXPAND_BUILTIN_VA_START
411#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
412
644459d0 413#undef TARGET_SETUP_INCOMING_VARARGS
414#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
415
416#undef TARGET_MACHINE_DEPENDENT_REORG
417#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
418
419#undef TARGET_GIMPLIFY_VA_ARG_EXPR
420#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
421
422#undef TARGET_DEFAULT_TARGET_FLAGS
423#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
424
425#undef TARGET_INIT_LIBFUNCS
426#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
427
428#undef TARGET_RETURN_IN_MEMORY
429#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
430
69ced2d6 431#undef TARGET_ENCODE_SECTION_INFO
432#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
433
e99f512d 434#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
435#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
436
437#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
438#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
439
a76866d3 440#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
441#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
442
a28df51d 443#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
444#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
445
202d6e5f 446#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
447#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 448
a0515226 449#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
450#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
451
ea32e033 452#undef TARGET_LIBGCC_CMP_RETURN_MODE
453#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
454
455#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
456#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
457
d52fd16a 458#undef TARGET_SCHED_SMS_RES_MII
459#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
460
5a976006 461#undef TARGET_ASM_FILE_START
462#define TARGET_ASM_FILE_START asm_file_start
463
a08dfd55 464#undef TARGET_SECTION_TYPE_FLAGS
465#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
466
6cf5579e 467#undef TARGET_ASM_SELECT_SECTION
468#define TARGET_ASM_SELECT_SECTION spu_select_section
469
470#undef TARGET_ASM_UNIQUE_SECTION
471#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
472
fd50b071 473#undef TARGET_LEGITIMATE_ADDRESS_P
474#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
475
e96f2783 476#undef TARGET_TRAMPOLINE_INIT
477#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
478
4c834714 479#undef TARGET_OPTION_OVERRIDE
480#define TARGET_OPTION_OVERRIDE spu_option_override
481
cc07c468 482#undef TARGET_OPTION_INIT_STRUCT
483#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
02e53c17 484
686e2769 485#undef TARGET_OPTION_DEFAULT_PARAMS
486#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
487
ed6befa5 488#undef TARGET_EXCEPT_UNWIND_INFO
489#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
490
b2d7ede1 491#undef TARGET_CONDITIONAL_REGISTER_USAGE
492#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
493
644459d0 494struct gcc_target targetm = TARGET_INITIALIZER;
495
02e53c17 496static void
cc07c468 497spu_option_init_struct (struct gcc_options *opts)
5df189be 498{
5df189be 499 /* With so many registers this is better on by default. */
cc07c468 500 opts->x_flag_rename_registers = 1;
5df189be 501}
502
686e2769 503/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
504static void
505spu_option_default_params (void)
506{
507 /* Override some of the default param values. With so many registers
508 larger values are better for these params. */
509 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
510}
511
4c834714 512/* Implement TARGET_OPTION_OVERRIDE. */
513static void
514spu_option_override (void)
644459d0 515{
14d408d9 516 /* Small loops will be unpeeled at -O3. For SPU it is more important
517 to keep code small by default. */
686e2769 518 if (!flag_unroll_loops && !flag_peel_loops)
56f280c4 519 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 1,
520 global_options.x_param_values,
521 global_options_set.x_param_values);
14d408d9 522
644459d0 523 flag_omit_frame_pointer = 1;
524
5a976006 525 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 526 if (align_functions < 8)
527 align_functions = 8;
c7b91b14 528
5a976006 529 spu_hint_dist = 8*4 - spu_max_nops*4;
530 if (spu_hint_dist < 0)
531 spu_hint_dist = 0;
532
c7b91b14 533 if (spu_fixed_range_string)
534 fix_range (spu_fixed_range_string);
5474166e 535
536 /* Determine processor architectural level. */
537 if (spu_arch_string)
538 {
539 if (strcmp (&spu_arch_string[0], "cell") == 0)
540 spu_arch = PROCESSOR_CELL;
541 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
542 spu_arch = PROCESSOR_CELLEDP;
543 else
8e181c9d 544 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 545 }
546
547 /* Determine processor to tune for. */
548 if (spu_tune_string)
549 {
550 if (strcmp (&spu_tune_string[0], "cell") == 0)
551 spu_tune = PROCESSOR_CELL;
552 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
553 spu_tune = PROCESSOR_CELLEDP;
554 else
8e181c9d 555 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 556 }
98bbec1e 557
13684256 558 /* Change defaults according to the processor architecture. */
559 if (spu_arch == PROCESSOR_CELLEDP)
560 {
561 /* If no command line option has been otherwise specified, change
562 the default to -mno-safe-hints on celledp -- only the original
563 Cell/B.E. processors require this workaround. */
564 if (!(target_flags_explicit & MASK_SAFE_HINTS))
565 target_flags &= ~MASK_SAFE_HINTS;
566 }
567
98bbec1e 568 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 569}
570\f
571/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
572 struct attribute_spec.handler. */
573
644459d0 574/* True if MODE is valid for the target. By "valid", we mean able to
575 be manipulated in non-trivial ways. In particular, this means all
576 the arithmetic is supported. */
577static bool
578spu_scalar_mode_supported_p (enum machine_mode mode)
579{
580 switch (mode)
581 {
582 case QImode:
583 case HImode:
584 case SImode:
585 case SFmode:
586 case DImode:
587 case TImode:
588 case DFmode:
589 return true;
590
591 default:
592 return false;
593 }
594}
595
596/* Similarly for vector modes. "Supported" here is less strict. At
597 least some operations are supported; need to check optabs or builtins
598 for further details. */
599static bool
600spu_vector_mode_supported_p (enum machine_mode mode)
601{
602 switch (mode)
603 {
604 case V16QImode:
605 case V8HImode:
606 case V4SImode:
607 case V2DImode:
608 case V4SFmode:
609 case V2DFmode:
610 return true;
611
612 default:
613 return false;
614 }
615}
616
617/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
618 least significant bytes of the outer mode. This function returns
619 TRUE for the SUBREG's where this is correct. */
620int
621valid_subreg (rtx op)
622{
623 enum machine_mode om = GET_MODE (op);
624 enum machine_mode im = GET_MODE (SUBREG_REG (op));
625 return om != VOIDmode && im != VOIDmode
626 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 627 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
628 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 629}
630
631/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 632 and adjust the start offset. */
644459d0 633static rtx
634adjust_operand (rtx op, HOST_WIDE_INT * start)
635{
636 enum machine_mode mode;
637 int op_size;
38aca5eb 638 /* Strip any paradoxical SUBREG. */
639 if (GET_CODE (op) == SUBREG
640 && (GET_MODE_BITSIZE (GET_MODE (op))
641 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 642 {
643 if (start)
644 *start -=
645 GET_MODE_BITSIZE (GET_MODE (op)) -
646 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
647 op = SUBREG_REG (op);
648 }
649 /* If it is smaller than SI, assure a SUBREG */
650 op_size = GET_MODE_BITSIZE (GET_MODE (op));
651 if (op_size < 32)
652 {
653 if (start)
654 *start += 32 - op_size;
655 op_size = 32;
656 }
657 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
658 mode = mode_for_size (op_size, MODE_INT, 0);
659 if (mode != GET_MODE (op))
660 op = gen_rtx_SUBREG (mode, op, 0);
661 return op;
662}
663
664void
665spu_expand_extv (rtx ops[], int unsignedp)
666{
9d98604b 667 rtx dst = ops[0], src = ops[1];
644459d0 668 HOST_WIDE_INT width = INTVAL (ops[2]);
669 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 670 HOST_WIDE_INT align_mask;
671 rtx s0, s1, mask, r0;
644459d0 672
9d98604b 673 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 674
9d98604b 675 if (MEM_P (src))
644459d0 676 {
9d98604b 677 /* First, determine if we need 1 TImode load or 2. We need only 1
678 if the bits being extracted do not cross the alignment boundary
679 as determined by the MEM and its address. */
680
681 align_mask = -MEM_ALIGN (src);
682 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 683 {
9d98604b 684 /* Alignment is sufficient for 1 load. */
685 s0 = gen_reg_rtx (TImode);
686 r0 = spu_expand_load (s0, 0, src, start / 8);
687 start &= 7;
688 if (r0)
689 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 690 }
9d98604b 691 else
692 {
693 /* Need 2 loads. */
694 s0 = gen_reg_rtx (TImode);
695 s1 = gen_reg_rtx (TImode);
696 r0 = spu_expand_load (s0, s1, src, start / 8);
697 start &= 7;
698
699 gcc_assert (start + width <= 128);
700 if (r0)
701 {
702 rtx r1 = gen_reg_rtx (SImode);
703 mask = gen_reg_rtx (TImode);
704 emit_move_insn (mask, GEN_INT (-1));
705 emit_insn (gen_rotqby_ti (s0, s0, r0));
706 emit_insn (gen_rotqby_ti (s1, s1, r0));
707 if (GET_CODE (r0) == CONST_INT)
708 r1 = GEN_INT (INTVAL (r0) & 15);
709 else
710 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
711 emit_insn (gen_shlqby_ti (mask, mask, r1));
712 emit_insn (gen_selb (s0, s1, s0, mask));
713 }
714 }
715
716 }
717 else if (GET_CODE (src) == SUBREG)
718 {
719 rtx r = SUBREG_REG (src);
720 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
721 s0 = gen_reg_rtx (TImode);
722 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
723 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
724 else
725 emit_move_insn (s0, src);
726 }
727 else
728 {
729 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
730 s0 = gen_reg_rtx (TImode);
731 emit_move_insn (s0, src);
644459d0 732 }
733
9d98604b 734 /* Now s0 is TImode and contains the bits to extract at start. */
735
736 if (start)
737 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
738
739 if (128 - width)
644459d0 740 {
9d98604b 741 tree c = build_int_cst (NULL_TREE, 128 - width);
742 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 743 }
744
9d98604b 745 emit_move_insn (dst, s0);
644459d0 746}
747
748void
749spu_expand_insv (rtx ops[])
750{
751 HOST_WIDE_INT width = INTVAL (ops[1]);
752 HOST_WIDE_INT start = INTVAL (ops[2]);
753 HOST_WIDE_INT maskbits;
4cbad5bb 754 enum machine_mode dst_mode;
644459d0 755 rtx dst = ops[0], src = ops[3];
4cbad5bb 756 int dst_size;
644459d0 757 rtx mask;
758 rtx shift_reg;
759 int shift;
760
761
762 if (GET_CODE (ops[0]) == MEM)
763 dst = gen_reg_rtx (TImode);
764 else
765 dst = adjust_operand (dst, &start);
766 dst_mode = GET_MODE (dst);
767 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
768
769 if (CONSTANT_P (src))
770 {
771 enum machine_mode m =
772 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
773 src = force_reg (m, convert_to_mode (m, src, 0));
774 }
775 src = adjust_operand (src, 0);
644459d0 776
777 mask = gen_reg_rtx (dst_mode);
778 shift_reg = gen_reg_rtx (dst_mode);
779 shift = dst_size - start - width;
780
781 /* It's not safe to use subreg here because the compiler assumes
782 that the SUBREG_REG is right justified in the SUBREG. */
783 convert_move (shift_reg, src, 1);
784
785 if (shift > 0)
786 {
787 switch (dst_mode)
788 {
789 case SImode:
790 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
791 break;
792 case DImode:
793 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
794 break;
795 case TImode:
796 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
797 break;
798 default:
799 abort ();
800 }
801 }
802 else if (shift < 0)
803 abort ();
804
805 switch (dst_size)
806 {
807 case 32:
808 maskbits = (-1ll << (32 - width - start));
809 if (start)
810 maskbits += (1ll << (32 - start));
811 emit_move_insn (mask, GEN_INT (maskbits));
812 break;
813 case 64:
814 maskbits = (-1ll << (64 - width - start));
815 if (start)
816 maskbits += (1ll << (64 - start));
817 emit_move_insn (mask, GEN_INT (maskbits));
818 break;
819 case 128:
820 {
821 unsigned char arr[16];
822 int i = start / 8;
823 memset (arr, 0, sizeof (arr));
824 arr[i] = 0xff >> (start & 7);
825 for (i++; i <= (start + width - 1) / 8; i++)
826 arr[i] = 0xff;
827 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
828 emit_move_insn (mask, array_to_constant (TImode, arr));
829 }
830 break;
831 default:
832 abort ();
833 }
834 if (GET_CODE (ops[0]) == MEM)
835 {
644459d0 836 rtx low = gen_reg_rtx (SImode);
644459d0 837 rtx rotl = gen_reg_rtx (SImode);
838 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 839 rtx addr;
840 rtx addr0;
841 rtx addr1;
644459d0 842 rtx mem;
843
9d98604b 844 addr = force_reg (Pmode, XEXP (ops[0], 0));
845 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 846 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
847 emit_insn (gen_negsi2 (rotl, low));
848 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
849 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 850 mem = change_address (ops[0], TImode, addr0);
644459d0 851 set_mem_alias_set (mem, 0);
852 emit_move_insn (dst, mem);
853 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 854 if (start + width > MEM_ALIGN (ops[0]))
855 {
856 rtx shl = gen_reg_rtx (SImode);
857 rtx mask1 = gen_reg_rtx (TImode);
858 rtx dst1 = gen_reg_rtx (TImode);
859 rtx mem1;
9d98604b 860 addr1 = plus_constant (addr, 16);
861 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 862 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
863 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 864 mem1 = change_address (ops[0], TImode, addr1);
644459d0 865 set_mem_alias_set (mem1, 0);
866 emit_move_insn (dst1, mem1);
867 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
868 emit_move_insn (mem1, dst1);
869 }
9d98604b 870 emit_move_insn (mem, dst);
644459d0 871 }
872 else
71cd778d 873 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 874}
875
876
877int
878spu_expand_block_move (rtx ops[])
879{
880 HOST_WIDE_INT bytes, align, offset;
881 rtx src, dst, sreg, dreg, target;
882 int i;
883 if (GET_CODE (ops[2]) != CONST_INT
884 || GET_CODE (ops[3]) != CONST_INT
48eb4342 885 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 886 return 0;
887
888 bytes = INTVAL (ops[2]);
889 align = INTVAL (ops[3]);
890
891 if (bytes <= 0)
892 return 1;
893
894 dst = ops[0];
895 src = ops[1];
896
897 if (align == 16)
898 {
899 for (offset = 0; offset + 16 <= bytes; offset += 16)
900 {
901 dst = adjust_address (ops[0], V16QImode, offset);
902 src = adjust_address (ops[1], V16QImode, offset);
903 emit_move_insn (dst, src);
904 }
905 if (offset < bytes)
906 {
907 rtx mask;
908 unsigned char arr[16] = { 0 };
909 for (i = 0; i < bytes - offset; i++)
910 arr[i] = 0xff;
911 dst = adjust_address (ops[0], V16QImode, offset);
912 src = adjust_address (ops[1], V16QImode, offset);
913 mask = gen_reg_rtx (V16QImode);
914 sreg = gen_reg_rtx (V16QImode);
915 dreg = gen_reg_rtx (V16QImode);
916 target = gen_reg_rtx (V16QImode);
917 emit_move_insn (mask, array_to_constant (V16QImode, arr));
918 emit_move_insn (dreg, dst);
919 emit_move_insn (sreg, src);
920 emit_insn (gen_selb (target, dreg, sreg, mask));
921 emit_move_insn (dst, target);
922 }
923 return 1;
924 }
925 return 0;
926}
927
928enum spu_comp_code
929{ SPU_EQ, SPU_GT, SPU_GTU };
930
5474166e 931int spu_comp_icode[12][3] = {
932 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
933 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
934 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
935 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
936 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
937 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
938 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
939 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
940 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
941 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
942 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
943 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 944};
945
946/* Generate a compare for CODE. Return a brand-new rtx that represents
947 the result of the compare. GCC can figure this out too if we don't
948 provide all variations of compares, but GCC always wants to use
949 WORD_MODE, we can generate better code in most cases if we do it
950 ourselves. */
951void
74f4459c 952spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 953{
954 int reverse_compare = 0;
955 int reverse_test = 0;
5d70b918 956 rtx compare_result, eq_result;
957 rtx comp_rtx, eq_rtx;
644459d0 958 enum machine_mode comp_mode;
959 enum machine_mode op_mode;
b9c74b4d 960 enum spu_comp_code scode, eq_code;
961 enum insn_code ior_code;
74f4459c 962 enum rtx_code code = GET_CODE (cmp);
963 rtx op0 = XEXP (cmp, 0);
964 rtx op1 = XEXP (cmp, 1);
644459d0 965 int index;
5d70b918 966 int eq_test = 0;
644459d0 967
74f4459c 968 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 969 and so on, to keep the constant in operand 1. */
74f4459c 970 if (GET_CODE (op1) == CONST_INT)
644459d0 971 {
74f4459c 972 HOST_WIDE_INT val = INTVAL (op1) - 1;
973 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 974 switch (code)
975 {
976 case GE:
74f4459c 977 op1 = GEN_INT (val);
644459d0 978 code = GT;
979 break;
980 case LT:
74f4459c 981 op1 = GEN_INT (val);
644459d0 982 code = LE;
983 break;
984 case GEU:
74f4459c 985 op1 = GEN_INT (val);
644459d0 986 code = GTU;
987 break;
988 case LTU:
74f4459c 989 op1 = GEN_INT (val);
644459d0 990 code = LEU;
991 break;
992 default:
993 break;
994 }
995 }
996
5d70b918 997 comp_mode = SImode;
74f4459c 998 op_mode = GET_MODE (op0);
5d70b918 999
644459d0 1000 switch (code)
1001 {
1002 case GE:
644459d0 1003 scode = SPU_GT;
07027691 1004 if (HONOR_NANS (op_mode))
5d70b918 1005 {
1006 reverse_compare = 0;
1007 reverse_test = 0;
1008 eq_test = 1;
1009 eq_code = SPU_EQ;
1010 }
1011 else
1012 {
1013 reverse_compare = 1;
1014 reverse_test = 1;
1015 }
644459d0 1016 break;
1017 case LE:
644459d0 1018 scode = SPU_GT;
07027691 1019 if (HONOR_NANS (op_mode))
5d70b918 1020 {
1021 reverse_compare = 1;
1022 reverse_test = 0;
1023 eq_test = 1;
1024 eq_code = SPU_EQ;
1025 }
1026 else
1027 {
1028 reverse_compare = 0;
1029 reverse_test = 1;
1030 }
644459d0 1031 break;
1032 case LT:
1033 reverse_compare = 1;
1034 reverse_test = 0;
1035 scode = SPU_GT;
1036 break;
1037 case GEU:
1038 reverse_compare = 1;
1039 reverse_test = 1;
1040 scode = SPU_GTU;
1041 break;
1042 case LEU:
1043 reverse_compare = 0;
1044 reverse_test = 1;
1045 scode = SPU_GTU;
1046 break;
1047 case LTU:
1048 reverse_compare = 1;
1049 reverse_test = 0;
1050 scode = SPU_GTU;
1051 break;
1052 case NE:
1053 reverse_compare = 0;
1054 reverse_test = 1;
1055 scode = SPU_EQ;
1056 break;
1057
1058 case EQ:
1059 scode = SPU_EQ;
1060 break;
1061 case GT:
1062 scode = SPU_GT;
1063 break;
1064 case GTU:
1065 scode = SPU_GTU;
1066 break;
1067 default:
1068 scode = SPU_EQ;
1069 break;
1070 }
1071
644459d0 1072 switch (op_mode)
1073 {
1074 case QImode:
1075 index = 0;
1076 comp_mode = QImode;
1077 break;
1078 case HImode:
1079 index = 1;
1080 comp_mode = HImode;
1081 break;
1082 case SImode:
1083 index = 2;
1084 break;
1085 case DImode:
1086 index = 3;
1087 break;
1088 case TImode:
1089 index = 4;
1090 break;
1091 case SFmode:
1092 index = 5;
1093 break;
1094 case DFmode:
1095 index = 6;
1096 break;
1097 case V16QImode:
5474166e 1098 index = 7;
1099 comp_mode = op_mode;
1100 break;
644459d0 1101 case V8HImode:
5474166e 1102 index = 8;
1103 comp_mode = op_mode;
1104 break;
644459d0 1105 case V4SImode:
5474166e 1106 index = 9;
1107 comp_mode = op_mode;
1108 break;
644459d0 1109 case V4SFmode:
5474166e 1110 index = 10;
1111 comp_mode = V4SImode;
1112 break;
644459d0 1113 case V2DFmode:
5474166e 1114 index = 11;
1115 comp_mode = V2DImode;
644459d0 1116 break;
5474166e 1117 case V2DImode:
644459d0 1118 default:
1119 abort ();
1120 }
1121
74f4459c 1122 if (GET_MODE (op1) == DFmode
07027691 1123 && (scode != SPU_GT && scode != SPU_EQ))
1124 abort ();
644459d0 1125
74f4459c 1126 if (is_set == 0 && op1 == const0_rtx
1127 && (GET_MODE (op0) == SImode
1128 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1129 {
1130 /* Don't need to set a register with the result when we are
1131 comparing against zero and branching. */
1132 reverse_test = !reverse_test;
74f4459c 1133 compare_result = op0;
644459d0 1134 }
1135 else
1136 {
1137 compare_result = gen_reg_rtx (comp_mode);
1138
1139 if (reverse_compare)
1140 {
74f4459c 1141 rtx t = op1;
1142 op1 = op0;
1143 op0 = t;
644459d0 1144 }
1145
1146 if (spu_comp_icode[index][scode] == 0)
1147 abort ();
1148
1149 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1150 (op0, op_mode))
1151 op0 = force_reg (op_mode, op0);
644459d0 1152 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1153 (op1, op_mode))
1154 op1 = force_reg (op_mode, op1);
644459d0 1155 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1156 op0, op1);
644459d0 1157 if (comp_rtx == 0)
1158 abort ();
1159 emit_insn (comp_rtx);
1160
5d70b918 1161 if (eq_test)
1162 {
1163 eq_result = gen_reg_rtx (comp_mode);
1164 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1165 op0, op1);
5d70b918 1166 if (eq_rtx == 0)
1167 abort ();
1168 emit_insn (eq_rtx);
d6bf3b14 1169 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1170 gcc_assert (ior_code != CODE_FOR_nothing);
1171 emit_insn (GEN_FCN (ior_code)
1172 (compare_result, compare_result, eq_result));
1173 }
644459d0 1174 }
1175
1176 if (is_set == 0)
1177 {
1178 rtx bcomp;
1179 rtx loc_ref;
1180
1181 /* We don't have branch on QI compare insns, so we convert the
1182 QI compare result to a HI result. */
1183 if (comp_mode == QImode)
1184 {
1185 rtx old_res = compare_result;
1186 compare_result = gen_reg_rtx (HImode);
1187 comp_mode = HImode;
1188 emit_insn (gen_extendqihi2 (compare_result, old_res));
1189 }
1190
1191 if (reverse_test)
1192 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1193 else
1194 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1195
74f4459c 1196 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1197 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1198 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1199 loc_ref, pc_rtx)));
1200 }
1201 else if (is_set == 2)
1202 {
74f4459c 1203 rtx target = operands[0];
644459d0 1204 int compare_size = GET_MODE_BITSIZE (comp_mode);
1205 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1206 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1207 rtx select_mask;
1208 rtx op_t = operands[2];
1209 rtx op_f = operands[3];
1210
1211 /* The result of the comparison can be SI, HI or QI mode. Create a
1212 mask based on that result. */
1213 if (target_size > compare_size)
1214 {
1215 select_mask = gen_reg_rtx (mode);
1216 emit_insn (gen_extend_compare (select_mask, compare_result));
1217 }
1218 else if (target_size < compare_size)
1219 select_mask =
1220 gen_rtx_SUBREG (mode, compare_result,
1221 (compare_size - target_size) / BITS_PER_UNIT);
1222 else if (comp_mode != mode)
1223 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1224 else
1225 select_mask = compare_result;
1226
1227 if (GET_MODE (target) != GET_MODE (op_t)
1228 || GET_MODE (target) != GET_MODE (op_f))
1229 abort ();
1230
1231 if (reverse_test)
1232 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1233 else
1234 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1235 }
1236 else
1237 {
74f4459c 1238 rtx target = operands[0];
644459d0 1239 if (reverse_test)
1240 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1241 gen_rtx_NOT (comp_mode, compare_result)));
1242 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1243 emit_insn (gen_extendhisi2 (target, compare_result));
1244 else if (GET_MODE (target) == SImode
1245 && GET_MODE (compare_result) == QImode)
1246 emit_insn (gen_extend_compare (target, compare_result));
1247 else
1248 emit_move_insn (target, compare_result);
1249 }
1250}
1251
1252HOST_WIDE_INT
1253const_double_to_hwint (rtx x)
1254{
1255 HOST_WIDE_INT val;
1256 REAL_VALUE_TYPE rv;
1257 if (GET_MODE (x) == SFmode)
1258 {
1259 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1260 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1261 }
1262 else if (GET_MODE (x) == DFmode)
1263 {
1264 long l[2];
1265 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1266 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1267 val = l[0];
1268 val = (val << 32) | (l[1] & 0xffffffff);
1269 }
1270 else
1271 abort ();
1272 return val;
1273}
1274
1275rtx
1276hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1277{
1278 long tv[2];
1279 REAL_VALUE_TYPE rv;
1280 gcc_assert (mode == SFmode || mode == DFmode);
1281
1282 if (mode == SFmode)
1283 tv[0] = (v << 32) >> 32;
1284 else if (mode == DFmode)
1285 {
1286 tv[1] = (v << 32) >> 32;
1287 tv[0] = v >> 32;
1288 }
1289 real_from_target (&rv, tv, mode);
1290 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1291}
1292
1293void
1294print_operand_address (FILE * file, register rtx addr)
1295{
1296 rtx reg;
1297 rtx offset;
1298
e04cf423 1299 if (GET_CODE (addr) == AND
1300 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1301 && INTVAL (XEXP (addr, 1)) == -16)
1302 addr = XEXP (addr, 0);
1303
644459d0 1304 switch (GET_CODE (addr))
1305 {
1306 case REG:
1307 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1308 break;
1309
1310 case PLUS:
1311 reg = XEXP (addr, 0);
1312 offset = XEXP (addr, 1);
1313 if (GET_CODE (offset) == REG)
1314 {
1315 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1316 reg_names[REGNO (offset)]);
1317 }
1318 else if (GET_CODE (offset) == CONST_INT)
1319 {
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1321 INTVAL (offset), reg_names[REGNO (reg)]);
1322 }
1323 else
1324 abort ();
1325 break;
1326
1327 case CONST:
1328 case LABEL_REF:
1329 case SYMBOL_REF:
1330 case CONST_INT:
1331 output_addr_const (file, addr);
1332 break;
1333
1334 default:
1335 debug_rtx (addr);
1336 abort ();
1337 }
1338}
1339
1340void
1341print_operand (FILE * file, rtx x, int code)
1342{
1343 enum machine_mode mode = GET_MODE (x);
1344 HOST_WIDE_INT val;
1345 unsigned char arr[16];
1346 int xcode = GET_CODE (x);
dea01258 1347 int i, info;
644459d0 1348 if (GET_MODE (x) == VOIDmode)
1349 switch (code)
1350 {
644459d0 1351 case 'L': /* 128 bits, signed */
1352 case 'm': /* 128 bits, signed */
1353 case 'T': /* 128 bits, signed */
1354 case 't': /* 128 bits, signed */
1355 mode = TImode;
1356 break;
644459d0 1357 case 'K': /* 64 bits, signed */
1358 case 'k': /* 64 bits, signed */
1359 case 'D': /* 64 bits, signed */
1360 case 'd': /* 64 bits, signed */
1361 mode = DImode;
1362 break;
644459d0 1363 case 'J': /* 32 bits, signed */
1364 case 'j': /* 32 bits, signed */
1365 case 's': /* 32 bits, signed */
1366 case 'S': /* 32 bits, signed */
1367 mode = SImode;
1368 break;
1369 }
1370 switch (code)
1371 {
1372
1373 case 'j': /* 32 bits, signed */
1374 case 'k': /* 64 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 if (xcode == CONST_INT
1377 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1378 {
1379 gcc_assert (logical_immediate_p (x, mode));
1380 constant_to_array (mode, x, arr);
1381 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1382 val = trunc_int_for_mode (val, SImode);
1383 switch (which_logical_immediate (val))
1384 {
1385 case SPU_ORI:
1386 break;
1387 case SPU_ORHI:
1388 fprintf (file, "h");
1389 break;
1390 case SPU_ORBI:
1391 fprintf (file, "b");
1392 break;
1393 default:
1394 gcc_unreachable();
1395 }
1396 }
1397 else
1398 gcc_unreachable();
1399 return;
1400
1401 case 'J': /* 32 bits, signed */
1402 case 'K': /* 64 bits, signed */
1403 case 'L': /* 128 bits, signed */
1404 if (xcode == CONST_INT
1405 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1406 {
1407 gcc_assert (logical_immediate_p (x, mode)
1408 || iohl_immediate_p (x, mode));
1409 constant_to_array (mode, x, arr);
1410 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1411 val = trunc_int_for_mode (val, SImode);
1412 switch (which_logical_immediate (val))
1413 {
1414 case SPU_ORI:
1415 case SPU_IOHL:
1416 break;
1417 case SPU_ORHI:
1418 val = trunc_int_for_mode (val, HImode);
1419 break;
1420 case SPU_ORBI:
1421 val = trunc_int_for_mode (val, QImode);
1422 break;
1423 default:
1424 gcc_unreachable();
1425 }
1426 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1427 }
1428 else
1429 gcc_unreachable();
1430 return;
1431
1432 case 't': /* 128 bits, signed */
1433 case 'd': /* 64 bits, signed */
1434 case 's': /* 32 bits, signed */
dea01258 1435 if (CONSTANT_P (x))
644459d0 1436 {
dea01258 1437 enum immediate_class c = classify_immediate (x, mode);
1438 switch (c)
1439 {
1440 case IC_IL1:
1441 constant_to_array (mode, x, arr);
1442 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1443 val = trunc_int_for_mode (val, SImode);
1444 switch (which_immediate_load (val))
1445 {
1446 case SPU_IL:
1447 break;
1448 case SPU_ILA:
1449 fprintf (file, "a");
1450 break;
1451 case SPU_ILH:
1452 fprintf (file, "h");
1453 break;
1454 case SPU_ILHU:
1455 fprintf (file, "hu");
1456 break;
1457 default:
1458 gcc_unreachable ();
1459 }
1460 break;
1461 case IC_CPAT:
1462 constant_to_array (mode, x, arr);
1463 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1464 if (info == 1)
1465 fprintf (file, "b");
1466 else if (info == 2)
1467 fprintf (file, "h");
1468 else if (info == 4)
1469 fprintf (file, "w");
1470 else if (info == 8)
1471 fprintf (file, "d");
1472 break;
1473 case IC_IL1s:
1474 if (xcode == CONST_VECTOR)
1475 {
1476 x = CONST_VECTOR_ELT (x, 0);
1477 xcode = GET_CODE (x);
1478 }
1479 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1480 fprintf (file, "a");
1481 else if (xcode == HIGH)
1482 fprintf (file, "hu");
1483 break;
1484 case IC_FSMBI:
5df189be 1485 case IC_FSMBI2:
dea01258 1486 case IC_IL2:
1487 case IC_IL2s:
1488 case IC_POOL:
1489 abort ();
1490 }
644459d0 1491 }
644459d0 1492 else
1493 gcc_unreachable ();
1494 return;
1495
1496 case 'T': /* 128 bits, signed */
1497 case 'D': /* 64 bits, signed */
1498 case 'S': /* 32 bits, signed */
dea01258 1499 if (CONSTANT_P (x))
644459d0 1500 {
dea01258 1501 enum immediate_class c = classify_immediate (x, mode);
1502 switch (c)
644459d0 1503 {
dea01258 1504 case IC_IL1:
1505 constant_to_array (mode, x, arr);
1506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1507 val = trunc_int_for_mode (val, SImode);
1508 switch (which_immediate_load (val))
1509 {
1510 case SPU_IL:
1511 case SPU_ILA:
1512 break;
1513 case SPU_ILH:
1514 case SPU_ILHU:
1515 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1516 break;
1517 default:
1518 gcc_unreachable ();
1519 }
1520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1521 break;
1522 case IC_FSMBI:
1523 constant_to_array (mode, x, arr);
1524 val = 0;
1525 for (i = 0; i < 16; i++)
1526 {
1527 val <<= 1;
1528 val |= arr[i] & 1;
1529 }
1530 print_operand (file, GEN_INT (val), 0);
1531 break;
1532 case IC_CPAT:
1533 constant_to_array (mode, x, arr);
1534 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1536 break;
dea01258 1537 case IC_IL1s:
dea01258 1538 if (xcode == HIGH)
5df189be 1539 x = XEXP (x, 0);
1540 if (GET_CODE (x) == CONST_VECTOR)
1541 x = CONST_VECTOR_ELT (x, 0);
1542 output_addr_const (file, x);
1543 if (xcode == HIGH)
1544 fprintf (file, "@h");
644459d0 1545 break;
dea01258 1546 case IC_IL2:
1547 case IC_IL2s:
5df189be 1548 case IC_FSMBI2:
dea01258 1549 case IC_POOL:
1550 abort ();
644459d0 1551 }
c8befdb9 1552 }
644459d0 1553 else
1554 gcc_unreachable ();
1555 return;
1556
644459d0 1557 case 'C':
1558 if (xcode == CONST_INT)
1559 {
1560 /* Only 4 least significant bits are relevant for generate
1561 control word instructions. */
1562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1563 return;
1564 }
1565 break;
1566
1567 case 'M': /* print code for c*d */
1568 if (GET_CODE (x) == CONST_INT)
1569 switch (INTVAL (x))
1570 {
1571 case 1:
1572 fprintf (file, "b");
1573 break;
1574 case 2:
1575 fprintf (file, "h");
1576 break;
1577 case 4:
1578 fprintf (file, "w");
1579 break;
1580 case 8:
1581 fprintf (file, "d");
1582 break;
1583 default:
1584 gcc_unreachable();
1585 }
1586 else
1587 gcc_unreachable();
1588 return;
1589
1590 case 'N': /* Negate the operand */
1591 if (xcode == CONST_INT)
1592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1593 else if (xcode == CONST_VECTOR)
1594 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1595 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1596 return;
1597
1598 case 'I': /* enable/disable interrupts */
1599 if (xcode == CONST_INT)
1600 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1601 return;
1602
1603 case 'b': /* branch modifiers */
1604 if (xcode == REG)
1605 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1606 else if (COMPARISON_P (x))
1607 fprintf (file, "%s", xcode == NE ? "n" : "");
1608 return;
1609
1610 case 'i': /* indirect call */
1611 if (xcode == MEM)
1612 {
1613 if (GET_CODE (XEXP (x, 0)) == REG)
1614 /* Used in indirect function calls. */
1615 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1616 else
1617 output_address (XEXP (x, 0));
1618 }
1619 return;
1620
1621 case 'p': /* load/store */
1622 if (xcode == MEM)
1623 {
1624 x = XEXP (x, 0);
1625 xcode = GET_CODE (x);
1626 }
e04cf423 1627 if (xcode == AND)
1628 {
1629 x = XEXP (x, 0);
1630 xcode = GET_CODE (x);
1631 }
644459d0 1632 if (xcode == REG)
1633 fprintf (file, "d");
1634 else if (xcode == CONST_INT)
1635 fprintf (file, "a");
1636 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1637 fprintf (file, "r");
1638 else if (xcode == PLUS || xcode == LO_SUM)
1639 {
1640 if (GET_CODE (XEXP (x, 1)) == REG)
1641 fprintf (file, "x");
1642 else
1643 fprintf (file, "d");
1644 }
1645 return;
1646
5df189be 1647 case 'e':
1648 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1649 val &= 0x7;
1650 output_addr_const (file, GEN_INT (val));
1651 return;
1652
1653 case 'f':
1654 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1655 val &= 0x1f;
1656 output_addr_const (file, GEN_INT (val));
1657 return;
1658
1659 case 'g':
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661 val &= 0x3f;
1662 output_addr_const (file, GEN_INT (val));
1663 return;
1664
1665 case 'h':
1666 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1667 val = (val >> 3) & 0x1f;
1668 output_addr_const (file, GEN_INT (val));
1669 return;
1670
1671 case 'E':
1672 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1673 val = -val;
1674 val &= 0x7;
1675 output_addr_const (file, GEN_INT (val));
1676 return;
1677
1678 case 'F':
1679 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 val = -val;
1681 val &= 0x1f;
1682 output_addr_const (file, GEN_INT (val));
1683 return;
1684
1685 case 'G':
1686 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 val = -val;
1688 val &= 0x3f;
1689 output_addr_const (file, GEN_INT (val));
1690 return;
1691
1692 case 'H':
1693 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1694 val = -(val & -8ll);
1695 val = (val >> 3) & 0x1f;
1696 output_addr_const (file, GEN_INT (val));
1697 return;
1698
56c7bfc2 1699 case 'v':
1700 case 'w':
1701 constant_to_array (mode, x, arr);
1702 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1703 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1704 return;
1705
644459d0 1706 case 0:
1707 if (xcode == REG)
1708 fprintf (file, "%s", reg_names[REGNO (x)]);
1709 else if (xcode == MEM)
1710 output_address (XEXP (x, 0));
1711 else if (xcode == CONST_VECTOR)
dea01258 1712 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1713 else
1714 output_addr_const (file, x);
1715 return;
1716
f6a0d06f 1717 /* unused letters
56c7bfc2 1718 o qr u yz
5df189be 1719 AB OPQR UVWXYZ */
644459d0 1720 default:
1721 output_operand_lossage ("invalid %%xn code");
1722 }
1723 gcc_unreachable ();
1724}
1725
644459d0 1726/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1727 caller saved register. For leaf functions it is more efficient to
1728 use a volatile register because we won't need to save and restore the
1729 pic register. This routine is only valid after register allocation
1730 is completed, so we can pick an unused register. */
1731static rtx
1732get_pic_reg (void)
1733{
1734 rtx pic_reg = pic_offset_table_rtx;
1735 if (!reload_completed && !reload_in_progress)
1736 abort ();
87a95921 1737 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1738 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1739 return pic_reg;
1740}
1741
5df189be 1742/* Split constant addresses to handle cases that are too large.
1743 Add in the pic register when in PIC mode.
1744 Split immediates that require more than 1 instruction. */
dea01258 1745int
1746spu_split_immediate (rtx * ops)
c8befdb9 1747{
dea01258 1748 enum machine_mode mode = GET_MODE (ops[0]);
1749 enum immediate_class c = classify_immediate (ops[1], mode);
1750
1751 switch (c)
c8befdb9 1752 {
dea01258 1753 case IC_IL2:
1754 {
1755 unsigned char arrhi[16];
1756 unsigned char arrlo[16];
98bbec1e 1757 rtx to, temp, hi, lo;
dea01258 1758 int i;
98bbec1e 1759 enum machine_mode imode = mode;
1760 /* We need to do reals as ints because the constant used in the
1761 IOR might not be a legitimate real constant. */
1762 imode = int_mode_for_mode (mode);
dea01258 1763 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1764 if (imode != mode)
1765 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1766 else
1767 to = ops[0];
1768 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1769 for (i = 0; i < 16; i += 4)
1770 {
1771 arrlo[i + 2] = arrhi[i + 2];
1772 arrlo[i + 3] = arrhi[i + 3];
1773 arrlo[i + 0] = arrlo[i + 1] = 0;
1774 arrhi[i + 2] = arrhi[i + 3] = 0;
1775 }
98bbec1e 1776 hi = array_to_constant (imode, arrhi);
1777 lo = array_to_constant (imode, arrlo);
1778 emit_move_insn (temp, hi);
dea01258 1779 emit_insn (gen_rtx_SET
98bbec1e 1780 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1781 return 1;
1782 }
5df189be 1783 case IC_FSMBI2:
1784 {
1785 unsigned char arr_fsmbi[16];
1786 unsigned char arr_andbi[16];
1787 rtx to, reg_fsmbi, reg_and;
1788 int i;
1789 enum machine_mode imode = mode;
1790 /* We need to do reals as ints because the constant used in the
1791 * AND might not be a legitimate real constant. */
1792 imode = int_mode_for_mode (mode);
1793 constant_to_array (mode, ops[1], arr_fsmbi);
1794 if (imode != mode)
1795 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1796 else
1797 to = ops[0];
1798 for (i = 0; i < 16; i++)
1799 if (arr_fsmbi[i] != 0)
1800 {
1801 arr_andbi[0] = arr_fsmbi[i];
1802 arr_fsmbi[i] = 0xff;
1803 }
1804 for (i = 1; i < 16; i++)
1805 arr_andbi[i] = arr_andbi[0];
1806 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1807 reg_and = array_to_constant (imode, arr_andbi);
1808 emit_move_insn (to, reg_fsmbi);
1809 emit_insn (gen_rtx_SET
1810 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1811 return 1;
1812 }
dea01258 1813 case IC_POOL:
1814 if (reload_in_progress || reload_completed)
1815 {
1816 rtx mem = force_const_mem (mode, ops[1]);
1817 if (TARGET_LARGE_MEM)
1818 {
1819 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1820 emit_move_insn (addr, XEXP (mem, 0));
1821 mem = replace_equiv_address (mem, addr);
1822 }
1823 emit_move_insn (ops[0], mem);
1824 return 1;
1825 }
1826 break;
1827 case IC_IL1s:
1828 case IC_IL2s:
1829 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1830 {
1831 if (c == IC_IL2s)
1832 {
5df189be 1833 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1834 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1835 }
1836 else if (flag_pic)
1837 emit_insn (gen_pic (ops[0], ops[1]));
1838 if (flag_pic)
1839 {
1840 rtx pic_reg = get_pic_reg ();
1841 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1842 crtl->uses_pic_offset_table = 1;
dea01258 1843 }
1844 return flag_pic || c == IC_IL2s;
1845 }
1846 break;
1847 case IC_IL1:
1848 case IC_FSMBI:
1849 case IC_CPAT:
1850 break;
c8befdb9 1851 }
dea01258 1852 return 0;
c8befdb9 1853}
1854
644459d0 1855/* SAVING is TRUE when we are generating the actual load and store
1856 instructions for REGNO. When determining the size of the stack
1857 needed for saving register we must allocate enough space for the
1858 worst case, because we don't always have the information early enough
1859 to not allocate it. But we can at least eliminate the actual loads
1860 and stores during the prologue/epilogue. */
1861static int
1862need_to_save_reg (int regno, int saving)
1863{
3072d30e 1864 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1865 return 1;
1866 if (flag_pic
1867 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1868 && (!saving || crtl->uses_pic_offset_table)
644459d0 1869 && (!saving
3072d30e 1870 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1871 return 1;
1872 return 0;
1873}
1874
1875/* This function is only correct starting with local register
1876 allocation */
1877int
1878spu_saved_regs_size (void)
1879{
1880 int reg_save_size = 0;
1881 int regno;
1882
1883 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1884 if (need_to_save_reg (regno, 0))
1885 reg_save_size += 0x10;
1886 return reg_save_size;
1887}
1888
1889static rtx
1890frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1891{
1892 rtx reg = gen_rtx_REG (V4SImode, regno);
1893 rtx mem =
1894 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1895 return emit_insn (gen_movv4si (mem, reg));
1896}
1897
1898static rtx
1899frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1900{
1901 rtx reg = gen_rtx_REG (V4SImode, regno);
1902 rtx mem =
1903 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1904 return emit_insn (gen_movv4si (reg, mem));
1905}
1906
1907/* This happens after reload, so we need to expand it. */
1908static rtx
1909frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1910{
1911 rtx insn;
1912 if (satisfies_constraint_K (GEN_INT (imm)))
1913 {
1914 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1915 }
1916 else
1917 {
3072d30e 1918 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1919 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1920 if (REGNO (src) == REGNO (scratch))
1921 abort ();
1922 }
644459d0 1923 return insn;
1924}
1925
1926/* Return nonzero if this function is known to have a null epilogue. */
1927
1928int
1929direct_return (void)
1930{
1931 if (reload_completed)
1932 {
1933 if (cfun->static_chain_decl == 0
1934 && (spu_saved_regs_size ()
1935 + get_frame_size ()
abe32cce 1936 + crtl->outgoing_args_size
1937 + crtl->args.pretend_args_size == 0)
644459d0 1938 && current_function_is_leaf)
1939 return 1;
1940 }
1941 return 0;
1942}
1943
1944/*
1945 The stack frame looks like this:
1946 +-------------+
1947 | incoming |
a8e019fa 1948 | args |
1949 AP -> +-------------+
644459d0 1950 | $lr save |
1951 +-------------+
1952 prev SP | back chain |
1953 +-------------+
1954 | var args |
abe32cce 1955 | reg save | crtl->args.pretend_args_size bytes
644459d0 1956 +-------------+
1957 | ... |
1958 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1959 FP -> +-------------+
644459d0 1960 | ... |
a8e019fa 1961 | vars | get_frame_size() bytes
1962 HFP -> +-------------+
644459d0 1963 | ... |
1964 | outgoing |
abe32cce 1965 | args | crtl->outgoing_args_size bytes
644459d0 1966 +-------------+
1967 | $lr of next |
1968 | frame |
1969 +-------------+
a8e019fa 1970 | back chain |
1971 SP -> +-------------+
644459d0 1972
1973*/
1974void
1975spu_expand_prologue (void)
1976{
1977 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1978 HOST_WIDE_INT total_size;
1979 HOST_WIDE_INT saved_regs_size;
1980 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1981 rtx scratch_reg_0, scratch_reg_1;
1982 rtx insn, real;
1983
644459d0 1984 if (flag_pic && optimize == 0)
18d50ae6 1985 crtl->uses_pic_offset_table = 1;
644459d0 1986
1987 if (spu_naked_function_p (current_function_decl))
1988 return;
1989
1990 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1991 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1992
1993 saved_regs_size = spu_saved_regs_size ();
1994 total_size = size + saved_regs_size
abe32cce 1995 + crtl->outgoing_args_size
1996 + crtl->args.pretend_args_size;
644459d0 1997
1998 if (!current_function_is_leaf
18d50ae6 1999 || cfun->calls_alloca || total_size > 0)
644459d0 2000 total_size += STACK_POINTER_OFFSET;
2001
2002 /* Save this first because code after this might use the link
2003 register as a scratch register. */
2004 if (!current_function_is_leaf)
2005 {
2006 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2007 RTX_FRAME_RELATED_P (insn) = 1;
2008 }
2009
2010 if (total_size > 0)
2011 {
abe32cce 2012 offset = -crtl->args.pretend_args_size;
644459d0 2013 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2014 if (need_to_save_reg (regno, 1))
2015 {
2016 offset -= 16;
2017 insn = frame_emit_store (regno, sp_reg, offset);
2018 RTX_FRAME_RELATED_P (insn) = 1;
2019 }
2020 }
2021
18d50ae6 2022 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2023 {
2024 rtx pic_reg = get_pic_reg ();
2025 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2026 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2027 }
2028
2029 if (total_size > 0)
2030 {
2031 if (flag_stack_check)
2032 {
d819917f 2033 /* We compare against total_size-1 because
644459d0 2034 ($sp >= total_size) <=> ($sp > total_size-1) */
2035 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2036 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2037 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2038 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2039 {
2040 emit_move_insn (scratch_v4si, size_v4si);
2041 size_v4si = scratch_v4si;
2042 }
2043 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2044 emit_insn (gen_vec_extractv4si
2045 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2046 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2047 }
2048
2049 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2050 the value of the previous $sp because we save it as the back
2051 chain. */
2052 if (total_size <= 2000)
2053 {
2054 /* In this case we save the back chain first. */
2055 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2056 insn =
2057 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2058 }
644459d0 2059 else
2060 {
2061 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2062 insn =
2063 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2064 }
2065 RTX_FRAME_RELATED_P (insn) = 1;
2066 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2068
2069 if (total_size > 2000)
2070 {
2071 /* Save the back chain ptr */
2072 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2073 }
2074
2075 if (frame_pointer_needed)
2076 {
2077 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2078 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2079 + crtl->outgoing_args_size;
644459d0 2080 /* Set the new frame_pointer */
d8dfeb55 2081 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2082 RTX_FRAME_RELATED_P (insn) = 1;
2083 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2084 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2085 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2086 }
2087 }
2088
a512540d 2089 if (flag_stack_usage)
2090 current_function_static_stack_size = total_size;
644459d0 2091}
2092
2093void
2094spu_expand_epilogue (bool sibcall_p)
2095{
2096 int size = get_frame_size (), offset, regno;
2097 HOST_WIDE_INT saved_regs_size, total_size;
2098 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2099 rtx jump, scratch_reg_0;
2100
644459d0 2101 if (spu_naked_function_p (current_function_decl))
2102 return;
2103
2104 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2105
2106 saved_regs_size = spu_saved_regs_size ();
2107 total_size = size + saved_regs_size
abe32cce 2108 + crtl->outgoing_args_size
2109 + crtl->args.pretend_args_size;
644459d0 2110
2111 if (!current_function_is_leaf
18d50ae6 2112 || cfun->calls_alloca || total_size > 0)
644459d0 2113 total_size += STACK_POINTER_OFFSET;
2114
2115 if (total_size > 0)
2116 {
18d50ae6 2117 if (cfun->calls_alloca)
644459d0 2118 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2119 else
2120 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2121
2122
2123 if (saved_regs_size > 0)
2124 {
abe32cce 2125 offset = -crtl->args.pretend_args_size;
644459d0 2126 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2127 if (need_to_save_reg (regno, 1))
2128 {
2129 offset -= 0x10;
2130 frame_emit_load (regno, sp_reg, offset);
2131 }
2132 }
2133 }
2134
2135 if (!current_function_is_leaf)
2136 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2137
2138 if (!sibcall_p)
2139 {
18b42941 2140 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2141 jump = emit_jump_insn (gen__return ());
2142 emit_barrier_after (jump);
2143 }
2144
644459d0 2145}
2146
2147rtx
2148spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2149{
2150 if (count != 0)
2151 return 0;
2152 /* This is inefficient because it ends up copying to a save-register
2153 which then gets saved even though $lr has already been saved. But
2154 it does generate better code for leaf functions and we don't need
2155 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2156 used for __builtin_return_address anyway, so maybe we don't care if
2157 it's inefficient. */
2158 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2159}
2160\f
2161
2162/* Given VAL, generate a constant appropriate for MODE.
2163 If MODE is a vector mode, every element will be VAL.
2164 For TImode, VAL will be zero extended to 128 bits. */
2165rtx
2166spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2167{
2168 rtx inner;
2169 rtvec v;
2170 int units, i;
2171
2172 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2173 || GET_MODE_CLASS (mode) == MODE_FLOAT
2174 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2175 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2176
2177 if (GET_MODE_CLASS (mode) == MODE_INT)
2178 return immed_double_const (val, 0, mode);
2179
2180 /* val is the bit representation of the float */
2181 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2182 return hwint_to_const_double (mode, val);
2183
2184 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2185 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2186 else
2187 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2188
2189 units = GET_MODE_NUNITS (mode);
2190
2191 v = rtvec_alloc (units);
2192
2193 for (i = 0; i < units; ++i)
2194 RTVEC_ELT (v, i) = inner;
2195
2196 return gen_rtx_CONST_VECTOR (mode, v);
2197}
644459d0 2198
5474166e 2199/* Create a MODE vector constant from 4 ints. */
2200rtx
2201spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2202{
2203 unsigned char arr[16];
2204 arr[0] = (a >> 24) & 0xff;
2205 arr[1] = (a >> 16) & 0xff;
2206 arr[2] = (a >> 8) & 0xff;
2207 arr[3] = (a >> 0) & 0xff;
2208 arr[4] = (b >> 24) & 0xff;
2209 arr[5] = (b >> 16) & 0xff;
2210 arr[6] = (b >> 8) & 0xff;
2211 arr[7] = (b >> 0) & 0xff;
2212 arr[8] = (c >> 24) & 0xff;
2213 arr[9] = (c >> 16) & 0xff;
2214 arr[10] = (c >> 8) & 0xff;
2215 arr[11] = (c >> 0) & 0xff;
2216 arr[12] = (d >> 24) & 0xff;
2217 arr[13] = (d >> 16) & 0xff;
2218 arr[14] = (d >> 8) & 0xff;
2219 arr[15] = (d >> 0) & 0xff;
2220 return array_to_constant(mode, arr);
2221}
5a976006 2222\f
2223/* branch hint stuff */
5474166e 2224
644459d0 2225/* An array of these is used to propagate hints to predecessor blocks. */
2226struct spu_bb_info
2227{
5a976006 2228 rtx prop_jump; /* propagated from another block */
2229 int bb_index; /* the original block. */
644459d0 2230};
5a976006 2231static struct spu_bb_info *spu_bb_info;
644459d0 2232
5a976006 2233#define STOP_HINT_P(INSN) \
2234 (GET_CODE(INSN) == CALL_INSN \
2235 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2236 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2237
2238/* 1 when RTX is a hinted branch or its target. We keep track of
2239 what has been hinted so the safe-hint code can test it easily. */
2240#define HINTED_P(RTX) \
2241 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2242
2243/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2244#define SCHED_ON_EVEN_P(RTX) \
2245 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2246
2247/* Emit a nop for INSN such that the two will dual issue. This assumes
2248 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2249 We check for TImode to handle a MULTI1 insn which has dual issued its
2250 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2251 ADDR_VEC insns. */
2252static void
2253emit_nop_for_insn (rtx insn)
644459d0 2254{
5a976006 2255 int p;
2256 rtx new_insn;
2257 p = get_pipe (insn);
2258 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2259 new_insn = emit_insn_after (gen_lnop (), insn);
2260 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2261 {
5a976006 2262 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2263 PUT_MODE (new_insn, TImode);
2264 PUT_MODE (insn, VOIDmode);
2265 }
2266 else
2267 new_insn = emit_insn_after (gen_lnop (), insn);
2268 recog_memoized (new_insn);
2fbdf9ef 2269 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2270}
2271
2272/* Insert nops in basic blocks to meet dual issue alignment
2273 requirements. Also make sure hbrp and hint instructions are at least
2274 one cycle apart, possibly inserting a nop. */
2275static void
2276pad_bb(void)
2277{
2278 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2279 int length;
2280 int addr;
2281
2282 /* This sets up INSN_ADDRESSES. */
2283 shorten_branches (get_insns ());
2284
2285 /* Keep track of length added by nops. */
2286 length = 0;
2287
2288 prev_insn = 0;
2289 insn = get_insns ();
2290 if (!active_insn_p (insn))
2291 insn = next_active_insn (insn);
2292 for (; insn; insn = next_insn)
2293 {
2294 next_insn = next_active_insn (insn);
2295 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2296 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2297 {
5a976006 2298 if (hbr_insn)
2299 {
2300 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2301 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2302 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2303 || (a1 - a0 == 4))
2304 {
2305 prev_insn = emit_insn_before (gen_lnop (), insn);
2306 PUT_MODE (prev_insn, GET_MODE (insn));
2307 PUT_MODE (insn, TImode);
2fbdf9ef 2308 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2309 length += 4;
2310 }
2311 }
2312 hbr_insn = insn;
2313 }
2314 if (INSN_CODE (insn) == CODE_FOR_blockage)
2315 {
2316 if (GET_MODE (insn) == TImode)
2317 PUT_MODE (next_insn, TImode);
2318 insn = next_insn;
2319 next_insn = next_active_insn (insn);
2320 }
2321 addr = INSN_ADDRESSES (INSN_UID (insn));
2322 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2323 {
2324 if (((addr + length) & 7) != 0)
2325 {
2326 emit_nop_for_insn (prev_insn);
2327 length += 4;
2328 }
644459d0 2329 }
5a976006 2330 else if (GET_MODE (insn) == TImode
2331 && ((next_insn && GET_MODE (next_insn) != TImode)
2332 || get_attr_type (insn) == TYPE_MULTI0)
2333 && ((addr + length) & 7) != 0)
2334 {
2335 /* prev_insn will always be set because the first insn is
2336 always 8-byte aligned. */
2337 emit_nop_for_insn (prev_insn);
2338 length += 4;
2339 }
2340 prev_insn = insn;
644459d0 2341 }
644459d0 2342}
2343
5a976006 2344\f
2345/* Routines for branch hints. */
2346
644459d0 2347static void
5a976006 2348spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2349 int distance, sbitmap blocks)
644459d0 2350{
5a976006 2351 rtx branch_label = 0;
2352 rtx hint;
2353 rtx insn;
2354 rtx table;
644459d0 2355
2356 if (before == 0 || branch == 0 || target == 0)
2357 return;
2358
5a976006 2359 /* While scheduling we require hints to be no further than 600, so
2360 we need to enforce that here too */
644459d0 2361 if (distance > 600)
2362 return;
2363
5a976006 2364 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2365 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2366 before = NEXT_INSN (before);
644459d0 2367
2368 branch_label = gen_label_rtx ();
2369 LABEL_NUSES (branch_label)++;
2370 LABEL_PRESERVE_P (branch_label) = 1;
2371 insn = emit_label_before (branch_label, branch);
2372 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2373 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2374
2375 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2376 recog_memoized (hint);
2fbdf9ef 2377 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2378 HINTED_P (branch) = 1;
644459d0 2379
5a976006 2380 if (GET_CODE (target) == LABEL_REF)
2381 HINTED_P (XEXP (target, 0)) = 1;
2382 else if (tablejump_p (branch, 0, &table))
644459d0 2383 {
5a976006 2384 rtvec vec;
2385 int j;
2386 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2387 vec = XVEC (PATTERN (table), 0);
2388 else
2389 vec = XVEC (PATTERN (table), 1);
2390 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2391 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2392 }
5a976006 2393
2394 if (distance >= 588)
644459d0 2395 {
5a976006 2396 /* Make sure the hint isn't scheduled any earlier than this point,
2397 which could make it too far for the branch offest to fit */
2fbdf9ef 2398 insn = emit_insn_before (gen_blockage (), hint);
2399 recog_memoized (insn);
2400 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2401 }
2402 else if (distance <= 8 * 4)
2403 {
2404 /* To guarantee at least 8 insns between the hint and branch we
2405 insert nops. */
2406 int d;
2407 for (d = distance; d < 8 * 4; d += 4)
2408 {
2409 insn =
2410 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2411 recog_memoized (insn);
2fbdf9ef 2412 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2413 }
2414
2415 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2416 insn = emit_insn_after (gen_blockage (), hint);
2417 recog_memoized (insn);
2418 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2419
2420 /* Make sure any nops inserted aren't scheduled after the call. */
2421 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2422 {
2423 insn = emit_insn_before (gen_blockage (), branch);
2424 recog_memoized (insn);
2425 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2426 }
644459d0 2427 }
644459d0 2428}
2429
2430/* Returns 0 if we don't want a hint for this branch. Otherwise return
2431 the rtx for the branch target. */
2432static rtx
2433get_branch_target (rtx branch)
2434{
2435 if (GET_CODE (branch) == JUMP_INSN)
2436 {
2437 rtx set, src;
2438
2439 /* Return statements */
2440 if (GET_CODE (PATTERN (branch)) == RETURN)
2441 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2442
2443 /* jump table */
2444 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2445 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2446 return 0;
2447
fcc31b99 2448 /* ASM GOTOs. */
604157f6 2449 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2450 return NULL;
2451
644459d0 2452 set = single_set (branch);
2453 src = SET_SRC (set);
2454 if (GET_CODE (SET_DEST (set)) != PC)
2455 abort ();
2456
2457 if (GET_CODE (src) == IF_THEN_ELSE)
2458 {
2459 rtx lab = 0;
2460 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2461 if (note)
2462 {
2463 /* If the more probable case is not a fall through, then
2464 try a branch hint. */
2465 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2466 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2467 && GET_CODE (XEXP (src, 1)) != PC)
2468 lab = XEXP (src, 1);
2469 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2470 && GET_CODE (XEXP (src, 2)) != PC)
2471 lab = XEXP (src, 2);
2472 }
2473 if (lab)
2474 {
2475 if (GET_CODE (lab) == RETURN)
2476 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2477 return lab;
2478 }
2479 return 0;
2480 }
2481
2482 return src;
2483 }
2484 else if (GET_CODE (branch) == CALL_INSN)
2485 {
2486 rtx call;
2487 /* All of our call patterns are in a PARALLEL and the CALL is
2488 the first pattern in the PARALLEL. */
2489 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2490 abort ();
2491 call = XVECEXP (PATTERN (branch), 0, 0);
2492 if (GET_CODE (call) == SET)
2493 call = SET_SRC (call);
2494 if (GET_CODE (call) != CALL)
2495 abort ();
2496 return XEXP (XEXP (call, 0), 0);
2497 }
2498 return 0;
2499}
2500
5a976006 2501/* The special $hbr register is used to prevent the insn scheduler from
2502 moving hbr insns across instructions which invalidate them. It
2503 should only be used in a clobber, and this function searches for
2504 insns which clobber it. */
2505static bool
2506insn_clobbers_hbr (rtx insn)
2507{
2508 if (INSN_P (insn)
2509 && GET_CODE (PATTERN (insn)) == PARALLEL)
2510 {
2511 rtx parallel = PATTERN (insn);
2512 rtx clobber;
2513 int j;
2514 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2515 {
2516 clobber = XVECEXP (parallel, 0, j);
2517 if (GET_CODE (clobber) == CLOBBER
2518 && GET_CODE (XEXP (clobber, 0)) == REG
2519 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2520 return 1;
2521 }
2522 }
2523 return 0;
2524}
2525
2526/* Search up to 32 insns starting at FIRST:
2527 - at any kind of hinted branch, just return
2528 - at any unconditional branch in the first 15 insns, just return
2529 - at a call or indirect branch, after the first 15 insns, force it to
2530 an even address and return
2531 - at any unconditional branch, after the first 15 insns, force it to
2532 an even address.
2533 At then end of the search, insert an hbrp within 4 insns of FIRST,
2534 and an hbrp within 16 instructions of FIRST.
2535 */
644459d0 2536static void
5a976006 2537insert_hbrp_for_ilb_runout (rtx first)
644459d0 2538{
5a976006 2539 rtx insn, before_4 = 0, before_16 = 0;
2540 int addr = 0, length, first_addr = -1;
2541 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2542 int insert_lnop_after = 0;
2543 for (insn = first; insn; insn = NEXT_INSN (insn))
2544 if (INSN_P (insn))
2545 {
2546 if (first_addr == -1)
2547 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2548 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2549 length = get_attr_length (insn);
2550
2551 if (before_4 == 0 && addr + length >= 4 * 4)
2552 before_4 = insn;
2553 /* We test for 14 instructions because the first hbrp will add
2554 up to 2 instructions. */
2555 if (before_16 == 0 && addr + length >= 14 * 4)
2556 before_16 = insn;
2557
2558 if (INSN_CODE (insn) == CODE_FOR_hbr)
2559 {
2560 /* Make sure an hbrp is at least 2 cycles away from a hint.
2561 Insert an lnop after the hbrp when necessary. */
2562 if (before_4 == 0 && addr > 0)
2563 {
2564 before_4 = insn;
2565 insert_lnop_after |= 1;
2566 }
2567 else if (before_4 && addr <= 4 * 4)
2568 insert_lnop_after |= 1;
2569 if (before_16 == 0 && addr > 10 * 4)
2570 {
2571 before_16 = insn;
2572 insert_lnop_after |= 2;
2573 }
2574 else if (before_16 && addr <= 14 * 4)
2575 insert_lnop_after |= 2;
2576 }
644459d0 2577
5a976006 2578 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2579 {
2580 if (addr < hbrp_addr0)
2581 hbrp_addr0 = addr;
2582 else if (addr < hbrp_addr1)
2583 hbrp_addr1 = addr;
2584 }
644459d0 2585
5a976006 2586 if (CALL_P (insn) || JUMP_P (insn))
2587 {
2588 if (HINTED_P (insn))
2589 return;
2590
2591 /* Any branch after the first 15 insns should be on an even
2592 address to avoid a special case branch. There might be
2593 some nops and/or hbrps inserted, so we test after 10
2594 insns. */
2595 if (addr > 10 * 4)
2596 SCHED_ON_EVEN_P (insn) = 1;
2597 }
644459d0 2598
5a976006 2599 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2600 return;
2601
2602
2603 if (addr + length >= 32 * 4)
644459d0 2604 {
5a976006 2605 gcc_assert (before_4 && before_16);
2606 if (hbrp_addr0 > 4 * 4)
644459d0 2607 {
5a976006 2608 insn =
2609 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2610 recog_memoized (insn);
2fbdf9ef 2611 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2612 INSN_ADDRESSES_NEW (insn,
2613 INSN_ADDRESSES (INSN_UID (before_4)));
2614 PUT_MODE (insn, GET_MODE (before_4));
2615 PUT_MODE (before_4, TImode);
2616 if (insert_lnop_after & 1)
644459d0 2617 {
5a976006 2618 insn = emit_insn_before (gen_lnop (), before_4);
2619 recog_memoized (insn);
2fbdf9ef 2620 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2621 INSN_ADDRESSES_NEW (insn,
2622 INSN_ADDRESSES (INSN_UID (before_4)));
2623 PUT_MODE (insn, TImode);
644459d0 2624 }
644459d0 2625 }
5a976006 2626 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2627 && hbrp_addr1 > 16 * 4)
644459d0 2628 {
5a976006 2629 insn =
2630 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2631 recog_memoized (insn);
2fbdf9ef 2632 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2633 INSN_ADDRESSES_NEW (insn,
2634 INSN_ADDRESSES (INSN_UID (before_16)));
2635 PUT_MODE (insn, GET_MODE (before_16));
2636 PUT_MODE (before_16, TImode);
2637 if (insert_lnop_after & 2)
644459d0 2638 {
5a976006 2639 insn = emit_insn_before (gen_lnop (), before_16);
2640 recog_memoized (insn);
2fbdf9ef 2641 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2642 INSN_ADDRESSES_NEW (insn,
2643 INSN_ADDRESSES (INSN_UID
2644 (before_16)));
2645 PUT_MODE (insn, TImode);
644459d0 2646 }
2647 }
5a976006 2648 return;
644459d0 2649 }
644459d0 2650 }
5a976006 2651 else if (BARRIER_P (insn))
2652 return;
644459d0 2653
644459d0 2654}
5a976006 2655
2656/* The SPU might hang when it executes 48 inline instructions after a
2657 hinted branch jumps to its hinted target. The beginning of a
2658 function and the return from a call might have been hinted, and must
2659 be handled as well. To prevent a hang we insert 2 hbrps. The first
2660 should be within 6 insns of the branch target. The second should be
2661 within 22 insns of the branch target. When determining if hbrps are
2662 necessary, we look for only 32 inline instructions, because up to to
2663 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2664 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2665static void
5a976006 2666insert_hbrp (void)
644459d0 2667{
5a976006 2668 rtx insn;
2669 if (TARGET_SAFE_HINTS)
644459d0 2670 {
5a976006 2671 shorten_branches (get_insns ());
2672 /* Insert hbrp at beginning of function */
2673 insn = next_active_insn (get_insns ());
2674 if (insn)
2675 insert_hbrp_for_ilb_runout (insn);
2676 /* Insert hbrp after hinted targets. */
2677 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2678 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2679 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2680 }
644459d0 2681}
2682
5a976006 2683static int in_spu_reorg;
2684
2685/* Insert branch hints. There are no branch optimizations after this
2686 pass, so it's safe to set our branch hints now. */
644459d0 2687static void
5a976006 2688spu_machine_dependent_reorg (void)
644459d0 2689{
5a976006 2690 sbitmap blocks;
2691 basic_block bb;
2692 rtx branch, insn;
2693 rtx branch_target = 0;
2694 int branch_addr = 0, insn_addr, required_dist = 0;
2695 int i;
2696 unsigned int j;
644459d0 2697
5a976006 2698 if (!TARGET_BRANCH_HINTS || optimize == 0)
2699 {
2700 /* We still do it for unoptimized code because an external
2701 function might have hinted a call or return. */
2702 insert_hbrp ();
2703 pad_bb ();
2704 return;
2705 }
644459d0 2706
5a976006 2707 blocks = sbitmap_alloc (last_basic_block);
2708 sbitmap_zero (blocks);
644459d0 2709
5a976006 2710 in_spu_reorg = 1;
2711 compute_bb_for_insn ();
2712
2713 compact_blocks ();
2714
2715 spu_bb_info =
2716 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2717 sizeof (struct spu_bb_info));
2718
2719 /* We need exact insn addresses and lengths. */
2720 shorten_branches (get_insns ());
2721
2722 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2723 {
5a976006 2724 bb = BASIC_BLOCK (i);
2725 branch = 0;
2726 if (spu_bb_info[i].prop_jump)
644459d0 2727 {
5a976006 2728 branch = spu_bb_info[i].prop_jump;
2729 branch_target = get_branch_target (branch);
2730 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2731 required_dist = spu_hint_dist;
2732 }
2733 /* Search from end of a block to beginning. In this loop, find
2734 jumps which need a branch and emit them only when:
2735 - it's an indirect branch and we're at the insn which sets
2736 the register
2737 - we're at an insn that will invalidate the hint. e.g., a
2738 call, another hint insn, inline asm that clobbers $hbr, and
2739 some inlined operations (divmodsi4). Don't consider jumps
2740 because they are only at the end of a block and are
2741 considered when we are deciding whether to propagate
2742 - we're getting too far away from the branch. The hbr insns
2743 only have a signed 10 bit offset
2744 We go back as far as possible so the branch will be considered
2745 for propagation when we get to the beginning of the block. */
2746 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2747 {
2748 if (INSN_P (insn))
2749 {
2750 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2751 if (branch
2752 && ((GET_CODE (branch_target) == REG
2753 && set_of (branch_target, insn) != NULL_RTX)
2754 || insn_clobbers_hbr (insn)
2755 || branch_addr - insn_addr > 600))
2756 {
2757 rtx next = NEXT_INSN (insn);
2758 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2759 if (insn != BB_END (bb)
2760 && branch_addr - next_addr >= required_dist)
2761 {
2762 if (dump_file)
2763 fprintf (dump_file,
2764 "hint for %i in block %i before %i\n",
2765 INSN_UID (branch), bb->index,
2766 INSN_UID (next));
2767 spu_emit_branch_hint (next, branch, branch_target,
2768 branch_addr - next_addr, blocks);
2769 }
2770 branch = 0;
2771 }
2772
2773 /* JUMP_P will only be true at the end of a block. When
2774 branch is already set it means we've previously decided
2775 to propagate a hint for that branch into this block. */
2776 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2777 {
2778 branch = 0;
2779 if ((branch_target = get_branch_target (insn)))
2780 {
2781 branch = insn;
2782 branch_addr = insn_addr;
2783 required_dist = spu_hint_dist;
2784 }
2785 }
2786 }
2787 if (insn == BB_HEAD (bb))
2788 break;
2789 }
2790
2791 if (branch)
2792 {
2793 /* If we haven't emitted a hint for this branch yet, it might
2794 be profitable to emit it in one of the predecessor blocks,
2795 especially for loops. */
2796 rtx bbend;
2797 basic_block prev = 0, prop = 0, prev2 = 0;
2798 int loop_exit = 0, simple_loop = 0;
2799 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2800
2801 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2802 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2803 prev = EDGE_PRED (bb, j)->src;
2804 else
2805 prev2 = EDGE_PRED (bb, j)->src;
2806
2807 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2808 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2809 loop_exit = 1;
2810 else if (EDGE_SUCC (bb, j)->dest == bb)
2811 simple_loop = 1;
2812
2813 /* If this branch is a loop exit then propagate to previous
2814 fallthru block. This catches the cases when it is a simple
2815 loop or when there is an initial branch into the loop. */
2816 if (prev && (loop_exit || simple_loop)
2817 && prev->loop_depth <= bb->loop_depth)
2818 prop = prev;
2819
2820 /* If there is only one adjacent predecessor. Don't propagate
2821 outside this loop. This loop_depth test isn't perfect, but
2822 I'm not sure the loop_father member is valid at this point. */
2823 else if (prev && single_pred_p (bb)
2824 && prev->loop_depth == bb->loop_depth)
2825 prop = prev;
2826
2827 /* If this is the JOIN block of a simple IF-THEN then
2828 propogate the hint to the HEADER block. */
2829 else if (prev && prev2
2830 && EDGE_COUNT (bb->preds) == 2
2831 && EDGE_COUNT (prev->preds) == 1
2832 && EDGE_PRED (prev, 0)->src == prev2
2833 && prev2->loop_depth == bb->loop_depth
2834 && GET_CODE (branch_target) != REG)
2835 prop = prev;
2836
2837 /* Don't propagate when:
2838 - this is a simple loop and the hint would be too far
2839 - this is not a simple loop and there are 16 insns in
2840 this block already
2841 - the predecessor block ends in a branch that will be
2842 hinted
2843 - the predecessor block ends in an insn that invalidates
2844 the hint */
2845 if (prop
2846 && prop->index >= 0
2847 && (bbend = BB_END (prop))
2848 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2849 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2850 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2851 {
2852 if (dump_file)
2853 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2854 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2855 bb->index, prop->index, bb->loop_depth,
2856 INSN_UID (branch), loop_exit, simple_loop,
2857 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2858
2859 spu_bb_info[prop->index].prop_jump = branch;
2860 spu_bb_info[prop->index].bb_index = i;
2861 }
2862 else if (branch_addr - next_addr >= required_dist)
2863 {
2864 if (dump_file)
2865 fprintf (dump_file, "hint for %i in block %i before %i\n",
2866 INSN_UID (branch), bb->index,
2867 INSN_UID (NEXT_INSN (insn)));
2868 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2869 branch_addr - next_addr, blocks);
2870 }
2871 branch = 0;
644459d0 2872 }
644459d0 2873 }
5a976006 2874 free (spu_bb_info);
644459d0 2875
5a976006 2876 if (!sbitmap_empty_p (blocks))
2877 find_many_sub_basic_blocks (blocks);
2878
2879 /* We have to schedule to make sure alignment is ok. */
2880 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2881
2882 /* The hints need to be scheduled, so call it again. */
2883 schedule_insns ();
2fbdf9ef 2884 df_finish_pass (true);
5a976006 2885
2886 insert_hbrp ();
2887
2888 pad_bb ();
2889
8f1d58ad 2890 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2891 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2892 {
2893 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2894 between its branch label and the branch . We don't move the
2895 label because GCC expects it at the beginning of the block. */
2896 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2897 rtx label_ref = XVECEXP (unspec, 0, 0);
2898 rtx label = XEXP (label_ref, 0);
2899 rtx branch;
2900 int offset = 0;
2901 for (branch = NEXT_INSN (label);
2902 !JUMP_P (branch) && !CALL_P (branch);
2903 branch = NEXT_INSN (branch))
2904 if (NONJUMP_INSN_P (branch))
2905 offset += get_attr_length (branch);
2906 if (offset > 0)
2907 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2908 }
5a976006 2909
2910 if (spu_flag_var_tracking)
644459d0 2911 {
5a976006 2912 df_analyze ();
2913 timevar_push (TV_VAR_TRACKING);
2914 variable_tracking_main ();
2915 timevar_pop (TV_VAR_TRACKING);
2916 df_finish_pass (false);
644459d0 2917 }
5a976006 2918
2919 free_bb_for_insn ();
2920
2921 in_spu_reorg = 0;
644459d0 2922}
2923\f
2924
2925/* Insn scheduling routines, primarily for dual issue. */
2926static int
2927spu_sched_issue_rate (void)
2928{
2929 return 2;
2930}
2931
2932static int
5a976006 2933uses_ls_unit(rtx insn)
644459d0 2934{
5a976006 2935 rtx set = single_set (insn);
2936 if (set != 0
2937 && (GET_CODE (SET_DEST (set)) == MEM
2938 || GET_CODE (SET_SRC (set)) == MEM))
2939 return 1;
2940 return 0;
644459d0 2941}
2942
2943static int
2944get_pipe (rtx insn)
2945{
2946 enum attr_type t;
2947 /* Handle inline asm */
2948 if (INSN_CODE (insn) == -1)
2949 return -1;
2950 t = get_attr_type (insn);
2951 switch (t)
2952 {
2953 case TYPE_CONVERT:
2954 return -2;
2955 case TYPE_MULTI0:
2956 return -1;
2957
2958 case TYPE_FX2:
2959 case TYPE_FX3:
2960 case TYPE_SPR:
2961 case TYPE_NOP:
2962 case TYPE_FXB:
2963 case TYPE_FPD:
2964 case TYPE_FP6:
2965 case TYPE_FP7:
644459d0 2966 return 0;
2967
2968 case TYPE_LNOP:
2969 case TYPE_SHUF:
2970 case TYPE_LOAD:
2971 case TYPE_STORE:
2972 case TYPE_BR:
2973 case TYPE_MULTI1:
2974 case TYPE_HBR:
5a976006 2975 case TYPE_IPREFETCH:
644459d0 2976 return 1;
2977 default:
2978 abort ();
2979 }
2980}
2981
5a976006 2982
2983/* haifa-sched.c has a static variable that keeps track of the current
2984 cycle. It is passed to spu_sched_reorder, and we record it here for
2985 use by spu_sched_variable_issue. It won't be accurate if the
2986 scheduler updates it's clock_var between the two calls. */
2987static int clock_var;
2988
2989/* This is used to keep track of insn alignment. Set to 0 at the
2990 beginning of each block and increased by the "length" attr of each
2991 insn scheduled. */
2992static int spu_sched_length;
2993
2994/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2995 ready list appropriately in spu_sched_reorder(). */
2996static int pipe0_clock;
2997static int pipe1_clock;
2998
2999static int prev_clock_var;
3000
3001static int prev_priority;
3002
3003/* The SPU needs to load the next ilb sometime during the execution of
3004 the previous ilb. There is a potential conflict if every cycle has a
3005 load or store. To avoid the conflict we make sure the load/store
3006 unit is free for at least one cycle during the execution of insns in
3007 the previous ilb. */
3008static int spu_ls_first;
3009static int prev_ls_clock;
3010
3011static void
3012spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3013 int max_ready ATTRIBUTE_UNUSED)
3014{
3015 spu_sched_length = 0;
3016}
3017
3018static void
3019spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3020 int max_ready ATTRIBUTE_UNUSED)
3021{
3022 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3023 {
3024 /* When any block might be at least 8-byte aligned, assume they
3025 will all be at least 8-byte aligned to make sure dual issue
3026 works out correctly. */
3027 spu_sched_length = 0;
3028 }
3029 spu_ls_first = INT_MAX;
3030 clock_var = -1;
3031 prev_ls_clock = -1;
3032 pipe0_clock = -1;
3033 pipe1_clock = -1;
3034 prev_clock_var = -1;
3035 prev_priority = -1;
3036}
3037
644459d0 3038static int
5a976006 3039spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3040 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3041{
5a976006 3042 int len;
3043 int p;
644459d0 3044 if (GET_CODE (PATTERN (insn)) == USE
3045 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3046 || (len = get_attr_length (insn)) == 0)
3047 return more;
3048
3049 spu_sched_length += len;
3050
3051 /* Reset on inline asm */
3052 if (INSN_CODE (insn) == -1)
3053 {
3054 spu_ls_first = INT_MAX;
3055 pipe0_clock = -1;
3056 pipe1_clock = -1;
3057 return 0;
3058 }
3059 p = get_pipe (insn);
3060 if (p == 0)
3061 pipe0_clock = clock_var;
3062 else
3063 pipe1_clock = clock_var;
3064
3065 if (in_spu_reorg)
3066 {
3067 if (clock_var - prev_ls_clock > 1
3068 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3069 spu_ls_first = INT_MAX;
3070 if (uses_ls_unit (insn))
3071 {
3072 if (spu_ls_first == INT_MAX)
3073 spu_ls_first = spu_sched_length;
3074 prev_ls_clock = clock_var;
3075 }
3076
3077 /* The scheduler hasn't inserted the nop, but we will later on.
3078 Include those nops in spu_sched_length. */
3079 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3080 spu_sched_length += 4;
3081 prev_clock_var = clock_var;
3082
3083 /* more is -1 when called from spu_sched_reorder for new insns
3084 that don't have INSN_PRIORITY */
3085 if (more >= 0)
3086 prev_priority = INSN_PRIORITY (insn);
3087 }
3088
3089 /* Always try issueing more insns. spu_sched_reorder will decide
3090 when the cycle should be advanced. */
3091 return 1;
3092}
3093
3094/* This function is called for both TARGET_SCHED_REORDER and
3095 TARGET_SCHED_REORDER2. */
3096static int
3097spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3098 rtx *ready, int *nreadyp, int clock)
3099{
3100 int i, nready = *nreadyp;
3101 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3102 rtx insn;
3103
3104 clock_var = clock;
3105
3106 if (nready <= 0 || pipe1_clock >= clock)
3107 return 0;
3108
3109 /* Find any rtl insns that don't generate assembly insns and schedule
3110 them first. */
3111 for (i = nready - 1; i >= 0; i--)
3112 {
3113 insn = ready[i];
3114 if (INSN_CODE (insn) == -1
3115 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3116 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3117 {
3118 ready[i] = ready[nready - 1];
3119 ready[nready - 1] = insn;
3120 return 1;
3121 }
3122 }
3123
3124 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3125 for (i = 0; i < nready; i++)
3126 if (INSN_CODE (ready[i]) != -1)
3127 {
3128 insn = ready[i];
3129 switch (get_attr_type (insn))
3130 {
3131 default:
3132 case TYPE_MULTI0:
3133 case TYPE_CONVERT:
3134 case TYPE_FX2:
3135 case TYPE_FX3:
3136 case TYPE_SPR:
3137 case TYPE_NOP:
3138 case TYPE_FXB:
3139 case TYPE_FPD:
3140 case TYPE_FP6:
3141 case TYPE_FP7:
3142 pipe_0 = i;
3143 break;
3144 case TYPE_LOAD:
3145 case TYPE_STORE:
3146 pipe_ls = i;
3147 case TYPE_LNOP:
3148 case TYPE_SHUF:
3149 case TYPE_BR:
3150 case TYPE_MULTI1:
3151 case TYPE_HBR:
3152 pipe_1 = i;
3153 break;
3154 case TYPE_IPREFETCH:
3155 pipe_hbrp = i;
3156 break;
3157 }
3158 }
3159
3160 /* In the first scheduling phase, schedule loads and stores together
3161 to increase the chance they will get merged during postreload CSE. */
3162 if (!reload_completed && pipe_ls >= 0)
3163 {
3164 insn = ready[pipe_ls];
3165 ready[pipe_ls] = ready[nready - 1];
3166 ready[nready - 1] = insn;
3167 return 1;
3168 }
3169
3170 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3171 if (pipe_hbrp >= 0)
3172 pipe_1 = pipe_hbrp;
3173
3174 /* When we have loads/stores in every cycle of the last 15 insns and
3175 we are about to schedule another load/store, emit an hbrp insn
3176 instead. */
3177 if (in_spu_reorg
3178 && spu_sched_length - spu_ls_first >= 4 * 15
3179 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3180 {
3181 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3182 recog_memoized (insn);
3183 if (pipe0_clock < clock)
3184 PUT_MODE (insn, TImode);
3185 spu_sched_variable_issue (file, verbose, insn, -1);
3186 return 0;
3187 }
3188
3189 /* In general, we want to emit nops to increase dual issue, but dual
3190 issue isn't faster when one of the insns could be scheduled later
3191 without effecting the critical path. We look at INSN_PRIORITY to
3192 make a good guess, but it isn't perfect so -mdual-nops=n can be
3193 used to effect it. */
3194 if (in_spu_reorg && spu_dual_nops < 10)
3195 {
3196 /* When we are at an even address and we are not issueing nops to
3197 improve scheduling then we need to advance the cycle. */
3198 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3199 && (spu_dual_nops == 0
3200 || (pipe_1 != -1
3201 && prev_priority >
3202 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3203 return 0;
3204
3205 /* When at an odd address, schedule the highest priority insn
3206 without considering pipeline. */
3207 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3208 && (spu_dual_nops == 0
3209 || (prev_priority >
3210 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3211 return 1;
3212 }
3213
3214
3215 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3216 pipe0 insn in the ready list, schedule it. */
3217 if (pipe0_clock < clock && pipe_0 >= 0)
3218 schedule_i = pipe_0;
3219
3220 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3221 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3222 else
3223 schedule_i = pipe_1;
3224
3225 if (schedule_i > -1)
3226 {
3227 insn = ready[schedule_i];
3228 ready[schedule_i] = ready[nready - 1];
3229 ready[nready - 1] = insn;
3230 return 1;
3231 }
3232 return 0;
644459d0 3233}
3234
3235/* INSN is dependent on DEP_INSN. */
3236static int
5a976006 3237spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3238{
5a976006 3239 rtx set;
3240
3241 /* The blockage pattern is used to prevent instructions from being
3242 moved across it and has no cost. */
3243 if (INSN_CODE (insn) == CODE_FOR_blockage
3244 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3245 return 0;
3246
9d98604b 3247 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3248 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3249 return 0;
3250
3251 /* Make sure hbrps are spread out. */
3252 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3253 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3254 return 8;
3255
3256 /* Make sure hints and hbrps are 2 cycles apart. */
3257 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3258 || INSN_CODE (insn) == CODE_FOR_hbr)
3259 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3260 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3261 return 2;
3262
3263 /* An hbrp has no real dependency on other insns. */
3264 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3265 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3266 return 0;
3267
3268 /* Assuming that it is unlikely an argument register will be used in
3269 the first cycle of the called function, we reduce the cost for
3270 slightly better scheduling of dep_insn. When not hinted, the
3271 mispredicted branch would hide the cost as well. */
3272 if (CALL_P (insn))
3273 {
3274 rtx target = get_branch_target (insn);
3275 if (GET_CODE (target) != REG || !set_of (target, insn))
3276 return cost - 2;
3277 return cost;
3278 }
3279
3280 /* And when returning from a function, let's assume the return values
3281 are completed sooner too. */
3282 if (CALL_P (dep_insn))
644459d0 3283 return cost - 2;
5a976006 3284
3285 /* Make sure an instruction that loads from the back chain is schedule
3286 away from the return instruction so a hint is more likely to get
3287 issued. */
3288 if (INSN_CODE (insn) == CODE_FOR__return
3289 && (set = single_set (dep_insn))
3290 && GET_CODE (SET_DEST (set)) == REG
3291 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3292 return 20;
3293
644459d0 3294 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3295 scheduler makes every insn in a block anti-dependent on the final
3296 jump_insn. We adjust here so higher cost insns will get scheduled
3297 earlier. */
5a976006 3298 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3299 return insn_cost (dep_insn) - 3;
5a976006 3300
644459d0 3301 return cost;
3302}
3303\f
3304/* Create a CONST_DOUBLE from a string. */
3305struct rtx_def *
3306spu_float_const (const char *string, enum machine_mode mode)
3307{
3308 REAL_VALUE_TYPE value;
3309 value = REAL_VALUE_ATOF (string, mode);
3310 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3311}
3312
644459d0 3313int
3314spu_constant_address_p (rtx x)
3315{
3316 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3317 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3318 || GET_CODE (x) == HIGH);
3319}
3320
3321static enum spu_immediate
3322which_immediate_load (HOST_WIDE_INT val)
3323{
3324 gcc_assert (val == trunc_int_for_mode (val, SImode));
3325
3326 if (val >= -0x8000 && val <= 0x7fff)
3327 return SPU_IL;
3328 if (val >= 0 && val <= 0x3ffff)
3329 return SPU_ILA;
3330 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3331 return SPU_ILH;
3332 if ((val & 0xffff) == 0)
3333 return SPU_ILHU;
3334
3335 return SPU_NONE;
3336}
3337
dea01258 3338/* Return true when OP can be loaded by one of the il instructions, or
3339 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3340int
3341immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3342{
3343 if (CONSTANT_P (op))
3344 {
3345 enum immediate_class c = classify_immediate (op, mode);
5df189be 3346 return c == IC_IL1 || c == IC_IL1s
3072d30e 3347 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3348 }
3349 return 0;
3350}
3351
3352/* Return true if the first SIZE bytes of arr is a constant that can be
3353 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3354 represent the size and offset of the instruction to use. */
3355static int
3356cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3357{
3358 int cpat, run, i, start;
3359 cpat = 1;
3360 run = 0;
3361 start = -1;
3362 for (i = 0; i < size && cpat; i++)
3363 if (arr[i] != i+16)
3364 {
3365 if (!run)
3366 {
3367 start = i;
3368 if (arr[i] == 3)
3369 run = 1;
3370 else if (arr[i] == 2 && arr[i+1] == 3)
3371 run = 2;
3372 else if (arr[i] == 0)
3373 {
3374 while (arr[i+run] == run && i+run < 16)
3375 run++;
3376 if (run != 4 && run != 8)
3377 cpat = 0;
3378 }
3379 else
3380 cpat = 0;
3381 if ((i & (run-1)) != 0)
3382 cpat = 0;
3383 i += run;
3384 }
3385 else
3386 cpat = 0;
3387 }
b01a6dc3 3388 if (cpat && (run || size < 16))
dea01258 3389 {
3390 if (run == 0)
3391 run = 1;
3392 if (prun)
3393 *prun = run;
3394 if (pstart)
3395 *pstart = start == -1 ? 16-run : start;
3396 return 1;
3397 }
3398 return 0;
3399}
3400
3401/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3402 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3403static enum immediate_class
3404classify_immediate (rtx op, enum machine_mode mode)
644459d0 3405{
3406 HOST_WIDE_INT val;
3407 unsigned char arr[16];
5df189be 3408 int i, j, repeated, fsmbi, repeat;
dea01258 3409
3410 gcc_assert (CONSTANT_P (op));
3411
644459d0 3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3414
dea01258 3415 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3416 if (!flag_pic
3417 && mode == V4SImode
dea01258 3418 && GET_CODE (op) == CONST_VECTOR
3419 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3420 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3421 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3422 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3423 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3424 op = CONST_VECTOR_ELT (op, 0);
644459d0 3425
dea01258 3426 switch (GET_CODE (op))
3427 {
3428 case SYMBOL_REF:
3429 case LABEL_REF:
3430 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3431
dea01258 3432 case CONST:
0cfc65d4 3433 /* We can never know if the resulting address fits in 18 bits and can be
3434 loaded with ila. For now, assume the address will not overflow if
3435 the displacement is "small" (fits 'K' constraint). */
3436 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3437 {
3438 rtx sym = XEXP (XEXP (op, 0), 0);
3439 rtx cst = XEXP (XEXP (op, 0), 1);
3440
3441 if (GET_CODE (sym) == SYMBOL_REF
3442 && GET_CODE (cst) == CONST_INT
3443 && satisfies_constraint_K (cst))
3444 return IC_IL1s;
3445 }
3446 return IC_IL2s;
644459d0 3447
dea01258 3448 case HIGH:
3449 return IC_IL1s;
3450
3451 case CONST_VECTOR:
3452 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3453 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3454 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3455 return IC_POOL;
3456 /* Fall through. */
3457
3458 case CONST_INT:
3459 case CONST_DOUBLE:
3460 constant_to_array (mode, op, arr);
644459d0 3461
dea01258 3462 /* Check that each 4-byte slot is identical. */
3463 repeated = 1;
3464 for (i = 4; i < 16; i += 4)
3465 for (j = 0; j < 4; j++)
3466 if (arr[j] != arr[i + j])
3467 repeated = 0;
3468
3469 if (repeated)
3470 {
3471 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3472 val = trunc_int_for_mode (val, SImode);
3473
3474 if (which_immediate_load (val) != SPU_NONE)
3475 return IC_IL1;
3476 }
3477
3478 /* Any mode of 2 bytes or smaller can be loaded with an il
3479 instruction. */
3480 gcc_assert (GET_MODE_SIZE (mode) > 2);
3481
3482 fsmbi = 1;
5df189be 3483 repeat = 0;
dea01258 3484 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3485 if (arr[i] != 0 && repeat == 0)
3486 repeat = arr[i];
3487 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3488 fsmbi = 0;
3489 if (fsmbi)
5df189be 3490 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3491
3492 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3493 return IC_CPAT;
3494
3495 if (repeated)
3496 return IC_IL2;
3497
3498 return IC_POOL;
3499 default:
3500 break;
3501 }
3502 gcc_unreachable ();
644459d0 3503}
3504
3505static enum spu_immediate
3506which_logical_immediate (HOST_WIDE_INT val)
3507{
3508 gcc_assert (val == trunc_int_for_mode (val, SImode));
3509
3510 if (val >= -0x200 && val <= 0x1ff)
3511 return SPU_ORI;
3512 if (val >= 0 && val <= 0xffff)
3513 return SPU_IOHL;
3514 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3515 {
3516 val = trunc_int_for_mode (val, HImode);
3517 if (val >= -0x200 && val <= 0x1ff)
3518 return SPU_ORHI;
3519 if ((val & 0xff) == ((val >> 8) & 0xff))
3520 {
3521 val = trunc_int_for_mode (val, QImode);
3522 if (val >= -0x200 && val <= 0x1ff)
3523 return SPU_ORBI;
3524 }
3525 }
3526 return SPU_NONE;
3527}
3528
5df189be 3529/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3530 CONST_DOUBLEs. */
3531static int
3532const_vector_immediate_p (rtx x)
3533{
3534 int i;
3535 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3536 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3537 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3538 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3539 return 0;
3540 return 1;
3541}
3542
644459d0 3543int
3544logical_immediate_p (rtx op, enum machine_mode mode)
3545{
3546 HOST_WIDE_INT val;
3547 unsigned char arr[16];
3548 int i, j;
3549
3550 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3551 || GET_CODE (op) == CONST_VECTOR);
3552
5df189be 3553 if (GET_CODE (op) == CONST_VECTOR
3554 && !const_vector_immediate_p (op))
3555 return 0;
3556
644459d0 3557 if (GET_MODE (op) != VOIDmode)
3558 mode = GET_MODE (op);
3559
3560 constant_to_array (mode, op, arr);
3561
3562 /* Check that bytes are repeated. */
3563 for (i = 4; i < 16; i += 4)
3564 for (j = 0; j < 4; j++)
3565 if (arr[j] != arr[i + j])
3566 return 0;
3567
3568 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3569 val = trunc_int_for_mode (val, SImode);
3570
3571 i = which_logical_immediate (val);
3572 return i != SPU_NONE && i != SPU_IOHL;
3573}
3574
3575int
3576iohl_immediate_p (rtx op, enum machine_mode mode)
3577{
3578 HOST_WIDE_INT val;
3579 unsigned char arr[16];
3580 int i, j;
3581
3582 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3583 || GET_CODE (op) == CONST_VECTOR);
3584
5df189be 3585 if (GET_CODE (op) == CONST_VECTOR
3586 && !const_vector_immediate_p (op))
3587 return 0;
3588
644459d0 3589 if (GET_MODE (op) != VOIDmode)
3590 mode = GET_MODE (op);
3591
3592 constant_to_array (mode, op, arr);
3593
3594 /* Check that bytes are repeated. */
3595 for (i = 4; i < 16; i += 4)
3596 for (j = 0; j < 4; j++)
3597 if (arr[j] != arr[i + j])
3598 return 0;
3599
3600 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3601 val = trunc_int_for_mode (val, SImode);
3602
3603 return val >= 0 && val <= 0xffff;
3604}
3605
3606int
3607arith_immediate_p (rtx op, enum machine_mode mode,
3608 HOST_WIDE_INT low, HOST_WIDE_INT high)
3609{
3610 HOST_WIDE_INT val;
3611 unsigned char arr[16];
3612 int bytes, i, j;
3613
3614 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3615 || GET_CODE (op) == CONST_VECTOR);
3616
5df189be 3617 if (GET_CODE (op) == CONST_VECTOR
3618 && !const_vector_immediate_p (op))
3619 return 0;
3620
644459d0 3621 if (GET_MODE (op) != VOIDmode)
3622 mode = GET_MODE (op);
3623
3624 constant_to_array (mode, op, arr);
3625
3626 if (VECTOR_MODE_P (mode))
3627 mode = GET_MODE_INNER (mode);
3628
3629 bytes = GET_MODE_SIZE (mode);
3630 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3631
3632 /* Check that bytes are repeated. */
3633 for (i = bytes; i < 16; i += bytes)
3634 for (j = 0; j < bytes; j++)
3635 if (arr[j] != arr[i + j])
3636 return 0;
3637
3638 val = arr[0];
3639 for (j = 1; j < bytes; j++)
3640 val = (val << 8) | arr[j];
3641
3642 val = trunc_int_for_mode (val, mode);
3643
3644 return val >= low && val <= high;
3645}
3646
56c7bfc2 3647/* TRUE when op is an immediate and an exact power of 2, and given that
3648 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3649 all entries must be the same. */
3650bool
3651exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3652{
3653 enum machine_mode int_mode;
3654 HOST_WIDE_INT val;
3655 unsigned char arr[16];
3656 int bytes, i, j;
3657
3658 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3659 || GET_CODE (op) == CONST_VECTOR);
3660
3661 if (GET_CODE (op) == CONST_VECTOR
3662 && !const_vector_immediate_p (op))
3663 return 0;
3664
3665 if (GET_MODE (op) != VOIDmode)
3666 mode = GET_MODE (op);
3667
3668 constant_to_array (mode, op, arr);
3669
3670 if (VECTOR_MODE_P (mode))
3671 mode = GET_MODE_INNER (mode);
3672
3673 bytes = GET_MODE_SIZE (mode);
3674 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3675
3676 /* Check that bytes are repeated. */
3677 for (i = bytes; i < 16; i += bytes)
3678 for (j = 0; j < bytes; j++)
3679 if (arr[j] != arr[i + j])
3680 return 0;
3681
3682 val = arr[0];
3683 for (j = 1; j < bytes; j++)
3684 val = (val << 8) | arr[j];
3685
3686 val = trunc_int_for_mode (val, int_mode);
3687
3688 /* Currently, we only handle SFmode */
3689 gcc_assert (mode == SFmode);
3690 if (mode == SFmode)
3691 {
3692 int exp = (val >> 23) - 127;
3693 return val > 0 && (val & 0x007fffff) == 0
3694 && exp >= low && exp <= high;
3695 }
3696 return FALSE;
3697}
3698
6cf5579e 3699/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3700
3701static int
3702ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3703{
3704 rtx x = *px;
3705 tree decl;
3706
3707 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3708 {
3709 rtx plus = XEXP (x, 0);
3710 rtx op0 = XEXP (plus, 0);
3711 rtx op1 = XEXP (plus, 1);
3712 if (GET_CODE (op1) == CONST_INT)
3713 x = op0;
3714 }
3715
3716 return (GET_CODE (x) == SYMBOL_REF
3717 && (decl = SYMBOL_REF_DECL (x)) != 0
3718 && TREE_CODE (decl) == VAR_DECL
3719 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3720}
3721
644459d0 3722/* We accept:
5b865faf 3723 - any 32-bit constant (SImode, SFmode)
644459d0 3724 - any constant that can be generated with fsmbi (any mode)
5b865faf 3725 - a 64-bit constant where the high and low bits are identical
644459d0 3726 (DImode, DFmode)
5b865faf 3727 - a 128-bit constant where the four 32-bit words match. */
644459d0 3728int
3729spu_legitimate_constant_p (rtx x)
3730{
5df189be 3731 if (GET_CODE (x) == HIGH)
3732 x = XEXP (x, 0);
6cf5579e 3733
3734 /* Reject any __ea qualified reference. These can't appear in
3735 instructions but must be forced to the constant pool. */
3736 if (for_each_rtx (&x, ea_symbol_ref, 0))
3737 return 0;
3738
644459d0 3739 /* V4SI with all identical symbols is valid. */
5df189be 3740 if (!flag_pic
3741 && GET_MODE (x) == V4SImode
644459d0 3742 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3743 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3744 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3745 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3746 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3747 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3748
5df189be 3749 if (GET_CODE (x) == CONST_VECTOR
3750 && !const_vector_immediate_p (x))
3751 return 0;
644459d0 3752 return 1;
3753}
3754
3755/* Valid address are:
3756 - symbol_ref, label_ref, const
3757 - reg
9d98604b 3758 - reg + const_int, where const_int is 16 byte aligned
644459d0 3759 - reg + reg, alignment doesn't matter
3760 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3761 ignore the 4 least significant bits of the const. We only care about
3762 16 byte modes because the expand phase will change all smaller MEM
3763 references to TImode. */
3764static bool
3765spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3766 rtx x, bool reg_ok_strict)
644459d0 3767{
9d98604b 3768 int aligned = GET_MODE_SIZE (mode) >= 16;
3769 if (aligned
3770 && GET_CODE (x) == AND
644459d0 3771 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3772 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3773 x = XEXP (x, 0);
3774 switch (GET_CODE (x))
3775 {
644459d0 3776 case LABEL_REF:
6cf5579e 3777 return !TARGET_LARGE_MEM;
3778
9d98604b 3779 case SYMBOL_REF:
644459d0 3780 case CONST:
6cf5579e 3781 /* Keep __ea references until reload so that spu_expand_mov can see them
3782 in MEMs. */
3783 if (ea_symbol_ref (&x, 0))
3784 return !reload_in_progress && !reload_completed;
9d98604b 3785 return !TARGET_LARGE_MEM;
644459d0 3786
3787 case CONST_INT:
3788 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3789
3790 case SUBREG:
3791 x = XEXP (x, 0);
9d98604b 3792 if (REG_P (x))
3793 return 0;
644459d0 3794
3795 case REG:
3796 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3797
3798 case PLUS:
3799 case LO_SUM:
3800 {
3801 rtx op0 = XEXP (x, 0);
3802 rtx op1 = XEXP (x, 1);
3803 if (GET_CODE (op0) == SUBREG)
3804 op0 = XEXP (op0, 0);
3805 if (GET_CODE (op1) == SUBREG)
3806 op1 = XEXP (op1, 0);
644459d0 3807 if (GET_CODE (op0) == REG
3808 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3809 && GET_CODE (op1) == CONST_INT
3810 && INTVAL (op1) >= -0x2000
3811 && INTVAL (op1) <= 0x1fff
9d98604b 3812 && (!aligned || (INTVAL (op1) & 15) == 0))
3813 return TRUE;
644459d0 3814 if (GET_CODE (op0) == REG
3815 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3816 && GET_CODE (op1) == REG
3817 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3818 return TRUE;
644459d0 3819 }
3820 break;
3821
3822 default:
3823 break;
3824 }
9d98604b 3825 return FALSE;
644459d0 3826}
3827
6cf5579e 3828/* Like spu_legitimate_address_p, except with named addresses. */
3829static bool
3830spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3831 bool reg_ok_strict, addr_space_t as)
3832{
3833 if (as == ADDR_SPACE_EA)
3834 return (REG_P (x) && (GET_MODE (x) == EAmode));
3835
3836 else if (as != ADDR_SPACE_GENERIC)
3837 gcc_unreachable ();
3838
3839 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3840}
3841
644459d0 3842/* When the address is reg + const_int, force the const_int into a
fa7637bd 3843 register. */
644459d0 3844rtx
3845spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3846 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3847{
3848 rtx op0, op1;
3849 /* Make sure both operands are registers. */
3850 if (GET_CODE (x) == PLUS)
3851 {
3852 op0 = XEXP (x, 0);
3853 op1 = XEXP (x, 1);
3854 if (ALIGNED_SYMBOL_REF_P (op0))
3855 {
3856 op0 = force_reg (Pmode, op0);
3857 mark_reg_pointer (op0, 128);
3858 }
3859 else if (GET_CODE (op0) != REG)
3860 op0 = force_reg (Pmode, op0);
3861 if (ALIGNED_SYMBOL_REF_P (op1))
3862 {
3863 op1 = force_reg (Pmode, op1);
3864 mark_reg_pointer (op1, 128);
3865 }
3866 else if (GET_CODE (op1) != REG)
3867 op1 = force_reg (Pmode, op1);
3868 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3869 }
41e3a0c7 3870 return x;
644459d0 3871}
3872
6cf5579e 3873/* Like spu_legitimate_address, except with named address support. */
3874static rtx
3875spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3876 addr_space_t as)
3877{
3878 if (as != ADDR_SPACE_GENERIC)
3879 return x;
3880
3881 return spu_legitimize_address (x, oldx, mode);
3882}
3883
644459d0 3884/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3885 struct attribute_spec.handler. */
3886static tree
3887spu_handle_fndecl_attribute (tree * node,
3888 tree name,
3889 tree args ATTRIBUTE_UNUSED,
3890 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3891{
3892 if (TREE_CODE (*node) != FUNCTION_DECL)
3893 {
67a779df 3894 warning (0, "%qE attribute only applies to functions",
3895 name);
644459d0 3896 *no_add_attrs = true;
3897 }
3898
3899 return NULL_TREE;
3900}
3901
3902/* Handle the "vector" attribute. */
3903static tree
3904spu_handle_vector_attribute (tree * node, tree name,
3905 tree args ATTRIBUTE_UNUSED,
3906 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3907{
3908 tree type = *node, result = NULL_TREE;
3909 enum machine_mode mode;
3910 int unsigned_p;
3911
3912 while (POINTER_TYPE_P (type)
3913 || TREE_CODE (type) == FUNCTION_TYPE
3914 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3915 type = TREE_TYPE (type);
3916
3917 mode = TYPE_MODE (type);
3918
3919 unsigned_p = TYPE_UNSIGNED (type);
3920 switch (mode)
3921 {
3922 case DImode:
3923 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3924 break;
3925 case SImode:
3926 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3927 break;
3928 case HImode:
3929 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3930 break;
3931 case QImode:
3932 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3933 break;
3934 case SFmode:
3935 result = V4SF_type_node;
3936 break;
3937 case DFmode:
3938 result = V2DF_type_node;
3939 break;
3940 default:
3941 break;
3942 }
3943
3944 /* Propagate qualifiers attached to the element type
3945 onto the vector type. */
3946 if (result && result != type && TYPE_QUALS (type))
3947 result = build_qualified_type (result, TYPE_QUALS (type));
3948
3949 *no_add_attrs = true; /* No need to hang on to the attribute. */
3950
3951 if (!result)
67a779df 3952 warning (0, "%qE attribute ignored", name);
644459d0 3953 else
d991e6e8 3954 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3955
3956 return NULL_TREE;
3957}
3958
f2b32076 3959/* Return nonzero if FUNC is a naked function. */
644459d0 3960static int
3961spu_naked_function_p (tree func)
3962{
3963 tree a;
3964
3965 if (TREE_CODE (func) != FUNCTION_DECL)
3966 abort ();
3967
3968 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3969 return a != NULL_TREE;
3970}
3971
3972int
3973spu_initial_elimination_offset (int from, int to)
3974{
3975 int saved_regs_size = spu_saved_regs_size ();
3976 int sp_offset = 0;
abe32cce 3977 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3978 || get_frame_size () || saved_regs_size)
3979 sp_offset = STACK_POINTER_OFFSET;
3980 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3981 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3982 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3983 return get_frame_size ();
644459d0 3984 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3985 return sp_offset + crtl->outgoing_args_size
644459d0 3986 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3987 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3988 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3989 else
3990 gcc_unreachable ();
644459d0 3991}
3992
3993rtx
fb80456a 3994spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3995{
3996 enum machine_mode mode = TYPE_MODE (type);
3997 int byte_size = ((mode == BLKmode)
3998 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3999
4000 /* Make sure small structs are left justified in a register. */
4001 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4002 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4003 {
4004 enum machine_mode smode;
4005 rtvec v;
4006 int i;
4007 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4008 int n = byte_size / UNITS_PER_WORD;
4009 v = rtvec_alloc (nregs);
4010 for (i = 0; i < n; i++)
4011 {
4012 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4013 gen_rtx_REG (TImode,
4014 FIRST_RETURN_REGNUM
4015 + i),
4016 GEN_INT (UNITS_PER_WORD * i));
4017 byte_size -= UNITS_PER_WORD;
4018 }
4019
4020 if (n < nregs)
4021 {
4022 if (byte_size < 4)
4023 byte_size = 4;
4024 smode =
4025 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4026 RTVEC_ELT (v, n) =
4027 gen_rtx_EXPR_LIST (VOIDmode,
4028 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4029 GEN_INT (UNITS_PER_WORD * n));
4030 }
4031 return gen_rtx_PARALLEL (mode, v);
4032 }
4033 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4034}
4035
ee9034d4 4036static rtx
4037spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4038 enum machine_mode mode,
ee9034d4 4039 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4040{
4041 int byte_size;
4042
a08c5dd0 4043 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4044 return 0;
4045
4046 byte_size = ((mode == BLKmode)
4047 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4048
4049 /* The ABI does not allow parameters to be passed partially in
4050 reg and partially in stack. */
a08c5dd0 4051 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4052 return 0;
4053
4054 /* Make sure small structs are left justified in a register. */
4055 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4056 && byte_size < UNITS_PER_WORD && byte_size > 0)
4057 {
4058 enum machine_mode smode;
4059 rtx gr_reg;
4060 if (byte_size < 4)
4061 byte_size = 4;
4062 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4063 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4064 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4065 const0_rtx);
4066 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4067 }
4068 else
a08c5dd0 4069 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4070}
4071
ee9034d4 4072static void
4073spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4074 const_tree type, bool named ATTRIBUTE_UNUSED)
4075{
4076 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4077 ? 1
4078 : mode == BLKmode
4079 ? ((int_size_in_bytes (type) + 15) / 16)
4080 : mode == VOIDmode
4081 ? 1
4082 : HARD_REGNO_NREGS (cum, mode));
4083}
4084
644459d0 4085/* Variable sized types are passed by reference. */
4086static bool
4087spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4088 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4089 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4090{
4091 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4092}
4093\f
4094
4095/* Var args. */
4096
4097/* Create and return the va_list datatype.
4098
4099 On SPU, va_list is an array type equivalent to
4100
4101 typedef struct __va_list_tag
4102 {
4103 void *__args __attribute__((__aligned(16)));
4104 void *__skip __attribute__((__aligned(16)));
4105
4106 } va_list[1];
4107
fa7637bd 4108 where __args points to the arg that will be returned by the next
644459d0 4109 va_arg(), and __skip points to the previous stack frame such that
4110 when __args == __skip we should advance __args by 32 bytes. */
4111static tree
4112spu_build_builtin_va_list (void)
4113{
4114 tree f_args, f_skip, record, type_decl;
4115 bool owp;
4116
4117 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4118
4119 type_decl =
54e46243 4120 build_decl (BUILTINS_LOCATION,
4121 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4122
54e46243 4123 f_args = build_decl (BUILTINS_LOCATION,
4124 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4125 f_skip = build_decl (BUILTINS_LOCATION,
4126 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4127
4128 DECL_FIELD_CONTEXT (f_args) = record;
4129 DECL_ALIGN (f_args) = 128;
4130 DECL_USER_ALIGN (f_args) = 1;
4131
4132 DECL_FIELD_CONTEXT (f_skip) = record;
4133 DECL_ALIGN (f_skip) = 128;
4134 DECL_USER_ALIGN (f_skip) = 1;
4135
bc907808 4136 TYPE_STUB_DECL (record) = type_decl;
644459d0 4137 TYPE_NAME (record) = type_decl;
4138 TYPE_FIELDS (record) = f_args;
1767a056 4139 DECL_CHAIN (f_args) = f_skip;
644459d0 4140
4141 /* We know this is being padded and we want it too. It is an internal
4142 type so hide the warnings from the user. */
4143 owp = warn_padded;
4144 warn_padded = false;
4145
4146 layout_type (record);
4147
4148 warn_padded = owp;
4149
4150 /* The correct type is an array type of one element. */
4151 return build_array_type (record, build_index_type (size_zero_node));
4152}
4153
4154/* Implement va_start by filling the va_list structure VALIST.
4155 NEXTARG points to the first anonymous stack argument.
4156
4157 The following global variables are used to initialize
4158 the va_list structure:
4159
abe32cce 4160 crtl->args.info;
644459d0 4161 the CUMULATIVE_ARGS for this function
4162
abe32cce 4163 crtl->args.arg_offset_rtx:
644459d0 4164 holds the offset of the first anonymous stack argument
4165 (relative to the virtual arg pointer). */
4166
8a58ed0a 4167static void
644459d0 4168spu_va_start (tree valist, rtx nextarg)
4169{
4170 tree f_args, f_skip;
4171 tree args, skip, t;
4172
4173 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4174 f_skip = DECL_CHAIN (f_args);
644459d0 4175
170efcd4 4176 valist = build_simple_mem_ref (valist);
644459d0 4177 args =
4178 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4179 skip =
4180 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4181
4182 /* Find the __args area. */
4183 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4184 if (crtl->args.pretend_args_size > 0)
0de36bdb 4185 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4186 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4187 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4188 TREE_SIDE_EFFECTS (t) = 1;
4189 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4190
4191 /* Find the __skip area. */
4192 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4193 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4194 size_int (crtl->args.pretend_args_size
0de36bdb 4195 - STACK_POINTER_OFFSET));
75a70cf9 4196 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4197 TREE_SIDE_EFFECTS (t) = 1;
4198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4199}
4200
4201/* Gimplify va_arg by updating the va_list structure
4202 VALIST as required to retrieve an argument of type
4203 TYPE, and returning that argument.
4204
4205 ret = va_arg(VALIST, TYPE);
4206
4207 generates code equivalent to:
4208
4209 paddedsize = (sizeof(TYPE) + 15) & -16;
4210 if (VALIST.__args + paddedsize > VALIST.__skip
4211 && VALIST.__args <= VALIST.__skip)
4212 addr = VALIST.__skip + 32;
4213 else
4214 addr = VALIST.__args;
4215 VALIST.__args = addr + paddedsize;
4216 ret = *(TYPE *)addr;
4217 */
4218static tree
75a70cf9 4219spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4220 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4221{
4222 tree f_args, f_skip;
4223 tree args, skip;
4224 HOST_WIDE_INT size, rsize;
4225 tree paddedsize, addr, tmp;
4226 bool pass_by_reference_p;
4227
4228 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4229 f_skip = DECL_CHAIN (f_args);
644459d0 4230
182cf5a9 4231 valist = build_simple_mem_ref (valist);
644459d0 4232 args =
4233 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4234 skip =
4235 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4236
4237 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4238
4239 /* if an object is dynamically sized, a pointer to it is passed
4240 instead of the object itself. */
4241 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4242 false);
4243 if (pass_by_reference_p)
4244 type = build_pointer_type (type);
4245 size = int_size_in_bytes (type);
4246 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4247
4248 /* build conditional expression to calculate addr. The expression
4249 will be gimplified later. */
0de36bdb 4250 paddedsize = size_int (rsize);
75a70cf9 4251 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4252 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4253 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4254 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4255 unshare_expr (skip)));
644459d0 4256
4257 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4258 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4259 size_int (32)), unshare_expr (args));
644459d0 4260
75a70cf9 4261 gimplify_assign (addr, tmp, pre_p);
644459d0 4262
4263 /* update VALIST.__args */
0de36bdb 4264 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4265 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4266
8115f0af 4267 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4268 addr);
644459d0 4269
4270 if (pass_by_reference_p)
4271 addr = build_va_arg_indirect_ref (addr);
4272
4273 return build_va_arg_indirect_ref (addr);
4274}
4275
4276/* Save parameter registers starting with the register that corresponds
4277 to the first unnamed parameters. If the first unnamed parameter is
4278 in the stack then save no registers. Set pretend_args_size to the
4279 amount of space needed to save the registers. */
4280void
4281spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4282 tree type, int *pretend_size, int no_rtl)
4283{
4284 if (!no_rtl)
4285 {
4286 rtx tmp;
4287 int regno;
4288 int offset;
4289 int ncum = *cum;
4290
4291 /* cum currently points to the last named argument, we want to
4292 start at the next argument. */
ee9034d4 4293 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4294
4295 offset = -STACK_POINTER_OFFSET;
4296 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4297 {
4298 tmp = gen_frame_mem (V4SImode,
4299 plus_constant (virtual_incoming_args_rtx,
4300 offset));
4301 emit_move_insn (tmp,
4302 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4303 offset += 16;
4304 }
4305 *pretend_size = offset + STACK_POINTER_OFFSET;
4306 }
4307}
4308\f
b2d7ede1 4309static void
644459d0 4310spu_conditional_register_usage (void)
4311{
4312 if (flag_pic)
4313 {
4314 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4315 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4316 }
644459d0 4317}
4318
9d98604b 4319/* This is called any time we inspect the alignment of a register for
4320 addresses. */
644459d0 4321static int
9d98604b 4322reg_aligned_for_addr (rtx x)
644459d0 4323{
9d98604b 4324 int regno =
4325 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4326 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4327}
4328
69ced2d6 4329/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4330 into its SYMBOL_REF_FLAGS. */
4331static void
4332spu_encode_section_info (tree decl, rtx rtl, int first)
4333{
4334 default_encode_section_info (decl, rtl, first);
4335
4336 /* If a variable has a forced alignment to < 16 bytes, mark it with
4337 SYMBOL_FLAG_ALIGN1. */
4338 if (TREE_CODE (decl) == VAR_DECL
4339 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4340 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4341}
4342
644459d0 4343/* Return TRUE if we are certain the mem refers to a complete object
4344 which is both 16-byte aligned and padded to a 16-byte boundary. This
4345 would make it safe to store with a single instruction.
4346 We guarantee the alignment and padding for static objects by aligning
4347 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4348 FIXME: We currently cannot guarantee this for objects on the stack
4349 because assign_parm_setup_stack calls assign_stack_local with the
4350 alignment of the parameter mode and in that case the alignment never
4351 gets adjusted by LOCAL_ALIGNMENT. */
4352static int
4353store_with_one_insn_p (rtx mem)
4354{
9d98604b 4355 enum machine_mode mode = GET_MODE (mem);
644459d0 4356 rtx addr = XEXP (mem, 0);
9d98604b 4357 if (mode == BLKmode)
644459d0 4358 return 0;
9d98604b 4359 if (GET_MODE_SIZE (mode) >= 16)
4360 return 1;
644459d0 4361 /* Only static objects. */
4362 if (GET_CODE (addr) == SYMBOL_REF)
4363 {
4364 /* We use the associated declaration to make sure the access is
fa7637bd 4365 referring to the whole object.
644459d0 4366 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4367 if it is necessary. Will there be cases where one exists, and
4368 the other does not? Will there be cases where both exist, but
4369 have different types? */
4370 tree decl = MEM_EXPR (mem);
4371 if (decl
4372 && TREE_CODE (decl) == VAR_DECL
4373 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4374 return 1;
4375 decl = SYMBOL_REF_DECL (addr);
4376 if (decl
4377 && TREE_CODE (decl) == VAR_DECL
4378 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4379 return 1;
4380 }
4381 return 0;
4382}
4383
9d98604b 4384/* Return 1 when the address is not valid for a simple load and store as
4385 required by the '_mov*' patterns. We could make this less strict
4386 for loads, but we prefer mem's to look the same so they are more
4387 likely to be merged. */
4388static int
4389address_needs_split (rtx mem)
4390{
4391 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4392 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4393 || !(store_with_one_insn_p (mem)
4394 || mem_is_padded_component_ref (mem))))
4395 return 1;
4396
4397 return 0;
4398}
4399
6cf5579e 4400static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4401static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4402static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4403
4404/* MEM is known to be an __ea qualified memory access. Emit a call to
4405 fetch the ppu memory to local store, and return its address in local
4406 store. */
4407
4408static void
4409ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4410{
4411 if (is_store)
4412 {
4413 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4414 if (!cache_fetch_dirty)
4415 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4416 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4417 2, ea_addr, EAmode, ndirty, SImode);
4418 }
4419 else
4420 {
4421 if (!cache_fetch)
4422 cache_fetch = init_one_libfunc ("__cache_fetch");
4423 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4424 1, ea_addr, EAmode);
4425 }
4426}
4427
4428/* Like ea_load_store, but do the cache tag comparison and, for stores,
4429 dirty bit marking, inline.
4430
4431 The cache control data structure is an array of
4432
4433 struct __cache_tag_array
4434 {
4435 unsigned int tag_lo[4];
4436 unsigned int tag_hi[4];
4437 void *data_pointer[4];
4438 int reserved[4];
4439 vector unsigned short dirty_bits[4];
4440 } */
4441
4442static void
4443ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4444{
4445 rtx ea_addr_si;
4446 HOST_WIDE_INT v;
4447 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4448 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4449 rtx index_mask = gen_reg_rtx (SImode);
4450 rtx tag_arr = gen_reg_rtx (Pmode);
4451 rtx splat_mask = gen_reg_rtx (TImode);
4452 rtx splat = gen_reg_rtx (V4SImode);
4453 rtx splat_hi = NULL_RTX;
4454 rtx tag_index = gen_reg_rtx (Pmode);
4455 rtx block_off = gen_reg_rtx (SImode);
4456 rtx tag_addr = gen_reg_rtx (Pmode);
4457 rtx tag = gen_reg_rtx (V4SImode);
4458 rtx cache_tag = gen_reg_rtx (V4SImode);
4459 rtx cache_tag_hi = NULL_RTX;
4460 rtx cache_ptrs = gen_reg_rtx (TImode);
4461 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4462 rtx tag_equal = gen_reg_rtx (V4SImode);
4463 rtx tag_equal_hi = NULL_RTX;
4464 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4465 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4466 rtx eq_index = gen_reg_rtx (SImode);
4467 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4468
4469 if (spu_ea_model != 32)
4470 {
4471 splat_hi = gen_reg_rtx (V4SImode);
4472 cache_tag_hi = gen_reg_rtx (V4SImode);
4473 tag_equal_hi = gen_reg_rtx (V4SImode);
4474 }
4475
4476 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4477 emit_move_insn (tag_arr, tag_arr_sym);
4478 v = 0x0001020300010203LL;
4479 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4480 ea_addr_si = ea_addr;
4481 if (spu_ea_model != 32)
4482 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4483
4484 /* tag_index = ea_addr & (tag_array_size - 128) */
4485 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4486
4487 /* splat ea_addr to all 4 slots. */
4488 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4489 /* Similarly for high 32 bits of ea_addr. */
4490 if (spu_ea_model != 32)
4491 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4492
4493 /* block_off = ea_addr & 127 */
4494 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4495
4496 /* tag_addr = tag_arr + tag_index */
4497 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4498
4499 /* Read cache tags. */
4500 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4501 if (spu_ea_model != 32)
4502 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4503 plus_constant (tag_addr, 16)));
4504
4505 /* tag = ea_addr & -128 */
4506 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4507
4508 /* Read all four cache data pointers. */
4509 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4510 plus_constant (tag_addr, 32)));
4511
4512 /* Compare tags. */
4513 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4514 if (spu_ea_model != 32)
4515 {
4516 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4517 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4518 }
4519
4520 /* At most one of the tags compare equal, so tag_equal has one
4521 32-bit slot set to all 1's, with the other slots all zero.
4522 gbb picks off low bit from each byte in the 128-bit registers,
4523 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4524 we have a hit. */
4525 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4526 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4527
4528 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4529 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4530
4531 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4532 (rotating eq_index mod 16 bytes). */
4533 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4534 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4535
4536 /* Add block offset to form final data address. */
4537 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4538
4539 /* Check that we did hit. */
4540 hit_label = gen_label_rtx ();
4541 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4542 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4543 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4544 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4545 hit_ref, pc_rtx)));
4546 /* Say that this branch is very likely to happen. */
4547 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4548 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4549
4550 ea_load_store (mem, is_store, ea_addr, data_addr);
4551 cont_label = gen_label_rtx ();
4552 emit_jump_insn (gen_jump (cont_label));
4553 emit_barrier ();
4554
4555 emit_label (hit_label);
4556
4557 if (is_store)
4558 {
4559 HOST_WIDE_INT v_hi;
4560 rtx dirty_bits = gen_reg_rtx (TImode);
4561 rtx dirty_off = gen_reg_rtx (SImode);
4562 rtx dirty_128 = gen_reg_rtx (TImode);
4563 rtx neg_block_off = gen_reg_rtx (SImode);
4564
4565 /* Set up mask with one dirty bit per byte of the mem we are
4566 writing, starting from top bit. */
4567 v_hi = v = -1;
4568 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4569 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4570 {
4571 v_hi = v;
4572 v = 0;
4573 }
4574 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4575
4576 /* Form index into cache dirty_bits. eq_index is one of
4577 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4578 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4579 offset to each of the four dirty_bits elements. */
4580 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4581
4582 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4583
4584 /* Rotate bit mask to proper bit. */
4585 emit_insn (gen_negsi2 (neg_block_off, block_off));
4586 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4587 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4588
4589 /* Or in the new dirty bits. */
4590 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4591
4592 /* Store. */
4593 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4594 }
4595
4596 emit_label (cont_label);
4597}
4598
4599static rtx
4600expand_ea_mem (rtx mem, bool is_store)
4601{
4602 rtx ea_addr;
4603 rtx data_addr = gen_reg_rtx (Pmode);
4604 rtx new_mem;
4605
4606 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4607 if (optimize_size || optimize == 0)
4608 ea_load_store (mem, is_store, ea_addr, data_addr);
4609 else
4610 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4611
4612 if (ea_alias_set == -1)
4613 ea_alias_set = new_alias_set ();
4614
4615 /* We generate a new MEM RTX to refer to the copy of the data
4616 in the cache. We do not copy memory attributes (except the
4617 alignment) from the original MEM, as they may no longer apply
4618 to the cache copy. */
4619 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4620 set_mem_alias_set (new_mem, ea_alias_set);
4621 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4622
4623 return new_mem;
4624}
4625
644459d0 4626int
4627spu_expand_mov (rtx * ops, enum machine_mode mode)
4628{
4629 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4630 {
4631 /* Perform the move in the destination SUBREG's inner mode. */
4632 ops[0] = SUBREG_REG (ops[0]);
4633 mode = GET_MODE (ops[0]);
4634 ops[1] = gen_lowpart_common (mode, ops[1]);
4635 gcc_assert (ops[1]);
4636 }
644459d0 4637
4638 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4639 {
4640 rtx from = SUBREG_REG (ops[1]);
8d72495d 4641 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4642
4643 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4644 && GET_MODE_CLASS (imode) == MODE_INT
4645 && subreg_lowpart_p (ops[1]));
4646
4647 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4648 imode = SImode;
4649 if (imode != GET_MODE (from))
4650 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4651
4652 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4653 {
d6bf3b14 4654 enum insn_code icode = convert_optab_handler (trunc_optab,
4655 mode, imode);
644459d0 4656 emit_insn (GEN_FCN (icode) (ops[0], from));
4657 }
4658 else
4659 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4660 return 1;
4661 }
4662
4663 /* At least one of the operands needs to be a register. */
4664 if ((reload_in_progress | reload_completed) == 0
4665 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4666 {
4667 rtx temp = force_reg (mode, ops[1]);
4668 emit_move_insn (ops[0], temp);
4669 return 1;
4670 }
4671 if (reload_in_progress || reload_completed)
4672 {
dea01258 4673 if (CONSTANT_P (ops[1]))
4674 return spu_split_immediate (ops);
644459d0 4675 return 0;
4676 }
9d98604b 4677
4678 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4679 extend them. */
4680 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4681 {
9d98604b 4682 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4683 if (val != INTVAL (ops[1]))
644459d0 4684 {
9d98604b 4685 emit_move_insn (ops[0], GEN_INT (val));
4686 return 1;
644459d0 4687 }
4688 }
9d98604b 4689 if (MEM_P (ops[0]))
6cf5579e 4690 {
4691 if (MEM_ADDR_SPACE (ops[0]))
4692 ops[0] = expand_ea_mem (ops[0], true);
4693 return spu_split_store (ops);
4694 }
9d98604b 4695 if (MEM_P (ops[1]))
6cf5579e 4696 {
4697 if (MEM_ADDR_SPACE (ops[1]))
4698 ops[1] = expand_ea_mem (ops[1], false);
4699 return spu_split_load (ops);
4700 }
9d98604b 4701
644459d0 4702 return 0;
4703}
4704
9d98604b 4705static void
4706spu_convert_move (rtx dst, rtx src)
644459d0 4707{
9d98604b 4708 enum machine_mode mode = GET_MODE (dst);
4709 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4710 rtx reg;
4711 gcc_assert (GET_MODE (src) == TImode);
4712 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4713 emit_insn (gen_rtx_SET (VOIDmode, reg,
4714 gen_rtx_TRUNCATE (int_mode,
4715 gen_rtx_LSHIFTRT (TImode, src,
4716 GEN_INT (int_mode == DImode ? 64 : 96)))));
4717 if (int_mode != mode)
4718 {
4719 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4720 emit_move_insn (dst, reg);
4721 }
4722}
644459d0 4723
9d98604b 4724/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4725 the address from SRC and SRC+16. Return a REG or CONST_INT that
4726 specifies how many bytes to rotate the loaded registers, plus any
4727 extra from EXTRA_ROTQBY. The address and rotate amounts are
4728 normalized to improve merging of loads and rotate computations. */
4729static rtx
4730spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4731{
4732 rtx addr = XEXP (src, 0);
4733 rtx p0, p1, rot, addr0, addr1;
4734 int rot_amt;
644459d0 4735
4736 rot = 0;
4737 rot_amt = 0;
9d98604b 4738
4739 if (MEM_ALIGN (src) >= 128)
4740 /* Address is already aligned; simply perform a TImode load. */ ;
4741 else if (GET_CODE (addr) == PLUS)
644459d0 4742 {
4743 /* 8 cases:
4744 aligned reg + aligned reg => lqx
4745 aligned reg + unaligned reg => lqx, rotqby
4746 aligned reg + aligned const => lqd
4747 aligned reg + unaligned const => lqd, rotqbyi
4748 unaligned reg + aligned reg => lqx, rotqby
4749 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4750 unaligned reg + aligned const => lqd, rotqby
4751 unaligned reg + unaligned const -> not allowed by legitimate address
4752 */
4753 p0 = XEXP (addr, 0);
4754 p1 = XEXP (addr, 1);
9d98604b 4755 if (!reg_aligned_for_addr (p0))
644459d0 4756 {
9d98604b 4757 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4758 {
9d98604b 4759 rot = gen_reg_rtx (SImode);
4760 emit_insn (gen_addsi3 (rot, p0, p1));
4761 }
4762 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4763 {
4764 if (INTVAL (p1) > 0
4765 && REG_POINTER (p0)
4766 && INTVAL (p1) * BITS_PER_UNIT
4767 < REGNO_POINTER_ALIGN (REGNO (p0)))
4768 {
4769 rot = gen_reg_rtx (SImode);
4770 emit_insn (gen_addsi3 (rot, p0, p1));
4771 addr = p0;
4772 }
4773 else
4774 {
4775 rtx x = gen_reg_rtx (SImode);
4776 emit_move_insn (x, p1);
4777 if (!spu_arith_operand (p1, SImode))
4778 p1 = x;
4779 rot = gen_reg_rtx (SImode);
4780 emit_insn (gen_addsi3 (rot, p0, p1));
4781 addr = gen_rtx_PLUS (Pmode, p0, x);
4782 }
644459d0 4783 }
4784 else
4785 rot = p0;
4786 }
4787 else
4788 {
4789 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4790 {
4791 rot_amt = INTVAL (p1) & 15;
9d98604b 4792 if (INTVAL (p1) & -16)
4793 {
4794 p1 = GEN_INT (INTVAL (p1) & -16);
4795 addr = gen_rtx_PLUS (SImode, p0, p1);
4796 }
4797 else
4798 addr = p0;
644459d0 4799 }
9d98604b 4800 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4801 rot = p1;
4802 }
4803 }
9d98604b 4804 else if (REG_P (addr))
644459d0 4805 {
9d98604b 4806 if (!reg_aligned_for_addr (addr))
644459d0 4807 rot = addr;
4808 }
4809 else if (GET_CODE (addr) == CONST)
4810 {
4811 if (GET_CODE (XEXP (addr, 0)) == PLUS
4812 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4813 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4814 {
4815 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4816 if (rot_amt & -16)
4817 addr = gen_rtx_CONST (Pmode,
4818 gen_rtx_PLUS (Pmode,
4819 XEXP (XEXP (addr, 0), 0),
4820 GEN_INT (rot_amt & -16)));
4821 else
4822 addr = XEXP (XEXP (addr, 0), 0);
4823 }
4824 else
9d98604b 4825 {
4826 rot = gen_reg_rtx (Pmode);
4827 emit_move_insn (rot, addr);
4828 }
644459d0 4829 }
4830 else if (GET_CODE (addr) == CONST_INT)
4831 {
4832 rot_amt = INTVAL (addr);
4833 addr = GEN_INT (rot_amt & -16);
4834 }
4835 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4836 {
4837 rot = gen_reg_rtx (Pmode);
4838 emit_move_insn (rot, addr);
4839 }
644459d0 4840
9d98604b 4841 rot_amt += extra_rotby;
644459d0 4842
4843 rot_amt &= 15;
4844
4845 if (rot && rot_amt)
4846 {
9d98604b 4847 rtx x = gen_reg_rtx (SImode);
4848 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4849 rot = x;
644459d0 4850 rot_amt = 0;
4851 }
9d98604b 4852 if (!rot && rot_amt)
4853 rot = GEN_INT (rot_amt);
4854
4855 addr0 = copy_rtx (addr);
4856 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4857 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4858
4859 if (dst1)
4860 {
4861 addr1 = plus_constant (copy_rtx (addr), 16);
4862 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4863 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4864 }
644459d0 4865
9d98604b 4866 return rot;
4867}
4868
4869int
4870spu_split_load (rtx * ops)
4871{
4872 enum machine_mode mode = GET_MODE (ops[0]);
4873 rtx addr, load, rot;
4874 int rot_amt;
644459d0 4875
9d98604b 4876 if (GET_MODE_SIZE (mode) >= 16)
4877 return 0;
644459d0 4878
9d98604b 4879 addr = XEXP (ops[1], 0);
4880 gcc_assert (GET_CODE (addr) != AND);
4881
4882 if (!address_needs_split (ops[1]))
4883 {
4884 ops[1] = change_address (ops[1], TImode, addr);
4885 load = gen_reg_rtx (TImode);
4886 emit_insn (gen__movti (load, ops[1]));
4887 spu_convert_move (ops[0], load);
4888 return 1;
4889 }
4890
4891 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4892
4893 load = gen_reg_rtx (TImode);
4894 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4895
4896 if (rot)
4897 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4898
9d98604b 4899 spu_convert_move (ops[0], load);
4900 return 1;
644459d0 4901}
4902
9d98604b 4903int
644459d0 4904spu_split_store (rtx * ops)
4905{
4906 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4907 rtx reg;
644459d0 4908 rtx addr, p0, p1, p1_lo, smem;
4909 int aform;
4910 int scalar;
4911
9d98604b 4912 if (GET_MODE_SIZE (mode) >= 16)
4913 return 0;
4914
644459d0 4915 addr = XEXP (ops[0], 0);
9d98604b 4916 gcc_assert (GET_CODE (addr) != AND);
4917
4918 if (!address_needs_split (ops[0]))
4919 {
4920 reg = gen_reg_rtx (TImode);
4921 emit_insn (gen_spu_convert (reg, ops[1]));
4922 ops[0] = change_address (ops[0], TImode, addr);
4923 emit_move_insn (ops[0], reg);
4924 return 1;
4925 }
644459d0 4926
4927 if (GET_CODE (addr) == PLUS)
4928 {
4929 /* 8 cases:
4930 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4931 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4932 aligned reg + aligned const => lqd, c?d, shuf, stqx
4933 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4934 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4935 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4936 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4937 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4938 */
4939 aform = 0;
4940 p0 = XEXP (addr, 0);
4941 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4942 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4943 {
4944 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4945 if (reg_aligned_for_addr (p0))
4946 {
4947 p1 = GEN_INT (INTVAL (p1) & -16);
4948 if (p1 == const0_rtx)
4949 addr = p0;
4950 else
4951 addr = gen_rtx_PLUS (SImode, p0, p1);
4952 }
4953 else
4954 {
4955 rtx x = gen_reg_rtx (SImode);
4956 emit_move_insn (x, p1);
4957 addr = gen_rtx_PLUS (SImode, p0, x);
4958 }
644459d0 4959 }
4960 }
9d98604b 4961 else if (REG_P (addr))
644459d0 4962 {
4963 aform = 0;
4964 p0 = addr;
4965 p1 = p1_lo = const0_rtx;
4966 }
4967 else
4968 {
4969 aform = 1;
4970 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4971 p1 = 0; /* aform doesn't use p1 */
4972 p1_lo = addr;
4973 if (ALIGNED_SYMBOL_REF_P (addr))
4974 p1_lo = const0_rtx;
9d98604b 4975 else if (GET_CODE (addr) == CONST
4976 && GET_CODE (XEXP (addr, 0)) == PLUS
4977 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4978 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4979 {
9d98604b 4980 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4981 if ((v & -16) != 0)
4982 addr = gen_rtx_CONST (Pmode,
4983 gen_rtx_PLUS (Pmode,
4984 XEXP (XEXP (addr, 0), 0),
4985 GEN_INT (v & -16)));
4986 else
4987 addr = XEXP (XEXP (addr, 0), 0);
4988 p1_lo = GEN_INT (v & 15);
644459d0 4989 }
4990 else if (GET_CODE (addr) == CONST_INT)
4991 {
4992 p1_lo = GEN_INT (INTVAL (addr) & 15);
4993 addr = GEN_INT (INTVAL (addr) & -16);
4994 }
9d98604b 4995 else
4996 {
4997 p1_lo = gen_reg_rtx (SImode);
4998 emit_move_insn (p1_lo, addr);
4999 }
644459d0 5000 }
5001
4cbad5bb 5002 gcc_assert (aform == 0 || aform == 1);
9d98604b 5003 reg = gen_reg_rtx (TImode);
e04cf423 5004
644459d0 5005 scalar = store_with_one_insn_p (ops[0]);
5006 if (!scalar)
5007 {
5008 /* We could copy the flags from the ops[0] MEM to mem here,
5009 We don't because we want this load to be optimized away if
5010 possible, and copying the flags will prevent that in certain
5011 cases, e.g. consider the volatile flag. */
5012
9d98604b 5013 rtx pat = gen_reg_rtx (TImode);
e04cf423 5014 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5015 set_mem_alias_set (lmem, 0);
5016 emit_insn (gen_movti (reg, lmem));
644459d0 5017
9d98604b 5018 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5019 p0 = stack_pointer_rtx;
5020 if (!p1_lo)
5021 p1_lo = const0_rtx;
5022
5023 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5024 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5025 }
644459d0 5026 else
5027 {
5028 if (GET_CODE (ops[1]) == REG)
5029 emit_insn (gen_spu_convert (reg, ops[1]));
5030 else if (GET_CODE (ops[1]) == SUBREG)
5031 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5032 else
5033 abort ();
5034 }
5035
5036 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5037 emit_insn (gen_ashlti3
5038 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5039
9d98604b 5040 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5041 /* We can't use the previous alias set because the memory has changed
5042 size and can potentially overlap objects of other types. */
5043 set_mem_alias_set (smem, 0);
5044
e04cf423 5045 emit_insn (gen_movti (smem, reg));
9d98604b 5046 return 1;
644459d0 5047}
5048
5049/* Return TRUE if X is MEM which is a struct member reference
5050 and the member can safely be loaded and stored with a single
5051 instruction because it is padded. */
5052static int
5053mem_is_padded_component_ref (rtx x)
5054{
5055 tree t = MEM_EXPR (x);
5056 tree r;
5057 if (!t || TREE_CODE (t) != COMPONENT_REF)
5058 return 0;
5059 t = TREE_OPERAND (t, 1);
5060 if (!t || TREE_CODE (t) != FIELD_DECL
5061 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5062 return 0;
5063 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5064 r = DECL_FIELD_CONTEXT (t);
5065 if (!r || TREE_CODE (r) != RECORD_TYPE)
5066 return 0;
5067 /* Make sure they are the same mode */
5068 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5069 return 0;
5070 /* If there are no following fields then the field alignment assures
fa7637bd 5071 the structure is padded to the alignment which means this field is
5072 padded too. */
644459d0 5073 if (TREE_CHAIN (t) == 0)
5074 return 1;
5075 /* If the following field is also aligned then this field will be
5076 padded. */
5077 t = TREE_CHAIN (t);
5078 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5079 return 1;
5080 return 0;
5081}
5082
c7b91b14 5083/* Parse the -mfixed-range= option string. */
5084static void
5085fix_range (const char *const_str)
5086{
5087 int i, first, last;
5088 char *str, *dash, *comma;
5089
5090 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5091 REG2 are either register names or register numbers. The effect
5092 of this option is to mark the registers in the range from REG1 to
5093 REG2 as ``fixed'' so they won't be used by the compiler. */
5094
5095 i = strlen (const_str);
5096 str = (char *) alloca (i + 1);
5097 memcpy (str, const_str, i + 1);
5098
5099 while (1)
5100 {
5101 dash = strchr (str, '-');
5102 if (!dash)
5103 {
5104 warning (0, "value of -mfixed-range must have form REG1-REG2");
5105 return;
5106 }
5107 *dash = '\0';
5108 comma = strchr (dash + 1, ',');
5109 if (comma)
5110 *comma = '\0';
5111
5112 first = decode_reg_name (str);
5113 if (first < 0)
5114 {
5115 warning (0, "unknown register name: %s", str);
5116 return;
5117 }
5118
5119 last = decode_reg_name (dash + 1);
5120 if (last < 0)
5121 {
5122 warning (0, "unknown register name: %s", dash + 1);
5123 return;
5124 }
5125
5126 *dash = '-';
5127
5128 if (first > last)
5129 {
5130 warning (0, "%s-%s is an empty range", str, dash + 1);
5131 return;
5132 }
5133
5134 for (i = first; i <= last; ++i)
5135 fixed_regs[i] = call_used_regs[i] = 1;
5136
5137 if (!comma)
5138 break;
5139
5140 *comma = ',';
5141 str = comma + 1;
5142 }
5143}
5144
644459d0 5145/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5146 can be generated using the fsmbi instruction. */
5147int
5148fsmbi_const_p (rtx x)
5149{
dea01258 5150 if (CONSTANT_P (x))
5151 {
5df189be 5152 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5153 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5154 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5155 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5156 }
5157 return 0;
5158}
5159
5160/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5161 can be generated using the cbd, chd, cwd or cdd instruction. */
5162int
5163cpat_const_p (rtx x, enum machine_mode mode)
5164{
5165 if (CONSTANT_P (x))
5166 {
5167 enum immediate_class c = classify_immediate (x, mode);
5168 return c == IC_CPAT;
5169 }
5170 return 0;
5171}
644459d0 5172
dea01258 5173rtx
5174gen_cpat_const (rtx * ops)
5175{
5176 unsigned char dst[16];
5177 int i, offset, shift, isize;
5178 if (GET_CODE (ops[3]) != CONST_INT
5179 || GET_CODE (ops[2]) != CONST_INT
5180 || (GET_CODE (ops[1]) != CONST_INT
5181 && GET_CODE (ops[1]) != REG))
5182 return 0;
5183 if (GET_CODE (ops[1]) == REG
5184 && (!REG_POINTER (ops[1])
5185 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5186 return 0;
644459d0 5187
5188 for (i = 0; i < 16; i++)
dea01258 5189 dst[i] = i + 16;
5190 isize = INTVAL (ops[3]);
5191 if (isize == 1)
5192 shift = 3;
5193 else if (isize == 2)
5194 shift = 2;
5195 else
5196 shift = 0;
5197 offset = (INTVAL (ops[2]) +
5198 (GET_CODE (ops[1]) ==
5199 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5200 for (i = 0; i < isize; i++)
5201 dst[offset + i] = i + shift;
5202 return array_to_constant (TImode, dst);
644459d0 5203}
5204
5205/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5206 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5207 than 16 bytes, the value is repeated across the rest of the array. */
5208void
5209constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5210{
5211 HOST_WIDE_INT val;
5212 int i, j, first;
5213
5214 memset (arr, 0, 16);
5215 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5216 if (GET_CODE (x) == CONST_INT
5217 || (GET_CODE (x) == CONST_DOUBLE
5218 && (mode == SFmode || mode == DFmode)))
5219 {
5220 gcc_assert (mode != VOIDmode && mode != BLKmode);
5221
5222 if (GET_CODE (x) == CONST_DOUBLE)
5223 val = const_double_to_hwint (x);
5224 else
5225 val = INTVAL (x);
5226 first = GET_MODE_SIZE (mode) - 1;
5227 for (i = first; i >= 0; i--)
5228 {
5229 arr[i] = val & 0xff;
5230 val >>= 8;
5231 }
5232 /* Splat the constant across the whole array. */
5233 for (j = 0, i = first + 1; i < 16; i++)
5234 {
5235 arr[i] = arr[j];
5236 j = (j == first) ? 0 : j + 1;
5237 }
5238 }
5239 else if (GET_CODE (x) == CONST_DOUBLE)
5240 {
5241 val = CONST_DOUBLE_LOW (x);
5242 for (i = 15; i >= 8; i--)
5243 {
5244 arr[i] = val & 0xff;
5245 val >>= 8;
5246 }
5247 val = CONST_DOUBLE_HIGH (x);
5248 for (i = 7; i >= 0; i--)
5249 {
5250 arr[i] = val & 0xff;
5251 val >>= 8;
5252 }
5253 }
5254 else if (GET_CODE (x) == CONST_VECTOR)
5255 {
5256 int units;
5257 rtx elt;
5258 mode = GET_MODE_INNER (mode);
5259 units = CONST_VECTOR_NUNITS (x);
5260 for (i = 0; i < units; i++)
5261 {
5262 elt = CONST_VECTOR_ELT (x, i);
5263 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5264 {
5265 if (GET_CODE (elt) == CONST_DOUBLE)
5266 val = const_double_to_hwint (elt);
5267 else
5268 val = INTVAL (elt);
5269 first = GET_MODE_SIZE (mode) - 1;
5270 if (first + i * GET_MODE_SIZE (mode) > 16)
5271 abort ();
5272 for (j = first; j >= 0; j--)
5273 {
5274 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5275 val >>= 8;
5276 }
5277 }
5278 }
5279 }
5280 else
5281 gcc_unreachable();
5282}
5283
5284/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5285 smaller than 16 bytes, use the bytes that would represent that value
5286 in a register, e.g., for QImode return the value of arr[3]. */
5287rtx
e96f2783 5288array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5289{
5290 enum machine_mode inner_mode;
5291 rtvec v;
5292 int units, size, i, j, k;
5293 HOST_WIDE_INT val;
5294
5295 if (GET_MODE_CLASS (mode) == MODE_INT
5296 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5297 {
5298 j = GET_MODE_SIZE (mode);
5299 i = j < 4 ? 4 - j : 0;
5300 for (val = 0; i < j; i++)
5301 val = (val << 8) | arr[i];
5302 val = trunc_int_for_mode (val, mode);
5303 return GEN_INT (val);
5304 }
5305
5306 if (mode == TImode)
5307 {
5308 HOST_WIDE_INT high;
5309 for (i = high = 0; i < 8; i++)
5310 high = (high << 8) | arr[i];
5311 for (i = 8, val = 0; i < 16; i++)
5312 val = (val << 8) | arr[i];
5313 return immed_double_const (val, high, TImode);
5314 }
5315 if (mode == SFmode)
5316 {
5317 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5318 val = trunc_int_for_mode (val, SImode);
171b6d22 5319 return hwint_to_const_double (SFmode, val);
644459d0 5320 }
5321 if (mode == DFmode)
5322 {
1f915911 5323 for (i = 0, val = 0; i < 8; i++)
5324 val = (val << 8) | arr[i];
171b6d22 5325 return hwint_to_const_double (DFmode, val);
644459d0 5326 }
5327
5328 if (!VECTOR_MODE_P (mode))
5329 abort ();
5330
5331 units = GET_MODE_NUNITS (mode);
5332 size = GET_MODE_UNIT_SIZE (mode);
5333 inner_mode = GET_MODE_INNER (mode);
5334 v = rtvec_alloc (units);
5335
5336 for (k = i = 0; i < units; ++i)
5337 {
5338 val = 0;
5339 for (j = 0; j < size; j++, k++)
5340 val = (val << 8) | arr[k];
5341
5342 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5343 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5344 else
5345 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5346 }
5347 if (k > 16)
5348 abort ();
5349
5350 return gen_rtx_CONST_VECTOR (mode, v);
5351}
5352
5353static void
5354reloc_diagnostic (rtx x)
5355{
712d2297 5356 tree decl = 0;
644459d0 5357 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5358 return;
5359
5360 if (GET_CODE (x) == SYMBOL_REF)
5361 decl = SYMBOL_REF_DECL (x);
5362 else if (GET_CODE (x) == CONST
5363 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5364 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5365
5366 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5367 if (decl && !DECL_P (decl))
5368 decl = 0;
5369
644459d0 5370 /* The decl could be a string constant. */
5371 if (decl && DECL_P (decl))
712d2297 5372 {
5373 location_t loc;
5374 /* We use last_assemble_variable_decl to get line information. It's
5375 not always going to be right and might not even be close, but will
5376 be right for the more common cases. */
5377 if (!last_assemble_variable_decl || in_section == ctors_section)
5378 loc = DECL_SOURCE_LOCATION (decl);
5379 else
5380 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5381
712d2297 5382 if (TARGET_WARN_RELOC)
5383 warning_at (loc, 0,
5384 "creating run-time relocation for %qD", decl);
5385 else
5386 error_at (loc,
5387 "creating run-time relocation for %qD", decl);
5388 }
5389 else
5390 {
5391 if (TARGET_WARN_RELOC)
5392 warning_at (input_location, 0, "creating run-time relocation");
5393 else
5394 error_at (input_location, "creating run-time relocation");
5395 }
644459d0 5396}
5397
5398/* Hook into assemble_integer so we can generate an error for run-time
5399 relocations. The SPU ABI disallows them. */
5400static bool
5401spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5402{
5403 /* By default run-time relocations aren't supported, but we allow them
5404 in case users support it in their own run-time loader. And we provide
5405 a warning for those users that don't. */
5406 if ((GET_CODE (x) == SYMBOL_REF)
5407 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5408 reloc_diagnostic (x);
5409
5410 return default_assemble_integer (x, size, aligned_p);
5411}
5412
5413static void
5414spu_asm_globalize_label (FILE * file, const char *name)
5415{
5416 fputs ("\t.global\t", file);
5417 assemble_name (file, name);
5418 fputs ("\n", file);
5419}
5420
5421static bool
f529eb25 5422spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5423 bool speed ATTRIBUTE_UNUSED)
644459d0 5424{
5425 enum machine_mode mode = GET_MODE (x);
5426 int cost = COSTS_N_INSNS (2);
5427
5428 /* Folding to a CONST_VECTOR will use extra space but there might
5429 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5430 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5431 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5432 because this cost will only be compared against a single insn.
5433 if (code == CONST_VECTOR)
5434 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5435 */
5436
5437 /* Use defaults for float operations. Not accurate but good enough. */
5438 if (mode == DFmode)
5439 {
5440 *total = COSTS_N_INSNS (13);
5441 return true;
5442 }
5443 if (mode == SFmode)
5444 {
5445 *total = COSTS_N_INSNS (6);
5446 return true;
5447 }
5448 switch (code)
5449 {
5450 case CONST_INT:
5451 if (satisfies_constraint_K (x))
5452 *total = 0;
5453 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5454 *total = COSTS_N_INSNS (1);
5455 else
5456 *total = COSTS_N_INSNS (3);
5457 return true;
5458
5459 case CONST:
5460 *total = COSTS_N_INSNS (3);
5461 return true;
5462
5463 case LABEL_REF:
5464 case SYMBOL_REF:
5465 *total = COSTS_N_INSNS (0);
5466 return true;
5467
5468 case CONST_DOUBLE:
5469 *total = COSTS_N_INSNS (5);
5470 return true;
5471
5472 case FLOAT_EXTEND:
5473 case FLOAT_TRUNCATE:
5474 case FLOAT:
5475 case UNSIGNED_FLOAT:
5476 case FIX:
5477 case UNSIGNED_FIX:
5478 *total = COSTS_N_INSNS (7);
5479 return true;
5480
5481 case PLUS:
5482 if (mode == TImode)
5483 {
5484 *total = COSTS_N_INSNS (9);
5485 return true;
5486 }
5487 break;
5488
5489 case MULT:
5490 cost =
5491 GET_CODE (XEXP (x, 0)) ==
5492 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5493 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5494 {
5495 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5496 {
5497 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5498 cost = COSTS_N_INSNS (14);
5499 if ((val & 0xffff) == 0)
5500 cost = COSTS_N_INSNS (9);
5501 else if (val > 0 && val < 0x10000)
5502 cost = COSTS_N_INSNS (11);
5503 }
5504 }
5505 *total = cost;
5506 return true;
5507 case DIV:
5508 case UDIV:
5509 case MOD:
5510 case UMOD:
5511 *total = COSTS_N_INSNS (20);
5512 return true;
5513 case ROTATE:
5514 case ROTATERT:
5515 case ASHIFT:
5516 case ASHIFTRT:
5517 case LSHIFTRT:
5518 *total = COSTS_N_INSNS (4);
5519 return true;
5520 case UNSPEC:
5521 if (XINT (x, 1) == UNSPEC_CONVERT)
5522 *total = COSTS_N_INSNS (0);
5523 else
5524 *total = COSTS_N_INSNS (4);
5525 return true;
5526 }
5527 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5528 if (GET_MODE_CLASS (mode) == MODE_INT
5529 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5530 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5531 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5532 *total = cost;
5533 return true;
5534}
5535
1bd43494 5536static enum machine_mode
5537spu_unwind_word_mode (void)
644459d0 5538{
1bd43494 5539 return SImode;
644459d0 5540}
5541
5542/* Decide whether we can make a sibling call to a function. DECL is the
5543 declaration of the function being targeted by the call and EXP is the
5544 CALL_EXPR representing the call. */
5545static bool
5546spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5547{
5548 return decl && !TARGET_LARGE_MEM;
5549}
5550
5551/* We need to correctly update the back chain pointer and the Available
5552 Stack Size (which is in the second slot of the sp register.) */
5553void
5554spu_allocate_stack (rtx op0, rtx op1)
5555{
5556 HOST_WIDE_INT v;
5557 rtx chain = gen_reg_rtx (V4SImode);
5558 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5559 rtx sp = gen_reg_rtx (V4SImode);
5560 rtx splatted = gen_reg_rtx (V4SImode);
5561 rtx pat = gen_reg_rtx (TImode);
5562
5563 /* copy the back chain so we can save it back again. */
5564 emit_move_insn (chain, stack_bot);
5565
5566 op1 = force_reg (SImode, op1);
5567
5568 v = 0x1020300010203ll;
5569 emit_move_insn (pat, immed_double_const (v, v, TImode));
5570 emit_insn (gen_shufb (splatted, op1, op1, pat));
5571
5572 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5573 emit_insn (gen_subv4si3 (sp, sp, splatted));
5574
5575 if (flag_stack_check)
5576 {
5577 rtx avail = gen_reg_rtx(SImode);
5578 rtx result = gen_reg_rtx(SImode);
5579 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5580 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5581 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5582 }
5583
5584 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5585
5586 emit_move_insn (stack_bot, chain);
5587
5588 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5589}
5590
5591void
5592spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5593{
5594 static unsigned char arr[16] =
5595 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5596 rtx temp = gen_reg_rtx (SImode);
5597 rtx temp2 = gen_reg_rtx (SImode);
5598 rtx temp3 = gen_reg_rtx (V4SImode);
5599 rtx temp4 = gen_reg_rtx (V4SImode);
5600 rtx pat = gen_reg_rtx (TImode);
5601 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5602
5603 /* Restore the backchain from the first word, sp from the second. */
5604 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5605 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5606
5607 emit_move_insn (pat, array_to_constant (TImode, arr));
5608
5609 /* Compute Available Stack Size for sp */
5610 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5611 emit_insn (gen_shufb (temp3, temp, temp, pat));
5612
5613 /* Compute Available Stack Size for back chain */
5614 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5615 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5616 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5617
5618 emit_insn (gen_addv4si3 (sp, sp, temp3));
5619 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5620}
5621
5622static void
5623spu_init_libfuncs (void)
5624{
5625 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5626 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5627 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5628 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5629 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5630 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5631 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5632 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5633 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5634 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5635 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5636
5637 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5638 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5639
5640 set_optab_libfunc (smul_optab, TImode, "__multi3");
5641 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5642 set_optab_libfunc (smod_optab, TImode, "__modti3");
5643 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5644 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5645 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5646}
5647
5648/* Make a subreg, stripping any existing subreg. We could possibly just
5649 call simplify_subreg, but in this case we know what we want. */
5650rtx
5651spu_gen_subreg (enum machine_mode mode, rtx x)
5652{
5653 if (GET_CODE (x) == SUBREG)
5654 x = SUBREG_REG (x);
5655 if (GET_MODE (x) == mode)
5656 return x;
5657 return gen_rtx_SUBREG (mode, x, 0);
5658}
5659
5660static bool
fb80456a 5661spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5662{
5663 return (TYPE_MODE (type) == BLKmode
5664 && ((type) == 0
5665 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5666 || int_size_in_bytes (type) >
5667 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5668}
5669\f
5670/* Create the built-in types and functions */
5671
c2233b46 5672enum spu_function_code
5673{
5674#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5675#include "spu-builtins.def"
5676#undef DEF_BUILTIN
5677 NUM_SPU_BUILTINS
5678};
5679
5680extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5681
644459d0 5682struct spu_builtin_description spu_builtins[] = {
5683#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5684 {fcode, icode, name, type, params},
644459d0 5685#include "spu-builtins.def"
5686#undef DEF_BUILTIN
5687};
5688
0c5c4d59 5689static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5690
5691/* Returns the spu builtin decl for CODE. */
e6925042 5692
5693static tree
5694spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5695{
5696 if (code >= NUM_SPU_BUILTINS)
5697 return error_mark_node;
5698
0c5c4d59 5699 return spu_builtin_decls[code];
e6925042 5700}
5701
5702
644459d0 5703static void
5704spu_init_builtins (void)
5705{
5706 struct spu_builtin_description *d;
5707 unsigned int i;
5708
5709 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5710 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5711 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5712 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5713 V4SF_type_node = build_vector_type (float_type_node, 4);
5714 V2DF_type_node = build_vector_type (double_type_node, 2);
5715
5716 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5717 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5718 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5719 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5720
c4ecce0c 5721 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5722
5723 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5724 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5725 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5726 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5727 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5728 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5729 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5730 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5731 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5732 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5733 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5734 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5735
5736 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5737 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5738 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5739 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5740 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5741 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5742 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5744
5745 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5746 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5747
5748 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5749
5750 spu_builtin_types[SPU_BTI_PTR] =
5751 build_pointer_type (build_qualified_type
5752 (void_type_node,
5753 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5754
5755 /* For each builtin we build a new prototype. The tree code will make
5756 sure nodes are shared. */
5757 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5758 {
5759 tree p;
5760 char name[64]; /* build_function will make a copy. */
5761 int parm;
5762
5763 if (d->name == 0)
5764 continue;
5765
5dfbd18f 5766 /* Find last parm. */
644459d0 5767 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5768 ;
644459d0 5769
5770 p = void_list_node;
5771 while (parm > 1)
5772 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5773
5774 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5775
5776 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5777 spu_builtin_decls[i] =
3726fe5e 5778 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5779 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5780 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5781
5782 /* These builtins don't throw. */
0c5c4d59 5783 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5784 }
5785}
5786
cf31d486 5787void
5788spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5789{
5790 static unsigned char arr[16] =
5791 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5792
5793 rtx temp = gen_reg_rtx (Pmode);
5794 rtx temp2 = gen_reg_rtx (V4SImode);
5795 rtx temp3 = gen_reg_rtx (V4SImode);
5796 rtx pat = gen_reg_rtx (TImode);
5797 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5798
5799 emit_move_insn (pat, array_to_constant (TImode, arr));
5800
5801 /* Restore the sp. */
5802 emit_move_insn (temp, op1);
5803 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5804
5805 /* Compute available stack size for sp. */
5806 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5807 emit_insn (gen_shufb (temp3, temp, temp, pat));
5808
5809 emit_insn (gen_addv4si3 (sp, sp, temp3));
5810 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5811}
5812
644459d0 5813int
5814spu_safe_dma (HOST_WIDE_INT channel)
5815{
006e4b96 5816 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5817}
5818
5819void
5820spu_builtin_splats (rtx ops[])
5821{
5822 enum machine_mode mode = GET_MODE (ops[0]);
5823 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5824 {
5825 unsigned char arr[16];
5826 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5827 emit_move_insn (ops[0], array_to_constant (mode, arr));
5828 }
644459d0 5829 else
5830 {
5831 rtx reg = gen_reg_rtx (TImode);
5832 rtx shuf;
5833 if (GET_CODE (ops[1]) != REG
5834 && GET_CODE (ops[1]) != SUBREG)
5835 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5836 switch (mode)
5837 {
5838 case V2DImode:
5839 case V2DFmode:
5840 shuf =
5841 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5842 TImode);
5843 break;
5844 case V4SImode:
5845 case V4SFmode:
5846 shuf =
5847 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5848 TImode);
5849 break;
5850 case V8HImode:
5851 shuf =
5852 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5853 TImode);
5854 break;
5855 case V16QImode:
5856 shuf =
5857 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5858 TImode);
5859 break;
5860 default:
5861 abort ();
5862 }
5863 emit_move_insn (reg, shuf);
5864 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5865 }
5866}
5867
5868void
5869spu_builtin_extract (rtx ops[])
5870{
5871 enum machine_mode mode;
5872 rtx rot, from, tmp;
5873
5874 mode = GET_MODE (ops[1]);
5875
5876 if (GET_CODE (ops[2]) == CONST_INT)
5877 {
5878 switch (mode)
5879 {
5880 case V16QImode:
5881 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5882 break;
5883 case V8HImode:
5884 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5885 break;
5886 case V4SFmode:
5887 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5888 break;
5889 case V4SImode:
5890 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5891 break;
5892 case V2DImode:
5893 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5894 break;
5895 case V2DFmode:
5896 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5897 break;
5898 default:
5899 abort ();
5900 }
5901 return;
5902 }
5903
5904 from = spu_gen_subreg (TImode, ops[1]);
5905 rot = gen_reg_rtx (TImode);
5906 tmp = gen_reg_rtx (SImode);
5907
5908 switch (mode)
5909 {
5910 case V16QImode:
5911 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5912 break;
5913 case V8HImode:
5914 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5915 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5916 break;
5917 case V4SFmode:
5918 case V4SImode:
5919 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5920 break;
5921 case V2DImode:
5922 case V2DFmode:
5923 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5924 break;
5925 default:
5926 abort ();
5927 }
5928 emit_insn (gen_rotqby_ti (rot, from, tmp));
5929
5930 emit_insn (gen_spu_convert (ops[0], rot));
5931}
5932
5933void
5934spu_builtin_insert (rtx ops[])
5935{
5936 enum machine_mode mode = GET_MODE (ops[0]);
5937 enum machine_mode imode = GET_MODE_INNER (mode);
5938 rtx mask = gen_reg_rtx (TImode);
5939 rtx offset;
5940
5941 if (GET_CODE (ops[3]) == CONST_INT)
5942 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5943 else
5944 {
5945 offset = gen_reg_rtx (SImode);
5946 emit_insn (gen_mulsi3
5947 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5948 }
5949 emit_insn (gen_cpat
5950 (mask, stack_pointer_rtx, offset,
5951 GEN_INT (GET_MODE_SIZE (imode))));
5952 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5953}
5954
5955void
5956spu_builtin_promote (rtx ops[])
5957{
5958 enum machine_mode mode, imode;
5959 rtx rot, from, offset;
5960 HOST_WIDE_INT pos;
5961
5962 mode = GET_MODE (ops[0]);
5963 imode = GET_MODE_INNER (mode);
5964
5965 from = gen_reg_rtx (TImode);
5966 rot = spu_gen_subreg (TImode, ops[0]);
5967
5968 emit_insn (gen_spu_convert (from, ops[1]));
5969
5970 if (GET_CODE (ops[2]) == CONST_INT)
5971 {
5972 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5973 if (GET_MODE_SIZE (imode) < 4)
5974 pos += 4 - GET_MODE_SIZE (imode);
5975 offset = GEN_INT (pos & 15);
5976 }
5977 else
5978 {
5979 offset = gen_reg_rtx (SImode);
5980 switch (mode)
5981 {
5982 case V16QImode:
5983 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5984 break;
5985 case V8HImode:
5986 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5987 emit_insn (gen_addsi3 (offset, offset, offset));
5988 break;
5989 case V4SFmode:
5990 case V4SImode:
5991 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5992 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5993 break;
5994 case V2DImode:
5995 case V2DFmode:
5996 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5997 break;
5998 default:
5999 abort ();
6000 }
6001 }
6002 emit_insn (gen_rotqby_ti (rot, from, offset));
6003}
6004
e96f2783 6005static void
6006spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6007{
e96f2783 6008 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6009 rtx shuf = gen_reg_rtx (V4SImode);
6010 rtx insn = gen_reg_rtx (V4SImode);
6011 rtx shufc;
6012 rtx insnc;
6013 rtx mem;
6014
6015 fnaddr = force_reg (SImode, fnaddr);
6016 cxt = force_reg (SImode, cxt);
6017
6018 if (TARGET_LARGE_MEM)
6019 {
6020 rtx rotl = gen_reg_rtx (V4SImode);
6021 rtx mask = gen_reg_rtx (V4SImode);
6022 rtx bi = gen_reg_rtx (SImode);
e96f2783 6023 static unsigned char const shufa[16] = {
644459d0 6024 2, 3, 0, 1, 18, 19, 16, 17,
6025 0, 1, 2, 3, 16, 17, 18, 19
6026 };
e96f2783 6027 static unsigned char const insna[16] = {
644459d0 6028 0x41, 0, 0, 79,
6029 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6030 0x60, 0x80, 0, 79,
6031 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6032 };
6033
6034 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6035 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6036
6037 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6038 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6039 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6040 emit_insn (gen_selb (insn, insnc, rotl, mask));
6041
e96f2783 6042 mem = adjust_address (m_tramp, V4SImode, 0);
6043 emit_move_insn (mem, insn);
644459d0 6044
6045 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6046 mem = adjust_address (m_tramp, Pmode, 16);
6047 emit_move_insn (mem, bi);
644459d0 6048 }
6049 else
6050 {
6051 rtx scxt = gen_reg_rtx (SImode);
6052 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6053 static unsigned char const insna[16] = {
644459d0 6054 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6055 0x30, 0, 0, 0,
6056 0, 0, 0, 0,
6057 0, 0, 0, 0
6058 };
6059
6060 shufc = gen_reg_rtx (TImode);
6061 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6062
6063 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6064 fits 18 bits and the last 4 are zeros. This will be true if
6065 the stack pointer is initialized to 0x3fff0 at program start,
6066 otherwise the ila instruction will be garbage. */
6067
6068 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6069 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6070 emit_insn (gen_cpat
6071 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6072 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6073 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6074
e96f2783 6075 mem = adjust_address (m_tramp, V4SImode, 0);
6076 emit_move_insn (mem, insn);
644459d0 6077 }
6078 emit_insn (gen_sync ());
6079}
6080
6081void
6082spu_expand_sign_extend (rtx ops[])
6083{
6084 unsigned char arr[16];
6085 rtx pat = gen_reg_rtx (TImode);
6086 rtx sign, c;
6087 int i, last;
6088 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6089 if (GET_MODE (ops[1]) == QImode)
6090 {
6091 sign = gen_reg_rtx (HImode);
6092 emit_insn (gen_extendqihi2 (sign, ops[1]));
6093 for (i = 0; i < 16; i++)
6094 arr[i] = 0x12;
6095 arr[last] = 0x13;
6096 }
6097 else
6098 {
6099 for (i = 0; i < 16; i++)
6100 arr[i] = 0x10;
6101 switch (GET_MODE (ops[1]))
6102 {
6103 case HImode:
6104 sign = gen_reg_rtx (SImode);
6105 emit_insn (gen_extendhisi2 (sign, ops[1]));
6106 arr[last] = 0x03;
6107 arr[last - 1] = 0x02;
6108 break;
6109 case SImode:
6110 sign = gen_reg_rtx (SImode);
6111 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6112 for (i = 0; i < 4; i++)
6113 arr[last - i] = 3 - i;
6114 break;
6115 case DImode:
6116 sign = gen_reg_rtx (SImode);
6117 c = gen_reg_rtx (SImode);
6118 emit_insn (gen_spu_convert (c, ops[1]));
6119 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6120 for (i = 0; i < 8; i++)
6121 arr[last - i] = 7 - i;
6122 break;
6123 default:
6124 abort ();
6125 }
6126 }
6127 emit_move_insn (pat, array_to_constant (TImode, arr));
6128 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6129}
6130
6131/* expand vector initialization. If there are any constant parts,
6132 load constant parts first. Then load any non-constant parts. */
6133void
6134spu_expand_vector_init (rtx target, rtx vals)
6135{
6136 enum machine_mode mode = GET_MODE (target);
6137 int n_elts = GET_MODE_NUNITS (mode);
6138 int n_var = 0;
6139 bool all_same = true;
790c536c 6140 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6141 int i;
6142
6143 first = XVECEXP (vals, 0, 0);
6144 for (i = 0; i < n_elts; ++i)
6145 {
6146 x = XVECEXP (vals, 0, i);
e442af0b 6147 if (!(CONST_INT_P (x)
6148 || GET_CODE (x) == CONST_DOUBLE
6149 || GET_CODE (x) == CONST_FIXED))
644459d0 6150 ++n_var;
6151 else
6152 {
6153 if (first_constant == NULL_RTX)
6154 first_constant = x;
6155 }
6156 if (i > 0 && !rtx_equal_p (x, first))
6157 all_same = false;
6158 }
6159
6160 /* if all elements are the same, use splats to repeat elements */
6161 if (all_same)
6162 {
6163 if (!CONSTANT_P (first)
6164 && !register_operand (first, GET_MODE (x)))
6165 first = force_reg (GET_MODE (first), first);
6166 emit_insn (gen_spu_splats (target, first));
6167 return;
6168 }
6169
6170 /* load constant parts */
6171 if (n_var != n_elts)
6172 {
6173 if (n_var == 0)
6174 {
6175 emit_move_insn (target,
6176 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6177 }
6178 else
6179 {
6180 rtx constant_parts_rtx = copy_rtx (vals);
6181
6182 gcc_assert (first_constant != NULL_RTX);
6183 /* fill empty slots with the first constant, this increases
6184 our chance of using splats in the recursive call below. */
6185 for (i = 0; i < n_elts; ++i)
e442af0b 6186 {
6187 x = XVECEXP (constant_parts_rtx, 0, i);
6188 if (!(CONST_INT_P (x)
6189 || GET_CODE (x) == CONST_DOUBLE
6190 || GET_CODE (x) == CONST_FIXED))
6191 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6192 }
644459d0 6193
6194 spu_expand_vector_init (target, constant_parts_rtx);
6195 }
6196 }
6197
6198 /* load variable parts */
6199 if (n_var != 0)
6200 {
6201 rtx insert_operands[4];
6202
6203 insert_operands[0] = target;
6204 insert_operands[2] = target;
6205 for (i = 0; i < n_elts; ++i)
6206 {
6207 x = XVECEXP (vals, 0, i);
e442af0b 6208 if (!(CONST_INT_P (x)
6209 || GET_CODE (x) == CONST_DOUBLE
6210 || GET_CODE (x) == CONST_FIXED))
644459d0 6211 {
6212 if (!register_operand (x, GET_MODE (x)))
6213 x = force_reg (GET_MODE (x), x);
6214 insert_operands[1] = x;
6215 insert_operands[3] = GEN_INT (i);
6216 spu_builtin_insert (insert_operands);
6217 }
6218 }
6219 }
6220}
6352eedf 6221
5474166e 6222/* Return insn index for the vector compare instruction for given CODE,
6223 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6224
6225static int
6226get_vec_cmp_insn (enum rtx_code code,
6227 enum machine_mode dest_mode,
6228 enum machine_mode op_mode)
6229
6230{
6231 switch (code)
6232 {
6233 case EQ:
6234 if (dest_mode == V16QImode && op_mode == V16QImode)
6235 return CODE_FOR_ceq_v16qi;
6236 if (dest_mode == V8HImode && op_mode == V8HImode)
6237 return CODE_FOR_ceq_v8hi;
6238 if (dest_mode == V4SImode && op_mode == V4SImode)
6239 return CODE_FOR_ceq_v4si;
6240 if (dest_mode == V4SImode && op_mode == V4SFmode)
6241 return CODE_FOR_ceq_v4sf;
6242 if (dest_mode == V2DImode && op_mode == V2DFmode)
6243 return CODE_FOR_ceq_v2df;
6244 break;
6245 case GT:
6246 if (dest_mode == V16QImode && op_mode == V16QImode)
6247 return CODE_FOR_cgt_v16qi;
6248 if (dest_mode == V8HImode && op_mode == V8HImode)
6249 return CODE_FOR_cgt_v8hi;
6250 if (dest_mode == V4SImode && op_mode == V4SImode)
6251 return CODE_FOR_cgt_v4si;
6252 if (dest_mode == V4SImode && op_mode == V4SFmode)
6253 return CODE_FOR_cgt_v4sf;
6254 if (dest_mode == V2DImode && op_mode == V2DFmode)
6255 return CODE_FOR_cgt_v2df;
6256 break;
6257 case GTU:
6258 if (dest_mode == V16QImode && op_mode == V16QImode)
6259 return CODE_FOR_clgt_v16qi;
6260 if (dest_mode == V8HImode && op_mode == V8HImode)
6261 return CODE_FOR_clgt_v8hi;
6262 if (dest_mode == V4SImode && op_mode == V4SImode)
6263 return CODE_FOR_clgt_v4si;
6264 break;
6265 default:
6266 break;
6267 }
6268 return -1;
6269}
6270
6271/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6272 DMODE is expected destination mode. This is a recursive function. */
6273
6274static rtx
6275spu_emit_vector_compare (enum rtx_code rcode,
6276 rtx op0, rtx op1,
6277 enum machine_mode dmode)
6278{
6279 int vec_cmp_insn;
6280 rtx mask;
6281 enum machine_mode dest_mode;
6282 enum machine_mode op_mode = GET_MODE (op1);
6283
6284 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6285
6286 /* Floating point vector compare instructions uses destination V4SImode.
6287 Double floating point vector compare instructions uses destination V2DImode.
6288 Move destination to appropriate mode later. */
6289 if (dmode == V4SFmode)
6290 dest_mode = V4SImode;
6291 else if (dmode == V2DFmode)
6292 dest_mode = V2DImode;
6293 else
6294 dest_mode = dmode;
6295
6296 mask = gen_reg_rtx (dest_mode);
6297 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6298
6299 if (vec_cmp_insn == -1)
6300 {
6301 bool swap_operands = false;
6302 bool try_again = false;
6303 switch (rcode)
6304 {
6305 case LT:
6306 rcode = GT;
6307 swap_operands = true;
6308 try_again = true;
6309 break;
6310 case LTU:
6311 rcode = GTU;
6312 swap_operands = true;
6313 try_again = true;
6314 break;
6315 case NE:
6316 /* Treat A != B as ~(A==B). */
6317 {
6318 enum insn_code nor_code;
6319 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6320 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6321 gcc_assert (nor_code != CODE_FOR_nothing);
6322 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6323 if (dmode != dest_mode)
6324 {
6325 rtx temp = gen_reg_rtx (dest_mode);
6326 convert_move (temp, mask, 0);
6327 return temp;
6328 }
6329 return mask;
6330 }
6331 break;
6332 case GE:
6333 case GEU:
6334 case LE:
6335 case LEU:
6336 /* Try GT/GTU/LT/LTU OR EQ */
6337 {
6338 rtx c_rtx, eq_rtx;
6339 enum insn_code ior_code;
6340 enum rtx_code new_code;
6341
6342 switch (rcode)
6343 {
6344 case GE: new_code = GT; break;
6345 case GEU: new_code = GTU; break;
6346 case LE: new_code = LT; break;
6347 case LEU: new_code = LTU; break;
6348 default:
6349 gcc_unreachable ();
6350 }
6351
6352 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6353 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6354
d6bf3b14 6355 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6356 gcc_assert (ior_code != CODE_FOR_nothing);
6357 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6358 if (dmode != dest_mode)
6359 {
6360 rtx temp = gen_reg_rtx (dest_mode);
6361 convert_move (temp, mask, 0);
6362 return temp;
6363 }
6364 return mask;
6365 }
6366 break;
6367 default:
6368 gcc_unreachable ();
6369 }
6370
6371 /* You only get two chances. */
6372 if (try_again)
6373 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6374
6375 gcc_assert (vec_cmp_insn != -1);
6376
6377 if (swap_operands)
6378 {
6379 rtx tmp;
6380 tmp = op0;
6381 op0 = op1;
6382 op1 = tmp;
6383 }
6384 }
6385
6386 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6387 if (dmode != dest_mode)
6388 {
6389 rtx temp = gen_reg_rtx (dest_mode);
6390 convert_move (temp, mask, 0);
6391 return temp;
6392 }
6393 return mask;
6394}
6395
6396
6397/* Emit vector conditional expression.
6398 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6399 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6400
6401int
6402spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6403 rtx cond, rtx cc_op0, rtx cc_op1)
6404{
6405 enum machine_mode dest_mode = GET_MODE (dest);
6406 enum rtx_code rcode = GET_CODE (cond);
6407 rtx mask;
6408
6409 /* Get the vector mask for the given relational operations. */
6410 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6411
6412 emit_insn(gen_selb (dest, op2, op1, mask));
6413
6414 return 1;
6415}
6416
6352eedf 6417static rtx
6418spu_force_reg (enum machine_mode mode, rtx op)
6419{
6420 rtx x, r;
6421 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6422 {
6423 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6424 || GET_MODE (op) == BLKmode)
6425 return force_reg (mode, convert_to_mode (mode, op, 0));
6426 abort ();
6427 }
6428
6429 r = force_reg (GET_MODE (op), op);
6430 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6431 {
6432 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6433 if (x)
6434 return x;
6435 }
6436
6437 x = gen_reg_rtx (mode);
6438 emit_insn (gen_spu_convert (x, r));
6439 return x;
6440}
6441
6442static void
6443spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6444{
6445 HOST_WIDE_INT v = 0;
6446 int lsbits;
6447 /* Check the range of immediate operands. */
6448 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6449 {
6450 int range = p - SPU_BTI_7;
5df189be 6451
6452 if (!CONSTANT_P (op))
bf776685 6453 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6454 d->name,
6455 spu_builtin_range[range].low, spu_builtin_range[range].high);
6456
6457 if (GET_CODE (op) == CONST
6458 && (GET_CODE (XEXP (op, 0)) == PLUS
6459 || GET_CODE (XEXP (op, 0)) == MINUS))
6460 {
6461 v = INTVAL (XEXP (XEXP (op, 0), 1));
6462 op = XEXP (XEXP (op, 0), 0);
6463 }
6464 else if (GET_CODE (op) == CONST_INT)
6465 v = INTVAL (op);
5df189be 6466 else if (GET_CODE (op) == CONST_VECTOR
6467 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6468 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6469
6470 /* The default for v is 0 which is valid in every range. */
6471 if (v < spu_builtin_range[range].low
6472 || v > spu_builtin_range[range].high)
bf776685 6473 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6474 d->name,
6475 spu_builtin_range[range].low, spu_builtin_range[range].high,
6476 v);
6352eedf 6477
6478 switch (p)
6479 {
6480 case SPU_BTI_S10_4:
6481 lsbits = 4;
6482 break;
6483 case SPU_BTI_U16_2:
6484 /* This is only used in lqa, and stqa. Even though the insns
6485 encode 16 bits of the address (all but the 2 least
6486 significant), only 14 bits are used because it is masked to
6487 be 16 byte aligned. */
6488 lsbits = 4;
6489 break;
6490 case SPU_BTI_S16_2:
6491 /* This is used for lqr and stqr. */
6492 lsbits = 2;
6493 break;
6494 default:
6495 lsbits = 0;
6496 }
6497
6498 if (GET_CODE (op) == LABEL_REF
6499 || (GET_CODE (op) == SYMBOL_REF
6500 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6501 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6502 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6503 d->name);
6504 }
6505}
6506
6507
70ca06f8 6508static int
5df189be 6509expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6510 rtx target, rtx ops[])
6511{
bc620c5c 6512 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6513 int i = 0, a;
6352eedf 6514
6515 /* Expand the arguments into rtl. */
6516
6517 if (d->parm[0] != SPU_BTI_VOID)
6518 ops[i++] = target;
6519
70ca06f8 6520 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6521 {
5df189be 6522 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6523 if (arg == 0)
6524 abort ();
b9c74b4d 6525 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6526 }
70ca06f8 6527
6528 /* The insn pattern may have additional operands (SCRATCH).
6529 Return the number of actual non-SCRATCH operands. */
6530 gcc_assert (i <= insn_data[icode].n_operands);
6531 return i;
6352eedf 6532}
6533
6534static rtx
6535spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6536 tree exp, rtx target)
6352eedf 6537{
6538 rtx pat;
6539 rtx ops[8];
bc620c5c 6540 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6541 enum machine_mode mode, tmode;
6542 int i, p;
70ca06f8 6543 int n_operands;
6352eedf 6544 tree return_type;
6545
6546 /* Set up ops[] with values from arglist. */
70ca06f8 6547 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6548
6549 /* Handle the target operand which must be operand 0. */
6550 i = 0;
6551 if (d->parm[0] != SPU_BTI_VOID)
6552 {
6553
6554 /* We prefer the mode specified for the match_operand otherwise
6555 use the mode from the builtin function prototype. */
6556 tmode = insn_data[d->icode].operand[0].mode;
6557 if (tmode == VOIDmode)
6558 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6559
6560 /* Try to use target because not using it can lead to extra copies
6561 and when we are using all of the registers extra copies leads
6562 to extra spills. */
6563 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6564 ops[0] = target;
6565 else
6566 target = ops[0] = gen_reg_rtx (tmode);
6567
6568 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6569 abort ();
6570
6571 i++;
6572 }
6573
a76866d3 6574 if (d->fcode == SPU_MASK_FOR_LOAD)
6575 {
6576 enum machine_mode mode = insn_data[icode].operand[1].mode;
6577 tree arg;
6578 rtx addr, op, pat;
6579
6580 /* get addr */
5df189be 6581 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6582 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6583 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6584 addr = memory_address (mode, op);
6585
6586 /* negate addr */
6587 op = gen_reg_rtx (GET_MODE (addr));
6588 emit_insn (gen_rtx_SET (VOIDmode, op,
6589 gen_rtx_NEG (GET_MODE (addr), addr)));
6590 op = gen_rtx_MEM (mode, op);
6591
6592 pat = GEN_FCN (icode) (target, op);
6593 if (!pat)
6594 return 0;
6595 emit_insn (pat);
6596 return target;
6597 }
6598
6352eedf 6599 /* Ignore align_hint, but still expand it's args in case they have
6600 side effects. */
6601 if (icode == CODE_FOR_spu_align_hint)
6602 return 0;
6603
6604 /* Handle the rest of the operands. */
70ca06f8 6605 for (p = 1; i < n_operands; i++, p++)
6352eedf 6606 {
6607 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6608 mode = insn_data[d->icode].operand[i].mode;
6609 else
6610 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6611
6612 /* mode can be VOIDmode here for labels */
6613
6614 /* For specific intrinsics with an immediate operand, e.g.,
6615 si_ai(), we sometimes need to convert the scalar argument to a
6616 vector argument by splatting the scalar. */
6617 if (VECTOR_MODE_P (mode)
6618 && (GET_CODE (ops[i]) == CONST_INT
6619 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6620 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6621 {
6622 if (GET_CODE (ops[i]) == CONST_INT)
6623 ops[i] = spu_const (mode, INTVAL (ops[i]));
6624 else
6625 {
6626 rtx reg = gen_reg_rtx (mode);
6627 enum machine_mode imode = GET_MODE_INNER (mode);
6628 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6629 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6630 if (imode != GET_MODE (ops[i]))
6631 ops[i] = convert_to_mode (imode, ops[i],
6632 TYPE_UNSIGNED (spu_builtin_types
6633 [d->parm[i]]));
6634 emit_insn (gen_spu_splats (reg, ops[i]));
6635 ops[i] = reg;
6636 }
6637 }
6638
5df189be 6639 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6640
6352eedf 6641 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6642 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6643 }
6644
70ca06f8 6645 switch (n_operands)
6352eedf 6646 {
6647 case 0:
6648 pat = GEN_FCN (icode) (0);
6649 break;
6650 case 1:
6651 pat = GEN_FCN (icode) (ops[0]);
6652 break;
6653 case 2:
6654 pat = GEN_FCN (icode) (ops[0], ops[1]);
6655 break;
6656 case 3:
6657 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6658 break;
6659 case 4:
6660 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6661 break;
6662 case 5:
6663 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6664 break;
6665 case 6:
6666 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6667 break;
6668 default:
6669 abort ();
6670 }
6671
6672 if (!pat)
6673 abort ();
6674
6675 if (d->type == B_CALL || d->type == B_BISLED)
6676 emit_call_insn (pat);
6677 else if (d->type == B_JUMP)
6678 {
6679 emit_jump_insn (pat);
6680 emit_barrier ();
6681 }
6682 else
6683 emit_insn (pat);
6684
6685 return_type = spu_builtin_types[d->parm[0]];
6686 if (d->parm[0] != SPU_BTI_VOID
6687 && GET_MODE (target) != TYPE_MODE (return_type))
6688 {
6689 /* target is the return value. It should always be the mode of
6690 the builtin function prototype. */
6691 target = spu_force_reg (TYPE_MODE (return_type), target);
6692 }
6693
6694 return target;
6695}
6696
6697rtx
6698spu_expand_builtin (tree exp,
6699 rtx target,
6700 rtx subtarget ATTRIBUTE_UNUSED,
6701 enum machine_mode mode ATTRIBUTE_UNUSED,
6702 int ignore ATTRIBUTE_UNUSED)
6703{
5df189be 6704 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6705 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6706 struct spu_builtin_description *d;
6707
6708 if (fcode < NUM_SPU_BUILTINS)
6709 {
6710 d = &spu_builtins[fcode];
6711
5df189be 6712 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6713 }
6714 abort ();
6715}
6716
e99f512d 6717/* Implement targetm.vectorize.builtin_mul_widen_even. */
6718static tree
6719spu_builtin_mul_widen_even (tree type)
6720{
e99f512d 6721 switch (TYPE_MODE (type))
6722 {
6723 case V8HImode:
6724 if (TYPE_UNSIGNED (type))
0c5c4d59 6725 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6726 else
0c5c4d59 6727 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6728 break;
6729 default:
6730 return NULL_TREE;
6731 }
6732}
6733
6734/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6735static tree
6736spu_builtin_mul_widen_odd (tree type)
6737{
6738 switch (TYPE_MODE (type))
6739 {
6740 case V8HImode:
6741 if (TYPE_UNSIGNED (type))
0c5c4d59 6742 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6743 else
0c5c4d59 6744 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6745 break;
6746 default:
6747 return NULL_TREE;
6748 }
6749}
6750
a76866d3 6751/* Implement targetm.vectorize.builtin_mask_for_load. */
6752static tree
6753spu_builtin_mask_for_load (void)
6754{
0c5c4d59 6755 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6756}
5df189be 6757
a28df51d 6758/* Implement targetm.vectorize.builtin_vectorization_cost. */
6759static int
0822b158 6760spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6761 tree vectype ATTRIBUTE_UNUSED,
6762 int misalign ATTRIBUTE_UNUSED)
559093aa 6763{
6764 switch (type_of_cost)
6765 {
6766 case scalar_stmt:
6767 case vector_stmt:
6768 case vector_load:
6769 case vector_store:
6770 case vec_to_scalar:
6771 case scalar_to_vec:
6772 case cond_branch_not_taken:
6773 case vec_perm:
6774 return 1;
6775
6776 case scalar_store:
6777 return 10;
6778
6779 case scalar_load:
6780 /* Load + rotate. */
6781 return 2;
6782
6783 case unaligned_load:
6784 return 2;
6785
6786 case cond_branch_taken:
6787 return 6;
6788
6789 default:
6790 gcc_unreachable ();
6791 }
a28df51d 6792}
6793
0e87db76 6794/* Return true iff, data reference of TYPE can reach vector alignment (16)
6795 after applying N number of iterations. This routine does not determine
6796 how may iterations are required to reach desired alignment. */
6797
6798static bool
a9f1838b 6799spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6800{
6801 if (is_packed)
6802 return false;
6803
6804 /* All other types are naturally aligned. */
6805 return true;
6806}
6807
a0515226 6808/* Implement targetm.vectorize.builtin_vec_perm. */
6809tree
6810spu_builtin_vec_perm (tree type, tree *mask_element_type)
6811{
a0515226 6812 *mask_element_type = unsigned_char_type_node;
6813
6814 switch (TYPE_MODE (type))
6815 {
6816 case V16QImode:
6817 if (TYPE_UNSIGNED (type))
0c5c4d59 6818 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6819 else
0c5c4d59 6820 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6821
6822 case V8HImode:
6823 if (TYPE_UNSIGNED (type))
0c5c4d59 6824 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6825 else
0c5c4d59 6826 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6827
6828 case V4SImode:
6829 if (TYPE_UNSIGNED (type))
0c5c4d59 6830 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6831 else
0c5c4d59 6832 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6833
6834 case V2DImode:
6835 if (TYPE_UNSIGNED (type))
0c5c4d59 6836 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6837 else
0c5c4d59 6838 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6839
6840 case V4SFmode:
0c5c4d59 6841 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6842
6843 case V2DFmode:
0c5c4d59 6844 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6845
6846 default:
6847 return NULL_TREE;
6848 }
a0515226 6849}
6850
6cf5579e 6851/* Return the appropriate mode for a named address pointer. */
6852static enum machine_mode
6853spu_addr_space_pointer_mode (addr_space_t addrspace)
6854{
6855 switch (addrspace)
6856 {
6857 case ADDR_SPACE_GENERIC:
6858 return ptr_mode;
6859 case ADDR_SPACE_EA:
6860 return EAmode;
6861 default:
6862 gcc_unreachable ();
6863 }
6864}
6865
6866/* Return the appropriate mode for a named address address. */
6867static enum machine_mode
6868spu_addr_space_address_mode (addr_space_t addrspace)
6869{
6870 switch (addrspace)
6871 {
6872 case ADDR_SPACE_GENERIC:
6873 return Pmode;
6874 case ADDR_SPACE_EA:
6875 return EAmode;
6876 default:
6877 gcc_unreachable ();
6878 }
6879}
6880
6881/* Determine if one named address space is a subset of another. */
6882
6883static bool
6884spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6885{
6886 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6887 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6888
6889 if (subset == superset)
6890 return true;
6891
6892 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6893 being subsets but instead as disjoint address spaces. */
6894 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6895 return false;
6896
6897 else
6898 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6899}
6900
6901/* Convert from one address space to another. */
6902static rtx
6903spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6904{
6905 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6906 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6907
6908 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6909 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6910
6911 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6912 {
6913 rtx result, ls;
6914
6915 ls = gen_const_mem (DImode,
6916 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6917 set_mem_align (ls, 128);
6918
6919 result = gen_reg_rtx (Pmode);
6920 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6921 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6922 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6923 ls, const0_rtx, Pmode, 1);
6924
6925 emit_insn (gen_subsi3 (result, op, ls));
6926
6927 return result;
6928 }
6929
6930 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6931 {
6932 rtx result, ls;
6933
6934 ls = gen_const_mem (DImode,
6935 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6936 set_mem_align (ls, 128);
6937
6938 result = gen_reg_rtx (EAmode);
6939 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6940 op = force_reg (Pmode, op);
6941 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6942 ls, const0_rtx, EAmode, 1);
6943 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6944
6945 if (EAmode == SImode)
6946 emit_insn (gen_addsi3 (result, op, ls));
6947 else
6948 emit_insn (gen_adddi3 (result, op, ls));
6949
6950 return result;
6951 }
6952
6953 else
6954 gcc_unreachable ();
6955}
6956
6957
d52fd16a 6958/* Count the total number of instructions in each pipe and return the
6959 maximum, which is used as the Minimum Iteration Interval (MII)
6960 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6961 -2 are instructions that can go in pipe0 or pipe1. */
6962static int
6963spu_sms_res_mii (struct ddg *g)
6964{
6965 int i;
6966 unsigned t[4] = {0, 0, 0, 0};
6967
6968 for (i = 0; i < g->num_nodes; i++)
6969 {
6970 rtx insn = g->nodes[i].insn;
6971 int p = get_pipe (insn) + 2;
6972
1e944a0b 6973 gcc_assert (p >= 0);
6974 gcc_assert (p < 4);
d52fd16a 6975
6976 t[p]++;
6977 if (dump_file && INSN_P (insn))
6978 fprintf (dump_file, "i%d %s %d %d\n",
6979 INSN_UID (insn),
6980 insn_data[INSN_CODE(insn)].name,
6981 p, t[p]);
6982 }
6983 if (dump_file)
6984 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6985
6986 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6987}
6988
6989
5df189be 6990void
6991spu_init_expanders (void)
9d98604b 6992{
5df189be 6993 if (cfun)
9d98604b 6994 {
6995 rtx r0, r1;
6996 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6997 frame_pointer_needed is true. We don't know that until we're
6998 expanding the prologue. */
6999 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7000
7001 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7002 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7003 to be treated as aligned, so generate them here. */
7004 r0 = gen_reg_rtx (SImode);
7005 r1 = gen_reg_rtx (SImode);
7006 mark_reg_pointer (r0, 128);
7007 mark_reg_pointer (r1, 128);
7008 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7009 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7010 }
ea32e033 7011}
7012
7013static enum machine_mode
7014spu_libgcc_cmp_return_mode (void)
7015{
7016
7017/* For SPU word mode is TI mode so it is better to use SImode
7018 for compare returns. */
7019 return SImode;
7020}
7021
7022static enum machine_mode
7023spu_libgcc_shift_count_mode (void)
7024{
7025/* For SPU word mode is TI mode so it is better to use SImode
7026 for shift counts. */
7027 return SImode;
7028}
5a976006 7029
7030/* An early place to adjust some flags after GCC has finished processing
7031 * them. */
7032static void
7033asm_file_start (void)
7034{
7035 /* Variable tracking should be run after all optimizations which
0ef14db8 7036 change order of insns. It also needs a valid CFG. Therefore,
7037 *if* we make nontrivial changes in machine-dependent reorg,
7038 run variable tracking after those. However, if we do not run
7039 our machine-dependent reorg pass, we must still run the normal
7040 variable tracking pass (or else we will ICE in final since
7041 debug insns have not been removed). */
7042 if (TARGET_BRANCH_HINTS && optimize)
7043 {
7044 spu_flag_var_tracking = flag_var_tracking;
7045 flag_var_tracking = 0;
7046 }
5a976006 7047
7048 default_file_start ();
7049}
7050
a08dfd55 7051/* Implement targetm.section_type_flags. */
7052static unsigned int
7053spu_section_type_flags (tree decl, const char *name, int reloc)
7054{
7055 /* .toe needs to have type @nobits. */
7056 if (strcmp (name, ".toe") == 0)
7057 return SECTION_BSS;
6cf5579e 7058 /* Don't load _ea into the current address space. */
7059 if (strcmp (name, "._ea") == 0)
7060 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7061 return default_section_type_flags (decl, name, reloc);
7062}
c2233b46 7063
6cf5579e 7064/* Implement targetm.select_section. */
7065static section *
7066spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7067{
7068 /* Variables and constants defined in the __ea address space
7069 go into a special section named "._ea". */
7070 if (TREE_TYPE (decl) != error_mark_node
7071 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7072 {
7073 /* We might get called with string constants, but get_named_section
7074 doesn't like them as they are not DECLs. Also, we need to set
7075 flags in that case. */
7076 if (!DECL_P (decl))
7077 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7078
7079 return get_named_section (decl, "._ea", reloc);
7080 }
7081
7082 return default_elf_select_section (decl, reloc, align);
7083}
7084
7085/* Implement targetm.unique_section. */
7086static void
7087spu_unique_section (tree decl, int reloc)
7088{
7089 /* We don't support unique section names in the __ea address
7090 space for now. */
7091 if (TREE_TYPE (decl) != error_mark_node
7092 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7093 return;
7094
7095 default_unique_section (decl, reloc);
7096}
7097
56c7bfc2 7098/* Generate a constant or register which contains 2^SCALE. We assume
7099 the result is valid for MODE. Currently, MODE must be V4SFmode and
7100 SCALE must be SImode. */
7101rtx
7102spu_gen_exp2 (enum machine_mode mode, rtx scale)
7103{
7104 gcc_assert (mode == V4SFmode);
7105 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7106 if (GET_CODE (scale) != CONST_INT)
7107 {
7108 /* unsigned int exp = (127 + scale) << 23;
7109 __vector float m = (__vector float) spu_splats (exp); */
7110 rtx reg = force_reg (SImode, scale);
7111 rtx exp = gen_reg_rtx (SImode);
7112 rtx mul = gen_reg_rtx (mode);
7113 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7114 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7115 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7116 return mul;
7117 }
7118 else
7119 {
7120 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7121 unsigned char arr[16];
7122 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7123 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7124 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7125 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7126 return array_to_constant (mode, arr);
7127 }
7128}
7129
9d98604b 7130/* After reload, just change the convert into a move instruction
7131 or a dead instruction. */
7132void
7133spu_split_convert (rtx ops[])
7134{
7135 if (REGNO (ops[0]) == REGNO (ops[1]))
7136 emit_note (NOTE_INSN_DELETED);
7137 else
7138 {
7139 /* Use TImode always as this might help hard reg copyprop. */
7140 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7141 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7142 emit_insn (gen_move_insn (op0, op1));
7143 }
7144}
7145
b3878a6c 7146void
4cbad5bb 7147spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7148{
7149 fprintf (file, "# profile\n");
7150 fprintf (file, "brsl $75, _mcount\n");
7151}
7152
c2233b46 7153#include "gt-spu.h"