]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2010-06-06 Sriraman Tallam <tmsriram@google.com>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
38#include "toplev.h"
39#include "ggc.h"
40#include "hashtab.h"
41#include "tm_p.h"
42#include "target.h"
43#include "target-def.h"
44#include "langhooks.h"
45#include "reload.h"
46#include "cfglayout.h"
47#include "sched-int.h"
48#include "params.h"
49#include "assert.h"
644459d0 50#include "machmode.h"
75a70cf9 51#include "gimple.h"
644459d0 52#include "tm-constrs.h"
d52fd16a 53#include "ddg.h"
5a976006 54#include "sbitmap.h"
55#include "timevar.h"
56#include "df.h"
6352eedf 57
58/* Builtin types, data and prototypes. */
c2233b46 59
60enum spu_builtin_type_index
61{
62 SPU_BTI_END_OF_PARAMS,
63
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
75
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
78
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
92
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
98
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
103
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
106
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
109
110 SPU_BTI_MAX
111};
112
113#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
123
124static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
125
6352eedf 126struct spu_builtin_range
127{
128 int low, high;
129};
130
131static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
144};
145
644459d0 146\f
147/* Target specific attribute specifications. */
148char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
149
150/* Prototypes and external defs. */
151static void spu_init_builtins (void);
e6925042 152static tree spu_builtin_decl (unsigned, bool);
644459d0 153static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 155static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 156static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
157 bool, addr_space_t);
644459d0 158static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
159static rtx get_pic_reg (void);
160static int need_to_save_reg (int regno, int saving);
161static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
162static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
163static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
164 rtx scratch);
165static void emit_nop_for_insn (rtx insn);
166static bool insn_clobbers_hbr (rtx insn);
167static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 168 int distance, sbitmap blocks);
5474166e 169static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
170 enum machine_mode dmode);
644459d0 171static rtx get_branch_target (rtx branch);
644459d0 172static void spu_machine_dependent_reorg (void);
173static int spu_sched_issue_rate (void);
174static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
175 int can_issue_more);
176static int get_pipe (rtx insn);
644459d0 177static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 178static void spu_sched_init_global (FILE *, int, int);
179static void spu_sched_init (FILE *, int, int);
180static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 181static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
182 int flags,
183 unsigned char *no_add_attrs);
184static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
185 int flags,
186 unsigned char *no_add_attrs);
187static int spu_naked_function_p (tree func);
fb80456a 188static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
189 const_tree type, unsigned char named);
644459d0 190static tree spu_build_builtin_va_list (void);
8a58ed0a 191static void spu_va_start (tree, rtx);
75a70cf9 192static tree spu_gimplify_va_arg_expr (tree valist, tree type,
193 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 194static int store_with_one_insn_p (rtx mem);
644459d0 195static int mem_is_padded_component_ref (rtx x);
9d98604b 196static int reg_aligned_for_addr (rtx x);
644459d0 197static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
198static void spu_asm_globalize_label (FILE * file, const char *name);
199static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 200 int *total, bool speed);
644459d0 201static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
202static void spu_init_libfuncs (void);
fb80456a 203static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 204static void fix_range (const char *);
69ced2d6 205static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 206static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 207static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
208 addr_space_t);
e99f512d 209static tree spu_builtin_mul_widen_even (tree);
210static tree spu_builtin_mul_widen_odd (tree);
a76866d3 211static tree spu_builtin_mask_for_load (void);
a28df51d 212static int spu_builtin_vectorization_cost (bool);
a9f1838b 213static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 214static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 215static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
216static enum machine_mode spu_addr_space_address_mode (addr_space_t);
217static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
218static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 219static int spu_sms_res_mii (struct ddg *g);
5a976006 220static void asm_file_start (void);
a08dfd55 221static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 222static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
223static void spu_unique_section (tree, int);
9d98604b 224static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 225static void spu_trampoline_init (rtx, tree, rtx);
644459d0 226
227extern const char *reg_names[];
644459d0 228
5474166e 229/* Which instruction set architecture to use. */
230int spu_arch;
231/* Which cpu are we tuning for. */
232int spu_tune;
233
5a976006 234/* The hardware requires 8 insns between a hint and the branch it
235 effects. This variable describes how many rtl instructions the
236 compiler needs to see before inserting a hint, and then the compiler
237 will insert enough nops to make it at least 8 insns. The default is
238 for the compiler to allow up to 2 nops be emitted. The nops are
239 inserted in pairs, so we round down. */
240int spu_hint_dist = (8*4) - (2*4);
241
242/* Determines whether we run variable tracking in machine dependent
243 reorganization. */
244static int spu_flag_var_tracking;
245
644459d0 246enum spu_immediate {
247 SPU_NONE,
248 SPU_IL,
249 SPU_ILA,
250 SPU_ILH,
251 SPU_ILHU,
252 SPU_ORI,
253 SPU_ORHI,
254 SPU_ORBI,
99369027 255 SPU_IOHL
644459d0 256};
dea01258 257enum immediate_class
258{
259 IC_POOL, /* constant pool */
260 IC_IL1, /* one il* instruction */
261 IC_IL2, /* both ilhu and iohl instructions */
262 IC_IL1s, /* one il* instruction */
263 IC_IL2s, /* both ilhu and iohl instructions */
264 IC_FSMBI, /* the fsmbi instruction */
265 IC_CPAT, /* one of the c*d instructions */
5df189be 266 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 267};
644459d0 268
269static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
270static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 271static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
272static enum immediate_class classify_immediate (rtx op,
273 enum machine_mode mode);
644459d0 274
1bd43494 275static enum machine_mode spu_unwind_word_mode (void);
276
ea32e033 277static enum machine_mode
278spu_libgcc_cmp_return_mode (void);
279
280static enum machine_mode
281spu_libgcc_shift_count_mode (void);
6cf5579e 282
283/* Pointer mode for __ea references. */
284#define EAmode (spu_ea_model != 32 ? DImode : SImode)
285
ef51d1e3 286\f
287/* Table of machine attributes. */
288static const struct attribute_spec spu_attribute_table[] =
289{
290 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
291 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
292 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
293 { NULL, 0, 0, false, false, false, NULL }
294};
644459d0 295\f
296/* TARGET overrides. */
297
6cf5579e 298#undef TARGET_ADDR_SPACE_POINTER_MODE
299#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
300
301#undef TARGET_ADDR_SPACE_ADDRESS_MODE
302#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
303
304#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
305#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
306 spu_addr_space_legitimate_address_p
307
308#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
309#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
310
311#undef TARGET_ADDR_SPACE_SUBSET_P
312#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
313
314#undef TARGET_ADDR_SPACE_CONVERT
315#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
316
644459d0 317#undef TARGET_INIT_BUILTINS
318#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 319#undef TARGET_BUILTIN_DECL
320#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 321
644459d0 322#undef TARGET_EXPAND_BUILTIN
323#define TARGET_EXPAND_BUILTIN spu_expand_builtin
324
1bd43494 325#undef TARGET_UNWIND_WORD_MODE
326#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 327
41e3a0c7 328#undef TARGET_LEGITIMIZE_ADDRESS
329#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
330
6cf5579e 331/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
332 and .quad for the debugger. When it is known that the assembler is fixed,
333 these can be removed. */
334#undef TARGET_ASM_UNALIGNED_SI_OP
335#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
336
337#undef TARGET_ASM_ALIGNED_DI_OP
338#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
339
644459d0 340/* The .8byte directive doesn't seem to work well for a 32 bit
341 architecture. */
342#undef TARGET_ASM_UNALIGNED_DI_OP
343#define TARGET_ASM_UNALIGNED_DI_OP NULL
344
345#undef TARGET_RTX_COSTS
346#define TARGET_RTX_COSTS spu_rtx_costs
347
348#undef TARGET_ADDRESS_COST
f529eb25 349#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 350
351#undef TARGET_SCHED_ISSUE_RATE
352#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
353
5a976006 354#undef TARGET_SCHED_INIT_GLOBAL
355#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
356
357#undef TARGET_SCHED_INIT
358#define TARGET_SCHED_INIT spu_sched_init
359
644459d0 360#undef TARGET_SCHED_VARIABLE_ISSUE
361#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
362
5a976006 363#undef TARGET_SCHED_REORDER
364#define TARGET_SCHED_REORDER spu_sched_reorder
365
366#undef TARGET_SCHED_REORDER2
367#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 368
369#undef TARGET_SCHED_ADJUST_COST
370#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
371
644459d0 372#undef TARGET_ATTRIBUTE_TABLE
373#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
374
375#undef TARGET_ASM_INTEGER
376#define TARGET_ASM_INTEGER spu_assemble_integer
377
378#undef TARGET_SCALAR_MODE_SUPPORTED_P
379#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
380
381#undef TARGET_VECTOR_MODE_SUPPORTED_P
382#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
383
384#undef TARGET_FUNCTION_OK_FOR_SIBCALL
385#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
386
387#undef TARGET_ASM_GLOBALIZE_LABEL
388#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
389
390#undef TARGET_PASS_BY_REFERENCE
391#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
392
393#undef TARGET_MUST_PASS_IN_STACK
394#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
395
396#undef TARGET_BUILD_BUILTIN_VA_LIST
397#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
398
8a58ed0a 399#undef TARGET_EXPAND_BUILTIN_VA_START
400#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
401
644459d0 402#undef TARGET_SETUP_INCOMING_VARARGS
403#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
404
405#undef TARGET_MACHINE_DEPENDENT_REORG
406#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
407
408#undef TARGET_GIMPLIFY_VA_ARG_EXPR
409#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
410
411#undef TARGET_DEFAULT_TARGET_FLAGS
412#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
413
414#undef TARGET_INIT_LIBFUNCS
415#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
416
417#undef TARGET_RETURN_IN_MEMORY
418#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
419
69ced2d6 420#undef TARGET_ENCODE_SECTION_INFO
421#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
422
e99f512d 423#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
424#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
425
426#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
427#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
428
a76866d3 429#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
430#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
431
a28df51d 432#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
433#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
434
0e87db76 435#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
436#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
437
a0515226 438#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
439#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
440
ea32e033 441#undef TARGET_LIBGCC_CMP_RETURN_MODE
442#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
443
444#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
445#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
446
d52fd16a 447#undef TARGET_SCHED_SMS_RES_MII
448#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
449
5a976006 450#undef TARGET_ASM_FILE_START
451#define TARGET_ASM_FILE_START asm_file_start
452
a08dfd55 453#undef TARGET_SECTION_TYPE_FLAGS
454#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
455
6cf5579e 456#undef TARGET_ASM_SELECT_SECTION
457#define TARGET_ASM_SELECT_SECTION spu_select_section
458
459#undef TARGET_ASM_UNIQUE_SECTION
460#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
461
fd50b071 462#undef TARGET_LEGITIMATE_ADDRESS_P
463#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
464
e96f2783 465#undef TARGET_TRAMPOLINE_INIT
466#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
467
644459d0 468struct gcc_target targetm = TARGET_INITIALIZER;
469
5df189be 470void
471spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
472{
5df189be 473 /* Override some of the default param values. With so many registers
474 larger values are better for these params. */
475 MAX_PENDING_LIST_LENGTH = 128;
476
477 /* With so many registers this is better on by default. */
478 flag_rename_registers = 1;
479}
480
644459d0 481/* Sometimes certain combinations of command options do not make sense
482 on a particular target machine. You can define a macro
483 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
484 executed once just after all the command options have been parsed. */
485void
486spu_override_options (void)
487{
14d408d9 488 /* Small loops will be unpeeled at -O3. For SPU it is more important
489 to keep code small by default. */
490 if (!flag_unroll_loops && !flag_peel_loops
491 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
492 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
493
644459d0 494 flag_omit_frame_pointer = 1;
495
5a976006 496 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 497 if (align_functions < 8)
498 align_functions = 8;
c7b91b14 499
5a976006 500 spu_hint_dist = 8*4 - spu_max_nops*4;
501 if (spu_hint_dist < 0)
502 spu_hint_dist = 0;
503
c7b91b14 504 if (spu_fixed_range_string)
505 fix_range (spu_fixed_range_string);
5474166e 506
507 /* Determine processor architectural level. */
508 if (spu_arch_string)
509 {
510 if (strcmp (&spu_arch_string[0], "cell") == 0)
511 spu_arch = PROCESSOR_CELL;
512 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
513 spu_arch = PROCESSOR_CELLEDP;
514 else
515 error ("Unknown architecture '%s'", &spu_arch_string[0]);
516 }
517
518 /* Determine processor to tune for. */
519 if (spu_tune_string)
520 {
521 if (strcmp (&spu_tune_string[0], "cell") == 0)
522 spu_tune = PROCESSOR_CELL;
523 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
524 spu_tune = PROCESSOR_CELLEDP;
525 else
526 error ("Unknown architecture '%s'", &spu_tune_string[0]);
527 }
98bbec1e 528
13684256 529 /* Change defaults according to the processor architecture. */
530 if (spu_arch == PROCESSOR_CELLEDP)
531 {
532 /* If no command line option has been otherwise specified, change
533 the default to -mno-safe-hints on celledp -- only the original
534 Cell/B.E. processors require this workaround. */
535 if (!(target_flags_explicit & MASK_SAFE_HINTS))
536 target_flags &= ~MASK_SAFE_HINTS;
537 }
538
98bbec1e 539 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 540}
541\f
542/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
543 struct attribute_spec.handler. */
544
644459d0 545/* True if MODE is valid for the target. By "valid", we mean able to
546 be manipulated in non-trivial ways. In particular, this means all
547 the arithmetic is supported. */
548static bool
549spu_scalar_mode_supported_p (enum machine_mode mode)
550{
551 switch (mode)
552 {
553 case QImode:
554 case HImode:
555 case SImode:
556 case SFmode:
557 case DImode:
558 case TImode:
559 case DFmode:
560 return true;
561
562 default:
563 return false;
564 }
565}
566
567/* Similarly for vector modes. "Supported" here is less strict. At
568 least some operations are supported; need to check optabs or builtins
569 for further details. */
570static bool
571spu_vector_mode_supported_p (enum machine_mode mode)
572{
573 switch (mode)
574 {
575 case V16QImode:
576 case V8HImode:
577 case V4SImode:
578 case V2DImode:
579 case V4SFmode:
580 case V2DFmode:
581 return true;
582
583 default:
584 return false;
585 }
586}
587
588/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
589 least significant bytes of the outer mode. This function returns
590 TRUE for the SUBREG's where this is correct. */
591int
592valid_subreg (rtx op)
593{
594 enum machine_mode om = GET_MODE (op);
595 enum machine_mode im = GET_MODE (SUBREG_REG (op));
596 return om != VOIDmode && im != VOIDmode
597 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 598 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
599 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 600}
601
602/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 603 and adjust the start offset. */
644459d0 604static rtx
605adjust_operand (rtx op, HOST_WIDE_INT * start)
606{
607 enum machine_mode mode;
608 int op_size;
38aca5eb 609 /* Strip any paradoxical SUBREG. */
610 if (GET_CODE (op) == SUBREG
611 && (GET_MODE_BITSIZE (GET_MODE (op))
612 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 613 {
614 if (start)
615 *start -=
616 GET_MODE_BITSIZE (GET_MODE (op)) -
617 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
618 op = SUBREG_REG (op);
619 }
620 /* If it is smaller than SI, assure a SUBREG */
621 op_size = GET_MODE_BITSIZE (GET_MODE (op));
622 if (op_size < 32)
623 {
624 if (start)
625 *start += 32 - op_size;
626 op_size = 32;
627 }
628 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
629 mode = mode_for_size (op_size, MODE_INT, 0);
630 if (mode != GET_MODE (op))
631 op = gen_rtx_SUBREG (mode, op, 0);
632 return op;
633}
634
635void
636spu_expand_extv (rtx ops[], int unsignedp)
637{
9d98604b 638 rtx dst = ops[0], src = ops[1];
644459d0 639 HOST_WIDE_INT width = INTVAL (ops[2]);
640 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 641 HOST_WIDE_INT align_mask;
642 rtx s0, s1, mask, r0;
644459d0 643
9d98604b 644 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 645
9d98604b 646 if (MEM_P (src))
644459d0 647 {
9d98604b 648 /* First, determine if we need 1 TImode load or 2. We need only 1
649 if the bits being extracted do not cross the alignment boundary
650 as determined by the MEM and its address. */
651
652 align_mask = -MEM_ALIGN (src);
653 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 654 {
9d98604b 655 /* Alignment is sufficient for 1 load. */
656 s0 = gen_reg_rtx (TImode);
657 r0 = spu_expand_load (s0, 0, src, start / 8);
658 start &= 7;
659 if (r0)
660 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 661 }
9d98604b 662 else
663 {
664 /* Need 2 loads. */
665 s0 = gen_reg_rtx (TImode);
666 s1 = gen_reg_rtx (TImode);
667 r0 = spu_expand_load (s0, s1, src, start / 8);
668 start &= 7;
669
670 gcc_assert (start + width <= 128);
671 if (r0)
672 {
673 rtx r1 = gen_reg_rtx (SImode);
674 mask = gen_reg_rtx (TImode);
675 emit_move_insn (mask, GEN_INT (-1));
676 emit_insn (gen_rotqby_ti (s0, s0, r0));
677 emit_insn (gen_rotqby_ti (s1, s1, r0));
678 if (GET_CODE (r0) == CONST_INT)
679 r1 = GEN_INT (INTVAL (r0) & 15);
680 else
681 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
682 emit_insn (gen_shlqby_ti (mask, mask, r1));
683 emit_insn (gen_selb (s0, s1, s0, mask));
684 }
685 }
686
687 }
688 else if (GET_CODE (src) == SUBREG)
689 {
690 rtx r = SUBREG_REG (src);
691 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
692 s0 = gen_reg_rtx (TImode);
693 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
694 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
695 else
696 emit_move_insn (s0, src);
697 }
698 else
699 {
700 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
701 s0 = gen_reg_rtx (TImode);
702 emit_move_insn (s0, src);
644459d0 703 }
704
9d98604b 705 /* Now s0 is TImode and contains the bits to extract at start. */
706
707 if (start)
708 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
709
710 if (128 - width)
644459d0 711 {
9d98604b 712 tree c = build_int_cst (NULL_TREE, 128 - width);
713 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 714 }
715
9d98604b 716 emit_move_insn (dst, s0);
644459d0 717}
718
719void
720spu_expand_insv (rtx ops[])
721{
722 HOST_WIDE_INT width = INTVAL (ops[1]);
723 HOST_WIDE_INT start = INTVAL (ops[2]);
724 HOST_WIDE_INT maskbits;
725 enum machine_mode dst_mode, src_mode;
726 rtx dst = ops[0], src = ops[3];
727 int dst_size, src_size;
728 rtx mask;
729 rtx shift_reg;
730 int shift;
731
732
733 if (GET_CODE (ops[0]) == MEM)
734 dst = gen_reg_rtx (TImode);
735 else
736 dst = adjust_operand (dst, &start);
737 dst_mode = GET_MODE (dst);
738 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
739
740 if (CONSTANT_P (src))
741 {
742 enum machine_mode m =
743 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
744 src = force_reg (m, convert_to_mode (m, src, 0));
745 }
746 src = adjust_operand (src, 0);
747 src_mode = GET_MODE (src);
748 src_size = GET_MODE_BITSIZE (GET_MODE (src));
749
750 mask = gen_reg_rtx (dst_mode);
751 shift_reg = gen_reg_rtx (dst_mode);
752 shift = dst_size - start - width;
753
754 /* It's not safe to use subreg here because the compiler assumes
755 that the SUBREG_REG is right justified in the SUBREG. */
756 convert_move (shift_reg, src, 1);
757
758 if (shift > 0)
759 {
760 switch (dst_mode)
761 {
762 case SImode:
763 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
764 break;
765 case DImode:
766 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
767 break;
768 case TImode:
769 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
770 break;
771 default:
772 abort ();
773 }
774 }
775 else if (shift < 0)
776 abort ();
777
778 switch (dst_size)
779 {
780 case 32:
781 maskbits = (-1ll << (32 - width - start));
782 if (start)
783 maskbits += (1ll << (32 - start));
784 emit_move_insn (mask, GEN_INT (maskbits));
785 break;
786 case 64:
787 maskbits = (-1ll << (64 - width - start));
788 if (start)
789 maskbits += (1ll << (64 - start));
790 emit_move_insn (mask, GEN_INT (maskbits));
791 break;
792 case 128:
793 {
794 unsigned char arr[16];
795 int i = start / 8;
796 memset (arr, 0, sizeof (arr));
797 arr[i] = 0xff >> (start & 7);
798 for (i++; i <= (start + width - 1) / 8; i++)
799 arr[i] = 0xff;
800 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
801 emit_move_insn (mask, array_to_constant (TImode, arr));
802 }
803 break;
804 default:
805 abort ();
806 }
807 if (GET_CODE (ops[0]) == MEM)
808 {
644459d0 809 rtx low = gen_reg_rtx (SImode);
644459d0 810 rtx rotl = gen_reg_rtx (SImode);
811 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 812 rtx addr;
813 rtx addr0;
814 rtx addr1;
644459d0 815 rtx mem;
816
9d98604b 817 addr = force_reg (Pmode, XEXP (ops[0], 0));
818 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 819 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
820 emit_insn (gen_negsi2 (rotl, low));
821 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
822 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 823 mem = change_address (ops[0], TImode, addr0);
644459d0 824 set_mem_alias_set (mem, 0);
825 emit_move_insn (dst, mem);
826 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 827 if (start + width > MEM_ALIGN (ops[0]))
828 {
829 rtx shl = gen_reg_rtx (SImode);
830 rtx mask1 = gen_reg_rtx (TImode);
831 rtx dst1 = gen_reg_rtx (TImode);
832 rtx mem1;
9d98604b 833 addr1 = plus_constant (addr, 16);
834 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 835 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
836 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 837 mem1 = change_address (ops[0], TImode, addr1);
644459d0 838 set_mem_alias_set (mem1, 0);
839 emit_move_insn (dst1, mem1);
840 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
841 emit_move_insn (mem1, dst1);
842 }
9d98604b 843 emit_move_insn (mem, dst);
644459d0 844 }
845 else
71cd778d 846 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 847}
848
849
850int
851spu_expand_block_move (rtx ops[])
852{
853 HOST_WIDE_INT bytes, align, offset;
854 rtx src, dst, sreg, dreg, target;
855 int i;
856 if (GET_CODE (ops[2]) != CONST_INT
857 || GET_CODE (ops[3]) != CONST_INT
48eb4342 858 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 859 return 0;
860
861 bytes = INTVAL (ops[2]);
862 align = INTVAL (ops[3]);
863
864 if (bytes <= 0)
865 return 1;
866
867 dst = ops[0];
868 src = ops[1];
869
870 if (align == 16)
871 {
872 for (offset = 0; offset + 16 <= bytes; offset += 16)
873 {
874 dst = adjust_address (ops[0], V16QImode, offset);
875 src = adjust_address (ops[1], V16QImode, offset);
876 emit_move_insn (dst, src);
877 }
878 if (offset < bytes)
879 {
880 rtx mask;
881 unsigned char arr[16] = { 0 };
882 for (i = 0; i < bytes - offset; i++)
883 arr[i] = 0xff;
884 dst = adjust_address (ops[0], V16QImode, offset);
885 src = adjust_address (ops[1], V16QImode, offset);
886 mask = gen_reg_rtx (V16QImode);
887 sreg = gen_reg_rtx (V16QImode);
888 dreg = gen_reg_rtx (V16QImode);
889 target = gen_reg_rtx (V16QImode);
890 emit_move_insn (mask, array_to_constant (V16QImode, arr));
891 emit_move_insn (dreg, dst);
892 emit_move_insn (sreg, src);
893 emit_insn (gen_selb (target, dreg, sreg, mask));
894 emit_move_insn (dst, target);
895 }
896 return 1;
897 }
898 return 0;
899}
900
901enum spu_comp_code
902{ SPU_EQ, SPU_GT, SPU_GTU };
903
5474166e 904int spu_comp_icode[12][3] = {
905 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
906 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
907 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
908 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
909 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
910 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
911 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
912 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
913 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
914 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
915 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
916 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 917};
918
919/* Generate a compare for CODE. Return a brand-new rtx that represents
920 the result of the compare. GCC can figure this out too if we don't
921 provide all variations of compares, but GCC always wants to use
922 WORD_MODE, we can generate better code in most cases if we do it
923 ourselves. */
924void
74f4459c 925spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 926{
927 int reverse_compare = 0;
928 int reverse_test = 0;
5d70b918 929 rtx compare_result, eq_result;
930 rtx comp_rtx, eq_rtx;
644459d0 931 enum machine_mode comp_mode;
932 enum machine_mode op_mode;
b9c74b4d 933 enum spu_comp_code scode, eq_code;
934 enum insn_code ior_code;
74f4459c 935 enum rtx_code code = GET_CODE (cmp);
936 rtx op0 = XEXP (cmp, 0);
937 rtx op1 = XEXP (cmp, 1);
644459d0 938 int index;
5d70b918 939 int eq_test = 0;
644459d0 940
74f4459c 941 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 942 and so on, to keep the constant in operand 1. */
74f4459c 943 if (GET_CODE (op1) == CONST_INT)
644459d0 944 {
74f4459c 945 HOST_WIDE_INT val = INTVAL (op1) - 1;
946 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 947 switch (code)
948 {
949 case GE:
74f4459c 950 op1 = GEN_INT (val);
644459d0 951 code = GT;
952 break;
953 case LT:
74f4459c 954 op1 = GEN_INT (val);
644459d0 955 code = LE;
956 break;
957 case GEU:
74f4459c 958 op1 = GEN_INT (val);
644459d0 959 code = GTU;
960 break;
961 case LTU:
74f4459c 962 op1 = GEN_INT (val);
644459d0 963 code = LEU;
964 break;
965 default:
966 break;
967 }
968 }
969
5d70b918 970 comp_mode = SImode;
74f4459c 971 op_mode = GET_MODE (op0);
5d70b918 972
644459d0 973 switch (code)
974 {
975 case GE:
644459d0 976 scode = SPU_GT;
07027691 977 if (HONOR_NANS (op_mode))
5d70b918 978 {
979 reverse_compare = 0;
980 reverse_test = 0;
981 eq_test = 1;
982 eq_code = SPU_EQ;
983 }
984 else
985 {
986 reverse_compare = 1;
987 reverse_test = 1;
988 }
644459d0 989 break;
990 case LE:
644459d0 991 scode = SPU_GT;
07027691 992 if (HONOR_NANS (op_mode))
5d70b918 993 {
994 reverse_compare = 1;
995 reverse_test = 0;
996 eq_test = 1;
997 eq_code = SPU_EQ;
998 }
999 else
1000 {
1001 reverse_compare = 0;
1002 reverse_test = 1;
1003 }
644459d0 1004 break;
1005 case LT:
1006 reverse_compare = 1;
1007 reverse_test = 0;
1008 scode = SPU_GT;
1009 break;
1010 case GEU:
1011 reverse_compare = 1;
1012 reverse_test = 1;
1013 scode = SPU_GTU;
1014 break;
1015 case LEU:
1016 reverse_compare = 0;
1017 reverse_test = 1;
1018 scode = SPU_GTU;
1019 break;
1020 case LTU:
1021 reverse_compare = 1;
1022 reverse_test = 0;
1023 scode = SPU_GTU;
1024 break;
1025 case NE:
1026 reverse_compare = 0;
1027 reverse_test = 1;
1028 scode = SPU_EQ;
1029 break;
1030
1031 case EQ:
1032 scode = SPU_EQ;
1033 break;
1034 case GT:
1035 scode = SPU_GT;
1036 break;
1037 case GTU:
1038 scode = SPU_GTU;
1039 break;
1040 default:
1041 scode = SPU_EQ;
1042 break;
1043 }
1044
644459d0 1045 switch (op_mode)
1046 {
1047 case QImode:
1048 index = 0;
1049 comp_mode = QImode;
1050 break;
1051 case HImode:
1052 index = 1;
1053 comp_mode = HImode;
1054 break;
1055 case SImode:
1056 index = 2;
1057 break;
1058 case DImode:
1059 index = 3;
1060 break;
1061 case TImode:
1062 index = 4;
1063 break;
1064 case SFmode:
1065 index = 5;
1066 break;
1067 case DFmode:
1068 index = 6;
1069 break;
1070 case V16QImode:
5474166e 1071 index = 7;
1072 comp_mode = op_mode;
1073 break;
644459d0 1074 case V8HImode:
5474166e 1075 index = 8;
1076 comp_mode = op_mode;
1077 break;
644459d0 1078 case V4SImode:
5474166e 1079 index = 9;
1080 comp_mode = op_mode;
1081 break;
644459d0 1082 case V4SFmode:
5474166e 1083 index = 10;
1084 comp_mode = V4SImode;
1085 break;
644459d0 1086 case V2DFmode:
5474166e 1087 index = 11;
1088 comp_mode = V2DImode;
644459d0 1089 break;
5474166e 1090 case V2DImode:
644459d0 1091 default:
1092 abort ();
1093 }
1094
74f4459c 1095 if (GET_MODE (op1) == DFmode
07027691 1096 && (scode != SPU_GT && scode != SPU_EQ))
1097 abort ();
644459d0 1098
74f4459c 1099 if (is_set == 0 && op1 == const0_rtx
1100 && (GET_MODE (op0) == SImode
1101 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1102 {
1103 /* Don't need to set a register with the result when we are
1104 comparing against zero and branching. */
1105 reverse_test = !reverse_test;
74f4459c 1106 compare_result = op0;
644459d0 1107 }
1108 else
1109 {
1110 compare_result = gen_reg_rtx (comp_mode);
1111
1112 if (reverse_compare)
1113 {
74f4459c 1114 rtx t = op1;
1115 op1 = op0;
1116 op0 = t;
644459d0 1117 }
1118
1119 if (spu_comp_icode[index][scode] == 0)
1120 abort ();
1121
1122 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1123 (op0, op_mode))
1124 op0 = force_reg (op_mode, op0);
644459d0 1125 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1126 (op1, op_mode))
1127 op1 = force_reg (op_mode, op1);
644459d0 1128 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1129 op0, op1);
644459d0 1130 if (comp_rtx == 0)
1131 abort ();
1132 emit_insn (comp_rtx);
1133
5d70b918 1134 if (eq_test)
1135 {
1136 eq_result = gen_reg_rtx (comp_mode);
1137 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1138 op0, op1);
5d70b918 1139 if (eq_rtx == 0)
1140 abort ();
1141 emit_insn (eq_rtx);
1142 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1143 gcc_assert (ior_code != CODE_FOR_nothing);
1144 emit_insn (GEN_FCN (ior_code)
1145 (compare_result, compare_result, eq_result));
1146 }
644459d0 1147 }
1148
1149 if (is_set == 0)
1150 {
1151 rtx bcomp;
1152 rtx loc_ref;
1153
1154 /* We don't have branch on QI compare insns, so we convert the
1155 QI compare result to a HI result. */
1156 if (comp_mode == QImode)
1157 {
1158 rtx old_res = compare_result;
1159 compare_result = gen_reg_rtx (HImode);
1160 comp_mode = HImode;
1161 emit_insn (gen_extendqihi2 (compare_result, old_res));
1162 }
1163
1164 if (reverse_test)
1165 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1166 else
1167 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1168
74f4459c 1169 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1170 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1171 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1172 loc_ref, pc_rtx)));
1173 }
1174 else if (is_set == 2)
1175 {
74f4459c 1176 rtx target = operands[0];
644459d0 1177 int compare_size = GET_MODE_BITSIZE (comp_mode);
1178 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1179 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1180 rtx select_mask;
1181 rtx op_t = operands[2];
1182 rtx op_f = operands[3];
1183
1184 /* The result of the comparison can be SI, HI or QI mode. Create a
1185 mask based on that result. */
1186 if (target_size > compare_size)
1187 {
1188 select_mask = gen_reg_rtx (mode);
1189 emit_insn (gen_extend_compare (select_mask, compare_result));
1190 }
1191 else if (target_size < compare_size)
1192 select_mask =
1193 gen_rtx_SUBREG (mode, compare_result,
1194 (compare_size - target_size) / BITS_PER_UNIT);
1195 else if (comp_mode != mode)
1196 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1197 else
1198 select_mask = compare_result;
1199
1200 if (GET_MODE (target) != GET_MODE (op_t)
1201 || GET_MODE (target) != GET_MODE (op_f))
1202 abort ();
1203
1204 if (reverse_test)
1205 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1206 else
1207 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1208 }
1209 else
1210 {
74f4459c 1211 rtx target = operands[0];
644459d0 1212 if (reverse_test)
1213 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1214 gen_rtx_NOT (comp_mode, compare_result)));
1215 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1216 emit_insn (gen_extendhisi2 (target, compare_result));
1217 else if (GET_MODE (target) == SImode
1218 && GET_MODE (compare_result) == QImode)
1219 emit_insn (gen_extend_compare (target, compare_result));
1220 else
1221 emit_move_insn (target, compare_result);
1222 }
1223}
1224
1225HOST_WIDE_INT
1226const_double_to_hwint (rtx x)
1227{
1228 HOST_WIDE_INT val;
1229 REAL_VALUE_TYPE rv;
1230 if (GET_MODE (x) == SFmode)
1231 {
1232 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1233 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1234 }
1235 else if (GET_MODE (x) == DFmode)
1236 {
1237 long l[2];
1238 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1239 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1240 val = l[0];
1241 val = (val << 32) | (l[1] & 0xffffffff);
1242 }
1243 else
1244 abort ();
1245 return val;
1246}
1247
1248rtx
1249hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1250{
1251 long tv[2];
1252 REAL_VALUE_TYPE rv;
1253 gcc_assert (mode == SFmode || mode == DFmode);
1254
1255 if (mode == SFmode)
1256 tv[0] = (v << 32) >> 32;
1257 else if (mode == DFmode)
1258 {
1259 tv[1] = (v << 32) >> 32;
1260 tv[0] = v >> 32;
1261 }
1262 real_from_target (&rv, tv, mode);
1263 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1264}
1265
1266void
1267print_operand_address (FILE * file, register rtx addr)
1268{
1269 rtx reg;
1270 rtx offset;
1271
e04cf423 1272 if (GET_CODE (addr) == AND
1273 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1274 && INTVAL (XEXP (addr, 1)) == -16)
1275 addr = XEXP (addr, 0);
1276
644459d0 1277 switch (GET_CODE (addr))
1278 {
1279 case REG:
1280 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1281 break;
1282
1283 case PLUS:
1284 reg = XEXP (addr, 0);
1285 offset = XEXP (addr, 1);
1286 if (GET_CODE (offset) == REG)
1287 {
1288 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1289 reg_names[REGNO (offset)]);
1290 }
1291 else if (GET_CODE (offset) == CONST_INT)
1292 {
1293 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1294 INTVAL (offset), reg_names[REGNO (reg)]);
1295 }
1296 else
1297 abort ();
1298 break;
1299
1300 case CONST:
1301 case LABEL_REF:
1302 case SYMBOL_REF:
1303 case CONST_INT:
1304 output_addr_const (file, addr);
1305 break;
1306
1307 default:
1308 debug_rtx (addr);
1309 abort ();
1310 }
1311}
1312
1313void
1314print_operand (FILE * file, rtx x, int code)
1315{
1316 enum machine_mode mode = GET_MODE (x);
1317 HOST_WIDE_INT val;
1318 unsigned char arr[16];
1319 int xcode = GET_CODE (x);
dea01258 1320 int i, info;
644459d0 1321 if (GET_MODE (x) == VOIDmode)
1322 switch (code)
1323 {
644459d0 1324 case 'L': /* 128 bits, signed */
1325 case 'm': /* 128 bits, signed */
1326 case 'T': /* 128 bits, signed */
1327 case 't': /* 128 bits, signed */
1328 mode = TImode;
1329 break;
644459d0 1330 case 'K': /* 64 bits, signed */
1331 case 'k': /* 64 bits, signed */
1332 case 'D': /* 64 bits, signed */
1333 case 'd': /* 64 bits, signed */
1334 mode = DImode;
1335 break;
644459d0 1336 case 'J': /* 32 bits, signed */
1337 case 'j': /* 32 bits, signed */
1338 case 's': /* 32 bits, signed */
1339 case 'S': /* 32 bits, signed */
1340 mode = SImode;
1341 break;
1342 }
1343 switch (code)
1344 {
1345
1346 case 'j': /* 32 bits, signed */
1347 case 'k': /* 64 bits, signed */
1348 case 'm': /* 128 bits, signed */
1349 if (xcode == CONST_INT
1350 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1351 {
1352 gcc_assert (logical_immediate_p (x, mode));
1353 constant_to_array (mode, x, arr);
1354 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1355 val = trunc_int_for_mode (val, SImode);
1356 switch (which_logical_immediate (val))
1357 {
1358 case SPU_ORI:
1359 break;
1360 case SPU_ORHI:
1361 fprintf (file, "h");
1362 break;
1363 case SPU_ORBI:
1364 fprintf (file, "b");
1365 break;
1366 default:
1367 gcc_unreachable();
1368 }
1369 }
1370 else
1371 gcc_unreachable();
1372 return;
1373
1374 case 'J': /* 32 bits, signed */
1375 case 'K': /* 64 bits, signed */
1376 case 'L': /* 128 bits, signed */
1377 if (xcode == CONST_INT
1378 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1379 {
1380 gcc_assert (logical_immediate_p (x, mode)
1381 || iohl_immediate_p (x, mode));
1382 constant_to_array (mode, x, arr);
1383 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1384 val = trunc_int_for_mode (val, SImode);
1385 switch (which_logical_immediate (val))
1386 {
1387 case SPU_ORI:
1388 case SPU_IOHL:
1389 break;
1390 case SPU_ORHI:
1391 val = trunc_int_for_mode (val, HImode);
1392 break;
1393 case SPU_ORBI:
1394 val = trunc_int_for_mode (val, QImode);
1395 break;
1396 default:
1397 gcc_unreachable();
1398 }
1399 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1400 }
1401 else
1402 gcc_unreachable();
1403 return;
1404
1405 case 't': /* 128 bits, signed */
1406 case 'd': /* 64 bits, signed */
1407 case 's': /* 32 bits, signed */
dea01258 1408 if (CONSTANT_P (x))
644459d0 1409 {
dea01258 1410 enum immediate_class c = classify_immediate (x, mode);
1411 switch (c)
1412 {
1413 case IC_IL1:
1414 constant_to_array (mode, x, arr);
1415 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1416 val = trunc_int_for_mode (val, SImode);
1417 switch (which_immediate_load (val))
1418 {
1419 case SPU_IL:
1420 break;
1421 case SPU_ILA:
1422 fprintf (file, "a");
1423 break;
1424 case SPU_ILH:
1425 fprintf (file, "h");
1426 break;
1427 case SPU_ILHU:
1428 fprintf (file, "hu");
1429 break;
1430 default:
1431 gcc_unreachable ();
1432 }
1433 break;
1434 case IC_CPAT:
1435 constant_to_array (mode, x, arr);
1436 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1437 if (info == 1)
1438 fprintf (file, "b");
1439 else if (info == 2)
1440 fprintf (file, "h");
1441 else if (info == 4)
1442 fprintf (file, "w");
1443 else if (info == 8)
1444 fprintf (file, "d");
1445 break;
1446 case IC_IL1s:
1447 if (xcode == CONST_VECTOR)
1448 {
1449 x = CONST_VECTOR_ELT (x, 0);
1450 xcode = GET_CODE (x);
1451 }
1452 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1453 fprintf (file, "a");
1454 else if (xcode == HIGH)
1455 fprintf (file, "hu");
1456 break;
1457 case IC_FSMBI:
5df189be 1458 case IC_FSMBI2:
dea01258 1459 case IC_IL2:
1460 case IC_IL2s:
1461 case IC_POOL:
1462 abort ();
1463 }
644459d0 1464 }
644459d0 1465 else
1466 gcc_unreachable ();
1467 return;
1468
1469 case 'T': /* 128 bits, signed */
1470 case 'D': /* 64 bits, signed */
1471 case 'S': /* 32 bits, signed */
dea01258 1472 if (CONSTANT_P (x))
644459d0 1473 {
dea01258 1474 enum immediate_class c = classify_immediate (x, mode);
1475 switch (c)
644459d0 1476 {
dea01258 1477 case IC_IL1:
1478 constant_to_array (mode, x, arr);
1479 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1480 val = trunc_int_for_mode (val, SImode);
1481 switch (which_immediate_load (val))
1482 {
1483 case SPU_IL:
1484 case SPU_ILA:
1485 break;
1486 case SPU_ILH:
1487 case SPU_ILHU:
1488 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1489 break;
1490 default:
1491 gcc_unreachable ();
1492 }
1493 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1494 break;
1495 case IC_FSMBI:
1496 constant_to_array (mode, x, arr);
1497 val = 0;
1498 for (i = 0; i < 16; i++)
1499 {
1500 val <<= 1;
1501 val |= arr[i] & 1;
1502 }
1503 print_operand (file, GEN_INT (val), 0);
1504 break;
1505 case IC_CPAT:
1506 constant_to_array (mode, x, arr);
1507 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1509 break;
dea01258 1510 case IC_IL1s:
dea01258 1511 if (xcode == HIGH)
5df189be 1512 x = XEXP (x, 0);
1513 if (GET_CODE (x) == CONST_VECTOR)
1514 x = CONST_VECTOR_ELT (x, 0);
1515 output_addr_const (file, x);
1516 if (xcode == HIGH)
1517 fprintf (file, "@h");
644459d0 1518 break;
dea01258 1519 case IC_IL2:
1520 case IC_IL2s:
5df189be 1521 case IC_FSMBI2:
dea01258 1522 case IC_POOL:
1523 abort ();
644459d0 1524 }
c8befdb9 1525 }
644459d0 1526 else
1527 gcc_unreachable ();
1528 return;
1529
644459d0 1530 case 'C':
1531 if (xcode == CONST_INT)
1532 {
1533 /* Only 4 least significant bits are relevant for generate
1534 control word instructions. */
1535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1536 return;
1537 }
1538 break;
1539
1540 case 'M': /* print code for c*d */
1541 if (GET_CODE (x) == CONST_INT)
1542 switch (INTVAL (x))
1543 {
1544 case 1:
1545 fprintf (file, "b");
1546 break;
1547 case 2:
1548 fprintf (file, "h");
1549 break;
1550 case 4:
1551 fprintf (file, "w");
1552 break;
1553 case 8:
1554 fprintf (file, "d");
1555 break;
1556 default:
1557 gcc_unreachable();
1558 }
1559 else
1560 gcc_unreachable();
1561 return;
1562
1563 case 'N': /* Negate the operand */
1564 if (xcode == CONST_INT)
1565 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1566 else if (xcode == CONST_VECTOR)
1567 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1568 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1569 return;
1570
1571 case 'I': /* enable/disable interrupts */
1572 if (xcode == CONST_INT)
1573 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1574 return;
1575
1576 case 'b': /* branch modifiers */
1577 if (xcode == REG)
1578 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1579 else if (COMPARISON_P (x))
1580 fprintf (file, "%s", xcode == NE ? "n" : "");
1581 return;
1582
1583 case 'i': /* indirect call */
1584 if (xcode == MEM)
1585 {
1586 if (GET_CODE (XEXP (x, 0)) == REG)
1587 /* Used in indirect function calls. */
1588 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1589 else
1590 output_address (XEXP (x, 0));
1591 }
1592 return;
1593
1594 case 'p': /* load/store */
1595 if (xcode == MEM)
1596 {
1597 x = XEXP (x, 0);
1598 xcode = GET_CODE (x);
1599 }
e04cf423 1600 if (xcode == AND)
1601 {
1602 x = XEXP (x, 0);
1603 xcode = GET_CODE (x);
1604 }
644459d0 1605 if (xcode == REG)
1606 fprintf (file, "d");
1607 else if (xcode == CONST_INT)
1608 fprintf (file, "a");
1609 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1610 fprintf (file, "r");
1611 else if (xcode == PLUS || xcode == LO_SUM)
1612 {
1613 if (GET_CODE (XEXP (x, 1)) == REG)
1614 fprintf (file, "x");
1615 else
1616 fprintf (file, "d");
1617 }
1618 return;
1619
5df189be 1620 case 'e':
1621 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1622 val &= 0x7;
1623 output_addr_const (file, GEN_INT (val));
1624 return;
1625
1626 case 'f':
1627 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1628 val &= 0x1f;
1629 output_addr_const (file, GEN_INT (val));
1630 return;
1631
1632 case 'g':
1633 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1634 val &= 0x3f;
1635 output_addr_const (file, GEN_INT (val));
1636 return;
1637
1638 case 'h':
1639 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1640 val = (val >> 3) & 0x1f;
1641 output_addr_const (file, GEN_INT (val));
1642 return;
1643
1644 case 'E':
1645 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1646 val = -val;
1647 val &= 0x7;
1648 output_addr_const (file, GEN_INT (val));
1649 return;
1650
1651 case 'F':
1652 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1653 val = -val;
1654 val &= 0x1f;
1655 output_addr_const (file, GEN_INT (val));
1656 return;
1657
1658 case 'G':
1659 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1660 val = -val;
1661 val &= 0x3f;
1662 output_addr_const (file, GEN_INT (val));
1663 return;
1664
1665 case 'H':
1666 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1667 val = -(val & -8ll);
1668 val = (val >> 3) & 0x1f;
1669 output_addr_const (file, GEN_INT (val));
1670 return;
1671
56c7bfc2 1672 case 'v':
1673 case 'w':
1674 constant_to_array (mode, x, arr);
1675 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1676 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1677 return;
1678
644459d0 1679 case 0:
1680 if (xcode == REG)
1681 fprintf (file, "%s", reg_names[REGNO (x)]);
1682 else if (xcode == MEM)
1683 output_address (XEXP (x, 0));
1684 else if (xcode == CONST_VECTOR)
dea01258 1685 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1686 else
1687 output_addr_const (file, x);
1688 return;
1689
f6a0d06f 1690 /* unused letters
56c7bfc2 1691 o qr u yz
5df189be 1692 AB OPQR UVWXYZ */
644459d0 1693 default:
1694 output_operand_lossage ("invalid %%xn code");
1695 }
1696 gcc_unreachable ();
1697}
1698
1699extern char call_used_regs[];
644459d0 1700
1701/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1702 caller saved register. For leaf functions it is more efficient to
1703 use a volatile register because we won't need to save and restore the
1704 pic register. This routine is only valid after register allocation
1705 is completed, so we can pick an unused register. */
1706static rtx
1707get_pic_reg (void)
1708{
1709 rtx pic_reg = pic_offset_table_rtx;
1710 if (!reload_completed && !reload_in_progress)
1711 abort ();
87a95921 1712 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1713 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1714 return pic_reg;
1715}
1716
5df189be 1717/* Split constant addresses to handle cases that are too large.
1718 Add in the pic register when in PIC mode.
1719 Split immediates that require more than 1 instruction. */
dea01258 1720int
1721spu_split_immediate (rtx * ops)
c8befdb9 1722{
dea01258 1723 enum machine_mode mode = GET_MODE (ops[0]);
1724 enum immediate_class c = classify_immediate (ops[1], mode);
1725
1726 switch (c)
c8befdb9 1727 {
dea01258 1728 case IC_IL2:
1729 {
1730 unsigned char arrhi[16];
1731 unsigned char arrlo[16];
98bbec1e 1732 rtx to, temp, hi, lo;
dea01258 1733 int i;
98bbec1e 1734 enum machine_mode imode = mode;
1735 /* We need to do reals as ints because the constant used in the
1736 IOR might not be a legitimate real constant. */
1737 imode = int_mode_for_mode (mode);
dea01258 1738 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1739 if (imode != mode)
1740 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1741 else
1742 to = ops[0];
1743 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1744 for (i = 0; i < 16; i += 4)
1745 {
1746 arrlo[i + 2] = arrhi[i + 2];
1747 arrlo[i + 3] = arrhi[i + 3];
1748 arrlo[i + 0] = arrlo[i + 1] = 0;
1749 arrhi[i + 2] = arrhi[i + 3] = 0;
1750 }
98bbec1e 1751 hi = array_to_constant (imode, arrhi);
1752 lo = array_to_constant (imode, arrlo);
1753 emit_move_insn (temp, hi);
dea01258 1754 emit_insn (gen_rtx_SET
98bbec1e 1755 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1756 return 1;
1757 }
5df189be 1758 case IC_FSMBI2:
1759 {
1760 unsigned char arr_fsmbi[16];
1761 unsigned char arr_andbi[16];
1762 rtx to, reg_fsmbi, reg_and;
1763 int i;
1764 enum machine_mode imode = mode;
1765 /* We need to do reals as ints because the constant used in the
1766 * AND might not be a legitimate real constant. */
1767 imode = int_mode_for_mode (mode);
1768 constant_to_array (mode, ops[1], arr_fsmbi);
1769 if (imode != mode)
1770 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1771 else
1772 to = ops[0];
1773 for (i = 0; i < 16; i++)
1774 if (arr_fsmbi[i] != 0)
1775 {
1776 arr_andbi[0] = arr_fsmbi[i];
1777 arr_fsmbi[i] = 0xff;
1778 }
1779 for (i = 1; i < 16; i++)
1780 arr_andbi[i] = arr_andbi[0];
1781 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1782 reg_and = array_to_constant (imode, arr_andbi);
1783 emit_move_insn (to, reg_fsmbi);
1784 emit_insn (gen_rtx_SET
1785 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1786 return 1;
1787 }
dea01258 1788 case IC_POOL:
1789 if (reload_in_progress || reload_completed)
1790 {
1791 rtx mem = force_const_mem (mode, ops[1]);
1792 if (TARGET_LARGE_MEM)
1793 {
1794 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1795 emit_move_insn (addr, XEXP (mem, 0));
1796 mem = replace_equiv_address (mem, addr);
1797 }
1798 emit_move_insn (ops[0], mem);
1799 return 1;
1800 }
1801 break;
1802 case IC_IL1s:
1803 case IC_IL2s:
1804 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1805 {
1806 if (c == IC_IL2s)
1807 {
5df189be 1808 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1809 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1810 }
1811 else if (flag_pic)
1812 emit_insn (gen_pic (ops[0], ops[1]));
1813 if (flag_pic)
1814 {
1815 rtx pic_reg = get_pic_reg ();
1816 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1817 crtl->uses_pic_offset_table = 1;
dea01258 1818 }
1819 return flag_pic || c == IC_IL2s;
1820 }
1821 break;
1822 case IC_IL1:
1823 case IC_FSMBI:
1824 case IC_CPAT:
1825 break;
c8befdb9 1826 }
dea01258 1827 return 0;
c8befdb9 1828}
1829
644459d0 1830/* SAVING is TRUE when we are generating the actual load and store
1831 instructions for REGNO. When determining the size of the stack
1832 needed for saving register we must allocate enough space for the
1833 worst case, because we don't always have the information early enough
1834 to not allocate it. But we can at least eliminate the actual loads
1835 and stores during the prologue/epilogue. */
1836static int
1837need_to_save_reg (int regno, int saving)
1838{
3072d30e 1839 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1840 return 1;
1841 if (flag_pic
1842 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1843 && (!saving || crtl->uses_pic_offset_table)
644459d0 1844 && (!saving
3072d30e 1845 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1846 return 1;
1847 return 0;
1848}
1849
1850/* This function is only correct starting with local register
1851 allocation */
1852int
1853spu_saved_regs_size (void)
1854{
1855 int reg_save_size = 0;
1856 int regno;
1857
1858 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1859 if (need_to_save_reg (regno, 0))
1860 reg_save_size += 0x10;
1861 return reg_save_size;
1862}
1863
1864static rtx
1865frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1866{
1867 rtx reg = gen_rtx_REG (V4SImode, regno);
1868 rtx mem =
1869 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1870 return emit_insn (gen_movv4si (mem, reg));
1871}
1872
1873static rtx
1874frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1875{
1876 rtx reg = gen_rtx_REG (V4SImode, regno);
1877 rtx mem =
1878 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1879 return emit_insn (gen_movv4si (reg, mem));
1880}
1881
1882/* This happens after reload, so we need to expand it. */
1883static rtx
1884frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1885{
1886 rtx insn;
1887 if (satisfies_constraint_K (GEN_INT (imm)))
1888 {
1889 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1890 }
1891 else
1892 {
3072d30e 1893 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1894 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1895 if (REGNO (src) == REGNO (scratch))
1896 abort ();
1897 }
644459d0 1898 return insn;
1899}
1900
1901/* Return nonzero if this function is known to have a null epilogue. */
1902
1903int
1904direct_return (void)
1905{
1906 if (reload_completed)
1907 {
1908 if (cfun->static_chain_decl == 0
1909 && (spu_saved_regs_size ()
1910 + get_frame_size ()
abe32cce 1911 + crtl->outgoing_args_size
1912 + crtl->args.pretend_args_size == 0)
644459d0 1913 && current_function_is_leaf)
1914 return 1;
1915 }
1916 return 0;
1917}
1918
1919/*
1920 The stack frame looks like this:
1921 +-------------+
1922 | incoming |
a8e019fa 1923 | args |
1924 AP -> +-------------+
644459d0 1925 | $lr save |
1926 +-------------+
1927 prev SP | back chain |
1928 +-------------+
1929 | var args |
abe32cce 1930 | reg save | crtl->args.pretend_args_size bytes
644459d0 1931 +-------------+
1932 | ... |
1933 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1934 FP -> +-------------+
644459d0 1935 | ... |
a8e019fa 1936 | vars | get_frame_size() bytes
1937 HFP -> +-------------+
644459d0 1938 | ... |
1939 | outgoing |
abe32cce 1940 | args | crtl->outgoing_args_size bytes
644459d0 1941 +-------------+
1942 | $lr of next |
1943 | frame |
1944 +-------------+
a8e019fa 1945 | back chain |
1946 SP -> +-------------+
644459d0 1947
1948*/
1949void
1950spu_expand_prologue (void)
1951{
1952 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1953 HOST_WIDE_INT total_size;
1954 HOST_WIDE_INT saved_regs_size;
1955 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1956 rtx scratch_reg_0, scratch_reg_1;
1957 rtx insn, real;
1958
644459d0 1959 if (flag_pic && optimize == 0)
18d50ae6 1960 crtl->uses_pic_offset_table = 1;
644459d0 1961
1962 if (spu_naked_function_p (current_function_decl))
1963 return;
1964
1965 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1966 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1967
1968 saved_regs_size = spu_saved_regs_size ();
1969 total_size = size + saved_regs_size
abe32cce 1970 + crtl->outgoing_args_size
1971 + crtl->args.pretend_args_size;
644459d0 1972
1973 if (!current_function_is_leaf
18d50ae6 1974 || cfun->calls_alloca || total_size > 0)
644459d0 1975 total_size += STACK_POINTER_OFFSET;
1976
1977 /* Save this first because code after this might use the link
1978 register as a scratch register. */
1979 if (!current_function_is_leaf)
1980 {
1981 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1982 RTX_FRAME_RELATED_P (insn) = 1;
1983 }
1984
1985 if (total_size > 0)
1986 {
abe32cce 1987 offset = -crtl->args.pretend_args_size;
644459d0 1988 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1989 if (need_to_save_reg (regno, 1))
1990 {
1991 offset -= 16;
1992 insn = frame_emit_store (regno, sp_reg, offset);
1993 RTX_FRAME_RELATED_P (insn) = 1;
1994 }
1995 }
1996
18d50ae6 1997 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1998 {
1999 rtx pic_reg = get_pic_reg ();
2000 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2001 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2002 }
2003
2004 if (total_size > 0)
2005 {
2006 if (flag_stack_check)
2007 {
d819917f 2008 /* We compare against total_size-1 because
644459d0 2009 ($sp >= total_size) <=> ($sp > total_size-1) */
2010 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2011 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2012 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2013 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2014 {
2015 emit_move_insn (scratch_v4si, size_v4si);
2016 size_v4si = scratch_v4si;
2017 }
2018 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2019 emit_insn (gen_vec_extractv4si
2020 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2021 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2022 }
2023
2024 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2025 the value of the previous $sp because we save it as the back
2026 chain. */
2027 if (total_size <= 2000)
2028 {
2029 /* In this case we save the back chain first. */
2030 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2031 insn =
2032 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2033 }
644459d0 2034 else
2035 {
2036 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2037 insn =
2038 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2039 }
2040 RTX_FRAME_RELATED_P (insn) = 1;
2041 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2042 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2043
2044 if (total_size > 2000)
2045 {
2046 /* Save the back chain ptr */
2047 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2048 }
2049
2050 if (frame_pointer_needed)
2051 {
2052 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2053 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2054 + crtl->outgoing_args_size;
644459d0 2055 /* Set the new frame_pointer */
d8dfeb55 2056 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2057 RTX_FRAME_RELATED_P (insn) = 1;
2058 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2060 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2061 }
2062 }
2063
644459d0 2064}
2065
2066void
2067spu_expand_epilogue (bool sibcall_p)
2068{
2069 int size = get_frame_size (), offset, regno;
2070 HOST_WIDE_INT saved_regs_size, total_size;
2071 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2072 rtx jump, scratch_reg_0;
2073
644459d0 2074 if (spu_naked_function_p (current_function_decl))
2075 return;
2076
2077 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2078
2079 saved_regs_size = spu_saved_regs_size ();
2080 total_size = size + saved_regs_size
abe32cce 2081 + crtl->outgoing_args_size
2082 + crtl->args.pretend_args_size;
644459d0 2083
2084 if (!current_function_is_leaf
18d50ae6 2085 || cfun->calls_alloca || total_size > 0)
644459d0 2086 total_size += STACK_POINTER_OFFSET;
2087
2088 if (total_size > 0)
2089 {
18d50ae6 2090 if (cfun->calls_alloca)
644459d0 2091 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2092 else
2093 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2094
2095
2096 if (saved_regs_size > 0)
2097 {
abe32cce 2098 offset = -crtl->args.pretend_args_size;
644459d0 2099 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2100 if (need_to_save_reg (regno, 1))
2101 {
2102 offset -= 0x10;
2103 frame_emit_load (regno, sp_reg, offset);
2104 }
2105 }
2106 }
2107
2108 if (!current_function_is_leaf)
2109 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2110
2111 if (!sibcall_p)
2112 {
18b42941 2113 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2114 jump = emit_jump_insn (gen__return ());
2115 emit_barrier_after (jump);
2116 }
2117
644459d0 2118}
2119
2120rtx
2121spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2122{
2123 if (count != 0)
2124 return 0;
2125 /* This is inefficient because it ends up copying to a save-register
2126 which then gets saved even though $lr has already been saved. But
2127 it does generate better code for leaf functions and we don't need
2128 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2129 used for __builtin_return_address anyway, so maybe we don't care if
2130 it's inefficient. */
2131 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2132}
2133\f
2134
2135/* Given VAL, generate a constant appropriate for MODE.
2136 If MODE is a vector mode, every element will be VAL.
2137 For TImode, VAL will be zero extended to 128 bits. */
2138rtx
2139spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2140{
2141 rtx inner;
2142 rtvec v;
2143 int units, i;
2144
2145 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2146 || GET_MODE_CLASS (mode) == MODE_FLOAT
2147 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2148 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2149
2150 if (GET_MODE_CLASS (mode) == MODE_INT)
2151 return immed_double_const (val, 0, mode);
2152
2153 /* val is the bit representation of the float */
2154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2155 return hwint_to_const_double (mode, val);
2156
2157 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2158 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2159 else
2160 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2161
2162 units = GET_MODE_NUNITS (mode);
2163
2164 v = rtvec_alloc (units);
2165
2166 for (i = 0; i < units; ++i)
2167 RTVEC_ELT (v, i) = inner;
2168
2169 return gen_rtx_CONST_VECTOR (mode, v);
2170}
644459d0 2171
5474166e 2172/* Create a MODE vector constant from 4 ints. */
2173rtx
2174spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2175{
2176 unsigned char arr[16];
2177 arr[0] = (a >> 24) & 0xff;
2178 arr[1] = (a >> 16) & 0xff;
2179 arr[2] = (a >> 8) & 0xff;
2180 arr[3] = (a >> 0) & 0xff;
2181 arr[4] = (b >> 24) & 0xff;
2182 arr[5] = (b >> 16) & 0xff;
2183 arr[6] = (b >> 8) & 0xff;
2184 arr[7] = (b >> 0) & 0xff;
2185 arr[8] = (c >> 24) & 0xff;
2186 arr[9] = (c >> 16) & 0xff;
2187 arr[10] = (c >> 8) & 0xff;
2188 arr[11] = (c >> 0) & 0xff;
2189 arr[12] = (d >> 24) & 0xff;
2190 arr[13] = (d >> 16) & 0xff;
2191 arr[14] = (d >> 8) & 0xff;
2192 arr[15] = (d >> 0) & 0xff;
2193 return array_to_constant(mode, arr);
2194}
5a976006 2195\f
2196/* branch hint stuff */
5474166e 2197
644459d0 2198/* An array of these is used to propagate hints to predecessor blocks. */
2199struct spu_bb_info
2200{
5a976006 2201 rtx prop_jump; /* propagated from another block */
2202 int bb_index; /* the original block. */
644459d0 2203};
5a976006 2204static struct spu_bb_info *spu_bb_info;
644459d0 2205
5a976006 2206#define STOP_HINT_P(INSN) \
2207 (GET_CODE(INSN) == CALL_INSN \
2208 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2209 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2210
2211/* 1 when RTX is a hinted branch or its target. We keep track of
2212 what has been hinted so the safe-hint code can test it easily. */
2213#define HINTED_P(RTX) \
2214 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2215
2216/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2217#define SCHED_ON_EVEN_P(RTX) \
2218 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2219
2220/* Emit a nop for INSN such that the two will dual issue. This assumes
2221 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2222 We check for TImode to handle a MULTI1 insn which has dual issued its
2223 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2224 ADDR_VEC insns. */
2225static void
2226emit_nop_for_insn (rtx insn)
644459d0 2227{
5a976006 2228 int p;
2229 rtx new_insn;
2230 p = get_pipe (insn);
2231 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2232 new_insn = emit_insn_after (gen_lnop (), insn);
2233 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2234 {
5a976006 2235 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2236 PUT_MODE (new_insn, TImode);
2237 PUT_MODE (insn, VOIDmode);
2238 }
2239 else
2240 new_insn = emit_insn_after (gen_lnop (), insn);
2241 recog_memoized (new_insn);
2242}
2243
2244/* Insert nops in basic blocks to meet dual issue alignment
2245 requirements. Also make sure hbrp and hint instructions are at least
2246 one cycle apart, possibly inserting a nop. */
2247static void
2248pad_bb(void)
2249{
2250 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2251 int length;
2252 int addr;
2253
2254 /* This sets up INSN_ADDRESSES. */
2255 shorten_branches (get_insns ());
2256
2257 /* Keep track of length added by nops. */
2258 length = 0;
2259
2260 prev_insn = 0;
2261 insn = get_insns ();
2262 if (!active_insn_p (insn))
2263 insn = next_active_insn (insn);
2264 for (; insn; insn = next_insn)
2265 {
2266 next_insn = next_active_insn (insn);
2267 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2268 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2269 {
5a976006 2270 if (hbr_insn)
2271 {
2272 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2273 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2274 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2275 || (a1 - a0 == 4))
2276 {
2277 prev_insn = emit_insn_before (gen_lnop (), insn);
2278 PUT_MODE (prev_insn, GET_MODE (insn));
2279 PUT_MODE (insn, TImode);
2280 length += 4;
2281 }
2282 }
2283 hbr_insn = insn;
2284 }
2285 if (INSN_CODE (insn) == CODE_FOR_blockage)
2286 {
2287 if (GET_MODE (insn) == TImode)
2288 PUT_MODE (next_insn, TImode);
2289 insn = next_insn;
2290 next_insn = next_active_insn (insn);
2291 }
2292 addr = INSN_ADDRESSES (INSN_UID (insn));
2293 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2294 {
2295 if (((addr + length) & 7) != 0)
2296 {
2297 emit_nop_for_insn (prev_insn);
2298 length += 4;
2299 }
644459d0 2300 }
5a976006 2301 else if (GET_MODE (insn) == TImode
2302 && ((next_insn && GET_MODE (next_insn) != TImode)
2303 || get_attr_type (insn) == TYPE_MULTI0)
2304 && ((addr + length) & 7) != 0)
2305 {
2306 /* prev_insn will always be set because the first insn is
2307 always 8-byte aligned. */
2308 emit_nop_for_insn (prev_insn);
2309 length += 4;
2310 }
2311 prev_insn = insn;
644459d0 2312 }
644459d0 2313}
2314
5a976006 2315\f
2316/* Routines for branch hints. */
2317
644459d0 2318static void
5a976006 2319spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2320 int distance, sbitmap blocks)
644459d0 2321{
5a976006 2322 rtx branch_label = 0;
2323 rtx hint;
2324 rtx insn;
2325 rtx table;
644459d0 2326
2327 if (before == 0 || branch == 0 || target == 0)
2328 return;
2329
5a976006 2330 /* While scheduling we require hints to be no further than 600, so
2331 we need to enforce that here too */
644459d0 2332 if (distance > 600)
2333 return;
2334
5a976006 2335 /* If we have a Basic block note, emit it after the basic block note. */
2336 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2337 before = NEXT_INSN (before);
644459d0 2338
2339 branch_label = gen_label_rtx ();
2340 LABEL_NUSES (branch_label)++;
2341 LABEL_PRESERVE_P (branch_label) = 1;
2342 insn = emit_label_before (branch_label, branch);
2343 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2344 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2345
2346 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2347 recog_memoized (hint);
2348 HINTED_P (branch) = 1;
644459d0 2349
5a976006 2350 if (GET_CODE (target) == LABEL_REF)
2351 HINTED_P (XEXP (target, 0)) = 1;
2352 else if (tablejump_p (branch, 0, &table))
644459d0 2353 {
5a976006 2354 rtvec vec;
2355 int j;
2356 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2357 vec = XVEC (PATTERN (table), 0);
2358 else
2359 vec = XVEC (PATTERN (table), 1);
2360 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2361 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2362 }
5a976006 2363
2364 if (distance >= 588)
644459d0 2365 {
5a976006 2366 /* Make sure the hint isn't scheduled any earlier than this point,
2367 which could make it too far for the branch offest to fit */
2368 recog_memoized (emit_insn_before (gen_blockage (), hint));
2369 }
2370 else if (distance <= 8 * 4)
2371 {
2372 /* To guarantee at least 8 insns between the hint and branch we
2373 insert nops. */
2374 int d;
2375 for (d = distance; d < 8 * 4; d += 4)
2376 {
2377 insn =
2378 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2379 recog_memoized (insn);
2380 }
2381
2382 /* Make sure any nops inserted aren't scheduled before the hint. */
2383 recog_memoized (emit_insn_after (gen_blockage (), hint));
2384
2385 /* Make sure any nops inserted aren't scheduled after the call. */
2386 if (CALL_P (branch) && distance < 8 * 4)
2387 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2388 }
644459d0 2389}
2390
2391/* Returns 0 if we don't want a hint for this branch. Otherwise return
2392 the rtx for the branch target. */
2393static rtx
2394get_branch_target (rtx branch)
2395{
2396 if (GET_CODE (branch) == JUMP_INSN)
2397 {
2398 rtx set, src;
2399
2400 /* Return statements */
2401 if (GET_CODE (PATTERN (branch)) == RETURN)
2402 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2403
2404 /* jump table */
2405 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2406 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2407 return 0;
2408
fcc31b99 2409 /* ASM GOTOs. */
604157f6 2410 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2411 return NULL;
2412
644459d0 2413 set = single_set (branch);
2414 src = SET_SRC (set);
2415 if (GET_CODE (SET_DEST (set)) != PC)
2416 abort ();
2417
2418 if (GET_CODE (src) == IF_THEN_ELSE)
2419 {
2420 rtx lab = 0;
2421 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2422 if (note)
2423 {
2424 /* If the more probable case is not a fall through, then
2425 try a branch hint. */
2426 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2427 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2428 && GET_CODE (XEXP (src, 1)) != PC)
2429 lab = XEXP (src, 1);
2430 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2431 && GET_CODE (XEXP (src, 2)) != PC)
2432 lab = XEXP (src, 2);
2433 }
2434 if (lab)
2435 {
2436 if (GET_CODE (lab) == RETURN)
2437 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2438 return lab;
2439 }
2440 return 0;
2441 }
2442
2443 return src;
2444 }
2445 else if (GET_CODE (branch) == CALL_INSN)
2446 {
2447 rtx call;
2448 /* All of our call patterns are in a PARALLEL and the CALL is
2449 the first pattern in the PARALLEL. */
2450 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2451 abort ();
2452 call = XVECEXP (PATTERN (branch), 0, 0);
2453 if (GET_CODE (call) == SET)
2454 call = SET_SRC (call);
2455 if (GET_CODE (call) != CALL)
2456 abort ();
2457 return XEXP (XEXP (call, 0), 0);
2458 }
2459 return 0;
2460}
2461
5a976006 2462/* The special $hbr register is used to prevent the insn scheduler from
2463 moving hbr insns across instructions which invalidate them. It
2464 should only be used in a clobber, and this function searches for
2465 insns which clobber it. */
2466static bool
2467insn_clobbers_hbr (rtx insn)
2468{
2469 if (INSN_P (insn)
2470 && GET_CODE (PATTERN (insn)) == PARALLEL)
2471 {
2472 rtx parallel = PATTERN (insn);
2473 rtx clobber;
2474 int j;
2475 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2476 {
2477 clobber = XVECEXP (parallel, 0, j);
2478 if (GET_CODE (clobber) == CLOBBER
2479 && GET_CODE (XEXP (clobber, 0)) == REG
2480 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2481 return 1;
2482 }
2483 }
2484 return 0;
2485}
2486
2487/* Search up to 32 insns starting at FIRST:
2488 - at any kind of hinted branch, just return
2489 - at any unconditional branch in the first 15 insns, just return
2490 - at a call or indirect branch, after the first 15 insns, force it to
2491 an even address and return
2492 - at any unconditional branch, after the first 15 insns, force it to
2493 an even address.
2494 At then end of the search, insert an hbrp within 4 insns of FIRST,
2495 and an hbrp within 16 instructions of FIRST.
2496 */
644459d0 2497static void
5a976006 2498insert_hbrp_for_ilb_runout (rtx first)
644459d0 2499{
5a976006 2500 rtx insn, before_4 = 0, before_16 = 0;
2501 int addr = 0, length, first_addr = -1;
2502 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2503 int insert_lnop_after = 0;
2504 for (insn = first; insn; insn = NEXT_INSN (insn))
2505 if (INSN_P (insn))
2506 {
2507 if (first_addr == -1)
2508 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2509 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2510 length = get_attr_length (insn);
2511
2512 if (before_4 == 0 && addr + length >= 4 * 4)
2513 before_4 = insn;
2514 /* We test for 14 instructions because the first hbrp will add
2515 up to 2 instructions. */
2516 if (before_16 == 0 && addr + length >= 14 * 4)
2517 before_16 = insn;
2518
2519 if (INSN_CODE (insn) == CODE_FOR_hbr)
2520 {
2521 /* Make sure an hbrp is at least 2 cycles away from a hint.
2522 Insert an lnop after the hbrp when necessary. */
2523 if (before_4 == 0 && addr > 0)
2524 {
2525 before_4 = insn;
2526 insert_lnop_after |= 1;
2527 }
2528 else if (before_4 && addr <= 4 * 4)
2529 insert_lnop_after |= 1;
2530 if (before_16 == 0 && addr > 10 * 4)
2531 {
2532 before_16 = insn;
2533 insert_lnop_after |= 2;
2534 }
2535 else if (before_16 && addr <= 14 * 4)
2536 insert_lnop_after |= 2;
2537 }
644459d0 2538
5a976006 2539 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2540 {
2541 if (addr < hbrp_addr0)
2542 hbrp_addr0 = addr;
2543 else if (addr < hbrp_addr1)
2544 hbrp_addr1 = addr;
2545 }
644459d0 2546
5a976006 2547 if (CALL_P (insn) || JUMP_P (insn))
2548 {
2549 if (HINTED_P (insn))
2550 return;
2551
2552 /* Any branch after the first 15 insns should be on an even
2553 address to avoid a special case branch. There might be
2554 some nops and/or hbrps inserted, so we test after 10
2555 insns. */
2556 if (addr > 10 * 4)
2557 SCHED_ON_EVEN_P (insn) = 1;
2558 }
644459d0 2559
5a976006 2560 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2561 return;
2562
2563
2564 if (addr + length >= 32 * 4)
644459d0 2565 {
5a976006 2566 gcc_assert (before_4 && before_16);
2567 if (hbrp_addr0 > 4 * 4)
644459d0 2568 {
5a976006 2569 insn =
2570 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2571 recog_memoized (insn);
2572 INSN_ADDRESSES_NEW (insn,
2573 INSN_ADDRESSES (INSN_UID (before_4)));
2574 PUT_MODE (insn, GET_MODE (before_4));
2575 PUT_MODE (before_4, TImode);
2576 if (insert_lnop_after & 1)
644459d0 2577 {
5a976006 2578 insn = emit_insn_before (gen_lnop (), before_4);
2579 recog_memoized (insn);
2580 INSN_ADDRESSES_NEW (insn,
2581 INSN_ADDRESSES (INSN_UID (before_4)));
2582 PUT_MODE (insn, TImode);
644459d0 2583 }
644459d0 2584 }
5a976006 2585 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2586 && hbrp_addr1 > 16 * 4)
644459d0 2587 {
5a976006 2588 insn =
2589 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2590 recog_memoized (insn);
2591 INSN_ADDRESSES_NEW (insn,
2592 INSN_ADDRESSES (INSN_UID (before_16)));
2593 PUT_MODE (insn, GET_MODE (before_16));
2594 PUT_MODE (before_16, TImode);
2595 if (insert_lnop_after & 2)
644459d0 2596 {
5a976006 2597 insn = emit_insn_before (gen_lnop (), before_16);
2598 recog_memoized (insn);
2599 INSN_ADDRESSES_NEW (insn,
2600 INSN_ADDRESSES (INSN_UID
2601 (before_16)));
2602 PUT_MODE (insn, TImode);
644459d0 2603 }
2604 }
5a976006 2605 return;
644459d0 2606 }
644459d0 2607 }
5a976006 2608 else if (BARRIER_P (insn))
2609 return;
644459d0 2610
644459d0 2611}
5a976006 2612
2613/* The SPU might hang when it executes 48 inline instructions after a
2614 hinted branch jumps to its hinted target. The beginning of a
2615 function and the return from a call might have been hinted, and must
2616 be handled as well. To prevent a hang we insert 2 hbrps. The first
2617 should be within 6 insns of the branch target. The second should be
2618 within 22 insns of the branch target. When determining if hbrps are
2619 necessary, we look for only 32 inline instructions, because up to to
2620 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2621 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2622static void
5a976006 2623insert_hbrp (void)
644459d0 2624{
5a976006 2625 rtx insn;
2626 if (TARGET_SAFE_HINTS)
644459d0 2627 {
5a976006 2628 shorten_branches (get_insns ());
2629 /* Insert hbrp at beginning of function */
2630 insn = next_active_insn (get_insns ());
2631 if (insn)
2632 insert_hbrp_for_ilb_runout (insn);
2633 /* Insert hbrp after hinted targets. */
2634 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2635 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2636 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2637 }
644459d0 2638}
2639
5a976006 2640static int in_spu_reorg;
2641
2642/* Insert branch hints. There are no branch optimizations after this
2643 pass, so it's safe to set our branch hints now. */
644459d0 2644static void
5a976006 2645spu_machine_dependent_reorg (void)
644459d0 2646{
5a976006 2647 sbitmap blocks;
2648 basic_block bb;
2649 rtx branch, insn;
2650 rtx branch_target = 0;
2651 int branch_addr = 0, insn_addr, required_dist = 0;
2652 int i;
2653 unsigned int j;
644459d0 2654
5a976006 2655 if (!TARGET_BRANCH_HINTS || optimize == 0)
2656 {
2657 /* We still do it for unoptimized code because an external
2658 function might have hinted a call or return. */
2659 insert_hbrp ();
2660 pad_bb ();
2661 return;
2662 }
644459d0 2663
5a976006 2664 blocks = sbitmap_alloc (last_basic_block);
2665 sbitmap_zero (blocks);
644459d0 2666
5a976006 2667 in_spu_reorg = 1;
2668 compute_bb_for_insn ();
2669
2670 compact_blocks ();
2671
2672 spu_bb_info =
2673 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2674 sizeof (struct spu_bb_info));
2675
2676 /* We need exact insn addresses and lengths. */
2677 shorten_branches (get_insns ());
2678
2679 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2680 {
5a976006 2681 bb = BASIC_BLOCK (i);
2682 branch = 0;
2683 if (spu_bb_info[i].prop_jump)
644459d0 2684 {
5a976006 2685 branch = spu_bb_info[i].prop_jump;
2686 branch_target = get_branch_target (branch);
2687 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2688 required_dist = spu_hint_dist;
2689 }
2690 /* Search from end of a block to beginning. In this loop, find
2691 jumps which need a branch and emit them only when:
2692 - it's an indirect branch and we're at the insn which sets
2693 the register
2694 - we're at an insn that will invalidate the hint. e.g., a
2695 call, another hint insn, inline asm that clobbers $hbr, and
2696 some inlined operations (divmodsi4). Don't consider jumps
2697 because they are only at the end of a block and are
2698 considered when we are deciding whether to propagate
2699 - we're getting too far away from the branch. The hbr insns
2700 only have a signed 10 bit offset
2701 We go back as far as possible so the branch will be considered
2702 for propagation when we get to the beginning of the block. */
2703 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2704 {
2705 if (INSN_P (insn))
2706 {
2707 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2708 if (branch
2709 && ((GET_CODE (branch_target) == REG
2710 && set_of (branch_target, insn) != NULL_RTX)
2711 || insn_clobbers_hbr (insn)
2712 || branch_addr - insn_addr > 600))
2713 {
2714 rtx next = NEXT_INSN (insn);
2715 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2716 if (insn != BB_END (bb)
2717 && branch_addr - next_addr >= required_dist)
2718 {
2719 if (dump_file)
2720 fprintf (dump_file,
2721 "hint for %i in block %i before %i\n",
2722 INSN_UID (branch), bb->index,
2723 INSN_UID (next));
2724 spu_emit_branch_hint (next, branch, branch_target,
2725 branch_addr - next_addr, blocks);
2726 }
2727 branch = 0;
2728 }
2729
2730 /* JUMP_P will only be true at the end of a block. When
2731 branch is already set it means we've previously decided
2732 to propagate a hint for that branch into this block. */
2733 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2734 {
2735 branch = 0;
2736 if ((branch_target = get_branch_target (insn)))
2737 {
2738 branch = insn;
2739 branch_addr = insn_addr;
2740 required_dist = spu_hint_dist;
2741 }
2742 }
2743 }
2744 if (insn == BB_HEAD (bb))
2745 break;
2746 }
2747
2748 if (branch)
2749 {
2750 /* If we haven't emitted a hint for this branch yet, it might
2751 be profitable to emit it in one of the predecessor blocks,
2752 especially for loops. */
2753 rtx bbend;
2754 basic_block prev = 0, prop = 0, prev2 = 0;
2755 int loop_exit = 0, simple_loop = 0;
2756 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2757
2758 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2759 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2760 prev = EDGE_PRED (bb, j)->src;
2761 else
2762 prev2 = EDGE_PRED (bb, j)->src;
2763
2764 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2765 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2766 loop_exit = 1;
2767 else if (EDGE_SUCC (bb, j)->dest == bb)
2768 simple_loop = 1;
2769
2770 /* If this branch is a loop exit then propagate to previous
2771 fallthru block. This catches the cases when it is a simple
2772 loop or when there is an initial branch into the loop. */
2773 if (prev && (loop_exit || simple_loop)
2774 && prev->loop_depth <= bb->loop_depth)
2775 prop = prev;
2776
2777 /* If there is only one adjacent predecessor. Don't propagate
2778 outside this loop. This loop_depth test isn't perfect, but
2779 I'm not sure the loop_father member is valid at this point. */
2780 else if (prev && single_pred_p (bb)
2781 && prev->loop_depth == bb->loop_depth)
2782 prop = prev;
2783
2784 /* If this is the JOIN block of a simple IF-THEN then
2785 propogate the hint to the HEADER block. */
2786 else if (prev && prev2
2787 && EDGE_COUNT (bb->preds) == 2
2788 && EDGE_COUNT (prev->preds) == 1
2789 && EDGE_PRED (prev, 0)->src == prev2
2790 && prev2->loop_depth == bb->loop_depth
2791 && GET_CODE (branch_target) != REG)
2792 prop = prev;
2793
2794 /* Don't propagate when:
2795 - this is a simple loop and the hint would be too far
2796 - this is not a simple loop and there are 16 insns in
2797 this block already
2798 - the predecessor block ends in a branch that will be
2799 hinted
2800 - the predecessor block ends in an insn that invalidates
2801 the hint */
2802 if (prop
2803 && prop->index >= 0
2804 && (bbend = BB_END (prop))
2805 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2806 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2807 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2808 {
2809 if (dump_file)
2810 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2811 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2812 bb->index, prop->index, bb->loop_depth,
2813 INSN_UID (branch), loop_exit, simple_loop,
2814 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2815
2816 spu_bb_info[prop->index].prop_jump = branch;
2817 spu_bb_info[prop->index].bb_index = i;
2818 }
2819 else if (branch_addr - next_addr >= required_dist)
2820 {
2821 if (dump_file)
2822 fprintf (dump_file, "hint for %i in block %i before %i\n",
2823 INSN_UID (branch), bb->index,
2824 INSN_UID (NEXT_INSN (insn)));
2825 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2826 branch_addr - next_addr, blocks);
2827 }
2828 branch = 0;
644459d0 2829 }
644459d0 2830 }
5a976006 2831 free (spu_bb_info);
644459d0 2832
5a976006 2833 if (!sbitmap_empty_p (blocks))
2834 find_many_sub_basic_blocks (blocks);
2835
2836 /* We have to schedule to make sure alignment is ok. */
2837 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2838
2839 /* The hints need to be scheduled, so call it again. */
2840 schedule_insns ();
2841
2842 insert_hbrp ();
2843
2844 pad_bb ();
2845
8f1d58ad 2846 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2847 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2848 {
2849 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2850 between its branch label and the branch . We don't move the
2851 label because GCC expects it at the beginning of the block. */
2852 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2853 rtx label_ref = XVECEXP (unspec, 0, 0);
2854 rtx label = XEXP (label_ref, 0);
2855 rtx branch;
2856 int offset = 0;
2857 for (branch = NEXT_INSN (label);
2858 !JUMP_P (branch) && !CALL_P (branch);
2859 branch = NEXT_INSN (branch))
2860 if (NONJUMP_INSN_P (branch))
2861 offset += get_attr_length (branch);
2862 if (offset > 0)
2863 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2864 }
5a976006 2865
2866 if (spu_flag_var_tracking)
644459d0 2867 {
5a976006 2868 df_analyze ();
2869 timevar_push (TV_VAR_TRACKING);
2870 variable_tracking_main ();
2871 timevar_pop (TV_VAR_TRACKING);
2872 df_finish_pass (false);
644459d0 2873 }
5a976006 2874
2875 free_bb_for_insn ();
2876
2877 in_spu_reorg = 0;
644459d0 2878}
2879\f
2880
2881/* Insn scheduling routines, primarily for dual issue. */
2882static int
2883spu_sched_issue_rate (void)
2884{
2885 return 2;
2886}
2887
2888static int
5a976006 2889uses_ls_unit(rtx insn)
644459d0 2890{
5a976006 2891 rtx set = single_set (insn);
2892 if (set != 0
2893 && (GET_CODE (SET_DEST (set)) == MEM
2894 || GET_CODE (SET_SRC (set)) == MEM))
2895 return 1;
2896 return 0;
644459d0 2897}
2898
2899static int
2900get_pipe (rtx insn)
2901{
2902 enum attr_type t;
2903 /* Handle inline asm */
2904 if (INSN_CODE (insn) == -1)
2905 return -1;
2906 t = get_attr_type (insn);
2907 switch (t)
2908 {
2909 case TYPE_CONVERT:
2910 return -2;
2911 case TYPE_MULTI0:
2912 return -1;
2913
2914 case TYPE_FX2:
2915 case TYPE_FX3:
2916 case TYPE_SPR:
2917 case TYPE_NOP:
2918 case TYPE_FXB:
2919 case TYPE_FPD:
2920 case TYPE_FP6:
2921 case TYPE_FP7:
644459d0 2922 return 0;
2923
2924 case TYPE_LNOP:
2925 case TYPE_SHUF:
2926 case TYPE_LOAD:
2927 case TYPE_STORE:
2928 case TYPE_BR:
2929 case TYPE_MULTI1:
2930 case TYPE_HBR:
5a976006 2931 case TYPE_IPREFETCH:
644459d0 2932 return 1;
2933 default:
2934 abort ();
2935 }
2936}
2937
5a976006 2938
2939/* haifa-sched.c has a static variable that keeps track of the current
2940 cycle. It is passed to spu_sched_reorder, and we record it here for
2941 use by spu_sched_variable_issue. It won't be accurate if the
2942 scheduler updates it's clock_var between the two calls. */
2943static int clock_var;
2944
2945/* This is used to keep track of insn alignment. Set to 0 at the
2946 beginning of each block and increased by the "length" attr of each
2947 insn scheduled. */
2948static int spu_sched_length;
2949
2950/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2951 ready list appropriately in spu_sched_reorder(). */
2952static int pipe0_clock;
2953static int pipe1_clock;
2954
2955static int prev_clock_var;
2956
2957static int prev_priority;
2958
2959/* The SPU needs to load the next ilb sometime during the execution of
2960 the previous ilb. There is a potential conflict if every cycle has a
2961 load or store. To avoid the conflict we make sure the load/store
2962 unit is free for at least one cycle during the execution of insns in
2963 the previous ilb. */
2964static int spu_ls_first;
2965static int prev_ls_clock;
2966
2967static void
2968spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2969 int max_ready ATTRIBUTE_UNUSED)
2970{
2971 spu_sched_length = 0;
2972}
2973
2974static void
2975spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2976 int max_ready ATTRIBUTE_UNUSED)
2977{
2978 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2979 {
2980 /* When any block might be at least 8-byte aligned, assume they
2981 will all be at least 8-byte aligned to make sure dual issue
2982 works out correctly. */
2983 spu_sched_length = 0;
2984 }
2985 spu_ls_first = INT_MAX;
2986 clock_var = -1;
2987 prev_ls_clock = -1;
2988 pipe0_clock = -1;
2989 pipe1_clock = -1;
2990 prev_clock_var = -1;
2991 prev_priority = -1;
2992}
2993
644459d0 2994static int
5a976006 2995spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2996 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2997{
5a976006 2998 int len;
2999 int p;
644459d0 3000 if (GET_CODE (PATTERN (insn)) == USE
3001 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3002 || (len = get_attr_length (insn)) == 0)
3003 return more;
3004
3005 spu_sched_length += len;
3006
3007 /* Reset on inline asm */
3008 if (INSN_CODE (insn) == -1)
3009 {
3010 spu_ls_first = INT_MAX;
3011 pipe0_clock = -1;
3012 pipe1_clock = -1;
3013 return 0;
3014 }
3015 p = get_pipe (insn);
3016 if (p == 0)
3017 pipe0_clock = clock_var;
3018 else
3019 pipe1_clock = clock_var;
3020
3021 if (in_spu_reorg)
3022 {
3023 if (clock_var - prev_ls_clock > 1
3024 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3025 spu_ls_first = INT_MAX;
3026 if (uses_ls_unit (insn))
3027 {
3028 if (spu_ls_first == INT_MAX)
3029 spu_ls_first = spu_sched_length;
3030 prev_ls_clock = clock_var;
3031 }
3032
3033 /* The scheduler hasn't inserted the nop, but we will later on.
3034 Include those nops in spu_sched_length. */
3035 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3036 spu_sched_length += 4;
3037 prev_clock_var = clock_var;
3038
3039 /* more is -1 when called from spu_sched_reorder for new insns
3040 that don't have INSN_PRIORITY */
3041 if (more >= 0)
3042 prev_priority = INSN_PRIORITY (insn);
3043 }
3044
3045 /* Always try issueing more insns. spu_sched_reorder will decide
3046 when the cycle should be advanced. */
3047 return 1;
3048}
3049
3050/* This function is called for both TARGET_SCHED_REORDER and
3051 TARGET_SCHED_REORDER2. */
3052static int
3053spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3054 rtx *ready, int *nreadyp, int clock)
3055{
3056 int i, nready = *nreadyp;
3057 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3058 rtx insn;
3059
3060 clock_var = clock;
3061
3062 if (nready <= 0 || pipe1_clock >= clock)
3063 return 0;
3064
3065 /* Find any rtl insns that don't generate assembly insns and schedule
3066 them first. */
3067 for (i = nready - 1; i >= 0; i--)
3068 {
3069 insn = ready[i];
3070 if (INSN_CODE (insn) == -1
3071 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3072 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3073 {
3074 ready[i] = ready[nready - 1];
3075 ready[nready - 1] = insn;
3076 return 1;
3077 }
3078 }
3079
3080 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3081 for (i = 0; i < nready; i++)
3082 if (INSN_CODE (ready[i]) != -1)
3083 {
3084 insn = ready[i];
3085 switch (get_attr_type (insn))
3086 {
3087 default:
3088 case TYPE_MULTI0:
3089 case TYPE_CONVERT:
3090 case TYPE_FX2:
3091 case TYPE_FX3:
3092 case TYPE_SPR:
3093 case TYPE_NOP:
3094 case TYPE_FXB:
3095 case TYPE_FPD:
3096 case TYPE_FP6:
3097 case TYPE_FP7:
3098 pipe_0 = i;
3099 break;
3100 case TYPE_LOAD:
3101 case TYPE_STORE:
3102 pipe_ls = i;
3103 case TYPE_LNOP:
3104 case TYPE_SHUF:
3105 case TYPE_BR:
3106 case TYPE_MULTI1:
3107 case TYPE_HBR:
3108 pipe_1 = i;
3109 break;
3110 case TYPE_IPREFETCH:
3111 pipe_hbrp = i;
3112 break;
3113 }
3114 }
3115
3116 /* In the first scheduling phase, schedule loads and stores together
3117 to increase the chance they will get merged during postreload CSE. */
3118 if (!reload_completed && pipe_ls >= 0)
3119 {
3120 insn = ready[pipe_ls];
3121 ready[pipe_ls] = ready[nready - 1];
3122 ready[nready - 1] = insn;
3123 return 1;
3124 }
3125
3126 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3127 if (pipe_hbrp >= 0)
3128 pipe_1 = pipe_hbrp;
3129
3130 /* When we have loads/stores in every cycle of the last 15 insns and
3131 we are about to schedule another load/store, emit an hbrp insn
3132 instead. */
3133 if (in_spu_reorg
3134 && spu_sched_length - spu_ls_first >= 4 * 15
3135 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3136 {
3137 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3138 recog_memoized (insn);
3139 if (pipe0_clock < clock)
3140 PUT_MODE (insn, TImode);
3141 spu_sched_variable_issue (file, verbose, insn, -1);
3142 return 0;
3143 }
3144
3145 /* In general, we want to emit nops to increase dual issue, but dual
3146 issue isn't faster when one of the insns could be scheduled later
3147 without effecting the critical path. We look at INSN_PRIORITY to
3148 make a good guess, but it isn't perfect so -mdual-nops=n can be
3149 used to effect it. */
3150 if (in_spu_reorg && spu_dual_nops < 10)
3151 {
3152 /* When we are at an even address and we are not issueing nops to
3153 improve scheduling then we need to advance the cycle. */
3154 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3155 && (spu_dual_nops == 0
3156 || (pipe_1 != -1
3157 && prev_priority >
3158 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3159 return 0;
3160
3161 /* When at an odd address, schedule the highest priority insn
3162 without considering pipeline. */
3163 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3164 && (spu_dual_nops == 0
3165 || (prev_priority >
3166 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3167 return 1;
3168 }
3169
3170
3171 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3172 pipe0 insn in the ready list, schedule it. */
3173 if (pipe0_clock < clock && pipe_0 >= 0)
3174 schedule_i = pipe_0;
3175
3176 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3177 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3178 else
3179 schedule_i = pipe_1;
3180
3181 if (schedule_i > -1)
3182 {
3183 insn = ready[schedule_i];
3184 ready[schedule_i] = ready[nready - 1];
3185 ready[nready - 1] = insn;
3186 return 1;
3187 }
3188 return 0;
644459d0 3189}
3190
3191/* INSN is dependent on DEP_INSN. */
3192static int
5a976006 3193spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3194{
5a976006 3195 rtx set;
3196
3197 /* The blockage pattern is used to prevent instructions from being
3198 moved across it and has no cost. */
3199 if (INSN_CODE (insn) == CODE_FOR_blockage
3200 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3201 return 0;
3202
9d98604b 3203 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3204 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3205 return 0;
3206
3207 /* Make sure hbrps are spread out. */
3208 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3209 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3210 return 8;
3211
3212 /* Make sure hints and hbrps are 2 cycles apart. */
3213 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3214 || INSN_CODE (insn) == CODE_FOR_hbr)
3215 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3216 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3217 return 2;
3218
3219 /* An hbrp has no real dependency on other insns. */
3220 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3221 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3222 return 0;
3223
3224 /* Assuming that it is unlikely an argument register will be used in
3225 the first cycle of the called function, we reduce the cost for
3226 slightly better scheduling of dep_insn. When not hinted, the
3227 mispredicted branch would hide the cost as well. */
3228 if (CALL_P (insn))
3229 {
3230 rtx target = get_branch_target (insn);
3231 if (GET_CODE (target) != REG || !set_of (target, insn))
3232 return cost - 2;
3233 return cost;
3234 }
3235
3236 /* And when returning from a function, let's assume the return values
3237 are completed sooner too. */
3238 if (CALL_P (dep_insn))
644459d0 3239 return cost - 2;
5a976006 3240
3241 /* Make sure an instruction that loads from the back chain is schedule
3242 away from the return instruction so a hint is more likely to get
3243 issued. */
3244 if (INSN_CODE (insn) == CODE_FOR__return
3245 && (set = single_set (dep_insn))
3246 && GET_CODE (SET_DEST (set)) == REG
3247 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3248 return 20;
3249
644459d0 3250 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3251 scheduler makes every insn in a block anti-dependent on the final
3252 jump_insn. We adjust here so higher cost insns will get scheduled
3253 earlier. */
5a976006 3254 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3255 return insn_cost (dep_insn) - 3;
5a976006 3256
644459d0 3257 return cost;
3258}
3259\f
3260/* Create a CONST_DOUBLE from a string. */
3261struct rtx_def *
3262spu_float_const (const char *string, enum machine_mode mode)
3263{
3264 REAL_VALUE_TYPE value;
3265 value = REAL_VALUE_ATOF (string, mode);
3266 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3267}
3268
644459d0 3269int
3270spu_constant_address_p (rtx x)
3271{
3272 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3273 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3274 || GET_CODE (x) == HIGH);
3275}
3276
3277static enum spu_immediate
3278which_immediate_load (HOST_WIDE_INT val)
3279{
3280 gcc_assert (val == trunc_int_for_mode (val, SImode));
3281
3282 if (val >= -0x8000 && val <= 0x7fff)
3283 return SPU_IL;
3284 if (val >= 0 && val <= 0x3ffff)
3285 return SPU_ILA;
3286 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3287 return SPU_ILH;
3288 if ((val & 0xffff) == 0)
3289 return SPU_ILHU;
3290
3291 return SPU_NONE;
3292}
3293
dea01258 3294/* Return true when OP can be loaded by one of the il instructions, or
3295 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3296int
3297immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3298{
3299 if (CONSTANT_P (op))
3300 {
3301 enum immediate_class c = classify_immediate (op, mode);
5df189be 3302 return c == IC_IL1 || c == IC_IL1s
3072d30e 3303 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3304 }
3305 return 0;
3306}
3307
3308/* Return true if the first SIZE bytes of arr is a constant that can be
3309 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3310 represent the size and offset of the instruction to use. */
3311static int
3312cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3313{
3314 int cpat, run, i, start;
3315 cpat = 1;
3316 run = 0;
3317 start = -1;
3318 for (i = 0; i < size && cpat; i++)
3319 if (arr[i] != i+16)
3320 {
3321 if (!run)
3322 {
3323 start = i;
3324 if (arr[i] == 3)
3325 run = 1;
3326 else if (arr[i] == 2 && arr[i+1] == 3)
3327 run = 2;
3328 else if (arr[i] == 0)
3329 {
3330 while (arr[i+run] == run && i+run < 16)
3331 run++;
3332 if (run != 4 && run != 8)
3333 cpat = 0;
3334 }
3335 else
3336 cpat = 0;
3337 if ((i & (run-1)) != 0)
3338 cpat = 0;
3339 i += run;
3340 }
3341 else
3342 cpat = 0;
3343 }
b01a6dc3 3344 if (cpat && (run || size < 16))
dea01258 3345 {
3346 if (run == 0)
3347 run = 1;
3348 if (prun)
3349 *prun = run;
3350 if (pstart)
3351 *pstart = start == -1 ? 16-run : start;
3352 return 1;
3353 }
3354 return 0;
3355}
3356
3357/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3358 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3359static enum immediate_class
3360classify_immediate (rtx op, enum machine_mode mode)
644459d0 3361{
3362 HOST_WIDE_INT val;
3363 unsigned char arr[16];
5df189be 3364 int i, j, repeated, fsmbi, repeat;
dea01258 3365
3366 gcc_assert (CONSTANT_P (op));
3367
644459d0 3368 if (GET_MODE (op) != VOIDmode)
3369 mode = GET_MODE (op);
3370
dea01258 3371 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3372 if (!flag_pic
3373 && mode == V4SImode
dea01258 3374 && GET_CODE (op) == CONST_VECTOR
3375 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3376 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3377 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3378 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3379 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3380 op = CONST_VECTOR_ELT (op, 0);
644459d0 3381
dea01258 3382 switch (GET_CODE (op))
3383 {
3384 case SYMBOL_REF:
3385 case LABEL_REF:
3386 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3387
dea01258 3388 case CONST:
0cfc65d4 3389 /* We can never know if the resulting address fits in 18 bits and can be
3390 loaded with ila. For now, assume the address will not overflow if
3391 the displacement is "small" (fits 'K' constraint). */
3392 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3393 {
3394 rtx sym = XEXP (XEXP (op, 0), 0);
3395 rtx cst = XEXP (XEXP (op, 0), 1);
3396
3397 if (GET_CODE (sym) == SYMBOL_REF
3398 && GET_CODE (cst) == CONST_INT
3399 && satisfies_constraint_K (cst))
3400 return IC_IL1s;
3401 }
3402 return IC_IL2s;
644459d0 3403
dea01258 3404 case HIGH:
3405 return IC_IL1s;
3406
3407 case CONST_VECTOR:
3408 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3409 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3410 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3411 return IC_POOL;
3412 /* Fall through. */
3413
3414 case CONST_INT:
3415 case CONST_DOUBLE:
3416 constant_to_array (mode, op, arr);
644459d0 3417
dea01258 3418 /* Check that each 4-byte slot is identical. */
3419 repeated = 1;
3420 for (i = 4; i < 16; i += 4)
3421 for (j = 0; j < 4; j++)
3422 if (arr[j] != arr[i + j])
3423 repeated = 0;
3424
3425 if (repeated)
3426 {
3427 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3428 val = trunc_int_for_mode (val, SImode);
3429
3430 if (which_immediate_load (val) != SPU_NONE)
3431 return IC_IL1;
3432 }
3433
3434 /* Any mode of 2 bytes or smaller can be loaded with an il
3435 instruction. */
3436 gcc_assert (GET_MODE_SIZE (mode) > 2);
3437
3438 fsmbi = 1;
5df189be 3439 repeat = 0;
dea01258 3440 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3441 if (arr[i] != 0 && repeat == 0)
3442 repeat = arr[i];
3443 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3444 fsmbi = 0;
3445 if (fsmbi)
5df189be 3446 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3447
3448 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3449 return IC_CPAT;
3450
3451 if (repeated)
3452 return IC_IL2;
3453
3454 return IC_POOL;
3455 default:
3456 break;
3457 }
3458 gcc_unreachable ();
644459d0 3459}
3460
3461static enum spu_immediate
3462which_logical_immediate (HOST_WIDE_INT val)
3463{
3464 gcc_assert (val == trunc_int_for_mode (val, SImode));
3465
3466 if (val >= -0x200 && val <= 0x1ff)
3467 return SPU_ORI;
3468 if (val >= 0 && val <= 0xffff)
3469 return SPU_IOHL;
3470 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3471 {
3472 val = trunc_int_for_mode (val, HImode);
3473 if (val >= -0x200 && val <= 0x1ff)
3474 return SPU_ORHI;
3475 if ((val & 0xff) == ((val >> 8) & 0xff))
3476 {
3477 val = trunc_int_for_mode (val, QImode);
3478 if (val >= -0x200 && val <= 0x1ff)
3479 return SPU_ORBI;
3480 }
3481 }
3482 return SPU_NONE;
3483}
3484
5df189be 3485/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3486 CONST_DOUBLEs. */
3487static int
3488const_vector_immediate_p (rtx x)
3489{
3490 int i;
3491 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3492 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3493 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3494 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3495 return 0;
3496 return 1;
3497}
3498
644459d0 3499int
3500logical_immediate_p (rtx op, enum machine_mode mode)
3501{
3502 HOST_WIDE_INT val;
3503 unsigned char arr[16];
3504 int i, j;
3505
3506 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3507 || GET_CODE (op) == CONST_VECTOR);
3508
5df189be 3509 if (GET_CODE (op) == CONST_VECTOR
3510 && !const_vector_immediate_p (op))
3511 return 0;
3512
644459d0 3513 if (GET_MODE (op) != VOIDmode)
3514 mode = GET_MODE (op);
3515
3516 constant_to_array (mode, op, arr);
3517
3518 /* Check that bytes are repeated. */
3519 for (i = 4; i < 16; i += 4)
3520 for (j = 0; j < 4; j++)
3521 if (arr[j] != arr[i + j])
3522 return 0;
3523
3524 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3525 val = trunc_int_for_mode (val, SImode);
3526
3527 i = which_logical_immediate (val);
3528 return i != SPU_NONE && i != SPU_IOHL;
3529}
3530
3531int
3532iohl_immediate_p (rtx op, enum machine_mode mode)
3533{
3534 HOST_WIDE_INT val;
3535 unsigned char arr[16];
3536 int i, j;
3537
3538 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3539 || GET_CODE (op) == CONST_VECTOR);
3540
5df189be 3541 if (GET_CODE (op) == CONST_VECTOR
3542 && !const_vector_immediate_p (op))
3543 return 0;
3544
644459d0 3545 if (GET_MODE (op) != VOIDmode)
3546 mode = GET_MODE (op);
3547
3548 constant_to_array (mode, op, arr);
3549
3550 /* Check that bytes are repeated. */
3551 for (i = 4; i < 16; i += 4)
3552 for (j = 0; j < 4; j++)
3553 if (arr[j] != arr[i + j])
3554 return 0;
3555
3556 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3557 val = trunc_int_for_mode (val, SImode);
3558
3559 return val >= 0 && val <= 0xffff;
3560}
3561
3562int
3563arith_immediate_p (rtx op, enum machine_mode mode,
3564 HOST_WIDE_INT low, HOST_WIDE_INT high)
3565{
3566 HOST_WIDE_INT val;
3567 unsigned char arr[16];
3568 int bytes, i, j;
3569
3570 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3571 || GET_CODE (op) == CONST_VECTOR);
3572
5df189be 3573 if (GET_CODE (op) == CONST_VECTOR
3574 && !const_vector_immediate_p (op))
3575 return 0;
3576
644459d0 3577 if (GET_MODE (op) != VOIDmode)
3578 mode = GET_MODE (op);
3579
3580 constant_to_array (mode, op, arr);
3581
3582 if (VECTOR_MODE_P (mode))
3583 mode = GET_MODE_INNER (mode);
3584
3585 bytes = GET_MODE_SIZE (mode);
3586 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3587
3588 /* Check that bytes are repeated. */
3589 for (i = bytes; i < 16; i += bytes)
3590 for (j = 0; j < bytes; j++)
3591 if (arr[j] != arr[i + j])
3592 return 0;
3593
3594 val = arr[0];
3595 for (j = 1; j < bytes; j++)
3596 val = (val << 8) | arr[j];
3597
3598 val = trunc_int_for_mode (val, mode);
3599
3600 return val >= low && val <= high;
3601}
3602
56c7bfc2 3603/* TRUE when op is an immediate and an exact power of 2, and given that
3604 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3605 all entries must be the same. */
3606bool
3607exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3608{
3609 enum machine_mode int_mode;
3610 HOST_WIDE_INT val;
3611 unsigned char arr[16];
3612 int bytes, i, j;
3613
3614 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3615 || GET_CODE (op) == CONST_VECTOR);
3616
3617 if (GET_CODE (op) == CONST_VECTOR
3618 && !const_vector_immediate_p (op))
3619 return 0;
3620
3621 if (GET_MODE (op) != VOIDmode)
3622 mode = GET_MODE (op);
3623
3624 constant_to_array (mode, op, arr);
3625
3626 if (VECTOR_MODE_P (mode))
3627 mode = GET_MODE_INNER (mode);
3628
3629 bytes = GET_MODE_SIZE (mode);
3630 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3631
3632 /* Check that bytes are repeated. */
3633 for (i = bytes; i < 16; i += bytes)
3634 for (j = 0; j < bytes; j++)
3635 if (arr[j] != arr[i + j])
3636 return 0;
3637
3638 val = arr[0];
3639 for (j = 1; j < bytes; j++)
3640 val = (val << 8) | arr[j];
3641
3642 val = trunc_int_for_mode (val, int_mode);
3643
3644 /* Currently, we only handle SFmode */
3645 gcc_assert (mode == SFmode);
3646 if (mode == SFmode)
3647 {
3648 int exp = (val >> 23) - 127;
3649 return val > 0 && (val & 0x007fffff) == 0
3650 && exp >= low && exp <= high;
3651 }
3652 return FALSE;
3653}
3654
6cf5579e 3655/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3656
3657static int
3658ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3659{
3660 rtx x = *px;
3661 tree decl;
3662
3663 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3664 {
3665 rtx plus = XEXP (x, 0);
3666 rtx op0 = XEXP (plus, 0);
3667 rtx op1 = XEXP (plus, 1);
3668 if (GET_CODE (op1) == CONST_INT)
3669 x = op0;
3670 }
3671
3672 return (GET_CODE (x) == SYMBOL_REF
3673 && (decl = SYMBOL_REF_DECL (x)) != 0
3674 && TREE_CODE (decl) == VAR_DECL
3675 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3676}
3677
644459d0 3678/* We accept:
5b865faf 3679 - any 32-bit constant (SImode, SFmode)
644459d0 3680 - any constant that can be generated with fsmbi (any mode)
5b865faf 3681 - a 64-bit constant where the high and low bits are identical
644459d0 3682 (DImode, DFmode)
5b865faf 3683 - a 128-bit constant where the four 32-bit words match. */
644459d0 3684int
3685spu_legitimate_constant_p (rtx x)
3686{
5df189be 3687 if (GET_CODE (x) == HIGH)
3688 x = XEXP (x, 0);
6cf5579e 3689
3690 /* Reject any __ea qualified reference. These can't appear in
3691 instructions but must be forced to the constant pool. */
3692 if (for_each_rtx (&x, ea_symbol_ref, 0))
3693 return 0;
3694
644459d0 3695 /* V4SI with all identical symbols is valid. */
5df189be 3696 if (!flag_pic
3697 && GET_MODE (x) == V4SImode
644459d0 3698 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3699 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3700 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3701 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3702 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3703 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3704
5df189be 3705 if (GET_CODE (x) == CONST_VECTOR
3706 && !const_vector_immediate_p (x))
3707 return 0;
644459d0 3708 return 1;
3709}
3710
3711/* Valid address are:
3712 - symbol_ref, label_ref, const
3713 - reg
9d98604b 3714 - reg + const_int, where const_int is 16 byte aligned
644459d0 3715 - reg + reg, alignment doesn't matter
3716 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3717 ignore the 4 least significant bits of the const. We only care about
3718 16 byte modes because the expand phase will change all smaller MEM
3719 references to TImode. */
3720static bool
3721spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3722 rtx x, bool reg_ok_strict)
644459d0 3723{
9d98604b 3724 int aligned = GET_MODE_SIZE (mode) >= 16;
3725 if (aligned
3726 && GET_CODE (x) == AND
644459d0 3727 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3728 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3729 x = XEXP (x, 0);
3730 switch (GET_CODE (x))
3731 {
644459d0 3732 case LABEL_REF:
6cf5579e 3733 return !TARGET_LARGE_MEM;
3734
9d98604b 3735 case SYMBOL_REF:
644459d0 3736 case CONST:
6cf5579e 3737 /* Keep __ea references until reload so that spu_expand_mov can see them
3738 in MEMs. */
3739 if (ea_symbol_ref (&x, 0))
3740 return !reload_in_progress && !reload_completed;
9d98604b 3741 return !TARGET_LARGE_MEM;
644459d0 3742
3743 case CONST_INT:
3744 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3745
3746 case SUBREG:
3747 x = XEXP (x, 0);
9d98604b 3748 if (REG_P (x))
3749 return 0;
644459d0 3750
3751 case REG:
3752 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3753
3754 case PLUS:
3755 case LO_SUM:
3756 {
3757 rtx op0 = XEXP (x, 0);
3758 rtx op1 = XEXP (x, 1);
3759 if (GET_CODE (op0) == SUBREG)
3760 op0 = XEXP (op0, 0);
3761 if (GET_CODE (op1) == SUBREG)
3762 op1 = XEXP (op1, 0);
644459d0 3763 if (GET_CODE (op0) == REG
3764 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3765 && GET_CODE (op1) == CONST_INT
3766 && INTVAL (op1) >= -0x2000
3767 && INTVAL (op1) <= 0x1fff
9d98604b 3768 && (!aligned || (INTVAL (op1) & 15) == 0))
3769 return TRUE;
644459d0 3770 if (GET_CODE (op0) == REG
3771 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3772 && GET_CODE (op1) == REG
3773 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3774 return TRUE;
644459d0 3775 }
3776 break;
3777
3778 default:
3779 break;
3780 }
9d98604b 3781 return FALSE;
644459d0 3782}
3783
6cf5579e 3784/* Like spu_legitimate_address_p, except with named addresses. */
3785static bool
3786spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3787 bool reg_ok_strict, addr_space_t as)
3788{
3789 if (as == ADDR_SPACE_EA)
3790 return (REG_P (x) && (GET_MODE (x) == EAmode));
3791
3792 else if (as != ADDR_SPACE_GENERIC)
3793 gcc_unreachable ();
3794
3795 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3796}
3797
644459d0 3798/* When the address is reg + const_int, force the const_int into a
fa7637bd 3799 register. */
644459d0 3800rtx
3801spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3802 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3803{
3804 rtx op0, op1;
3805 /* Make sure both operands are registers. */
3806 if (GET_CODE (x) == PLUS)
3807 {
3808 op0 = XEXP (x, 0);
3809 op1 = XEXP (x, 1);
3810 if (ALIGNED_SYMBOL_REF_P (op0))
3811 {
3812 op0 = force_reg (Pmode, op0);
3813 mark_reg_pointer (op0, 128);
3814 }
3815 else if (GET_CODE (op0) != REG)
3816 op0 = force_reg (Pmode, op0);
3817 if (ALIGNED_SYMBOL_REF_P (op1))
3818 {
3819 op1 = force_reg (Pmode, op1);
3820 mark_reg_pointer (op1, 128);
3821 }
3822 else if (GET_CODE (op1) != REG)
3823 op1 = force_reg (Pmode, op1);
3824 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3825 }
41e3a0c7 3826 return x;
644459d0 3827}
3828
6cf5579e 3829/* Like spu_legitimate_address, except with named address support. */
3830static rtx
3831spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3832 addr_space_t as)
3833{
3834 if (as != ADDR_SPACE_GENERIC)
3835 return x;
3836
3837 return spu_legitimize_address (x, oldx, mode);
3838}
3839
644459d0 3840/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3841 struct attribute_spec.handler. */
3842static tree
3843spu_handle_fndecl_attribute (tree * node,
3844 tree name,
3845 tree args ATTRIBUTE_UNUSED,
3846 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3847{
3848 if (TREE_CODE (*node) != FUNCTION_DECL)
3849 {
67a779df 3850 warning (0, "%qE attribute only applies to functions",
3851 name);
644459d0 3852 *no_add_attrs = true;
3853 }
3854
3855 return NULL_TREE;
3856}
3857
3858/* Handle the "vector" attribute. */
3859static tree
3860spu_handle_vector_attribute (tree * node, tree name,
3861 tree args ATTRIBUTE_UNUSED,
3862 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3863{
3864 tree type = *node, result = NULL_TREE;
3865 enum machine_mode mode;
3866 int unsigned_p;
3867
3868 while (POINTER_TYPE_P (type)
3869 || TREE_CODE (type) == FUNCTION_TYPE
3870 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3871 type = TREE_TYPE (type);
3872
3873 mode = TYPE_MODE (type);
3874
3875 unsigned_p = TYPE_UNSIGNED (type);
3876 switch (mode)
3877 {
3878 case DImode:
3879 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3880 break;
3881 case SImode:
3882 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3883 break;
3884 case HImode:
3885 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3886 break;
3887 case QImode:
3888 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3889 break;
3890 case SFmode:
3891 result = V4SF_type_node;
3892 break;
3893 case DFmode:
3894 result = V2DF_type_node;
3895 break;
3896 default:
3897 break;
3898 }
3899
3900 /* Propagate qualifiers attached to the element type
3901 onto the vector type. */
3902 if (result && result != type && TYPE_QUALS (type))
3903 result = build_qualified_type (result, TYPE_QUALS (type));
3904
3905 *no_add_attrs = true; /* No need to hang on to the attribute. */
3906
3907 if (!result)
67a779df 3908 warning (0, "%qE attribute ignored", name);
644459d0 3909 else
d991e6e8 3910 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3911
3912 return NULL_TREE;
3913}
3914
f2b32076 3915/* Return nonzero if FUNC is a naked function. */
644459d0 3916static int
3917spu_naked_function_p (tree func)
3918{
3919 tree a;
3920
3921 if (TREE_CODE (func) != FUNCTION_DECL)
3922 abort ();
3923
3924 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3925 return a != NULL_TREE;
3926}
3927
3928int
3929spu_initial_elimination_offset (int from, int to)
3930{
3931 int saved_regs_size = spu_saved_regs_size ();
3932 int sp_offset = 0;
abe32cce 3933 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3934 || get_frame_size () || saved_regs_size)
3935 sp_offset = STACK_POINTER_OFFSET;
3936 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3937 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3938 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3939 return get_frame_size ();
644459d0 3940 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3941 return sp_offset + crtl->outgoing_args_size
644459d0 3942 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3943 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3944 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3945 else
3946 gcc_unreachable ();
644459d0 3947}
3948
3949rtx
fb80456a 3950spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3951{
3952 enum machine_mode mode = TYPE_MODE (type);
3953 int byte_size = ((mode == BLKmode)
3954 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3955
3956 /* Make sure small structs are left justified in a register. */
3957 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3958 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3959 {
3960 enum machine_mode smode;
3961 rtvec v;
3962 int i;
3963 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3964 int n = byte_size / UNITS_PER_WORD;
3965 v = rtvec_alloc (nregs);
3966 for (i = 0; i < n; i++)
3967 {
3968 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3969 gen_rtx_REG (TImode,
3970 FIRST_RETURN_REGNUM
3971 + i),
3972 GEN_INT (UNITS_PER_WORD * i));
3973 byte_size -= UNITS_PER_WORD;
3974 }
3975
3976 if (n < nregs)
3977 {
3978 if (byte_size < 4)
3979 byte_size = 4;
3980 smode =
3981 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3982 RTVEC_ELT (v, n) =
3983 gen_rtx_EXPR_LIST (VOIDmode,
3984 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3985 GEN_INT (UNITS_PER_WORD * n));
3986 }
3987 return gen_rtx_PARALLEL (mode, v);
3988 }
3989 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3990}
3991
3992rtx
3993spu_function_arg (CUMULATIVE_ARGS cum,
3994 enum machine_mode mode,
3995 tree type, int named ATTRIBUTE_UNUSED)
3996{
3997 int byte_size;
3998
3999 if (cum >= MAX_REGISTER_ARGS)
4000 return 0;
4001
4002 byte_size = ((mode == BLKmode)
4003 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4004
4005 /* The ABI does not allow parameters to be passed partially in
4006 reg and partially in stack. */
4007 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4008 return 0;
4009
4010 /* Make sure small structs are left justified in a register. */
4011 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4012 && byte_size < UNITS_PER_WORD && byte_size > 0)
4013 {
4014 enum machine_mode smode;
4015 rtx gr_reg;
4016 if (byte_size < 4)
4017 byte_size = 4;
4018 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4019 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4020 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4021 const0_rtx);
4022 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4023 }
4024 else
4025 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4026}
4027
4028/* Variable sized types are passed by reference. */
4029static bool
4030spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4031 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4032 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4033{
4034 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4035}
4036\f
4037
4038/* Var args. */
4039
4040/* Create and return the va_list datatype.
4041
4042 On SPU, va_list is an array type equivalent to
4043
4044 typedef struct __va_list_tag
4045 {
4046 void *__args __attribute__((__aligned(16)));
4047 void *__skip __attribute__((__aligned(16)));
4048
4049 } va_list[1];
4050
fa7637bd 4051 where __args points to the arg that will be returned by the next
644459d0 4052 va_arg(), and __skip points to the previous stack frame such that
4053 when __args == __skip we should advance __args by 32 bytes. */
4054static tree
4055spu_build_builtin_va_list (void)
4056{
4057 tree f_args, f_skip, record, type_decl;
4058 bool owp;
4059
4060 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4061
4062 type_decl =
54e46243 4063 build_decl (BUILTINS_LOCATION,
4064 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4065
54e46243 4066 f_args = build_decl (BUILTINS_LOCATION,
4067 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4068 f_skip = build_decl (BUILTINS_LOCATION,
4069 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4070
4071 DECL_FIELD_CONTEXT (f_args) = record;
4072 DECL_ALIGN (f_args) = 128;
4073 DECL_USER_ALIGN (f_args) = 1;
4074
4075 DECL_FIELD_CONTEXT (f_skip) = record;
4076 DECL_ALIGN (f_skip) = 128;
4077 DECL_USER_ALIGN (f_skip) = 1;
4078
4079 TREE_CHAIN (record) = type_decl;
4080 TYPE_NAME (record) = type_decl;
4081 TYPE_FIELDS (record) = f_args;
4082 TREE_CHAIN (f_args) = f_skip;
4083
4084 /* We know this is being padded and we want it too. It is an internal
4085 type so hide the warnings from the user. */
4086 owp = warn_padded;
4087 warn_padded = false;
4088
4089 layout_type (record);
4090
4091 warn_padded = owp;
4092
4093 /* The correct type is an array type of one element. */
4094 return build_array_type (record, build_index_type (size_zero_node));
4095}
4096
4097/* Implement va_start by filling the va_list structure VALIST.
4098 NEXTARG points to the first anonymous stack argument.
4099
4100 The following global variables are used to initialize
4101 the va_list structure:
4102
abe32cce 4103 crtl->args.info;
644459d0 4104 the CUMULATIVE_ARGS for this function
4105
abe32cce 4106 crtl->args.arg_offset_rtx:
644459d0 4107 holds the offset of the first anonymous stack argument
4108 (relative to the virtual arg pointer). */
4109
8a58ed0a 4110static void
644459d0 4111spu_va_start (tree valist, rtx nextarg)
4112{
4113 tree f_args, f_skip;
4114 tree args, skip, t;
4115
4116 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4117 f_skip = TREE_CHAIN (f_args);
4118
4119 valist = build_va_arg_indirect_ref (valist);
4120 args =
4121 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4122 skip =
4123 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4124
4125 /* Find the __args area. */
4126 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4127 if (crtl->args.pretend_args_size > 0)
0de36bdb 4128 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4129 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4130 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4131 TREE_SIDE_EFFECTS (t) = 1;
4132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4133
4134 /* Find the __skip area. */
4135 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4136 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4137 size_int (crtl->args.pretend_args_size
0de36bdb 4138 - STACK_POINTER_OFFSET));
75a70cf9 4139 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4140 TREE_SIDE_EFFECTS (t) = 1;
4141 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4142}
4143
4144/* Gimplify va_arg by updating the va_list structure
4145 VALIST as required to retrieve an argument of type
4146 TYPE, and returning that argument.
4147
4148 ret = va_arg(VALIST, TYPE);
4149
4150 generates code equivalent to:
4151
4152 paddedsize = (sizeof(TYPE) + 15) & -16;
4153 if (VALIST.__args + paddedsize > VALIST.__skip
4154 && VALIST.__args <= VALIST.__skip)
4155 addr = VALIST.__skip + 32;
4156 else
4157 addr = VALIST.__args;
4158 VALIST.__args = addr + paddedsize;
4159 ret = *(TYPE *)addr;
4160 */
4161static tree
75a70cf9 4162spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4163 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4164{
4165 tree f_args, f_skip;
4166 tree args, skip;
4167 HOST_WIDE_INT size, rsize;
4168 tree paddedsize, addr, tmp;
4169 bool pass_by_reference_p;
4170
4171 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4172 f_skip = TREE_CHAIN (f_args);
4173
4174 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4175 args =
4176 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4177 skip =
4178 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4179
4180 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4181
4182 /* if an object is dynamically sized, a pointer to it is passed
4183 instead of the object itself. */
4184 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4185 false);
4186 if (pass_by_reference_p)
4187 type = build_pointer_type (type);
4188 size = int_size_in_bytes (type);
4189 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4190
4191 /* build conditional expression to calculate addr. The expression
4192 will be gimplified later. */
0de36bdb 4193 paddedsize = size_int (rsize);
75a70cf9 4194 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4195 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4196 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4197 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4198 unshare_expr (skip)));
644459d0 4199
4200 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4201 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4202 size_int (32)), unshare_expr (args));
644459d0 4203
75a70cf9 4204 gimplify_assign (addr, tmp, pre_p);
644459d0 4205
4206 /* update VALIST.__args */
0de36bdb 4207 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4208 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4209
8115f0af 4210 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4211 addr);
644459d0 4212
4213 if (pass_by_reference_p)
4214 addr = build_va_arg_indirect_ref (addr);
4215
4216 return build_va_arg_indirect_ref (addr);
4217}
4218
4219/* Save parameter registers starting with the register that corresponds
4220 to the first unnamed parameters. If the first unnamed parameter is
4221 in the stack then save no registers. Set pretend_args_size to the
4222 amount of space needed to save the registers. */
4223void
4224spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4225 tree type, int *pretend_size, int no_rtl)
4226{
4227 if (!no_rtl)
4228 {
4229 rtx tmp;
4230 int regno;
4231 int offset;
4232 int ncum = *cum;
4233
4234 /* cum currently points to the last named argument, we want to
4235 start at the next argument. */
4236 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4237
4238 offset = -STACK_POINTER_OFFSET;
4239 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4240 {
4241 tmp = gen_frame_mem (V4SImode,
4242 plus_constant (virtual_incoming_args_rtx,
4243 offset));
4244 emit_move_insn (tmp,
4245 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4246 offset += 16;
4247 }
4248 *pretend_size = offset + STACK_POINTER_OFFSET;
4249 }
4250}
4251\f
4252void
4253spu_conditional_register_usage (void)
4254{
4255 if (flag_pic)
4256 {
4257 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4258 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4259 }
644459d0 4260}
4261
9d98604b 4262/* This is called any time we inspect the alignment of a register for
4263 addresses. */
644459d0 4264static int
9d98604b 4265reg_aligned_for_addr (rtx x)
644459d0 4266{
9d98604b 4267 int regno =
4268 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4269 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4270}
4271
69ced2d6 4272/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4273 into its SYMBOL_REF_FLAGS. */
4274static void
4275spu_encode_section_info (tree decl, rtx rtl, int first)
4276{
4277 default_encode_section_info (decl, rtl, first);
4278
4279 /* If a variable has a forced alignment to < 16 bytes, mark it with
4280 SYMBOL_FLAG_ALIGN1. */
4281 if (TREE_CODE (decl) == VAR_DECL
4282 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4283 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4284}
4285
644459d0 4286/* Return TRUE if we are certain the mem refers to a complete object
4287 which is both 16-byte aligned and padded to a 16-byte boundary. This
4288 would make it safe to store with a single instruction.
4289 We guarantee the alignment and padding for static objects by aligning
4290 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4291 FIXME: We currently cannot guarantee this for objects on the stack
4292 because assign_parm_setup_stack calls assign_stack_local with the
4293 alignment of the parameter mode and in that case the alignment never
4294 gets adjusted by LOCAL_ALIGNMENT. */
4295static int
4296store_with_one_insn_p (rtx mem)
4297{
9d98604b 4298 enum machine_mode mode = GET_MODE (mem);
644459d0 4299 rtx addr = XEXP (mem, 0);
9d98604b 4300 if (mode == BLKmode)
644459d0 4301 return 0;
9d98604b 4302 if (GET_MODE_SIZE (mode) >= 16)
4303 return 1;
644459d0 4304 /* Only static objects. */
4305 if (GET_CODE (addr) == SYMBOL_REF)
4306 {
4307 /* We use the associated declaration to make sure the access is
fa7637bd 4308 referring to the whole object.
644459d0 4309 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4310 if it is necessary. Will there be cases where one exists, and
4311 the other does not? Will there be cases where both exist, but
4312 have different types? */
4313 tree decl = MEM_EXPR (mem);
4314 if (decl
4315 && TREE_CODE (decl) == VAR_DECL
4316 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4317 return 1;
4318 decl = SYMBOL_REF_DECL (addr);
4319 if (decl
4320 && TREE_CODE (decl) == VAR_DECL
4321 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4322 return 1;
4323 }
4324 return 0;
4325}
4326
9d98604b 4327/* Return 1 when the address is not valid for a simple load and store as
4328 required by the '_mov*' patterns. We could make this less strict
4329 for loads, but we prefer mem's to look the same so they are more
4330 likely to be merged. */
4331static int
4332address_needs_split (rtx mem)
4333{
4334 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4335 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4336 || !(store_with_one_insn_p (mem)
4337 || mem_is_padded_component_ref (mem))))
4338 return 1;
4339
4340 return 0;
4341}
4342
6cf5579e 4343static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4344static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4345static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4346
4347/* MEM is known to be an __ea qualified memory access. Emit a call to
4348 fetch the ppu memory to local store, and return its address in local
4349 store. */
4350
4351static void
4352ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4353{
4354 if (is_store)
4355 {
4356 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4357 if (!cache_fetch_dirty)
4358 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4359 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4360 2, ea_addr, EAmode, ndirty, SImode);
4361 }
4362 else
4363 {
4364 if (!cache_fetch)
4365 cache_fetch = init_one_libfunc ("__cache_fetch");
4366 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4367 1, ea_addr, EAmode);
4368 }
4369}
4370
4371/* Like ea_load_store, but do the cache tag comparison and, for stores,
4372 dirty bit marking, inline.
4373
4374 The cache control data structure is an array of
4375
4376 struct __cache_tag_array
4377 {
4378 unsigned int tag_lo[4];
4379 unsigned int tag_hi[4];
4380 void *data_pointer[4];
4381 int reserved[4];
4382 vector unsigned short dirty_bits[4];
4383 } */
4384
4385static void
4386ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4387{
4388 rtx ea_addr_si;
4389 HOST_WIDE_INT v;
4390 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4391 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4392 rtx index_mask = gen_reg_rtx (SImode);
4393 rtx tag_arr = gen_reg_rtx (Pmode);
4394 rtx splat_mask = gen_reg_rtx (TImode);
4395 rtx splat = gen_reg_rtx (V4SImode);
4396 rtx splat_hi = NULL_RTX;
4397 rtx tag_index = gen_reg_rtx (Pmode);
4398 rtx block_off = gen_reg_rtx (SImode);
4399 rtx tag_addr = gen_reg_rtx (Pmode);
4400 rtx tag = gen_reg_rtx (V4SImode);
4401 rtx cache_tag = gen_reg_rtx (V4SImode);
4402 rtx cache_tag_hi = NULL_RTX;
4403 rtx cache_ptrs = gen_reg_rtx (TImode);
4404 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4405 rtx tag_equal = gen_reg_rtx (V4SImode);
4406 rtx tag_equal_hi = NULL_RTX;
4407 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4408 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4409 rtx eq_index = gen_reg_rtx (SImode);
4410 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4411
4412 if (spu_ea_model != 32)
4413 {
4414 splat_hi = gen_reg_rtx (V4SImode);
4415 cache_tag_hi = gen_reg_rtx (V4SImode);
4416 tag_equal_hi = gen_reg_rtx (V4SImode);
4417 }
4418
4419 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4420 emit_move_insn (tag_arr, tag_arr_sym);
4421 v = 0x0001020300010203LL;
4422 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4423 ea_addr_si = ea_addr;
4424 if (spu_ea_model != 32)
4425 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4426
4427 /* tag_index = ea_addr & (tag_array_size - 128) */
4428 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4429
4430 /* splat ea_addr to all 4 slots. */
4431 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4432 /* Similarly for high 32 bits of ea_addr. */
4433 if (spu_ea_model != 32)
4434 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4435
4436 /* block_off = ea_addr & 127 */
4437 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4438
4439 /* tag_addr = tag_arr + tag_index */
4440 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4441
4442 /* Read cache tags. */
4443 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4444 if (spu_ea_model != 32)
4445 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4446 plus_constant (tag_addr, 16)));
4447
4448 /* tag = ea_addr & -128 */
4449 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4450
4451 /* Read all four cache data pointers. */
4452 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4453 plus_constant (tag_addr, 32)));
4454
4455 /* Compare tags. */
4456 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4457 if (spu_ea_model != 32)
4458 {
4459 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4460 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4461 }
4462
4463 /* At most one of the tags compare equal, so tag_equal has one
4464 32-bit slot set to all 1's, with the other slots all zero.
4465 gbb picks off low bit from each byte in the 128-bit registers,
4466 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4467 we have a hit. */
4468 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4469 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4470
4471 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4472 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4473
4474 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4475 (rotating eq_index mod 16 bytes). */
4476 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4477 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4478
4479 /* Add block offset to form final data address. */
4480 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4481
4482 /* Check that we did hit. */
4483 hit_label = gen_label_rtx ();
4484 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4485 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4486 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4487 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4488 hit_ref, pc_rtx)));
4489 /* Say that this branch is very likely to happen. */
4490 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4491 REG_NOTES (insn)
4492 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
4493
4494 ea_load_store (mem, is_store, ea_addr, data_addr);
4495 cont_label = gen_label_rtx ();
4496 emit_jump_insn (gen_jump (cont_label));
4497 emit_barrier ();
4498
4499 emit_label (hit_label);
4500
4501 if (is_store)
4502 {
4503 HOST_WIDE_INT v_hi;
4504 rtx dirty_bits = gen_reg_rtx (TImode);
4505 rtx dirty_off = gen_reg_rtx (SImode);
4506 rtx dirty_128 = gen_reg_rtx (TImode);
4507 rtx neg_block_off = gen_reg_rtx (SImode);
4508
4509 /* Set up mask with one dirty bit per byte of the mem we are
4510 writing, starting from top bit. */
4511 v_hi = v = -1;
4512 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4513 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4514 {
4515 v_hi = v;
4516 v = 0;
4517 }
4518 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4519
4520 /* Form index into cache dirty_bits. eq_index is one of
4521 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4522 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4523 offset to each of the four dirty_bits elements. */
4524 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4525
4526 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4527
4528 /* Rotate bit mask to proper bit. */
4529 emit_insn (gen_negsi2 (neg_block_off, block_off));
4530 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4531 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4532
4533 /* Or in the new dirty bits. */
4534 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4535
4536 /* Store. */
4537 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4538 }
4539
4540 emit_label (cont_label);
4541}
4542
4543static rtx
4544expand_ea_mem (rtx mem, bool is_store)
4545{
4546 rtx ea_addr;
4547 rtx data_addr = gen_reg_rtx (Pmode);
4548 rtx new_mem;
4549
4550 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4551 if (optimize_size || optimize == 0)
4552 ea_load_store (mem, is_store, ea_addr, data_addr);
4553 else
4554 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4555
4556 if (ea_alias_set == -1)
4557 ea_alias_set = new_alias_set ();
4558
4559 /* We generate a new MEM RTX to refer to the copy of the data
4560 in the cache. We do not copy memory attributes (except the
4561 alignment) from the original MEM, as they may no longer apply
4562 to the cache copy. */
4563 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4564 set_mem_alias_set (new_mem, ea_alias_set);
4565 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4566
4567 return new_mem;
4568}
4569
644459d0 4570int
4571spu_expand_mov (rtx * ops, enum machine_mode mode)
4572{
4573 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4574 abort ();
4575
4576 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4577 {
4578 rtx from = SUBREG_REG (ops[1]);
8d72495d 4579 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4580
4581 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4582 && GET_MODE_CLASS (imode) == MODE_INT
4583 && subreg_lowpart_p (ops[1]));
4584
4585 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4586 imode = SImode;
4587 if (imode != GET_MODE (from))
4588 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4589
4590 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4591 {
99bdde56 4592 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4593 emit_insn (GEN_FCN (icode) (ops[0], from));
4594 }
4595 else
4596 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4597 return 1;
4598 }
4599
4600 /* At least one of the operands needs to be a register. */
4601 if ((reload_in_progress | reload_completed) == 0
4602 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4603 {
4604 rtx temp = force_reg (mode, ops[1]);
4605 emit_move_insn (ops[0], temp);
4606 return 1;
4607 }
4608 if (reload_in_progress || reload_completed)
4609 {
dea01258 4610 if (CONSTANT_P (ops[1]))
4611 return spu_split_immediate (ops);
644459d0 4612 return 0;
4613 }
9d98604b 4614
4615 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4616 extend them. */
4617 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4618 {
9d98604b 4619 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4620 if (val != INTVAL (ops[1]))
644459d0 4621 {
9d98604b 4622 emit_move_insn (ops[0], GEN_INT (val));
4623 return 1;
644459d0 4624 }
4625 }
9d98604b 4626 if (MEM_P (ops[0]))
6cf5579e 4627 {
4628 if (MEM_ADDR_SPACE (ops[0]))
4629 ops[0] = expand_ea_mem (ops[0], true);
4630 return spu_split_store (ops);
4631 }
9d98604b 4632 if (MEM_P (ops[1]))
6cf5579e 4633 {
4634 if (MEM_ADDR_SPACE (ops[1]))
4635 ops[1] = expand_ea_mem (ops[1], false);
4636 return spu_split_load (ops);
4637 }
9d98604b 4638
644459d0 4639 return 0;
4640}
4641
9d98604b 4642static void
4643spu_convert_move (rtx dst, rtx src)
644459d0 4644{
9d98604b 4645 enum machine_mode mode = GET_MODE (dst);
4646 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4647 rtx reg;
4648 gcc_assert (GET_MODE (src) == TImode);
4649 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4650 emit_insn (gen_rtx_SET (VOIDmode, reg,
4651 gen_rtx_TRUNCATE (int_mode,
4652 gen_rtx_LSHIFTRT (TImode, src,
4653 GEN_INT (int_mode == DImode ? 64 : 96)))));
4654 if (int_mode != mode)
4655 {
4656 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4657 emit_move_insn (dst, reg);
4658 }
4659}
644459d0 4660
9d98604b 4661/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4662 the address from SRC and SRC+16. Return a REG or CONST_INT that
4663 specifies how many bytes to rotate the loaded registers, plus any
4664 extra from EXTRA_ROTQBY. The address and rotate amounts are
4665 normalized to improve merging of loads and rotate computations. */
4666static rtx
4667spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4668{
4669 rtx addr = XEXP (src, 0);
4670 rtx p0, p1, rot, addr0, addr1;
4671 int rot_amt;
644459d0 4672
4673 rot = 0;
4674 rot_amt = 0;
9d98604b 4675
4676 if (MEM_ALIGN (src) >= 128)
4677 /* Address is already aligned; simply perform a TImode load. */ ;
4678 else if (GET_CODE (addr) == PLUS)
644459d0 4679 {
4680 /* 8 cases:
4681 aligned reg + aligned reg => lqx
4682 aligned reg + unaligned reg => lqx, rotqby
4683 aligned reg + aligned const => lqd
4684 aligned reg + unaligned const => lqd, rotqbyi
4685 unaligned reg + aligned reg => lqx, rotqby
4686 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4687 unaligned reg + aligned const => lqd, rotqby
4688 unaligned reg + unaligned const -> not allowed by legitimate address
4689 */
4690 p0 = XEXP (addr, 0);
4691 p1 = XEXP (addr, 1);
9d98604b 4692 if (!reg_aligned_for_addr (p0))
644459d0 4693 {
9d98604b 4694 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4695 {
9d98604b 4696 rot = gen_reg_rtx (SImode);
4697 emit_insn (gen_addsi3 (rot, p0, p1));
4698 }
4699 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4700 {
4701 if (INTVAL (p1) > 0
4702 && REG_POINTER (p0)
4703 && INTVAL (p1) * BITS_PER_UNIT
4704 < REGNO_POINTER_ALIGN (REGNO (p0)))
4705 {
4706 rot = gen_reg_rtx (SImode);
4707 emit_insn (gen_addsi3 (rot, p0, p1));
4708 addr = p0;
4709 }
4710 else
4711 {
4712 rtx x = gen_reg_rtx (SImode);
4713 emit_move_insn (x, p1);
4714 if (!spu_arith_operand (p1, SImode))
4715 p1 = x;
4716 rot = gen_reg_rtx (SImode);
4717 emit_insn (gen_addsi3 (rot, p0, p1));
4718 addr = gen_rtx_PLUS (Pmode, p0, x);
4719 }
644459d0 4720 }
4721 else
4722 rot = p0;
4723 }
4724 else
4725 {
4726 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4727 {
4728 rot_amt = INTVAL (p1) & 15;
9d98604b 4729 if (INTVAL (p1) & -16)
4730 {
4731 p1 = GEN_INT (INTVAL (p1) & -16);
4732 addr = gen_rtx_PLUS (SImode, p0, p1);
4733 }
4734 else
4735 addr = p0;
644459d0 4736 }
9d98604b 4737 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4738 rot = p1;
4739 }
4740 }
9d98604b 4741 else if (REG_P (addr))
644459d0 4742 {
9d98604b 4743 if (!reg_aligned_for_addr (addr))
644459d0 4744 rot = addr;
4745 }
4746 else if (GET_CODE (addr) == CONST)
4747 {
4748 if (GET_CODE (XEXP (addr, 0)) == PLUS
4749 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4750 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4751 {
4752 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4753 if (rot_amt & -16)
4754 addr = gen_rtx_CONST (Pmode,
4755 gen_rtx_PLUS (Pmode,
4756 XEXP (XEXP (addr, 0), 0),
4757 GEN_INT (rot_amt & -16)));
4758 else
4759 addr = XEXP (XEXP (addr, 0), 0);
4760 }
4761 else
9d98604b 4762 {
4763 rot = gen_reg_rtx (Pmode);
4764 emit_move_insn (rot, addr);
4765 }
644459d0 4766 }
4767 else if (GET_CODE (addr) == CONST_INT)
4768 {
4769 rot_amt = INTVAL (addr);
4770 addr = GEN_INT (rot_amt & -16);
4771 }
4772 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4773 {
4774 rot = gen_reg_rtx (Pmode);
4775 emit_move_insn (rot, addr);
4776 }
644459d0 4777
9d98604b 4778 rot_amt += extra_rotby;
644459d0 4779
4780 rot_amt &= 15;
4781
4782 if (rot && rot_amt)
4783 {
9d98604b 4784 rtx x = gen_reg_rtx (SImode);
4785 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4786 rot = x;
644459d0 4787 rot_amt = 0;
4788 }
9d98604b 4789 if (!rot && rot_amt)
4790 rot = GEN_INT (rot_amt);
4791
4792 addr0 = copy_rtx (addr);
4793 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4794 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4795
4796 if (dst1)
4797 {
4798 addr1 = plus_constant (copy_rtx (addr), 16);
4799 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4800 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4801 }
644459d0 4802
9d98604b 4803 return rot;
4804}
4805
4806int
4807spu_split_load (rtx * ops)
4808{
4809 enum machine_mode mode = GET_MODE (ops[0]);
4810 rtx addr, load, rot;
4811 int rot_amt;
644459d0 4812
9d98604b 4813 if (GET_MODE_SIZE (mode) >= 16)
4814 return 0;
644459d0 4815
9d98604b 4816 addr = XEXP (ops[1], 0);
4817 gcc_assert (GET_CODE (addr) != AND);
4818
4819 if (!address_needs_split (ops[1]))
4820 {
4821 ops[1] = change_address (ops[1], TImode, addr);
4822 load = gen_reg_rtx (TImode);
4823 emit_insn (gen__movti (load, ops[1]));
4824 spu_convert_move (ops[0], load);
4825 return 1;
4826 }
4827
4828 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4829
4830 load = gen_reg_rtx (TImode);
4831 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4832
4833 if (rot)
4834 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4835
9d98604b 4836 spu_convert_move (ops[0], load);
4837 return 1;
644459d0 4838}
4839
9d98604b 4840int
644459d0 4841spu_split_store (rtx * ops)
4842{
4843 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4844 rtx reg;
644459d0 4845 rtx addr, p0, p1, p1_lo, smem;
4846 int aform;
4847 int scalar;
4848
9d98604b 4849 if (GET_MODE_SIZE (mode) >= 16)
4850 return 0;
4851
644459d0 4852 addr = XEXP (ops[0], 0);
9d98604b 4853 gcc_assert (GET_CODE (addr) != AND);
4854
4855 if (!address_needs_split (ops[0]))
4856 {
4857 reg = gen_reg_rtx (TImode);
4858 emit_insn (gen_spu_convert (reg, ops[1]));
4859 ops[0] = change_address (ops[0], TImode, addr);
4860 emit_move_insn (ops[0], reg);
4861 return 1;
4862 }
644459d0 4863
4864 if (GET_CODE (addr) == PLUS)
4865 {
4866 /* 8 cases:
4867 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4868 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4869 aligned reg + aligned const => lqd, c?d, shuf, stqx
4870 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4871 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4872 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4873 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4874 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4875 */
4876 aform = 0;
4877 p0 = XEXP (addr, 0);
4878 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4879 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4880 {
4881 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4882 if (reg_aligned_for_addr (p0))
4883 {
4884 p1 = GEN_INT (INTVAL (p1) & -16);
4885 if (p1 == const0_rtx)
4886 addr = p0;
4887 else
4888 addr = gen_rtx_PLUS (SImode, p0, p1);
4889 }
4890 else
4891 {
4892 rtx x = gen_reg_rtx (SImode);
4893 emit_move_insn (x, p1);
4894 addr = gen_rtx_PLUS (SImode, p0, x);
4895 }
644459d0 4896 }
4897 }
9d98604b 4898 else if (REG_P (addr))
644459d0 4899 {
4900 aform = 0;
4901 p0 = addr;
4902 p1 = p1_lo = const0_rtx;
4903 }
4904 else
4905 {
4906 aform = 1;
4907 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4908 p1 = 0; /* aform doesn't use p1 */
4909 p1_lo = addr;
4910 if (ALIGNED_SYMBOL_REF_P (addr))
4911 p1_lo = const0_rtx;
9d98604b 4912 else if (GET_CODE (addr) == CONST
4913 && GET_CODE (XEXP (addr, 0)) == PLUS
4914 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4915 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4916 {
9d98604b 4917 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4918 if ((v & -16) != 0)
4919 addr = gen_rtx_CONST (Pmode,
4920 gen_rtx_PLUS (Pmode,
4921 XEXP (XEXP (addr, 0), 0),
4922 GEN_INT (v & -16)));
4923 else
4924 addr = XEXP (XEXP (addr, 0), 0);
4925 p1_lo = GEN_INT (v & 15);
644459d0 4926 }
4927 else if (GET_CODE (addr) == CONST_INT)
4928 {
4929 p1_lo = GEN_INT (INTVAL (addr) & 15);
4930 addr = GEN_INT (INTVAL (addr) & -16);
4931 }
9d98604b 4932 else
4933 {
4934 p1_lo = gen_reg_rtx (SImode);
4935 emit_move_insn (p1_lo, addr);
4936 }
644459d0 4937 }
4938
9d98604b 4939 reg = gen_reg_rtx (TImode);
e04cf423 4940
644459d0 4941 scalar = store_with_one_insn_p (ops[0]);
4942 if (!scalar)
4943 {
4944 /* We could copy the flags from the ops[0] MEM to mem here,
4945 We don't because we want this load to be optimized away if
4946 possible, and copying the flags will prevent that in certain
4947 cases, e.g. consider the volatile flag. */
4948
9d98604b 4949 rtx pat = gen_reg_rtx (TImode);
e04cf423 4950 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4951 set_mem_alias_set (lmem, 0);
4952 emit_insn (gen_movti (reg, lmem));
644459d0 4953
9d98604b 4954 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4955 p0 = stack_pointer_rtx;
4956 if (!p1_lo)
4957 p1_lo = const0_rtx;
4958
4959 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4960 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4961 }
644459d0 4962 else
4963 {
4964 if (GET_CODE (ops[1]) == REG)
4965 emit_insn (gen_spu_convert (reg, ops[1]));
4966 else if (GET_CODE (ops[1]) == SUBREG)
4967 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4968 else
4969 abort ();
4970 }
4971
4972 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4973 emit_insn (gen_ashlti3
4974 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4975
9d98604b 4976 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4977 /* We can't use the previous alias set because the memory has changed
4978 size and can potentially overlap objects of other types. */
4979 set_mem_alias_set (smem, 0);
4980
e04cf423 4981 emit_insn (gen_movti (smem, reg));
9d98604b 4982 return 1;
644459d0 4983}
4984
4985/* Return TRUE if X is MEM which is a struct member reference
4986 and the member can safely be loaded and stored with a single
4987 instruction because it is padded. */
4988static int
4989mem_is_padded_component_ref (rtx x)
4990{
4991 tree t = MEM_EXPR (x);
4992 tree r;
4993 if (!t || TREE_CODE (t) != COMPONENT_REF)
4994 return 0;
4995 t = TREE_OPERAND (t, 1);
4996 if (!t || TREE_CODE (t) != FIELD_DECL
4997 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4998 return 0;
4999 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5000 r = DECL_FIELD_CONTEXT (t);
5001 if (!r || TREE_CODE (r) != RECORD_TYPE)
5002 return 0;
5003 /* Make sure they are the same mode */
5004 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5005 return 0;
5006 /* If there are no following fields then the field alignment assures
fa7637bd 5007 the structure is padded to the alignment which means this field is
5008 padded too. */
644459d0 5009 if (TREE_CHAIN (t) == 0)
5010 return 1;
5011 /* If the following field is also aligned then this field will be
5012 padded. */
5013 t = TREE_CHAIN (t);
5014 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5015 return 1;
5016 return 0;
5017}
5018
c7b91b14 5019/* Parse the -mfixed-range= option string. */
5020static void
5021fix_range (const char *const_str)
5022{
5023 int i, first, last;
5024 char *str, *dash, *comma;
5025
5026 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5027 REG2 are either register names or register numbers. The effect
5028 of this option is to mark the registers in the range from REG1 to
5029 REG2 as ``fixed'' so they won't be used by the compiler. */
5030
5031 i = strlen (const_str);
5032 str = (char *) alloca (i + 1);
5033 memcpy (str, const_str, i + 1);
5034
5035 while (1)
5036 {
5037 dash = strchr (str, '-');
5038 if (!dash)
5039 {
5040 warning (0, "value of -mfixed-range must have form REG1-REG2");
5041 return;
5042 }
5043 *dash = '\0';
5044 comma = strchr (dash + 1, ',');
5045 if (comma)
5046 *comma = '\0';
5047
5048 first = decode_reg_name (str);
5049 if (first < 0)
5050 {
5051 warning (0, "unknown register name: %s", str);
5052 return;
5053 }
5054
5055 last = decode_reg_name (dash + 1);
5056 if (last < 0)
5057 {
5058 warning (0, "unknown register name: %s", dash + 1);
5059 return;
5060 }
5061
5062 *dash = '-';
5063
5064 if (first > last)
5065 {
5066 warning (0, "%s-%s is an empty range", str, dash + 1);
5067 return;
5068 }
5069
5070 for (i = first; i <= last; ++i)
5071 fixed_regs[i] = call_used_regs[i] = 1;
5072
5073 if (!comma)
5074 break;
5075
5076 *comma = ',';
5077 str = comma + 1;
5078 }
5079}
5080
644459d0 5081/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5082 can be generated using the fsmbi instruction. */
5083int
5084fsmbi_const_p (rtx x)
5085{
dea01258 5086 if (CONSTANT_P (x))
5087 {
5df189be 5088 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5089 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5090 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5091 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5092 }
5093 return 0;
5094}
5095
5096/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5097 can be generated using the cbd, chd, cwd or cdd instruction. */
5098int
5099cpat_const_p (rtx x, enum machine_mode mode)
5100{
5101 if (CONSTANT_P (x))
5102 {
5103 enum immediate_class c = classify_immediate (x, mode);
5104 return c == IC_CPAT;
5105 }
5106 return 0;
5107}
644459d0 5108
dea01258 5109rtx
5110gen_cpat_const (rtx * ops)
5111{
5112 unsigned char dst[16];
5113 int i, offset, shift, isize;
5114 if (GET_CODE (ops[3]) != CONST_INT
5115 || GET_CODE (ops[2]) != CONST_INT
5116 || (GET_CODE (ops[1]) != CONST_INT
5117 && GET_CODE (ops[1]) != REG))
5118 return 0;
5119 if (GET_CODE (ops[1]) == REG
5120 && (!REG_POINTER (ops[1])
5121 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5122 return 0;
644459d0 5123
5124 for (i = 0; i < 16; i++)
dea01258 5125 dst[i] = i + 16;
5126 isize = INTVAL (ops[3]);
5127 if (isize == 1)
5128 shift = 3;
5129 else if (isize == 2)
5130 shift = 2;
5131 else
5132 shift = 0;
5133 offset = (INTVAL (ops[2]) +
5134 (GET_CODE (ops[1]) ==
5135 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5136 for (i = 0; i < isize; i++)
5137 dst[offset + i] = i + shift;
5138 return array_to_constant (TImode, dst);
644459d0 5139}
5140
5141/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5142 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5143 than 16 bytes, the value is repeated across the rest of the array. */
5144void
5145constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5146{
5147 HOST_WIDE_INT val;
5148 int i, j, first;
5149
5150 memset (arr, 0, 16);
5151 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5152 if (GET_CODE (x) == CONST_INT
5153 || (GET_CODE (x) == CONST_DOUBLE
5154 && (mode == SFmode || mode == DFmode)))
5155 {
5156 gcc_assert (mode != VOIDmode && mode != BLKmode);
5157
5158 if (GET_CODE (x) == CONST_DOUBLE)
5159 val = const_double_to_hwint (x);
5160 else
5161 val = INTVAL (x);
5162 first = GET_MODE_SIZE (mode) - 1;
5163 for (i = first; i >= 0; i--)
5164 {
5165 arr[i] = val & 0xff;
5166 val >>= 8;
5167 }
5168 /* Splat the constant across the whole array. */
5169 for (j = 0, i = first + 1; i < 16; i++)
5170 {
5171 arr[i] = arr[j];
5172 j = (j == first) ? 0 : j + 1;
5173 }
5174 }
5175 else if (GET_CODE (x) == CONST_DOUBLE)
5176 {
5177 val = CONST_DOUBLE_LOW (x);
5178 for (i = 15; i >= 8; i--)
5179 {
5180 arr[i] = val & 0xff;
5181 val >>= 8;
5182 }
5183 val = CONST_DOUBLE_HIGH (x);
5184 for (i = 7; i >= 0; i--)
5185 {
5186 arr[i] = val & 0xff;
5187 val >>= 8;
5188 }
5189 }
5190 else if (GET_CODE (x) == CONST_VECTOR)
5191 {
5192 int units;
5193 rtx elt;
5194 mode = GET_MODE_INNER (mode);
5195 units = CONST_VECTOR_NUNITS (x);
5196 for (i = 0; i < units; i++)
5197 {
5198 elt = CONST_VECTOR_ELT (x, i);
5199 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5200 {
5201 if (GET_CODE (elt) == CONST_DOUBLE)
5202 val = const_double_to_hwint (elt);
5203 else
5204 val = INTVAL (elt);
5205 first = GET_MODE_SIZE (mode) - 1;
5206 if (first + i * GET_MODE_SIZE (mode) > 16)
5207 abort ();
5208 for (j = first; j >= 0; j--)
5209 {
5210 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5211 val >>= 8;
5212 }
5213 }
5214 }
5215 }
5216 else
5217 gcc_unreachable();
5218}
5219
5220/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5221 smaller than 16 bytes, use the bytes that would represent that value
5222 in a register, e.g., for QImode return the value of arr[3]. */
5223rtx
e96f2783 5224array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5225{
5226 enum machine_mode inner_mode;
5227 rtvec v;
5228 int units, size, i, j, k;
5229 HOST_WIDE_INT val;
5230
5231 if (GET_MODE_CLASS (mode) == MODE_INT
5232 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5233 {
5234 j = GET_MODE_SIZE (mode);
5235 i = j < 4 ? 4 - j : 0;
5236 for (val = 0; i < j; i++)
5237 val = (val << 8) | arr[i];
5238 val = trunc_int_for_mode (val, mode);
5239 return GEN_INT (val);
5240 }
5241
5242 if (mode == TImode)
5243 {
5244 HOST_WIDE_INT high;
5245 for (i = high = 0; i < 8; i++)
5246 high = (high << 8) | arr[i];
5247 for (i = 8, val = 0; i < 16; i++)
5248 val = (val << 8) | arr[i];
5249 return immed_double_const (val, high, TImode);
5250 }
5251 if (mode == SFmode)
5252 {
5253 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5254 val = trunc_int_for_mode (val, SImode);
171b6d22 5255 return hwint_to_const_double (SFmode, val);
644459d0 5256 }
5257 if (mode == DFmode)
5258 {
1f915911 5259 for (i = 0, val = 0; i < 8; i++)
5260 val = (val << 8) | arr[i];
171b6d22 5261 return hwint_to_const_double (DFmode, val);
644459d0 5262 }
5263
5264 if (!VECTOR_MODE_P (mode))
5265 abort ();
5266
5267 units = GET_MODE_NUNITS (mode);
5268 size = GET_MODE_UNIT_SIZE (mode);
5269 inner_mode = GET_MODE_INNER (mode);
5270 v = rtvec_alloc (units);
5271
5272 for (k = i = 0; i < units; ++i)
5273 {
5274 val = 0;
5275 for (j = 0; j < size; j++, k++)
5276 val = (val << 8) | arr[k];
5277
5278 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5279 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5280 else
5281 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5282 }
5283 if (k > 16)
5284 abort ();
5285
5286 return gen_rtx_CONST_VECTOR (mode, v);
5287}
5288
5289static void
5290reloc_diagnostic (rtx x)
5291{
712d2297 5292 tree decl = 0;
644459d0 5293 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5294 return;
5295
5296 if (GET_CODE (x) == SYMBOL_REF)
5297 decl = SYMBOL_REF_DECL (x);
5298 else if (GET_CODE (x) == CONST
5299 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5300 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5301
5302 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5303 if (decl && !DECL_P (decl))
5304 decl = 0;
5305
644459d0 5306 /* The decl could be a string constant. */
5307 if (decl && DECL_P (decl))
712d2297 5308 {
5309 location_t loc;
5310 /* We use last_assemble_variable_decl to get line information. It's
5311 not always going to be right and might not even be close, but will
5312 be right for the more common cases. */
5313 if (!last_assemble_variable_decl || in_section == ctors_section)
5314 loc = DECL_SOURCE_LOCATION (decl);
5315 else
5316 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5317
712d2297 5318 if (TARGET_WARN_RELOC)
5319 warning_at (loc, 0,
5320 "creating run-time relocation for %qD", decl);
5321 else
5322 error_at (loc,
5323 "creating run-time relocation for %qD", decl);
5324 }
5325 else
5326 {
5327 if (TARGET_WARN_RELOC)
5328 warning_at (input_location, 0, "creating run-time relocation");
5329 else
5330 error_at (input_location, "creating run-time relocation");
5331 }
644459d0 5332}
5333
5334/* Hook into assemble_integer so we can generate an error for run-time
5335 relocations. The SPU ABI disallows them. */
5336static bool
5337spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5338{
5339 /* By default run-time relocations aren't supported, but we allow them
5340 in case users support it in their own run-time loader. And we provide
5341 a warning for those users that don't. */
5342 if ((GET_CODE (x) == SYMBOL_REF)
5343 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5344 reloc_diagnostic (x);
5345
5346 return default_assemble_integer (x, size, aligned_p);
5347}
5348
5349static void
5350spu_asm_globalize_label (FILE * file, const char *name)
5351{
5352 fputs ("\t.global\t", file);
5353 assemble_name (file, name);
5354 fputs ("\n", file);
5355}
5356
5357static bool
f529eb25 5358spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5359 bool speed ATTRIBUTE_UNUSED)
644459d0 5360{
5361 enum machine_mode mode = GET_MODE (x);
5362 int cost = COSTS_N_INSNS (2);
5363
5364 /* Folding to a CONST_VECTOR will use extra space but there might
5365 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5366 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5367 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5368 because this cost will only be compared against a single insn.
5369 if (code == CONST_VECTOR)
5370 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5371 */
5372
5373 /* Use defaults for float operations. Not accurate but good enough. */
5374 if (mode == DFmode)
5375 {
5376 *total = COSTS_N_INSNS (13);
5377 return true;
5378 }
5379 if (mode == SFmode)
5380 {
5381 *total = COSTS_N_INSNS (6);
5382 return true;
5383 }
5384 switch (code)
5385 {
5386 case CONST_INT:
5387 if (satisfies_constraint_K (x))
5388 *total = 0;
5389 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5390 *total = COSTS_N_INSNS (1);
5391 else
5392 *total = COSTS_N_INSNS (3);
5393 return true;
5394
5395 case CONST:
5396 *total = COSTS_N_INSNS (3);
5397 return true;
5398
5399 case LABEL_REF:
5400 case SYMBOL_REF:
5401 *total = COSTS_N_INSNS (0);
5402 return true;
5403
5404 case CONST_DOUBLE:
5405 *total = COSTS_N_INSNS (5);
5406 return true;
5407
5408 case FLOAT_EXTEND:
5409 case FLOAT_TRUNCATE:
5410 case FLOAT:
5411 case UNSIGNED_FLOAT:
5412 case FIX:
5413 case UNSIGNED_FIX:
5414 *total = COSTS_N_INSNS (7);
5415 return true;
5416
5417 case PLUS:
5418 if (mode == TImode)
5419 {
5420 *total = COSTS_N_INSNS (9);
5421 return true;
5422 }
5423 break;
5424
5425 case MULT:
5426 cost =
5427 GET_CODE (XEXP (x, 0)) ==
5428 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5429 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5430 {
5431 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5432 {
5433 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5434 cost = COSTS_N_INSNS (14);
5435 if ((val & 0xffff) == 0)
5436 cost = COSTS_N_INSNS (9);
5437 else if (val > 0 && val < 0x10000)
5438 cost = COSTS_N_INSNS (11);
5439 }
5440 }
5441 *total = cost;
5442 return true;
5443 case DIV:
5444 case UDIV:
5445 case MOD:
5446 case UMOD:
5447 *total = COSTS_N_INSNS (20);
5448 return true;
5449 case ROTATE:
5450 case ROTATERT:
5451 case ASHIFT:
5452 case ASHIFTRT:
5453 case LSHIFTRT:
5454 *total = COSTS_N_INSNS (4);
5455 return true;
5456 case UNSPEC:
5457 if (XINT (x, 1) == UNSPEC_CONVERT)
5458 *total = COSTS_N_INSNS (0);
5459 else
5460 *total = COSTS_N_INSNS (4);
5461 return true;
5462 }
5463 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5464 if (GET_MODE_CLASS (mode) == MODE_INT
5465 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5466 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5467 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5468 *total = cost;
5469 return true;
5470}
5471
1bd43494 5472static enum machine_mode
5473spu_unwind_word_mode (void)
644459d0 5474{
1bd43494 5475 return SImode;
644459d0 5476}
5477
5478/* Decide whether we can make a sibling call to a function. DECL is the
5479 declaration of the function being targeted by the call and EXP is the
5480 CALL_EXPR representing the call. */
5481static bool
5482spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5483{
5484 return decl && !TARGET_LARGE_MEM;
5485}
5486
5487/* We need to correctly update the back chain pointer and the Available
5488 Stack Size (which is in the second slot of the sp register.) */
5489void
5490spu_allocate_stack (rtx op0, rtx op1)
5491{
5492 HOST_WIDE_INT v;
5493 rtx chain = gen_reg_rtx (V4SImode);
5494 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5495 rtx sp = gen_reg_rtx (V4SImode);
5496 rtx splatted = gen_reg_rtx (V4SImode);
5497 rtx pat = gen_reg_rtx (TImode);
5498
5499 /* copy the back chain so we can save it back again. */
5500 emit_move_insn (chain, stack_bot);
5501
5502 op1 = force_reg (SImode, op1);
5503
5504 v = 0x1020300010203ll;
5505 emit_move_insn (pat, immed_double_const (v, v, TImode));
5506 emit_insn (gen_shufb (splatted, op1, op1, pat));
5507
5508 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5509 emit_insn (gen_subv4si3 (sp, sp, splatted));
5510
5511 if (flag_stack_check)
5512 {
5513 rtx avail = gen_reg_rtx(SImode);
5514 rtx result = gen_reg_rtx(SImode);
5515 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5516 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5517 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5518 }
5519
5520 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5521
5522 emit_move_insn (stack_bot, chain);
5523
5524 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5525}
5526
5527void
5528spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5529{
5530 static unsigned char arr[16] =
5531 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5532 rtx temp = gen_reg_rtx (SImode);
5533 rtx temp2 = gen_reg_rtx (SImode);
5534 rtx temp3 = gen_reg_rtx (V4SImode);
5535 rtx temp4 = gen_reg_rtx (V4SImode);
5536 rtx pat = gen_reg_rtx (TImode);
5537 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5538
5539 /* Restore the backchain from the first word, sp from the second. */
5540 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5541 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5542
5543 emit_move_insn (pat, array_to_constant (TImode, arr));
5544
5545 /* Compute Available Stack Size for sp */
5546 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5547 emit_insn (gen_shufb (temp3, temp, temp, pat));
5548
5549 /* Compute Available Stack Size for back chain */
5550 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5551 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5552 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5553
5554 emit_insn (gen_addv4si3 (sp, sp, temp3));
5555 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5556}
5557
5558static void
5559spu_init_libfuncs (void)
5560{
5561 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5562 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5563 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5564 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5565 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5566 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5567 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5568 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5569 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5570 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5571 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5572
5573 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5574 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5575
5576 set_optab_libfunc (smul_optab, TImode, "__multi3");
5577 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5578 set_optab_libfunc (smod_optab, TImode, "__modti3");
5579 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5580 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5581 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5582}
5583
5584/* Make a subreg, stripping any existing subreg. We could possibly just
5585 call simplify_subreg, but in this case we know what we want. */
5586rtx
5587spu_gen_subreg (enum machine_mode mode, rtx x)
5588{
5589 if (GET_CODE (x) == SUBREG)
5590 x = SUBREG_REG (x);
5591 if (GET_MODE (x) == mode)
5592 return x;
5593 return gen_rtx_SUBREG (mode, x, 0);
5594}
5595
5596static bool
fb80456a 5597spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5598{
5599 return (TYPE_MODE (type) == BLKmode
5600 && ((type) == 0
5601 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5602 || int_size_in_bytes (type) >
5603 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5604}
5605\f
5606/* Create the built-in types and functions */
5607
c2233b46 5608enum spu_function_code
5609{
5610#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5611#include "spu-builtins.def"
5612#undef DEF_BUILTIN
5613 NUM_SPU_BUILTINS
5614};
5615
5616extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5617
644459d0 5618struct spu_builtin_description spu_builtins[] = {
5619#define DEF_BUILTIN(fcode, icode, name, type, params) \
5620 {fcode, icode, name, type, params, NULL_TREE},
5621#include "spu-builtins.def"
5622#undef DEF_BUILTIN
5623};
5624
e6925042 5625/* Returns the rs6000 builtin decl for CODE. */
5626
5627static tree
5628spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5629{
5630 if (code >= NUM_SPU_BUILTINS)
5631 return error_mark_node;
5632
5633 return spu_builtins[code].fndecl;
5634}
5635
5636
644459d0 5637static void
5638spu_init_builtins (void)
5639{
5640 struct spu_builtin_description *d;
5641 unsigned int i;
5642
5643 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5644 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5645 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5646 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5647 V4SF_type_node = build_vector_type (float_type_node, 4);
5648 V2DF_type_node = build_vector_type (double_type_node, 2);
5649
5650 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5651 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5652 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5653 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5654
c4ecce0c 5655 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5656
5657 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5658 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5659 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5660 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5661 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5662 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5663 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5664 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5665 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5666 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5667 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5668 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5669
5670 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5671 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5672 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5673 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5674 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5675 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5676 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5677 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5678
5679 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5680 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5681
5682 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5683
5684 spu_builtin_types[SPU_BTI_PTR] =
5685 build_pointer_type (build_qualified_type
5686 (void_type_node,
5687 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5688
5689 /* For each builtin we build a new prototype. The tree code will make
5690 sure nodes are shared. */
5691 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5692 {
5693 tree p;
5694 char name[64]; /* build_function will make a copy. */
5695 int parm;
5696
5697 if (d->name == 0)
5698 continue;
5699
5dfbd18f 5700 /* Find last parm. */
644459d0 5701 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5702 ;
644459d0 5703
5704 p = void_list_node;
5705 while (parm > 1)
5706 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5707
5708 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5709
5710 sprintf (name, "__builtin_%s", d->name);
5711 d->fndecl =
5712 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5713 NULL, NULL_TREE);
a76866d3 5714 if (d->fcode == SPU_MASK_FOR_LOAD)
5715 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5716
5717 /* These builtins don't throw. */
5718 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5719 }
5720}
5721
cf31d486 5722void
5723spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5724{
5725 static unsigned char arr[16] =
5726 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5727
5728 rtx temp = gen_reg_rtx (Pmode);
5729 rtx temp2 = gen_reg_rtx (V4SImode);
5730 rtx temp3 = gen_reg_rtx (V4SImode);
5731 rtx pat = gen_reg_rtx (TImode);
5732 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5733
5734 emit_move_insn (pat, array_to_constant (TImode, arr));
5735
5736 /* Restore the sp. */
5737 emit_move_insn (temp, op1);
5738 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5739
5740 /* Compute available stack size for sp. */
5741 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5742 emit_insn (gen_shufb (temp3, temp, temp, pat));
5743
5744 emit_insn (gen_addv4si3 (sp, sp, temp3));
5745 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5746}
5747
644459d0 5748int
5749spu_safe_dma (HOST_WIDE_INT channel)
5750{
006e4b96 5751 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5752}
5753
5754void
5755spu_builtin_splats (rtx ops[])
5756{
5757 enum machine_mode mode = GET_MODE (ops[0]);
5758 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5759 {
5760 unsigned char arr[16];
5761 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5762 emit_move_insn (ops[0], array_to_constant (mode, arr));
5763 }
644459d0 5764 else
5765 {
5766 rtx reg = gen_reg_rtx (TImode);
5767 rtx shuf;
5768 if (GET_CODE (ops[1]) != REG
5769 && GET_CODE (ops[1]) != SUBREG)
5770 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5771 switch (mode)
5772 {
5773 case V2DImode:
5774 case V2DFmode:
5775 shuf =
5776 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5777 TImode);
5778 break;
5779 case V4SImode:
5780 case V4SFmode:
5781 shuf =
5782 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5783 TImode);
5784 break;
5785 case V8HImode:
5786 shuf =
5787 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5788 TImode);
5789 break;
5790 case V16QImode:
5791 shuf =
5792 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5793 TImode);
5794 break;
5795 default:
5796 abort ();
5797 }
5798 emit_move_insn (reg, shuf);
5799 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5800 }
5801}
5802
5803void
5804spu_builtin_extract (rtx ops[])
5805{
5806 enum machine_mode mode;
5807 rtx rot, from, tmp;
5808
5809 mode = GET_MODE (ops[1]);
5810
5811 if (GET_CODE (ops[2]) == CONST_INT)
5812 {
5813 switch (mode)
5814 {
5815 case V16QImode:
5816 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5817 break;
5818 case V8HImode:
5819 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5820 break;
5821 case V4SFmode:
5822 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5823 break;
5824 case V4SImode:
5825 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5826 break;
5827 case V2DImode:
5828 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5829 break;
5830 case V2DFmode:
5831 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5832 break;
5833 default:
5834 abort ();
5835 }
5836 return;
5837 }
5838
5839 from = spu_gen_subreg (TImode, ops[1]);
5840 rot = gen_reg_rtx (TImode);
5841 tmp = gen_reg_rtx (SImode);
5842
5843 switch (mode)
5844 {
5845 case V16QImode:
5846 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5847 break;
5848 case V8HImode:
5849 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5850 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5851 break;
5852 case V4SFmode:
5853 case V4SImode:
5854 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5855 break;
5856 case V2DImode:
5857 case V2DFmode:
5858 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5859 break;
5860 default:
5861 abort ();
5862 }
5863 emit_insn (gen_rotqby_ti (rot, from, tmp));
5864
5865 emit_insn (gen_spu_convert (ops[0], rot));
5866}
5867
5868void
5869spu_builtin_insert (rtx ops[])
5870{
5871 enum machine_mode mode = GET_MODE (ops[0]);
5872 enum machine_mode imode = GET_MODE_INNER (mode);
5873 rtx mask = gen_reg_rtx (TImode);
5874 rtx offset;
5875
5876 if (GET_CODE (ops[3]) == CONST_INT)
5877 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5878 else
5879 {
5880 offset = gen_reg_rtx (SImode);
5881 emit_insn (gen_mulsi3
5882 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5883 }
5884 emit_insn (gen_cpat
5885 (mask, stack_pointer_rtx, offset,
5886 GEN_INT (GET_MODE_SIZE (imode))));
5887 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5888}
5889
5890void
5891spu_builtin_promote (rtx ops[])
5892{
5893 enum machine_mode mode, imode;
5894 rtx rot, from, offset;
5895 HOST_WIDE_INT pos;
5896
5897 mode = GET_MODE (ops[0]);
5898 imode = GET_MODE_INNER (mode);
5899
5900 from = gen_reg_rtx (TImode);
5901 rot = spu_gen_subreg (TImode, ops[0]);
5902
5903 emit_insn (gen_spu_convert (from, ops[1]));
5904
5905 if (GET_CODE (ops[2]) == CONST_INT)
5906 {
5907 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5908 if (GET_MODE_SIZE (imode) < 4)
5909 pos += 4 - GET_MODE_SIZE (imode);
5910 offset = GEN_INT (pos & 15);
5911 }
5912 else
5913 {
5914 offset = gen_reg_rtx (SImode);
5915 switch (mode)
5916 {
5917 case V16QImode:
5918 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5919 break;
5920 case V8HImode:
5921 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5922 emit_insn (gen_addsi3 (offset, offset, offset));
5923 break;
5924 case V4SFmode:
5925 case V4SImode:
5926 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5927 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5928 break;
5929 case V2DImode:
5930 case V2DFmode:
5931 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5932 break;
5933 default:
5934 abort ();
5935 }
5936 }
5937 emit_insn (gen_rotqby_ti (rot, from, offset));
5938}
5939
e96f2783 5940static void
5941spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5942{
e96f2783 5943 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5944 rtx shuf = gen_reg_rtx (V4SImode);
5945 rtx insn = gen_reg_rtx (V4SImode);
5946 rtx shufc;
5947 rtx insnc;
5948 rtx mem;
5949
5950 fnaddr = force_reg (SImode, fnaddr);
5951 cxt = force_reg (SImode, cxt);
5952
5953 if (TARGET_LARGE_MEM)
5954 {
5955 rtx rotl = gen_reg_rtx (V4SImode);
5956 rtx mask = gen_reg_rtx (V4SImode);
5957 rtx bi = gen_reg_rtx (SImode);
e96f2783 5958 static unsigned char const shufa[16] = {
644459d0 5959 2, 3, 0, 1, 18, 19, 16, 17,
5960 0, 1, 2, 3, 16, 17, 18, 19
5961 };
e96f2783 5962 static unsigned char const insna[16] = {
644459d0 5963 0x41, 0, 0, 79,
5964 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5965 0x60, 0x80, 0, 79,
5966 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5967 };
5968
5969 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5970 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5971
5972 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5973 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5974 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5975 emit_insn (gen_selb (insn, insnc, rotl, mask));
5976
e96f2783 5977 mem = adjust_address (m_tramp, V4SImode, 0);
5978 emit_move_insn (mem, insn);
644459d0 5979
5980 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5981 mem = adjust_address (m_tramp, Pmode, 16);
5982 emit_move_insn (mem, bi);
644459d0 5983 }
5984 else
5985 {
5986 rtx scxt = gen_reg_rtx (SImode);
5987 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5988 static unsigned char const insna[16] = {
644459d0 5989 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5990 0x30, 0, 0, 0,
5991 0, 0, 0, 0,
5992 0, 0, 0, 0
5993 };
5994
5995 shufc = gen_reg_rtx (TImode);
5996 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5997
5998 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5999 fits 18 bits and the last 4 are zeros. This will be true if
6000 the stack pointer is initialized to 0x3fff0 at program start,
6001 otherwise the ila instruction will be garbage. */
6002
6003 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6004 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6005 emit_insn (gen_cpat
6006 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6007 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6008 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6009
e96f2783 6010 mem = adjust_address (m_tramp, V4SImode, 0);
6011 emit_move_insn (mem, insn);
644459d0 6012 }
6013 emit_insn (gen_sync ());
6014}
6015
6016void
6017spu_expand_sign_extend (rtx ops[])
6018{
6019 unsigned char arr[16];
6020 rtx pat = gen_reg_rtx (TImode);
6021 rtx sign, c;
6022 int i, last;
6023 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6024 if (GET_MODE (ops[1]) == QImode)
6025 {
6026 sign = gen_reg_rtx (HImode);
6027 emit_insn (gen_extendqihi2 (sign, ops[1]));
6028 for (i = 0; i < 16; i++)
6029 arr[i] = 0x12;
6030 arr[last] = 0x13;
6031 }
6032 else
6033 {
6034 for (i = 0; i < 16; i++)
6035 arr[i] = 0x10;
6036 switch (GET_MODE (ops[1]))
6037 {
6038 case HImode:
6039 sign = gen_reg_rtx (SImode);
6040 emit_insn (gen_extendhisi2 (sign, ops[1]));
6041 arr[last] = 0x03;
6042 arr[last - 1] = 0x02;
6043 break;
6044 case SImode:
6045 sign = gen_reg_rtx (SImode);
6046 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6047 for (i = 0; i < 4; i++)
6048 arr[last - i] = 3 - i;
6049 break;
6050 case DImode:
6051 sign = gen_reg_rtx (SImode);
6052 c = gen_reg_rtx (SImode);
6053 emit_insn (gen_spu_convert (c, ops[1]));
6054 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6055 for (i = 0; i < 8; i++)
6056 arr[last - i] = 7 - i;
6057 break;
6058 default:
6059 abort ();
6060 }
6061 }
6062 emit_move_insn (pat, array_to_constant (TImode, arr));
6063 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6064}
6065
6066/* expand vector initialization. If there are any constant parts,
6067 load constant parts first. Then load any non-constant parts. */
6068void
6069spu_expand_vector_init (rtx target, rtx vals)
6070{
6071 enum machine_mode mode = GET_MODE (target);
6072 int n_elts = GET_MODE_NUNITS (mode);
6073 int n_var = 0;
6074 bool all_same = true;
790c536c 6075 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6076 int i;
6077
6078 first = XVECEXP (vals, 0, 0);
6079 for (i = 0; i < n_elts; ++i)
6080 {
6081 x = XVECEXP (vals, 0, i);
e442af0b 6082 if (!(CONST_INT_P (x)
6083 || GET_CODE (x) == CONST_DOUBLE
6084 || GET_CODE (x) == CONST_FIXED))
644459d0 6085 ++n_var;
6086 else
6087 {
6088 if (first_constant == NULL_RTX)
6089 first_constant = x;
6090 }
6091 if (i > 0 && !rtx_equal_p (x, first))
6092 all_same = false;
6093 }
6094
6095 /* if all elements are the same, use splats to repeat elements */
6096 if (all_same)
6097 {
6098 if (!CONSTANT_P (first)
6099 && !register_operand (first, GET_MODE (x)))
6100 first = force_reg (GET_MODE (first), first);
6101 emit_insn (gen_spu_splats (target, first));
6102 return;
6103 }
6104
6105 /* load constant parts */
6106 if (n_var != n_elts)
6107 {
6108 if (n_var == 0)
6109 {
6110 emit_move_insn (target,
6111 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6112 }
6113 else
6114 {
6115 rtx constant_parts_rtx = copy_rtx (vals);
6116
6117 gcc_assert (first_constant != NULL_RTX);
6118 /* fill empty slots with the first constant, this increases
6119 our chance of using splats in the recursive call below. */
6120 for (i = 0; i < n_elts; ++i)
e442af0b 6121 {
6122 x = XVECEXP (constant_parts_rtx, 0, i);
6123 if (!(CONST_INT_P (x)
6124 || GET_CODE (x) == CONST_DOUBLE
6125 || GET_CODE (x) == CONST_FIXED))
6126 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6127 }
644459d0 6128
6129 spu_expand_vector_init (target, constant_parts_rtx);
6130 }
6131 }
6132
6133 /* load variable parts */
6134 if (n_var != 0)
6135 {
6136 rtx insert_operands[4];
6137
6138 insert_operands[0] = target;
6139 insert_operands[2] = target;
6140 for (i = 0; i < n_elts; ++i)
6141 {
6142 x = XVECEXP (vals, 0, i);
e442af0b 6143 if (!(CONST_INT_P (x)
6144 || GET_CODE (x) == CONST_DOUBLE
6145 || GET_CODE (x) == CONST_FIXED))
644459d0 6146 {
6147 if (!register_operand (x, GET_MODE (x)))
6148 x = force_reg (GET_MODE (x), x);
6149 insert_operands[1] = x;
6150 insert_operands[3] = GEN_INT (i);
6151 spu_builtin_insert (insert_operands);
6152 }
6153 }
6154 }
6155}
6352eedf 6156
5474166e 6157/* Return insn index for the vector compare instruction for given CODE,
6158 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6159
6160static int
6161get_vec_cmp_insn (enum rtx_code code,
6162 enum machine_mode dest_mode,
6163 enum machine_mode op_mode)
6164
6165{
6166 switch (code)
6167 {
6168 case EQ:
6169 if (dest_mode == V16QImode && op_mode == V16QImode)
6170 return CODE_FOR_ceq_v16qi;
6171 if (dest_mode == V8HImode && op_mode == V8HImode)
6172 return CODE_FOR_ceq_v8hi;
6173 if (dest_mode == V4SImode && op_mode == V4SImode)
6174 return CODE_FOR_ceq_v4si;
6175 if (dest_mode == V4SImode && op_mode == V4SFmode)
6176 return CODE_FOR_ceq_v4sf;
6177 if (dest_mode == V2DImode && op_mode == V2DFmode)
6178 return CODE_FOR_ceq_v2df;
6179 break;
6180 case GT:
6181 if (dest_mode == V16QImode && op_mode == V16QImode)
6182 return CODE_FOR_cgt_v16qi;
6183 if (dest_mode == V8HImode && op_mode == V8HImode)
6184 return CODE_FOR_cgt_v8hi;
6185 if (dest_mode == V4SImode && op_mode == V4SImode)
6186 return CODE_FOR_cgt_v4si;
6187 if (dest_mode == V4SImode && op_mode == V4SFmode)
6188 return CODE_FOR_cgt_v4sf;
6189 if (dest_mode == V2DImode && op_mode == V2DFmode)
6190 return CODE_FOR_cgt_v2df;
6191 break;
6192 case GTU:
6193 if (dest_mode == V16QImode && op_mode == V16QImode)
6194 return CODE_FOR_clgt_v16qi;
6195 if (dest_mode == V8HImode && op_mode == V8HImode)
6196 return CODE_FOR_clgt_v8hi;
6197 if (dest_mode == V4SImode && op_mode == V4SImode)
6198 return CODE_FOR_clgt_v4si;
6199 break;
6200 default:
6201 break;
6202 }
6203 return -1;
6204}
6205
6206/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6207 DMODE is expected destination mode. This is a recursive function. */
6208
6209static rtx
6210spu_emit_vector_compare (enum rtx_code rcode,
6211 rtx op0, rtx op1,
6212 enum machine_mode dmode)
6213{
6214 int vec_cmp_insn;
6215 rtx mask;
6216 enum machine_mode dest_mode;
6217 enum machine_mode op_mode = GET_MODE (op1);
6218
6219 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6220
6221 /* Floating point vector compare instructions uses destination V4SImode.
6222 Double floating point vector compare instructions uses destination V2DImode.
6223 Move destination to appropriate mode later. */
6224 if (dmode == V4SFmode)
6225 dest_mode = V4SImode;
6226 else if (dmode == V2DFmode)
6227 dest_mode = V2DImode;
6228 else
6229 dest_mode = dmode;
6230
6231 mask = gen_reg_rtx (dest_mode);
6232 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6233
6234 if (vec_cmp_insn == -1)
6235 {
6236 bool swap_operands = false;
6237 bool try_again = false;
6238 switch (rcode)
6239 {
6240 case LT:
6241 rcode = GT;
6242 swap_operands = true;
6243 try_again = true;
6244 break;
6245 case LTU:
6246 rcode = GTU;
6247 swap_operands = true;
6248 try_again = true;
6249 break;
6250 case NE:
6251 /* Treat A != B as ~(A==B). */
6252 {
6253 enum insn_code nor_code;
6254 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 6255 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 6256 gcc_assert (nor_code != CODE_FOR_nothing);
6257 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6258 if (dmode != dest_mode)
6259 {
6260 rtx temp = gen_reg_rtx (dest_mode);
6261 convert_move (temp, mask, 0);
6262 return temp;
6263 }
6264 return mask;
6265 }
6266 break;
6267 case GE:
6268 case GEU:
6269 case LE:
6270 case LEU:
6271 /* Try GT/GTU/LT/LTU OR EQ */
6272 {
6273 rtx c_rtx, eq_rtx;
6274 enum insn_code ior_code;
6275 enum rtx_code new_code;
6276
6277 switch (rcode)
6278 {
6279 case GE: new_code = GT; break;
6280 case GEU: new_code = GTU; break;
6281 case LE: new_code = LT; break;
6282 case LEU: new_code = LTU; break;
6283 default:
6284 gcc_unreachable ();
6285 }
6286
6287 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6288 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6289
99bdde56 6290 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 6291 gcc_assert (ior_code != CODE_FOR_nothing);
6292 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6293 if (dmode != dest_mode)
6294 {
6295 rtx temp = gen_reg_rtx (dest_mode);
6296 convert_move (temp, mask, 0);
6297 return temp;
6298 }
6299 return mask;
6300 }
6301 break;
6302 default:
6303 gcc_unreachable ();
6304 }
6305
6306 /* You only get two chances. */
6307 if (try_again)
6308 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6309
6310 gcc_assert (vec_cmp_insn != -1);
6311
6312 if (swap_operands)
6313 {
6314 rtx tmp;
6315 tmp = op0;
6316 op0 = op1;
6317 op1 = tmp;
6318 }
6319 }
6320
6321 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6322 if (dmode != dest_mode)
6323 {
6324 rtx temp = gen_reg_rtx (dest_mode);
6325 convert_move (temp, mask, 0);
6326 return temp;
6327 }
6328 return mask;
6329}
6330
6331
6332/* Emit vector conditional expression.
6333 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6334 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6335
6336int
6337spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6338 rtx cond, rtx cc_op0, rtx cc_op1)
6339{
6340 enum machine_mode dest_mode = GET_MODE (dest);
6341 enum rtx_code rcode = GET_CODE (cond);
6342 rtx mask;
6343
6344 /* Get the vector mask for the given relational operations. */
6345 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6346
6347 emit_insn(gen_selb (dest, op2, op1, mask));
6348
6349 return 1;
6350}
6351
6352eedf 6352static rtx
6353spu_force_reg (enum machine_mode mode, rtx op)
6354{
6355 rtx x, r;
6356 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6357 {
6358 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6359 || GET_MODE (op) == BLKmode)
6360 return force_reg (mode, convert_to_mode (mode, op, 0));
6361 abort ();
6362 }
6363
6364 r = force_reg (GET_MODE (op), op);
6365 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6366 {
6367 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6368 if (x)
6369 return x;
6370 }
6371
6372 x = gen_reg_rtx (mode);
6373 emit_insn (gen_spu_convert (x, r));
6374 return x;
6375}
6376
6377static void
6378spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6379{
6380 HOST_WIDE_INT v = 0;
6381 int lsbits;
6382 /* Check the range of immediate operands. */
6383 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6384 {
6385 int range = p - SPU_BTI_7;
5df189be 6386
6387 if (!CONSTANT_P (op))
6352eedf 6388 error ("%s expects an integer literal in the range [%d, %d].",
6389 d->name,
6390 spu_builtin_range[range].low, spu_builtin_range[range].high);
6391
6392 if (GET_CODE (op) == CONST
6393 && (GET_CODE (XEXP (op, 0)) == PLUS
6394 || GET_CODE (XEXP (op, 0)) == MINUS))
6395 {
6396 v = INTVAL (XEXP (XEXP (op, 0), 1));
6397 op = XEXP (XEXP (op, 0), 0);
6398 }
6399 else if (GET_CODE (op) == CONST_INT)
6400 v = INTVAL (op);
5df189be 6401 else if (GET_CODE (op) == CONST_VECTOR
6402 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6403 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6404
6405 /* The default for v is 0 which is valid in every range. */
6406 if (v < spu_builtin_range[range].low
6407 || v > spu_builtin_range[range].high)
6408 error ("%s expects an integer literal in the range [%d, %d]. ("
6409 HOST_WIDE_INT_PRINT_DEC ")",
6410 d->name,
6411 spu_builtin_range[range].low, spu_builtin_range[range].high,
6412 v);
6352eedf 6413
6414 switch (p)
6415 {
6416 case SPU_BTI_S10_4:
6417 lsbits = 4;
6418 break;
6419 case SPU_BTI_U16_2:
6420 /* This is only used in lqa, and stqa. Even though the insns
6421 encode 16 bits of the address (all but the 2 least
6422 significant), only 14 bits are used because it is masked to
6423 be 16 byte aligned. */
6424 lsbits = 4;
6425 break;
6426 case SPU_BTI_S16_2:
6427 /* This is used for lqr and stqr. */
6428 lsbits = 2;
6429 break;
6430 default:
6431 lsbits = 0;
6432 }
6433
6434 if (GET_CODE (op) == LABEL_REF
6435 || (GET_CODE (op) == SYMBOL_REF
6436 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6437 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6438 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6439 d->name);
6440 }
6441}
6442
6443
70ca06f8 6444static int
5df189be 6445expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6446 rtx target, rtx ops[])
6447{
bc620c5c 6448 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6449 int i = 0, a;
6352eedf 6450
6451 /* Expand the arguments into rtl. */
6452
6453 if (d->parm[0] != SPU_BTI_VOID)
6454 ops[i++] = target;
6455
70ca06f8 6456 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6457 {
5df189be 6458 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6459 if (arg == 0)
6460 abort ();
b9c74b4d 6461 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6462 }
70ca06f8 6463
6464 /* The insn pattern may have additional operands (SCRATCH).
6465 Return the number of actual non-SCRATCH operands. */
6466 gcc_assert (i <= insn_data[icode].n_operands);
6467 return i;
6352eedf 6468}
6469
6470static rtx
6471spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6472 tree exp, rtx target)
6352eedf 6473{
6474 rtx pat;
6475 rtx ops[8];
bc620c5c 6476 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6477 enum machine_mode mode, tmode;
6478 int i, p;
70ca06f8 6479 int n_operands;
6352eedf 6480 tree return_type;
6481
6482 /* Set up ops[] with values from arglist. */
70ca06f8 6483 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6484
6485 /* Handle the target operand which must be operand 0. */
6486 i = 0;
6487 if (d->parm[0] != SPU_BTI_VOID)
6488 {
6489
6490 /* We prefer the mode specified for the match_operand otherwise
6491 use the mode from the builtin function prototype. */
6492 tmode = insn_data[d->icode].operand[0].mode;
6493 if (tmode == VOIDmode)
6494 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6495
6496 /* Try to use target because not using it can lead to extra copies
6497 and when we are using all of the registers extra copies leads
6498 to extra spills. */
6499 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6500 ops[0] = target;
6501 else
6502 target = ops[0] = gen_reg_rtx (tmode);
6503
6504 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6505 abort ();
6506
6507 i++;
6508 }
6509
a76866d3 6510 if (d->fcode == SPU_MASK_FOR_LOAD)
6511 {
6512 enum machine_mode mode = insn_data[icode].operand[1].mode;
6513 tree arg;
6514 rtx addr, op, pat;
6515
6516 /* get addr */
5df189be 6517 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6518 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6519 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6520 addr = memory_address (mode, op);
6521
6522 /* negate addr */
6523 op = gen_reg_rtx (GET_MODE (addr));
6524 emit_insn (gen_rtx_SET (VOIDmode, op,
6525 gen_rtx_NEG (GET_MODE (addr), addr)));
6526 op = gen_rtx_MEM (mode, op);
6527
6528 pat = GEN_FCN (icode) (target, op);
6529 if (!pat)
6530 return 0;
6531 emit_insn (pat);
6532 return target;
6533 }
6534
6352eedf 6535 /* Ignore align_hint, but still expand it's args in case they have
6536 side effects. */
6537 if (icode == CODE_FOR_spu_align_hint)
6538 return 0;
6539
6540 /* Handle the rest of the operands. */
70ca06f8 6541 for (p = 1; i < n_operands; i++, p++)
6352eedf 6542 {
6543 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6544 mode = insn_data[d->icode].operand[i].mode;
6545 else
6546 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6547
6548 /* mode can be VOIDmode here for labels */
6549
6550 /* For specific intrinsics with an immediate operand, e.g.,
6551 si_ai(), we sometimes need to convert the scalar argument to a
6552 vector argument by splatting the scalar. */
6553 if (VECTOR_MODE_P (mode)
6554 && (GET_CODE (ops[i]) == CONST_INT
6555 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6556 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6557 {
6558 if (GET_CODE (ops[i]) == CONST_INT)
6559 ops[i] = spu_const (mode, INTVAL (ops[i]));
6560 else
6561 {
6562 rtx reg = gen_reg_rtx (mode);
6563 enum machine_mode imode = GET_MODE_INNER (mode);
6564 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6565 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6566 if (imode != GET_MODE (ops[i]))
6567 ops[i] = convert_to_mode (imode, ops[i],
6568 TYPE_UNSIGNED (spu_builtin_types
6569 [d->parm[i]]));
6570 emit_insn (gen_spu_splats (reg, ops[i]));
6571 ops[i] = reg;
6572 }
6573 }
6574
5df189be 6575 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6576
6352eedf 6577 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6578 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6579 }
6580
70ca06f8 6581 switch (n_operands)
6352eedf 6582 {
6583 case 0:
6584 pat = GEN_FCN (icode) (0);
6585 break;
6586 case 1:
6587 pat = GEN_FCN (icode) (ops[0]);
6588 break;
6589 case 2:
6590 pat = GEN_FCN (icode) (ops[0], ops[1]);
6591 break;
6592 case 3:
6593 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6594 break;
6595 case 4:
6596 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6597 break;
6598 case 5:
6599 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6600 break;
6601 case 6:
6602 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6603 break;
6604 default:
6605 abort ();
6606 }
6607
6608 if (!pat)
6609 abort ();
6610
6611 if (d->type == B_CALL || d->type == B_BISLED)
6612 emit_call_insn (pat);
6613 else if (d->type == B_JUMP)
6614 {
6615 emit_jump_insn (pat);
6616 emit_barrier ();
6617 }
6618 else
6619 emit_insn (pat);
6620
6621 return_type = spu_builtin_types[d->parm[0]];
6622 if (d->parm[0] != SPU_BTI_VOID
6623 && GET_MODE (target) != TYPE_MODE (return_type))
6624 {
6625 /* target is the return value. It should always be the mode of
6626 the builtin function prototype. */
6627 target = spu_force_reg (TYPE_MODE (return_type), target);
6628 }
6629
6630 return target;
6631}
6632
6633rtx
6634spu_expand_builtin (tree exp,
6635 rtx target,
6636 rtx subtarget ATTRIBUTE_UNUSED,
6637 enum machine_mode mode ATTRIBUTE_UNUSED,
6638 int ignore ATTRIBUTE_UNUSED)
6639{
5df189be 6640 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6641 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6642 struct spu_builtin_description *d;
6643
6644 if (fcode < NUM_SPU_BUILTINS)
6645 {
6646 d = &spu_builtins[fcode];
6647
5df189be 6648 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6649 }
6650 abort ();
6651}
6652
e99f512d 6653/* Implement targetm.vectorize.builtin_mul_widen_even. */
6654static tree
6655spu_builtin_mul_widen_even (tree type)
6656{
e99f512d 6657 switch (TYPE_MODE (type))
6658 {
6659 case V8HImode:
6660 if (TYPE_UNSIGNED (type))
6661 return spu_builtins[SPU_MULE_0].fndecl;
6662 else
6663 return spu_builtins[SPU_MULE_1].fndecl;
6664 break;
6665 default:
6666 return NULL_TREE;
6667 }
6668}
6669
6670/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6671static tree
6672spu_builtin_mul_widen_odd (tree type)
6673{
6674 switch (TYPE_MODE (type))
6675 {
6676 case V8HImode:
6677 if (TYPE_UNSIGNED (type))
6678 return spu_builtins[SPU_MULO_1].fndecl;
6679 else
6680 return spu_builtins[SPU_MULO_0].fndecl;
6681 break;
6682 default:
6683 return NULL_TREE;
6684 }
6685}
6686
a76866d3 6687/* Implement targetm.vectorize.builtin_mask_for_load. */
6688static tree
6689spu_builtin_mask_for_load (void)
6690{
6691 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6692 gcc_assert (d);
6693 return d->fndecl;
6694}
5df189be 6695
a28df51d 6696/* Implement targetm.vectorize.builtin_vectorization_cost. */
6697static int
6698spu_builtin_vectorization_cost (bool runtime_test)
6699{
6700 /* If the branch of the runtime test is taken - i.e. - the vectorized
6701 version is skipped - this incurs a misprediction cost (because the
6702 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6703 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6704 when the vectorized version is executed. */
6705 if (runtime_test)
6706 return -19;
6707 else
6708 return 0;
6709}
6710
0e87db76 6711/* Return true iff, data reference of TYPE can reach vector alignment (16)
6712 after applying N number of iterations. This routine does not determine
6713 how may iterations are required to reach desired alignment. */
6714
6715static bool
a9f1838b 6716spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6717{
6718 if (is_packed)
6719 return false;
6720
6721 /* All other types are naturally aligned. */
6722 return true;
6723}
6724
a0515226 6725/* Implement targetm.vectorize.builtin_vec_perm. */
6726tree
6727spu_builtin_vec_perm (tree type, tree *mask_element_type)
6728{
6729 struct spu_builtin_description *d;
6730
6731 *mask_element_type = unsigned_char_type_node;
6732
6733 switch (TYPE_MODE (type))
6734 {
6735 case V16QImode:
6736 if (TYPE_UNSIGNED (type))
6737 d = &spu_builtins[SPU_SHUFFLE_0];
6738 else
6739 d = &spu_builtins[SPU_SHUFFLE_1];
6740 break;
6741
6742 case V8HImode:
6743 if (TYPE_UNSIGNED (type))
6744 d = &spu_builtins[SPU_SHUFFLE_2];
6745 else
6746 d = &spu_builtins[SPU_SHUFFLE_3];
6747 break;
6748
6749 case V4SImode:
6750 if (TYPE_UNSIGNED (type))
6751 d = &spu_builtins[SPU_SHUFFLE_4];
6752 else
6753 d = &spu_builtins[SPU_SHUFFLE_5];
6754 break;
6755
6756 case V2DImode:
6757 if (TYPE_UNSIGNED (type))
6758 d = &spu_builtins[SPU_SHUFFLE_6];
6759 else
6760 d = &spu_builtins[SPU_SHUFFLE_7];
6761 break;
6762
6763 case V4SFmode:
6764 d = &spu_builtins[SPU_SHUFFLE_8];
6765 break;
6766
6767 case V2DFmode:
6768 d = &spu_builtins[SPU_SHUFFLE_9];
6769 break;
6770
6771 default:
6772 return NULL_TREE;
6773 }
6774
6775 gcc_assert (d);
6776 return d->fndecl;
6777}
6778
6cf5579e 6779/* Return the appropriate mode for a named address pointer. */
6780static enum machine_mode
6781spu_addr_space_pointer_mode (addr_space_t addrspace)
6782{
6783 switch (addrspace)
6784 {
6785 case ADDR_SPACE_GENERIC:
6786 return ptr_mode;
6787 case ADDR_SPACE_EA:
6788 return EAmode;
6789 default:
6790 gcc_unreachable ();
6791 }
6792}
6793
6794/* Return the appropriate mode for a named address address. */
6795static enum machine_mode
6796spu_addr_space_address_mode (addr_space_t addrspace)
6797{
6798 switch (addrspace)
6799 {
6800 case ADDR_SPACE_GENERIC:
6801 return Pmode;
6802 case ADDR_SPACE_EA:
6803 return EAmode;
6804 default:
6805 gcc_unreachable ();
6806 }
6807}
6808
6809/* Determine if one named address space is a subset of another. */
6810
6811static bool
6812spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6813{
6814 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6815 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6816
6817 if (subset == superset)
6818 return true;
6819
6820 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6821 being subsets but instead as disjoint address spaces. */
6822 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6823 return false;
6824
6825 else
6826 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6827}
6828
6829/* Convert from one address space to another. */
6830static rtx
6831spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6832{
6833 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6834 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6835
6836 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6837 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6838
6839 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6840 {
6841 rtx result, ls;
6842
6843 ls = gen_const_mem (DImode,
6844 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6845 set_mem_align (ls, 128);
6846
6847 result = gen_reg_rtx (Pmode);
6848 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6849 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6850 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6851 ls, const0_rtx, Pmode, 1);
6852
6853 emit_insn (gen_subsi3 (result, op, ls));
6854
6855 return result;
6856 }
6857
6858 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6859 {
6860 rtx result, ls;
6861
6862 ls = gen_const_mem (DImode,
6863 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6864 set_mem_align (ls, 128);
6865
6866 result = gen_reg_rtx (EAmode);
6867 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6868 op = force_reg (Pmode, op);
6869 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6870 ls, const0_rtx, EAmode, 1);
6871 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6872
6873 if (EAmode == SImode)
6874 emit_insn (gen_addsi3 (result, op, ls));
6875 else
6876 emit_insn (gen_adddi3 (result, op, ls));
6877
6878 return result;
6879 }
6880
6881 else
6882 gcc_unreachable ();
6883}
6884
6885
d52fd16a 6886/* Count the total number of instructions in each pipe and return the
6887 maximum, which is used as the Minimum Iteration Interval (MII)
6888 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6889 -2 are instructions that can go in pipe0 or pipe1. */
6890static int
6891spu_sms_res_mii (struct ddg *g)
6892{
6893 int i;
6894 unsigned t[4] = {0, 0, 0, 0};
6895
6896 for (i = 0; i < g->num_nodes; i++)
6897 {
6898 rtx insn = g->nodes[i].insn;
6899 int p = get_pipe (insn) + 2;
6900
6901 assert (p >= 0);
6902 assert (p < 4);
6903
6904 t[p]++;
6905 if (dump_file && INSN_P (insn))
6906 fprintf (dump_file, "i%d %s %d %d\n",
6907 INSN_UID (insn),
6908 insn_data[INSN_CODE(insn)].name,
6909 p, t[p]);
6910 }
6911 if (dump_file)
6912 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6913
6914 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6915}
6916
6917
5df189be 6918void
6919spu_init_expanders (void)
9d98604b 6920{
5df189be 6921 if (cfun)
9d98604b 6922 {
6923 rtx r0, r1;
6924 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6925 frame_pointer_needed is true. We don't know that until we're
6926 expanding the prologue. */
6927 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6928
6929 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6930 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6931 to be treated as aligned, so generate them here. */
6932 r0 = gen_reg_rtx (SImode);
6933 r1 = gen_reg_rtx (SImode);
6934 mark_reg_pointer (r0, 128);
6935 mark_reg_pointer (r1, 128);
6936 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6937 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6938 }
ea32e033 6939}
6940
6941static enum machine_mode
6942spu_libgcc_cmp_return_mode (void)
6943{
6944
6945/* For SPU word mode is TI mode so it is better to use SImode
6946 for compare returns. */
6947 return SImode;
6948}
6949
6950static enum machine_mode
6951spu_libgcc_shift_count_mode (void)
6952{
6953/* For SPU word mode is TI mode so it is better to use SImode
6954 for shift counts. */
6955 return SImode;
6956}
5a976006 6957
6958/* An early place to adjust some flags after GCC has finished processing
6959 * them. */
6960static void
6961asm_file_start (void)
6962{
6963 /* Variable tracking should be run after all optimizations which
6964 change order of insns. It also needs a valid CFG. */
6965 spu_flag_var_tracking = flag_var_tracking;
6966 flag_var_tracking = 0;
6967
6968 default_file_start ();
6969}
6970
a08dfd55 6971/* Implement targetm.section_type_flags. */
6972static unsigned int
6973spu_section_type_flags (tree decl, const char *name, int reloc)
6974{
6975 /* .toe needs to have type @nobits. */
6976 if (strcmp (name, ".toe") == 0)
6977 return SECTION_BSS;
6cf5579e 6978 /* Don't load _ea into the current address space. */
6979 if (strcmp (name, "._ea") == 0)
6980 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6981 return default_section_type_flags (decl, name, reloc);
6982}
c2233b46 6983
6cf5579e 6984/* Implement targetm.select_section. */
6985static section *
6986spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6987{
6988 /* Variables and constants defined in the __ea address space
6989 go into a special section named "._ea". */
6990 if (TREE_TYPE (decl) != error_mark_node
6991 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6992 {
6993 /* We might get called with string constants, but get_named_section
6994 doesn't like them as they are not DECLs. Also, we need to set
6995 flags in that case. */
6996 if (!DECL_P (decl))
6997 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6998
6999 return get_named_section (decl, "._ea", reloc);
7000 }
7001
7002 return default_elf_select_section (decl, reloc, align);
7003}
7004
7005/* Implement targetm.unique_section. */
7006static void
7007spu_unique_section (tree decl, int reloc)
7008{
7009 /* We don't support unique section names in the __ea address
7010 space for now. */
7011 if (TREE_TYPE (decl) != error_mark_node
7012 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7013 return;
7014
7015 default_unique_section (decl, reloc);
7016}
7017
56c7bfc2 7018/* Generate a constant or register which contains 2^SCALE. We assume
7019 the result is valid for MODE. Currently, MODE must be V4SFmode and
7020 SCALE must be SImode. */
7021rtx
7022spu_gen_exp2 (enum machine_mode mode, rtx scale)
7023{
7024 gcc_assert (mode == V4SFmode);
7025 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7026 if (GET_CODE (scale) != CONST_INT)
7027 {
7028 /* unsigned int exp = (127 + scale) << 23;
7029 __vector float m = (__vector float) spu_splats (exp); */
7030 rtx reg = force_reg (SImode, scale);
7031 rtx exp = gen_reg_rtx (SImode);
7032 rtx mul = gen_reg_rtx (mode);
7033 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7034 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7035 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7036 return mul;
7037 }
7038 else
7039 {
7040 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7041 unsigned char arr[16];
7042 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7043 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7044 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7045 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7046 return array_to_constant (mode, arr);
7047 }
7048}
7049
9d98604b 7050/* After reload, just change the convert into a move instruction
7051 or a dead instruction. */
7052void
7053spu_split_convert (rtx ops[])
7054{
7055 if (REGNO (ops[0]) == REGNO (ops[1]))
7056 emit_note (NOTE_INSN_DELETED);
7057 else
7058 {
7059 /* Use TImode always as this might help hard reg copyprop. */
7060 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7061 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7062 emit_insn (gen_move_insn (op0, op1));
7063 }
7064}
7065
b3878a6c 7066void
7067spu_function_profiler (FILE * file, int labelno)
7068{
7069 fprintf (file, "# profile\n");
7070 fprintf (file, "brsl $75, _mcount\n");
7071}
7072
c2233b46 7073#include "gt-spu.h"