]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2010-07-15 Daniel Kraft <d@domob.eu>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
152static void spu_init_builtins (void);
e6925042 153static tree spu_builtin_decl (unsigned, bool);
b62e30b8 154static bool spu_scalar_mode_supported_p (enum machine_mode mode);
155static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 156static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 157static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158 bool, addr_space_t);
644459d0 159static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160static rtx get_pic_reg (void);
161static int need_to_save_reg (int regno, int saving);
162static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165 rtx scratch);
166static void emit_nop_for_insn (rtx insn);
167static bool insn_clobbers_hbr (rtx insn);
168static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 169 int distance, sbitmap blocks);
5474166e 170static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
644459d0 172static rtx get_branch_target (rtx branch);
644459d0 173static void spu_machine_dependent_reorg (void);
174static int spu_sched_issue_rate (void);
175static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176 int can_issue_more);
177static int get_pipe (rtx insn);
644459d0 178static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 179static void spu_sched_init_global (FILE *, int, int);
180static void spu_sched_init (FILE *, int, int);
181static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 182static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183 int flags,
b62e30b8 184 bool *no_add_attrs);
644459d0 185static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186 int flags,
b62e30b8 187 bool *no_add_attrs);
644459d0 188static int spu_naked_function_p (tree func);
b62e30b8 189static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
190 const_tree type, bool named);
644459d0 191static tree spu_build_builtin_va_list (void);
8a58ed0a 192static void spu_va_start (tree, rtx);
75a70cf9 193static tree spu_gimplify_va_arg_expr (tree valist, tree type,
194 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 195static int store_with_one_insn_p (rtx mem);
644459d0 196static int mem_is_padded_component_ref (rtx x);
9d98604b 197static int reg_aligned_for_addr (rtx x);
644459d0 198static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
199static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 200static bool spu_rtx_costs (rtx x, int code, int outer_code,
201 int *total, bool speed);
202static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 203static void spu_init_libfuncs (void);
fb80456a 204static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 205static void fix_range (const char *);
69ced2d6 206static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 207static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 208static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
209 addr_space_t);
e99f512d 210static tree spu_builtin_mul_widen_even (tree);
211static tree spu_builtin_mul_widen_odd (tree);
a76866d3 212static tree spu_builtin_mask_for_load (void);
0822b158 213static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 214static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 215static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 216static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
217static enum machine_mode spu_addr_space_address_mode (addr_space_t);
218static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
219static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 220static int spu_sms_res_mii (struct ddg *g);
5a976006 221static void asm_file_start (void);
a08dfd55 222static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 223static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
224static void spu_unique_section (tree, int);
9d98604b 225static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 226static void spu_trampoline_init (rtx, tree, rtx);
644459d0 227
5474166e 228/* Which instruction set architecture to use. */
229int spu_arch;
230/* Which cpu are we tuning for. */
231int spu_tune;
232
5a976006 233/* The hardware requires 8 insns between a hint and the branch it
234 effects. This variable describes how many rtl instructions the
235 compiler needs to see before inserting a hint, and then the compiler
236 will insert enough nops to make it at least 8 insns. The default is
237 for the compiler to allow up to 2 nops be emitted. The nops are
238 inserted in pairs, so we round down. */
239int spu_hint_dist = (8*4) - (2*4);
240
241/* Determines whether we run variable tracking in machine dependent
242 reorganization. */
243static int spu_flag_var_tracking;
244
644459d0 245enum spu_immediate {
246 SPU_NONE,
247 SPU_IL,
248 SPU_ILA,
249 SPU_ILH,
250 SPU_ILHU,
251 SPU_ORI,
252 SPU_ORHI,
253 SPU_ORBI,
99369027 254 SPU_IOHL
644459d0 255};
dea01258 256enum immediate_class
257{
258 IC_POOL, /* constant pool */
259 IC_IL1, /* one il* instruction */
260 IC_IL2, /* both ilhu and iohl instructions */
261 IC_IL1s, /* one il* instruction */
262 IC_IL2s, /* both ilhu and iohl instructions */
263 IC_FSMBI, /* the fsmbi instruction */
264 IC_CPAT, /* one of the c*d instructions */
5df189be 265 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 266};
644459d0 267
268static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
269static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 270static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
271static enum immediate_class classify_immediate (rtx op,
272 enum machine_mode mode);
644459d0 273
1bd43494 274static enum machine_mode spu_unwind_word_mode (void);
275
ea32e033 276static enum machine_mode
277spu_libgcc_cmp_return_mode (void);
278
279static enum machine_mode
280spu_libgcc_shift_count_mode (void);
6cf5579e 281
282/* Pointer mode for __ea references. */
283#define EAmode (spu_ea_model != 32 ? DImode : SImode)
284
ef51d1e3 285\f
286/* Table of machine attributes. */
287static const struct attribute_spec spu_attribute_table[] =
288{
289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
290 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
291 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
292 { NULL, 0, 0, false, false, false, NULL }
293};
644459d0 294\f
295/* TARGET overrides. */
296
6cf5579e 297#undef TARGET_ADDR_SPACE_POINTER_MODE
298#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
299
300#undef TARGET_ADDR_SPACE_ADDRESS_MODE
301#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
302
303#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
304#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
305 spu_addr_space_legitimate_address_p
306
307#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
308#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
309
310#undef TARGET_ADDR_SPACE_SUBSET_P
311#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
312
313#undef TARGET_ADDR_SPACE_CONVERT
314#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
315
644459d0 316#undef TARGET_INIT_BUILTINS
317#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 318#undef TARGET_BUILTIN_DECL
319#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 320
644459d0 321#undef TARGET_EXPAND_BUILTIN
322#define TARGET_EXPAND_BUILTIN spu_expand_builtin
323
1bd43494 324#undef TARGET_UNWIND_WORD_MODE
325#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 326
41e3a0c7 327#undef TARGET_LEGITIMIZE_ADDRESS
328#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
329
6cf5579e 330/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
331 and .quad for the debugger. When it is known that the assembler is fixed,
332 these can be removed. */
333#undef TARGET_ASM_UNALIGNED_SI_OP
334#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
335
336#undef TARGET_ASM_ALIGNED_DI_OP
337#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
338
644459d0 339/* The .8byte directive doesn't seem to work well for a 32 bit
340 architecture. */
341#undef TARGET_ASM_UNALIGNED_DI_OP
342#define TARGET_ASM_UNALIGNED_DI_OP NULL
343
344#undef TARGET_RTX_COSTS
345#define TARGET_RTX_COSTS spu_rtx_costs
346
347#undef TARGET_ADDRESS_COST
f529eb25 348#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 349
350#undef TARGET_SCHED_ISSUE_RATE
351#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
352
5a976006 353#undef TARGET_SCHED_INIT_GLOBAL
354#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
355
356#undef TARGET_SCHED_INIT
357#define TARGET_SCHED_INIT spu_sched_init
358
644459d0 359#undef TARGET_SCHED_VARIABLE_ISSUE
360#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
361
5a976006 362#undef TARGET_SCHED_REORDER
363#define TARGET_SCHED_REORDER spu_sched_reorder
364
365#undef TARGET_SCHED_REORDER2
366#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 367
368#undef TARGET_SCHED_ADJUST_COST
369#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
370
644459d0 371#undef TARGET_ATTRIBUTE_TABLE
372#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
373
374#undef TARGET_ASM_INTEGER
375#define TARGET_ASM_INTEGER spu_assemble_integer
376
377#undef TARGET_SCALAR_MODE_SUPPORTED_P
378#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
379
380#undef TARGET_VECTOR_MODE_SUPPORTED_P
381#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
382
383#undef TARGET_FUNCTION_OK_FOR_SIBCALL
384#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
385
386#undef TARGET_ASM_GLOBALIZE_LABEL
387#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
388
389#undef TARGET_PASS_BY_REFERENCE
390#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
391
392#undef TARGET_MUST_PASS_IN_STACK
393#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
394
395#undef TARGET_BUILD_BUILTIN_VA_LIST
396#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
397
8a58ed0a 398#undef TARGET_EXPAND_BUILTIN_VA_START
399#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
400
644459d0 401#undef TARGET_SETUP_INCOMING_VARARGS
402#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
403
404#undef TARGET_MACHINE_DEPENDENT_REORG
405#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
406
407#undef TARGET_GIMPLIFY_VA_ARG_EXPR
408#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
409
410#undef TARGET_DEFAULT_TARGET_FLAGS
411#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
412
413#undef TARGET_INIT_LIBFUNCS
414#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
415
416#undef TARGET_RETURN_IN_MEMORY
417#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
418
69ced2d6 419#undef TARGET_ENCODE_SECTION_INFO
420#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
421
e99f512d 422#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
423#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
424
425#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
426#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
427
a76866d3 428#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
429#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
430
a28df51d 431#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
432#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
433
202d6e5f 434#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
435#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 436
a0515226 437#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
438#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
439
ea32e033 440#undef TARGET_LIBGCC_CMP_RETURN_MODE
441#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
442
443#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
444#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
445
d52fd16a 446#undef TARGET_SCHED_SMS_RES_MII
447#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
448
5a976006 449#undef TARGET_ASM_FILE_START
450#define TARGET_ASM_FILE_START asm_file_start
451
a08dfd55 452#undef TARGET_SECTION_TYPE_FLAGS
453#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
454
6cf5579e 455#undef TARGET_ASM_SELECT_SECTION
456#define TARGET_ASM_SELECT_SECTION spu_select_section
457
458#undef TARGET_ASM_UNIQUE_SECTION
459#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
460
fd50b071 461#undef TARGET_LEGITIMATE_ADDRESS_P
462#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
463
e96f2783 464#undef TARGET_TRAMPOLINE_INIT
465#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
466
644459d0 467struct gcc_target targetm = TARGET_INITIALIZER;
468
5df189be 469void
470spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
471{
5df189be 472 /* Override some of the default param values. With so many registers
473 larger values are better for these params. */
474 MAX_PENDING_LIST_LENGTH = 128;
475
476 /* With so many registers this is better on by default. */
477 flag_rename_registers = 1;
478}
479
644459d0 480/* Sometimes certain combinations of command options do not make sense
481 on a particular target machine. You can define a macro
482 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
483 executed once just after all the command options have been parsed. */
484void
485spu_override_options (void)
486{
14d408d9 487 /* Small loops will be unpeeled at -O3. For SPU it is more important
488 to keep code small by default. */
489 if (!flag_unroll_loops && !flag_peel_loops
490 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
491 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
492
644459d0 493 flag_omit_frame_pointer = 1;
494
5a976006 495 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 496 if (align_functions < 8)
497 align_functions = 8;
c7b91b14 498
5a976006 499 spu_hint_dist = 8*4 - spu_max_nops*4;
500 if (spu_hint_dist < 0)
501 spu_hint_dist = 0;
502
c7b91b14 503 if (spu_fixed_range_string)
504 fix_range (spu_fixed_range_string);
5474166e 505
506 /* Determine processor architectural level. */
507 if (spu_arch_string)
508 {
509 if (strcmp (&spu_arch_string[0], "cell") == 0)
510 spu_arch = PROCESSOR_CELL;
511 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
512 spu_arch = PROCESSOR_CELLEDP;
513 else
514 error ("Unknown architecture '%s'", &spu_arch_string[0]);
515 }
516
517 /* Determine processor to tune for. */
518 if (spu_tune_string)
519 {
520 if (strcmp (&spu_tune_string[0], "cell") == 0)
521 spu_tune = PROCESSOR_CELL;
522 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
523 spu_tune = PROCESSOR_CELLEDP;
524 else
525 error ("Unknown architecture '%s'", &spu_tune_string[0]);
526 }
98bbec1e 527
13684256 528 /* Change defaults according to the processor architecture. */
529 if (spu_arch == PROCESSOR_CELLEDP)
530 {
531 /* If no command line option has been otherwise specified, change
532 the default to -mno-safe-hints on celledp -- only the original
533 Cell/B.E. processors require this workaround. */
534 if (!(target_flags_explicit & MASK_SAFE_HINTS))
535 target_flags &= ~MASK_SAFE_HINTS;
536 }
537
98bbec1e 538 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 539}
540\f
541/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
542 struct attribute_spec.handler. */
543
644459d0 544/* True if MODE is valid for the target. By "valid", we mean able to
545 be manipulated in non-trivial ways. In particular, this means all
546 the arithmetic is supported. */
547static bool
548spu_scalar_mode_supported_p (enum machine_mode mode)
549{
550 switch (mode)
551 {
552 case QImode:
553 case HImode:
554 case SImode:
555 case SFmode:
556 case DImode:
557 case TImode:
558 case DFmode:
559 return true;
560
561 default:
562 return false;
563 }
564}
565
566/* Similarly for vector modes. "Supported" here is less strict. At
567 least some operations are supported; need to check optabs or builtins
568 for further details. */
569static bool
570spu_vector_mode_supported_p (enum machine_mode mode)
571{
572 switch (mode)
573 {
574 case V16QImode:
575 case V8HImode:
576 case V4SImode:
577 case V2DImode:
578 case V4SFmode:
579 case V2DFmode:
580 return true;
581
582 default:
583 return false;
584 }
585}
586
587/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
588 least significant bytes of the outer mode. This function returns
589 TRUE for the SUBREG's where this is correct. */
590int
591valid_subreg (rtx op)
592{
593 enum machine_mode om = GET_MODE (op);
594 enum machine_mode im = GET_MODE (SUBREG_REG (op));
595 return om != VOIDmode && im != VOIDmode
596 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 597 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
598 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 599}
600
601/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 602 and adjust the start offset. */
644459d0 603static rtx
604adjust_operand (rtx op, HOST_WIDE_INT * start)
605{
606 enum machine_mode mode;
607 int op_size;
38aca5eb 608 /* Strip any paradoxical SUBREG. */
609 if (GET_CODE (op) == SUBREG
610 && (GET_MODE_BITSIZE (GET_MODE (op))
611 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 612 {
613 if (start)
614 *start -=
615 GET_MODE_BITSIZE (GET_MODE (op)) -
616 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
617 op = SUBREG_REG (op);
618 }
619 /* If it is smaller than SI, assure a SUBREG */
620 op_size = GET_MODE_BITSIZE (GET_MODE (op));
621 if (op_size < 32)
622 {
623 if (start)
624 *start += 32 - op_size;
625 op_size = 32;
626 }
627 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
628 mode = mode_for_size (op_size, MODE_INT, 0);
629 if (mode != GET_MODE (op))
630 op = gen_rtx_SUBREG (mode, op, 0);
631 return op;
632}
633
634void
635spu_expand_extv (rtx ops[], int unsignedp)
636{
9d98604b 637 rtx dst = ops[0], src = ops[1];
644459d0 638 HOST_WIDE_INT width = INTVAL (ops[2]);
639 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 640 HOST_WIDE_INT align_mask;
641 rtx s0, s1, mask, r0;
644459d0 642
9d98604b 643 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 644
9d98604b 645 if (MEM_P (src))
644459d0 646 {
9d98604b 647 /* First, determine if we need 1 TImode load or 2. We need only 1
648 if the bits being extracted do not cross the alignment boundary
649 as determined by the MEM and its address. */
650
651 align_mask = -MEM_ALIGN (src);
652 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 653 {
9d98604b 654 /* Alignment is sufficient for 1 load. */
655 s0 = gen_reg_rtx (TImode);
656 r0 = spu_expand_load (s0, 0, src, start / 8);
657 start &= 7;
658 if (r0)
659 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 660 }
9d98604b 661 else
662 {
663 /* Need 2 loads. */
664 s0 = gen_reg_rtx (TImode);
665 s1 = gen_reg_rtx (TImode);
666 r0 = spu_expand_load (s0, s1, src, start / 8);
667 start &= 7;
668
669 gcc_assert (start + width <= 128);
670 if (r0)
671 {
672 rtx r1 = gen_reg_rtx (SImode);
673 mask = gen_reg_rtx (TImode);
674 emit_move_insn (mask, GEN_INT (-1));
675 emit_insn (gen_rotqby_ti (s0, s0, r0));
676 emit_insn (gen_rotqby_ti (s1, s1, r0));
677 if (GET_CODE (r0) == CONST_INT)
678 r1 = GEN_INT (INTVAL (r0) & 15);
679 else
680 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
681 emit_insn (gen_shlqby_ti (mask, mask, r1));
682 emit_insn (gen_selb (s0, s1, s0, mask));
683 }
684 }
685
686 }
687 else if (GET_CODE (src) == SUBREG)
688 {
689 rtx r = SUBREG_REG (src);
690 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
691 s0 = gen_reg_rtx (TImode);
692 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
693 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
694 else
695 emit_move_insn (s0, src);
696 }
697 else
698 {
699 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
700 s0 = gen_reg_rtx (TImode);
701 emit_move_insn (s0, src);
644459d0 702 }
703
9d98604b 704 /* Now s0 is TImode and contains the bits to extract at start. */
705
706 if (start)
707 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
708
709 if (128 - width)
644459d0 710 {
9d98604b 711 tree c = build_int_cst (NULL_TREE, 128 - width);
712 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 713 }
714
9d98604b 715 emit_move_insn (dst, s0);
644459d0 716}
717
718void
719spu_expand_insv (rtx ops[])
720{
721 HOST_WIDE_INT width = INTVAL (ops[1]);
722 HOST_WIDE_INT start = INTVAL (ops[2]);
723 HOST_WIDE_INT maskbits;
724 enum machine_mode dst_mode, src_mode;
725 rtx dst = ops[0], src = ops[3];
726 int dst_size, src_size;
727 rtx mask;
728 rtx shift_reg;
729 int shift;
730
731
732 if (GET_CODE (ops[0]) == MEM)
733 dst = gen_reg_rtx (TImode);
734 else
735 dst = adjust_operand (dst, &start);
736 dst_mode = GET_MODE (dst);
737 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
738
739 if (CONSTANT_P (src))
740 {
741 enum machine_mode m =
742 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
743 src = force_reg (m, convert_to_mode (m, src, 0));
744 }
745 src = adjust_operand (src, 0);
746 src_mode = GET_MODE (src);
747 src_size = GET_MODE_BITSIZE (GET_MODE (src));
748
749 mask = gen_reg_rtx (dst_mode);
750 shift_reg = gen_reg_rtx (dst_mode);
751 shift = dst_size - start - width;
752
753 /* It's not safe to use subreg here because the compiler assumes
754 that the SUBREG_REG is right justified in the SUBREG. */
755 convert_move (shift_reg, src, 1);
756
757 if (shift > 0)
758 {
759 switch (dst_mode)
760 {
761 case SImode:
762 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
763 break;
764 case DImode:
765 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
766 break;
767 case TImode:
768 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
769 break;
770 default:
771 abort ();
772 }
773 }
774 else if (shift < 0)
775 abort ();
776
777 switch (dst_size)
778 {
779 case 32:
780 maskbits = (-1ll << (32 - width - start));
781 if (start)
782 maskbits += (1ll << (32 - start));
783 emit_move_insn (mask, GEN_INT (maskbits));
784 break;
785 case 64:
786 maskbits = (-1ll << (64 - width - start));
787 if (start)
788 maskbits += (1ll << (64 - start));
789 emit_move_insn (mask, GEN_INT (maskbits));
790 break;
791 case 128:
792 {
793 unsigned char arr[16];
794 int i = start / 8;
795 memset (arr, 0, sizeof (arr));
796 arr[i] = 0xff >> (start & 7);
797 for (i++; i <= (start + width - 1) / 8; i++)
798 arr[i] = 0xff;
799 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
800 emit_move_insn (mask, array_to_constant (TImode, arr));
801 }
802 break;
803 default:
804 abort ();
805 }
806 if (GET_CODE (ops[0]) == MEM)
807 {
644459d0 808 rtx low = gen_reg_rtx (SImode);
644459d0 809 rtx rotl = gen_reg_rtx (SImode);
810 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 811 rtx addr;
812 rtx addr0;
813 rtx addr1;
644459d0 814 rtx mem;
815
9d98604b 816 addr = force_reg (Pmode, XEXP (ops[0], 0));
817 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 818 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
819 emit_insn (gen_negsi2 (rotl, low));
820 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
821 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 822 mem = change_address (ops[0], TImode, addr0);
644459d0 823 set_mem_alias_set (mem, 0);
824 emit_move_insn (dst, mem);
825 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 826 if (start + width > MEM_ALIGN (ops[0]))
827 {
828 rtx shl = gen_reg_rtx (SImode);
829 rtx mask1 = gen_reg_rtx (TImode);
830 rtx dst1 = gen_reg_rtx (TImode);
831 rtx mem1;
9d98604b 832 addr1 = plus_constant (addr, 16);
833 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 834 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
835 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 836 mem1 = change_address (ops[0], TImode, addr1);
644459d0 837 set_mem_alias_set (mem1, 0);
838 emit_move_insn (dst1, mem1);
839 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
840 emit_move_insn (mem1, dst1);
841 }
9d98604b 842 emit_move_insn (mem, dst);
644459d0 843 }
844 else
71cd778d 845 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 846}
847
848
849int
850spu_expand_block_move (rtx ops[])
851{
852 HOST_WIDE_INT bytes, align, offset;
853 rtx src, dst, sreg, dreg, target;
854 int i;
855 if (GET_CODE (ops[2]) != CONST_INT
856 || GET_CODE (ops[3]) != CONST_INT
48eb4342 857 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 858 return 0;
859
860 bytes = INTVAL (ops[2]);
861 align = INTVAL (ops[3]);
862
863 if (bytes <= 0)
864 return 1;
865
866 dst = ops[0];
867 src = ops[1];
868
869 if (align == 16)
870 {
871 for (offset = 0; offset + 16 <= bytes; offset += 16)
872 {
873 dst = adjust_address (ops[0], V16QImode, offset);
874 src = adjust_address (ops[1], V16QImode, offset);
875 emit_move_insn (dst, src);
876 }
877 if (offset < bytes)
878 {
879 rtx mask;
880 unsigned char arr[16] = { 0 };
881 for (i = 0; i < bytes - offset; i++)
882 arr[i] = 0xff;
883 dst = adjust_address (ops[0], V16QImode, offset);
884 src = adjust_address (ops[1], V16QImode, offset);
885 mask = gen_reg_rtx (V16QImode);
886 sreg = gen_reg_rtx (V16QImode);
887 dreg = gen_reg_rtx (V16QImode);
888 target = gen_reg_rtx (V16QImode);
889 emit_move_insn (mask, array_to_constant (V16QImode, arr));
890 emit_move_insn (dreg, dst);
891 emit_move_insn (sreg, src);
892 emit_insn (gen_selb (target, dreg, sreg, mask));
893 emit_move_insn (dst, target);
894 }
895 return 1;
896 }
897 return 0;
898}
899
900enum spu_comp_code
901{ SPU_EQ, SPU_GT, SPU_GTU };
902
5474166e 903int spu_comp_icode[12][3] = {
904 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
905 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
906 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
907 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
908 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
909 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
910 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
911 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
912 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
913 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
914 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
915 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 916};
917
918/* Generate a compare for CODE. Return a brand-new rtx that represents
919 the result of the compare. GCC can figure this out too if we don't
920 provide all variations of compares, but GCC always wants to use
921 WORD_MODE, we can generate better code in most cases if we do it
922 ourselves. */
923void
74f4459c 924spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 925{
926 int reverse_compare = 0;
927 int reverse_test = 0;
5d70b918 928 rtx compare_result, eq_result;
929 rtx comp_rtx, eq_rtx;
644459d0 930 enum machine_mode comp_mode;
931 enum machine_mode op_mode;
b9c74b4d 932 enum spu_comp_code scode, eq_code;
933 enum insn_code ior_code;
74f4459c 934 enum rtx_code code = GET_CODE (cmp);
935 rtx op0 = XEXP (cmp, 0);
936 rtx op1 = XEXP (cmp, 1);
644459d0 937 int index;
5d70b918 938 int eq_test = 0;
644459d0 939
74f4459c 940 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 941 and so on, to keep the constant in operand 1. */
74f4459c 942 if (GET_CODE (op1) == CONST_INT)
644459d0 943 {
74f4459c 944 HOST_WIDE_INT val = INTVAL (op1) - 1;
945 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 946 switch (code)
947 {
948 case GE:
74f4459c 949 op1 = GEN_INT (val);
644459d0 950 code = GT;
951 break;
952 case LT:
74f4459c 953 op1 = GEN_INT (val);
644459d0 954 code = LE;
955 break;
956 case GEU:
74f4459c 957 op1 = GEN_INT (val);
644459d0 958 code = GTU;
959 break;
960 case LTU:
74f4459c 961 op1 = GEN_INT (val);
644459d0 962 code = LEU;
963 break;
964 default:
965 break;
966 }
967 }
968
5d70b918 969 comp_mode = SImode;
74f4459c 970 op_mode = GET_MODE (op0);
5d70b918 971
644459d0 972 switch (code)
973 {
974 case GE:
644459d0 975 scode = SPU_GT;
07027691 976 if (HONOR_NANS (op_mode))
5d70b918 977 {
978 reverse_compare = 0;
979 reverse_test = 0;
980 eq_test = 1;
981 eq_code = SPU_EQ;
982 }
983 else
984 {
985 reverse_compare = 1;
986 reverse_test = 1;
987 }
644459d0 988 break;
989 case LE:
644459d0 990 scode = SPU_GT;
07027691 991 if (HONOR_NANS (op_mode))
5d70b918 992 {
993 reverse_compare = 1;
994 reverse_test = 0;
995 eq_test = 1;
996 eq_code = SPU_EQ;
997 }
998 else
999 {
1000 reverse_compare = 0;
1001 reverse_test = 1;
1002 }
644459d0 1003 break;
1004 case LT:
1005 reverse_compare = 1;
1006 reverse_test = 0;
1007 scode = SPU_GT;
1008 break;
1009 case GEU:
1010 reverse_compare = 1;
1011 reverse_test = 1;
1012 scode = SPU_GTU;
1013 break;
1014 case LEU:
1015 reverse_compare = 0;
1016 reverse_test = 1;
1017 scode = SPU_GTU;
1018 break;
1019 case LTU:
1020 reverse_compare = 1;
1021 reverse_test = 0;
1022 scode = SPU_GTU;
1023 break;
1024 case NE:
1025 reverse_compare = 0;
1026 reverse_test = 1;
1027 scode = SPU_EQ;
1028 break;
1029
1030 case EQ:
1031 scode = SPU_EQ;
1032 break;
1033 case GT:
1034 scode = SPU_GT;
1035 break;
1036 case GTU:
1037 scode = SPU_GTU;
1038 break;
1039 default:
1040 scode = SPU_EQ;
1041 break;
1042 }
1043
644459d0 1044 switch (op_mode)
1045 {
1046 case QImode:
1047 index = 0;
1048 comp_mode = QImode;
1049 break;
1050 case HImode:
1051 index = 1;
1052 comp_mode = HImode;
1053 break;
1054 case SImode:
1055 index = 2;
1056 break;
1057 case DImode:
1058 index = 3;
1059 break;
1060 case TImode:
1061 index = 4;
1062 break;
1063 case SFmode:
1064 index = 5;
1065 break;
1066 case DFmode:
1067 index = 6;
1068 break;
1069 case V16QImode:
5474166e 1070 index = 7;
1071 comp_mode = op_mode;
1072 break;
644459d0 1073 case V8HImode:
5474166e 1074 index = 8;
1075 comp_mode = op_mode;
1076 break;
644459d0 1077 case V4SImode:
5474166e 1078 index = 9;
1079 comp_mode = op_mode;
1080 break;
644459d0 1081 case V4SFmode:
5474166e 1082 index = 10;
1083 comp_mode = V4SImode;
1084 break;
644459d0 1085 case V2DFmode:
5474166e 1086 index = 11;
1087 comp_mode = V2DImode;
644459d0 1088 break;
5474166e 1089 case V2DImode:
644459d0 1090 default:
1091 abort ();
1092 }
1093
74f4459c 1094 if (GET_MODE (op1) == DFmode
07027691 1095 && (scode != SPU_GT && scode != SPU_EQ))
1096 abort ();
644459d0 1097
74f4459c 1098 if (is_set == 0 && op1 == const0_rtx
1099 && (GET_MODE (op0) == SImode
1100 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1101 {
1102 /* Don't need to set a register with the result when we are
1103 comparing against zero and branching. */
1104 reverse_test = !reverse_test;
74f4459c 1105 compare_result = op0;
644459d0 1106 }
1107 else
1108 {
1109 compare_result = gen_reg_rtx (comp_mode);
1110
1111 if (reverse_compare)
1112 {
74f4459c 1113 rtx t = op1;
1114 op1 = op0;
1115 op0 = t;
644459d0 1116 }
1117
1118 if (spu_comp_icode[index][scode] == 0)
1119 abort ();
1120
1121 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1122 (op0, op_mode))
1123 op0 = force_reg (op_mode, op0);
644459d0 1124 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1125 (op1, op_mode))
1126 op1 = force_reg (op_mode, op1);
644459d0 1127 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1128 op0, op1);
644459d0 1129 if (comp_rtx == 0)
1130 abort ();
1131 emit_insn (comp_rtx);
1132
5d70b918 1133 if (eq_test)
1134 {
1135 eq_result = gen_reg_rtx (comp_mode);
1136 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1137 op0, op1);
5d70b918 1138 if (eq_rtx == 0)
1139 abort ();
1140 emit_insn (eq_rtx);
d6bf3b14 1141 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1142 gcc_assert (ior_code != CODE_FOR_nothing);
1143 emit_insn (GEN_FCN (ior_code)
1144 (compare_result, compare_result, eq_result));
1145 }
644459d0 1146 }
1147
1148 if (is_set == 0)
1149 {
1150 rtx bcomp;
1151 rtx loc_ref;
1152
1153 /* We don't have branch on QI compare insns, so we convert the
1154 QI compare result to a HI result. */
1155 if (comp_mode == QImode)
1156 {
1157 rtx old_res = compare_result;
1158 compare_result = gen_reg_rtx (HImode);
1159 comp_mode = HImode;
1160 emit_insn (gen_extendqihi2 (compare_result, old_res));
1161 }
1162
1163 if (reverse_test)
1164 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1165 else
1166 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1167
74f4459c 1168 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1169 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1170 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1171 loc_ref, pc_rtx)));
1172 }
1173 else if (is_set == 2)
1174 {
74f4459c 1175 rtx target = operands[0];
644459d0 1176 int compare_size = GET_MODE_BITSIZE (comp_mode);
1177 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1178 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1179 rtx select_mask;
1180 rtx op_t = operands[2];
1181 rtx op_f = operands[3];
1182
1183 /* The result of the comparison can be SI, HI or QI mode. Create a
1184 mask based on that result. */
1185 if (target_size > compare_size)
1186 {
1187 select_mask = gen_reg_rtx (mode);
1188 emit_insn (gen_extend_compare (select_mask, compare_result));
1189 }
1190 else if (target_size < compare_size)
1191 select_mask =
1192 gen_rtx_SUBREG (mode, compare_result,
1193 (compare_size - target_size) / BITS_PER_UNIT);
1194 else if (comp_mode != mode)
1195 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1196 else
1197 select_mask = compare_result;
1198
1199 if (GET_MODE (target) != GET_MODE (op_t)
1200 || GET_MODE (target) != GET_MODE (op_f))
1201 abort ();
1202
1203 if (reverse_test)
1204 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1205 else
1206 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1207 }
1208 else
1209 {
74f4459c 1210 rtx target = operands[0];
644459d0 1211 if (reverse_test)
1212 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1213 gen_rtx_NOT (comp_mode, compare_result)));
1214 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1215 emit_insn (gen_extendhisi2 (target, compare_result));
1216 else if (GET_MODE (target) == SImode
1217 && GET_MODE (compare_result) == QImode)
1218 emit_insn (gen_extend_compare (target, compare_result));
1219 else
1220 emit_move_insn (target, compare_result);
1221 }
1222}
1223
1224HOST_WIDE_INT
1225const_double_to_hwint (rtx x)
1226{
1227 HOST_WIDE_INT val;
1228 REAL_VALUE_TYPE rv;
1229 if (GET_MODE (x) == SFmode)
1230 {
1231 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1232 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1233 }
1234 else if (GET_MODE (x) == DFmode)
1235 {
1236 long l[2];
1237 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1238 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1239 val = l[0];
1240 val = (val << 32) | (l[1] & 0xffffffff);
1241 }
1242 else
1243 abort ();
1244 return val;
1245}
1246
1247rtx
1248hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1249{
1250 long tv[2];
1251 REAL_VALUE_TYPE rv;
1252 gcc_assert (mode == SFmode || mode == DFmode);
1253
1254 if (mode == SFmode)
1255 tv[0] = (v << 32) >> 32;
1256 else if (mode == DFmode)
1257 {
1258 tv[1] = (v << 32) >> 32;
1259 tv[0] = v >> 32;
1260 }
1261 real_from_target (&rv, tv, mode);
1262 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1263}
1264
1265void
1266print_operand_address (FILE * file, register rtx addr)
1267{
1268 rtx reg;
1269 rtx offset;
1270
e04cf423 1271 if (GET_CODE (addr) == AND
1272 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1273 && INTVAL (XEXP (addr, 1)) == -16)
1274 addr = XEXP (addr, 0);
1275
644459d0 1276 switch (GET_CODE (addr))
1277 {
1278 case REG:
1279 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1280 break;
1281
1282 case PLUS:
1283 reg = XEXP (addr, 0);
1284 offset = XEXP (addr, 1);
1285 if (GET_CODE (offset) == REG)
1286 {
1287 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1288 reg_names[REGNO (offset)]);
1289 }
1290 else if (GET_CODE (offset) == CONST_INT)
1291 {
1292 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1293 INTVAL (offset), reg_names[REGNO (reg)]);
1294 }
1295 else
1296 abort ();
1297 break;
1298
1299 case CONST:
1300 case LABEL_REF:
1301 case SYMBOL_REF:
1302 case CONST_INT:
1303 output_addr_const (file, addr);
1304 break;
1305
1306 default:
1307 debug_rtx (addr);
1308 abort ();
1309 }
1310}
1311
1312void
1313print_operand (FILE * file, rtx x, int code)
1314{
1315 enum machine_mode mode = GET_MODE (x);
1316 HOST_WIDE_INT val;
1317 unsigned char arr[16];
1318 int xcode = GET_CODE (x);
dea01258 1319 int i, info;
644459d0 1320 if (GET_MODE (x) == VOIDmode)
1321 switch (code)
1322 {
644459d0 1323 case 'L': /* 128 bits, signed */
1324 case 'm': /* 128 bits, signed */
1325 case 'T': /* 128 bits, signed */
1326 case 't': /* 128 bits, signed */
1327 mode = TImode;
1328 break;
644459d0 1329 case 'K': /* 64 bits, signed */
1330 case 'k': /* 64 bits, signed */
1331 case 'D': /* 64 bits, signed */
1332 case 'd': /* 64 bits, signed */
1333 mode = DImode;
1334 break;
644459d0 1335 case 'J': /* 32 bits, signed */
1336 case 'j': /* 32 bits, signed */
1337 case 's': /* 32 bits, signed */
1338 case 'S': /* 32 bits, signed */
1339 mode = SImode;
1340 break;
1341 }
1342 switch (code)
1343 {
1344
1345 case 'j': /* 32 bits, signed */
1346 case 'k': /* 64 bits, signed */
1347 case 'm': /* 128 bits, signed */
1348 if (xcode == CONST_INT
1349 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1350 {
1351 gcc_assert (logical_immediate_p (x, mode));
1352 constant_to_array (mode, x, arr);
1353 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1354 val = trunc_int_for_mode (val, SImode);
1355 switch (which_logical_immediate (val))
1356 {
1357 case SPU_ORI:
1358 break;
1359 case SPU_ORHI:
1360 fprintf (file, "h");
1361 break;
1362 case SPU_ORBI:
1363 fprintf (file, "b");
1364 break;
1365 default:
1366 gcc_unreachable();
1367 }
1368 }
1369 else
1370 gcc_unreachable();
1371 return;
1372
1373 case 'J': /* 32 bits, signed */
1374 case 'K': /* 64 bits, signed */
1375 case 'L': /* 128 bits, signed */
1376 if (xcode == CONST_INT
1377 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1378 {
1379 gcc_assert (logical_immediate_p (x, mode)
1380 || iohl_immediate_p (x, mode));
1381 constant_to_array (mode, x, arr);
1382 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1383 val = trunc_int_for_mode (val, SImode);
1384 switch (which_logical_immediate (val))
1385 {
1386 case SPU_ORI:
1387 case SPU_IOHL:
1388 break;
1389 case SPU_ORHI:
1390 val = trunc_int_for_mode (val, HImode);
1391 break;
1392 case SPU_ORBI:
1393 val = trunc_int_for_mode (val, QImode);
1394 break;
1395 default:
1396 gcc_unreachable();
1397 }
1398 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1399 }
1400 else
1401 gcc_unreachable();
1402 return;
1403
1404 case 't': /* 128 bits, signed */
1405 case 'd': /* 64 bits, signed */
1406 case 's': /* 32 bits, signed */
dea01258 1407 if (CONSTANT_P (x))
644459d0 1408 {
dea01258 1409 enum immediate_class c = classify_immediate (x, mode);
1410 switch (c)
1411 {
1412 case IC_IL1:
1413 constant_to_array (mode, x, arr);
1414 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1415 val = trunc_int_for_mode (val, SImode);
1416 switch (which_immediate_load (val))
1417 {
1418 case SPU_IL:
1419 break;
1420 case SPU_ILA:
1421 fprintf (file, "a");
1422 break;
1423 case SPU_ILH:
1424 fprintf (file, "h");
1425 break;
1426 case SPU_ILHU:
1427 fprintf (file, "hu");
1428 break;
1429 default:
1430 gcc_unreachable ();
1431 }
1432 break;
1433 case IC_CPAT:
1434 constant_to_array (mode, x, arr);
1435 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1436 if (info == 1)
1437 fprintf (file, "b");
1438 else if (info == 2)
1439 fprintf (file, "h");
1440 else if (info == 4)
1441 fprintf (file, "w");
1442 else if (info == 8)
1443 fprintf (file, "d");
1444 break;
1445 case IC_IL1s:
1446 if (xcode == CONST_VECTOR)
1447 {
1448 x = CONST_VECTOR_ELT (x, 0);
1449 xcode = GET_CODE (x);
1450 }
1451 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1452 fprintf (file, "a");
1453 else if (xcode == HIGH)
1454 fprintf (file, "hu");
1455 break;
1456 case IC_FSMBI:
5df189be 1457 case IC_FSMBI2:
dea01258 1458 case IC_IL2:
1459 case IC_IL2s:
1460 case IC_POOL:
1461 abort ();
1462 }
644459d0 1463 }
644459d0 1464 else
1465 gcc_unreachable ();
1466 return;
1467
1468 case 'T': /* 128 bits, signed */
1469 case 'D': /* 64 bits, signed */
1470 case 'S': /* 32 bits, signed */
dea01258 1471 if (CONSTANT_P (x))
644459d0 1472 {
dea01258 1473 enum immediate_class c = classify_immediate (x, mode);
1474 switch (c)
644459d0 1475 {
dea01258 1476 case IC_IL1:
1477 constant_to_array (mode, x, arr);
1478 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1479 val = trunc_int_for_mode (val, SImode);
1480 switch (which_immediate_load (val))
1481 {
1482 case SPU_IL:
1483 case SPU_ILA:
1484 break;
1485 case SPU_ILH:
1486 case SPU_ILHU:
1487 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1488 break;
1489 default:
1490 gcc_unreachable ();
1491 }
1492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1493 break;
1494 case IC_FSMBI:
1495 constant_to_array (mode, x, arr);
1496 val = 0;
1497 for (i = 0; i < 16; i++)
1498 {
1499 val <<= 1;
1500 val |= arr[i] & 1;
1501 }
1502 print_operand (file, GEN_INT (val), 0);
1503 break;
1504 case IC_CPAT:
1505 constant_to_array (mode, x, arr);
1506 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1507 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1508 break;
dea01258 1509 case IC_IL1s:
dea01258 1510 if (xcode == HIGH)
5df189be 1511 x = XEXP (x, 0);
1512 if (GET_CODE (x) == CONST_VECTOR)
1513 x = CONST_VECTOR_ELT (x, 0);
1514 output_addr_const (file, x);
1515 if (xcode == HIGH)
1516 fprintf (file, "@h");
644459d0 1517 break;
dea01258 1518 case IC_IL2:
1519 case IC_IL2s:
5df189be 1520 case IC_FSMBI2:
dea01258 1521 case IC_POOL:
1522 abort ();
644459d0 1523 }
c8befdb9 1524 }
644459d0 1525 else
1526 gcc_unreachable ();
1527 return;
1528
644459d0 1529 case 'C':
1530 if (xcode == CONST_INT)
1531 {
1532 /* Only 4 least significant bits are relevant for generate
1533 control word instructions. */
1534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1535 return;
1536 }
1537 break;
1538
1539 case 'M': /* print code for c*d */
1540 if (GET_CODE (x) == CONST_INT)
1541 switch (INTVAL (x))
1542 {
1543 case 1:
1544 fprintf (file, "b");
1545 break;
1546 case 2:
1547 fprintf (file, "h");
1548 break;
1549 case 4:
1550 fprintf (file, "w");
1551 break;
1552 case 8:
1553 fprintf (file, "d");
1554 break;
1555 default:
1556 gcc_unreachable();
1557 }
1558 else
1559 gcc_unreachable();
1560 return;
1561
1562 case 'N': /* Negate the operand */
1563 if (xcode == CONST_INT)
1564 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1565 else if (xcode == CONST_VECTOR)
1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1567 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1568 return;
1569
1570 case 'I': /* enable/disable interrupts */
1571 if (xcode == CONST_INT)
1572 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1573 return;
1574
1575 case 'b': /* branch modifiers */
1576 if (xcode == REG)
1577 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1578 else if (COMPARISON_P (x))
1579 fprintf (file, "%s", xcode == NE ? "n" : "");
1580 return;
1581
1582 case 'i': /* indirect call */
1583 if (xcode == MEM)
1584 {
1585 if (GET_CODE (XEXP (x, 0)) == REG)
1586 /* Used in indirect function calls. */
1587 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1588 else
1589 output_address (XEXP (x, 0));
1590 }
1591 return;
1592
1593 case 'p': /* load/store */
1594 if (xcode == MEM)
1595 {
1596 x = XEXP (x, 0);
1597 xcode = GET_CODE (x);
1598 }
e04cf423 1599 if (xcode == AND)
1600 {
1601 x = XEXP (x, 0);
1602 xcode = GET_CODE (x);
1603 }
644459d0 1604 if (xcode == REG)
1605 fprintf (file, "d");
1606 else if (xcode == CONST_INT)
1607 fprintf (file, "a");
1608 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1609 fprintf (file, "r");
1610 else if (xcode == PLUS || xcode == LO_SUM)
1611 {
1612 if (GET_CODE (XEXP (x, 1)) == REG)
1613 fprintf (file, "x");
1614 else
1615 fprintf (file, "d");
1616 }
1617 return;
1618
5df189be 1619 case 'e':
1620 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1621 val &= 0x7;
1622 output_addr_const (file, GEN_INT (val));
1623 return;
1624
1625 case 'f':
1626 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1627 val &= 0x1f;
1628 output_addr_const (file, GEN_INT (val));
1629 return;
1630
1631 case 'g':
1632 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1633 val &= 0x3f;
1634 output_addr_const (file, GEN_INT (val));
1635 return;
1636
1637 case 'h':
1638 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1639 val = (val >> 3) & 0x1f;
1640 output_addr_const (file, GEN_INT (val));
1641 return;
1642
1643 case 'E':
1644 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1645 val = -val;
1646 val &= 0x7;
1647 output_addr_const (file, GEN_INT (val));
1648 return;
1649
1650 case 'F':
1651 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1652 val = -val;
1653 val &= 0x1f;
1654 output_addr_const (file, GEN_INT (val));
1655 return;
1656
1657 case 'G':
1658 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1659 val = -val;
1660 val &= 0x3f;
1661 output_addr_const (file, GEN_INT (val));
1662 return;
1663
1664 case 'H':
1665 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1666 val = -(val & -8ll);
1667 val = (val >> 3) & 0x1f;
1668 output_addr_const (file, GEN_INT (val));
1669 return;
1670
56c7bfc2 1671 case 'v':
1672 case 'w':
1673 constant_to_array (mode, x, arr);
1674 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1675 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1676 return;
1677
644459d0 1678 case 0:
1679 if (xcode == REG)
1680 fprintf (file, "%s", reg_names[REGNO (x)]);
1681 else if (xcode == MEM)
1682 output_address (XEXP (x, 0));
1683 else if (xcode == CONST_VECTOR)
dea01258 1684 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1685 else
1686 output_addr_const (file, x);
1687 return;
1688
f6a0d06f 1689 /* unused letters
56c7bfc2 1690 o qr u yz
5df189be 1691 AB OPQR UVWXYZ */
644459d0 1692 default:
1693 output_operand_lossage ("invalid %%xn code");
1694 }
1695 gcc_unreachable ();
1696}
1697
644459d0 1698/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1699 caller saved register. For leaf functions it is more efficient to
1700 use a volatile register because we won't need to save and restore the
1701 pic register. This routine is only valid after register allocation
1702 is completed, so we can pick an unused register. */
1703static rtx
1704get_pic_reg (void)
1705{
1706 rtx pic_reg = pic_offset_table_rtx;
1707 if (!reload_completed && !reload_in_progress)
1708 abort ();
87a95921 1709 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1710 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1711 return pic_reg;
1712}
1713
5df189be 1714/* Split constant addresses to handle cases that are too large.
1715 Add in the pic register when in PIC mode.
1716 Split immediates that require more than 1 instruction. */
dea01258 1717int
1718spu_split_immediate (rtx * ops)
c8befdb9 1719{
dea01258 1720 enum machine_mode mode = GET_MODE (ops[0]);
1721 enum immediate_class c = classify_immediate (ops[1], mode);
1722
1723 switch (c)
c8befdb9 1724 {
dea01258 1725 case IC_IL2:
1726 {
1727 unsigned char arrhi[16];
1728 unsigned char arrlo[16];
98bbec1e 1729 rtx to, temp, hi, lo;
dea01258 1730 int i;
98bbec1e 1731 enum machine_mode imode = mode;
1732 /* We need to do reals as ints because the constant used in the
1733 IOR might not be a legitimate real constant. */
1734 imode = int_mode_for_mode (mode);
dea01258 1735 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1736 if (imode != mode)
1737 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1738 else
1739 to = ops[0];
1740 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1741 for (i = 0; i < 16; i += 4)
1742 {
1743 arrlo[i + 2] = arrhi[i + 2];
1744 arrlo[i + 3] = arrhi[i + 3];
1745 arrlo[i + 0] = arrlo[i + 1] = 0;
1746 arrhi[i + 2] = arrhi[i + 3] = 0;
1747 }
98bbec1e 1748 hi = array_to_constant (imode, arrhi);
1749 lo = array_to_constant (imode, arrlo);
1750 emit_move_insn (temp, hi);
dea01258 1751 emit_insn (gen_rtx_SET
98bbec1e 1752 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1753 return 1;
1754 }
5df189be 1755 case IC_FSMBI2:
1756 {
1757 unsigned char arr_fsmbi[16];
1758 unsigned char arr_andbi[16];
1759 rtx to, reg_fsmbi, reg_and;
1760 int i;
1761 enum machine_mode imode = mode;
1762 /* We need to do reals as ints because the constant used in the
1763 * AND might not be a legitimate real constant. */
1764 imode = int_mode_for_mode (mode);
1765 constant_to_array (mode, ops[1], arr_fsmbi);
1766 if (imode != mode)
1767 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1768 else
1769 to = ops[0];
1770 for (i = 0; i < 16; i++)
1771 if (arr_fsmbi[i] != 0)
1772 {
1773 arr_andbi[0] = arr_fsmbi[i];
1774 arr_fsmbi[i] = 0xff;
1775 }
1776 for (i = 1; i < 16; i++)
1777 arr_andbi[i] = arr_andbi[0];
1778 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1779 reg_and = array_to_constant (imode, arr_andbi);
1780 emit_move_insn (to, reg_fsmbi);
1781 emit_insn (gen_rtx_SET
1782 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1783 return 1;
1784 }
dea01258 1785 case IC_POOL:
1786 if (reload_in_progress || reload_completed)
1787 {
1788 rtx mem = force_const_mem (mode, ops[1]);
1789 if (TARGET_LARGE_MEM)
1790 {
1791 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1792 emit_move_insn (addr, XEXP (mem, 0));
1793 mem = replace_equiv_address (mem, addr);
1794 }
1795 emit_move_insn (ops[0], mem);
1796 return 1;
1797 }
1798 break;
1799 case IC_IL1s:
1800 case IC_IL2s:
1801 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1802 {
1803 if (c == IC_IL2s)
1804 {
5df189be 1805 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1806 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1807 }
1808 else if (flag_pic)
1809 emit_insn (gen_pic (ops[0], ops[1]));
1810 if (flag_pic)
1811 {
1812 rtx pic_reg = get_pic_reg ();
1813 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1814 crtl->uses_pic_offset_table = 1;
dea01258 1815 }
1816 return flag_pic || c == IC_IL2s;
1817 }
1818 break;
1819 case IC_IL1:
1820 case IC_FSMBI:
1821 case IC_CPAT:
1822 break;
c8befdb9 1823 }
dea01258 1824 return 0;
c8befdb9 1825}
1826
644459d0 1827/* SAVING is TRUE when we are generating the actual load and store
1828 instructions for REGNO. When determining the size of the stack
1829 needed for saving register we must allocate enough space for the
1830 worst case, because we don't always have the information early enough
1831 to not allocate it. But we can at least eliminate the actual loads
1832 and stores during the prologue/epilogue. */
1833static int
1834need_to_save_reg (int regno, int saving)
1835{
3072d30e 1836 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1837 return 1;
1838 if (flag_pic
1839 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1840 && (!saving || crtl->uses_pic_offset_table)
644459d0 1841 && (!saving
3072d30e 1842 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1843 return 1;
1844 return 0;
1845}
1846
1847/* This function is only correct starting with local register
1848 allocation */
1849int
1850spu_saved_regs_size (void)
1851{
1852 int reg_save_size = 0;
1853 int regno;
1854
1855 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1856 if (need_to_save_reg (regno, 0))
1857 reg_save_size += 0x10;
1858 return reg_save_size;
1859}
1860
1861static rtx
1862frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1863{
1864 rtx reg = gen_rtx_REG (V4SImode, regno);
1865 rtx mem =
1866 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1867 return emit_insn (gen_movv4si (mem, reg));
1868}
1869
1870static rtx
1871frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1872{
1873 rtx reg = gen_rtx_REG (V4SImode, regno);
1874 rtx mem =
1875 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1876 return emit_insn (gen_movv4si (reg, mem));
1877}
1878
1879/* This happens after reload, so we need to expand it. */
1880static rtx
1881frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1882{
1883 rtx insn;
1884 if (satisfies_constraint_K (GEN_INT (imm)))
1885 {
1886 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1887 }
1888 else
1889 {
3072d30e 1890 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1891 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1892 if (REGNO (src) == REGNO (scratch))
1893 abort ();
1894 }
644459d0 1895 return insn;
1896}
1897
1898/* Return nonzero if this function is known to have a null epilogue. */
1899
1900int
1901direct_return (void)
1902{
1903 if (reload_completed)
1904 {
1905 if (cfun->static_chain_decl == 0
1906 && (spu_saved_regs_size ()
1907 + get_frame_size ()
abe32cce 1908 + crtl->outgoing_args_size
1909 + crtl->args.pretend_args_size == 0)
644459d0 1910 && current_function_is_leaf)
1911 return 1;
1912 }
1913 return 0;
1914}
1915
1916/*
1917 The stack frame looks like this:
1918 +-------------+
1919 | incoming |
a8e019fa 1920 | args |
1921 AP -> +-------------+
644459d0 1922 | $lr save |
1923 +-------------+
1924 prev SP | back chain |
1925 +-------------+
1926 | var args |
abe32cce 1927 | reg save | crtl->args.pretend_args_size bytes
644459d0 1928 +-------------+
1929 | ... |
1930 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1931 FP -> +-------------+
644459d0 1932 | ... |
a8e019fa 1933 | vars | get_frame_size() bytes
1934 HFP -> +-------------+
644459d0 1935 | ... |
1936 | outgoing |
abe32cce 1937 | args | crtl->outgoing_args_size bytes
644459d0 1938 +-------------+
1939 | $lr of next |
1940 | frame |
1941 +-------------+
a8e019fa 1942 | back chain |
1943 SP -> +-------------+
644459d0 1944
1945*/
1946void
1947spu_expand_prologue (void)
1948{
1949 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1950 HOST_WIDE_INT total_size;
1951 HOST_WIDE_INT saved_regs_size;
1952 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1953 rtx scratch_reg_0, scratch_reg_1;
1954 rtx insn, real;
1955
644459d0 1956 if (flag_pic && optimize == 0)
18d50ae6 1957 crtl->uses_pic_offset_table = 1;
644459d0 1958
1959 if (spu_naked_function_p (current_function_decl))
1960 return;
1961
1962 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1963 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1964
1965 saved_regs_size = spu_saved_regs_size ();
1966 total_size = size + saved_regs_size
abe32cce 1967 + crtl->outgoing_args_size
1968 + crtl->args.pretend_args_size;
644459d0 1969
1970 if (!current_function_is_leaf
18d50ae6 1971 || cfun->calls_alloca || total_size > 0)
644459d0 1972 total_size += STACK_POINTER_OFFSET;
1973
1974 /* Save this first because code after this might use the link
1975 register as a scratch register. */
1976 if (!current_function_is_leaf)
1977 {
1978 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1979 RTX_FRAME_RELATED_P (insn) = 1;
1980 }
1981
1982 if (total_size > 0)
1983 {
abe32cce 1984 offset = -crtl->args.pretend_args_size;
644459d0 1985 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1986 if (need_to_save_reg (regno, 1))
1987 {
1988 offset -= 16;
1989 insn = frame_emit_store (regno, sp_reg, offset);
1990 RTX_FRAME_RELATED_P (insn) = 1;
1991 }
1992 }
1993
18d50ae6 1994 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1995 {
1996 rtx pic_reg = get_pic_reg ();
1997 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1998 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1999 }
2000
2001 if (total_size > 0)
2002 {
2003 if (flag_stack_check)
2004 {
d819917f 2005 /* We compare against total_size-1 because
644459d0 2006 ($sp >= total_size) <=> ($sp > total_size-1) */
2007 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2008 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2009 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2010 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2011 {
2012 emit_move_insn (scratch_v4si, size_v4si);
2013 size_v4si = scratch_v4si;
2014 }
2015 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2016 emit_insn (gen_vec_extractv4si
2017 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2018 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2019 }
2020
2021 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2022 the value of the previous $sp because we save it as the back
2023 chain. */
2024 if (total_size <= 2000)
2025 {
2026 /* In this case we save the back chain first. */
2027 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2028 insn =
2029 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2030 }
644459d0 2031 else
2032 {
2033 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2034 insn =
2035 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2036 }
2037 RTX_FRAME_RELATED_P (insn) = 1;
2038 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2039 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2040
2041 if (total_size > 2000)
2042 {
2043 /* Save the back chain ptr */
2044 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2045 }
2046
2047 if (frame_pointer_needed)
2048 {
2049 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2050 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2051 + crtl->outgoing_args_size;
644459d0 2052 /* Set the new frame_pointer */
d8dfeb55 2053 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2054 RTX_FRAME_RELATED_P (insn) = 1;
2055 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2056 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2057 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2058 }
2059 }
2060
644459d0 2061}
2062
2063void
2064spu_expand_epilogue (bool sibcall_p)
2065{
2066 int size = get_frame_size (), offset, regno;
2067 HOST_WIDE_INT saved_regs_size, total_size;
2068 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2069 rtx jump, scratch_reg_0;
2070
644459d0 2071 if (spu_naked_function_p (current_function_decl))
2072 return;
2073
2074 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2075
2076 saved_regs_size = spu_saved_regs_size ();
2077 total_size = size + saved_regs_size
abe32cce 2078 + crtl->outgoing_args_size
2079 + crtl->args.pretend_args_size;
644459d0 2080
2081 if (!current_function_is_leaf
18d50ae6 2082 || cfun->calls_alloca || total_size > 0)
644459d0 2083 total_size += STACK_POINTER_OFFSET;
2084
2085 if (total_size > 0)
2086 {
18d50ae6 2087 if (cfun->calls_alloca)
644459d0 2088 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2089 else
2090 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2091
2092
2093 if (saved_regs_size > 0)
2094 {
abe32cce 2095 offset = -crtl->args.pretend_args_size;
644459d0 2096 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2097 if (need_to_save_reg (regno, 1))
2098 {
2099 offset -= 0x10;
2100 frame_emit_load (regno, sp_reg, offset);
2101 }
2102 }
2103 }
2104
2105 if (!current_function_is_leaf)
2106 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2107
2108 if (!sibcall_p)
2109 {
18b42941 2110 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2111 jump = emit_jump_insn (gen__return ());
2112 emit_barrier_after (jump);
2113 }
2114
644459d0 2115}
2116
2117rtx
2118spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2119{
2120 if (count != 0)
2121 return 0;
2122 /* This is inefficient because it ends up copying to a save-register
2123 which then gets saved even though $lr has already been saved. But
2124 it does generate better code for leaf functions and we don't need
2125 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2126 used for __builtin_return_address anyway, so maybe we don't care if
2127 it's inefficient. */
2128 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2129}
2130\f
2131
2132/* Given VAL, generate a constant appropriate for MODE.
2133 If MODE is a vector mode, every element will be VAL.
2134 For TImode, VAL will be zero extended to 128 bits. */
2135rtx
2136spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2137{
2138 rtx inner;
2139 rtvec v;
2140 int units, i;
2141
2142 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2143 || GET_MODE_CLASS (mode) == MODE_FLOAT
2144 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2145 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2146
2147 if (GET_MODE_CLASS (mode) == MODE_INT)
2148 return immed_double_const (val, 0, mode);
2149
2150 /* val is the bit representation of the float */
2151 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2152 return hwint_to_const_double (mode, val);
2153
2154 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2155 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2156 else
2157 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2158
2159 units = GET_MODE_NUNITS (mode);
2160
2161 v = rtvec_alloc (units);
2162
2163 for (i = 0; i < units; ++i)
2164 RTVEC_ELT (v, i) = inner;
2165
2166 return gen_rtx_CONST_VECTOR (mode, v);
2167}
644459d0 2168
5474166e 2169/* Create a MODE vector constant from 4 ints. */
2170rtx
2171spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2172{
2173 unsigned char arr[16];
2174 arr[0] = (a >> 24) & 0xff;
2175 arr[1] = (a >> 16) & 0xff;
2176 arr[2] = (a >> 8) & 0xff;
2177 arr[3] = (a >> 0) & 0xff;
2178 arr[4] = (b >> 24) & 0xff;
2179 arr[5] = (b >> 16) & 0xff;
2180 arr[6] = (b >> 8) & 0xff;
2181 arr[7] = (b >> 0) & 0xff;
2182 arr[8] = (c >> 24) & 0xff;
2183 arr[9] = (c >> 16) & 0xff;
2184 arr[10] = (c >> 8) & 0xff;
2185 arr[11] = (c >> 0) & 0xff;
2186 arr[12] = (d >> 24) & 0xff;
2187 arr[13] = (d >> 16) & 0xff;
2188 arr[14] = (d >> 8) & 0xff;
2189 arr[15] = (d >> 0) & 0xff;
2190 return array_to_constant(mode, arr);
2191}
5a976006 2192\f
2193/* branch hint stuff */
5474166e 2194
644459d0 2195/* An array of these is used to propagate hints to predecessor blocks. */
2196struct spu_bb_info
2197{
5a976006 2198 rtx prop_jump; /* propagated from another block */
2199 int bb_index; /* the original block. */
644459d0 2200};
5a976006 2201static struct spu_bb_info *spu_bb_info;
644459d0 2202
5a976006 2203#define STOP_HINT_P(INSN) \
2204 (GET_CODE(INSN) == CALL_INSN \
2205 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2206 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2207
2208/* 1 when RTX is a hinted branch or its target. We keep track of
2209 what has been hinted so the safe-hint code can test it easily. */
2210#define HINTED_P(RTX) \
2211 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2212
2213/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2214#define SCHED_ON_EVEN_P(RTX) \
2215 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2216
2217/* Emit a nop for INSN such that the two will dual issue. This assumes
2218 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2219 We check for TImode to handle a MULTI1 insn which has dual issued its
2220 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2221 ADDR_VEC insns. */
2222static void
2223emit_nop_for_insn (rtx insn)
644459d0 2224{
5a976006 2225 int p;
2226 rtx new_insn;
2227 p = get_pipe (insn);
2228 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2229 new_insn = emit_insn_after (gen_lnop (), insn);
2230 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2231 {
5a976006 2232 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2233 PUT_MODE (new_insn, TImode);
2234 PUT_MODE (insn, VOIDmode);
2235 }
2236 else
2237 new_insn = emit_insn_after (gen_lnop (), insn);
2238 recog_memoized (new_insn);
2239}
2240
2241/* Insert nops in basic blocks to meet dual issue alignment
2242 requirements. Also make sure hbrp and hint instructions are at least
2243 one cycle apart, possibly inserting a nop. */
2244static void
2245pad_bb(void)
2246{
2247 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2248 int length;
2249 int addr;
2250
2251 /* This sets up INSN_ADDRESSES. */
2252 shorten_branches (get_insns ());
2253
2254 /* Keep track of length added by nops. */
2255 length = 0;
2256
2257 prev_insn = 0;
2258 insn = get_insns ();
2259 if (!active_insn_p (insn))
2260 insn = next_active_insn (insn);
2261 for (; insn; insn = next_insn)
2262 {
2263 next_insn = next_active_insn (insn);
2264 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2265 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2266 {
5a976006 2267 if (hbr_insn)
2268 {
2269 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2270 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2271 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2272 || (a1 - a0 == 4))
2273 {
2274 prev_insn = emit_insn_before (gen_lnop (), insn);
2275 PUT_MODE (prev_insn, GET_MODE (insn));
2276 PUT_MODE (insn, TImode);
2277 length += 4;
2278 }
2279 }
2280 hbr_insn = insn;
2281 }
2282 if (INSN_CODE (insn) == CODE_FOR_blockage)
2283 {
2284 if (GET_MODE (insn) == TImode)
2285 PUT_MODE (next_insn, TImode);
2286 insn = next_insn;
2287 next_insn = next_active_insn (insn);
2288 }
2289 addr = INSN_ADDRESSES (INSN_UID (insn));
2290 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2291 {
2292 if (((addr + length) & 7) != 0)
2293 {
2294 emit_nop_for_insn (prev_insn);
2295 length += 4;
2296 }
644459d0 2297 }
5a976006 2298 else if (GET_MODE (insn) == TImode
2299 && ((next_insn && GET_MODE (next_insn) != TImode)
2300 || get_attr_type (insn) == TYPE_MULTI0)
2301 && ((addr + length) & 7) != 0)
2302 {
2303 /* prev_insn will always be set because the first insn is
2304 always 8-byte aligned. */
2305 emit_nop_for_insn (prev_insn);
2306 length += 4;
2307 }
2308 prev_insn = insn;
644459d0 2309 }
644459d0 2310}
2311
5a976006 2312\f
2313/* Routines for branch hints. */
2314
644459d0 2315static void
5a976006 2316spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2317 int distance, sbitmap blocks)
644459d0 2318{
5a976006 2319 rtx branch_label = 0;
2320 rtx hint;
2321 rtx insn;
2322 rtx table;
644459d0 2323
2324 if (before == 0 || branch == 0 || target == 0)
2325 return;
2326
5a976006 2327 /* While scheduling we require hints to be no further than 600, so
2328 we need to enforce that here too */
644459d0 2329 if (distance > 600)
2330 return;
2331
5a976006 2332 /* If we have a Basic block note, emit it after the basic block note. */
2333 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2334 before = NEXT_INSN (before);
644459d0 2335
2336 branch_label = gen_label_rtx ();
2337 LABEL_NUSES (branch_label)++;
2338 LABEL_PRESERVE_P (branch_label) = 1;
2339 insn = emit_label_before (branch_label, branch);
2340 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2341 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2342
2343 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2344 recog_memoized (hint);
2345 HINTED_P (branch) = 1;
644459d0 2346
5a976006 2347 if (GET_CODE (target) == LABEL_REF)
2348 HINTED_P (XEXP (target, 0)) = 1;
2349 else if (tablejump_p (branch, 0, &table))
644459d0 2350 {
5a976006 2351 rtvec vec;
2352 int j;
2353 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2354 vec = XVEC (PATTERN (table), 0);
2355 else
2356 vec = XVEC (PATTERN (table), 1);
2357 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2358 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2359 }
5a976006 2360
2361 if (distance >= 588)
644459d0 2362 {
5a976006 2363 /* Make sure the hint isn't scheduled any earlier than this point,
2364 which could make it too far for the branch offest to fit */
2365 recog_memoized (emit_insn_before (gen_blockage (), hint));
2366 }
2367 else if (distance <= 8 * 4)
2368 {
2369 /* To guarantee at least 8 insns between the hint and branch we
2370 insert nops. */
2371 int d;
2372 for (d = distance; d < 8 * 4; d += 4)
2373 {
2374 insn =
2375 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2376 recog_memoized (insn);
2377 }
2378
2379 /* Make sure any nops inserted aren't scheduled before the hint. */
2380 recog_memoized (emit_insn_after (gen_blockage (), hint));
2381
2382 /* Make sure any nops inserted aren't scheduled after the call. */
2383 if (CALL_P (branch) && distance < 8 * 4)
2384 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2385 }
644459d0 2386}
2387
2388/* Returns 0 if we don't want a hint for this branch. Otherwise return
2389 the rtx for the branch target. */
2390static rtx
2391get_branch_target (rtx branch)
2392{
2393 if (GET_CODE (branch) == JUMP_INSN)
2394 {
2395 rtx set, src;
2396
2397 /* Return statements */
2398 if (GET_CODE (PATTERN (branch)) == RETURN)
2399 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2400
2401 /* jump table */
2402 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2403 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2404 return 0;
2405
fcc31b99 2406 /* ASM GOTOs. */
604157f6 2407 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2408 return NULL;
2409
644459d0 2410 set = single_set (branch);
2411 src = SET_SRC (set);
2412 if (GET_CODE (SET_DEST (set)) != PC)
2413 abort ();
2414
2415 if (GET_CODE (src) == IF_THEN_ELSE)
2416 {
2417 rtx lab = 0;
2418 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2419 if (note)
2420 {
2421 /* If the more probable case is not a fall through, then
2422 try a branch hint. */
2423 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2424 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2425 && GET_CODE (XEXP (src, 1)) != PC)
2426 lab = XEXP (src, 1);
2427 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2428 && GET_CODE (XEXP (src, 2)) != PC)
2429 lab = XEXP (src, 2);
2430 }
2431 if (lab)
2432 {
2433 if (GET_CODE (lab) == RETURN)
2434 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2435 return lab;
2436 }
2437 return 0;
2438 }
2439
2440 return src;
2441 }
2442 else if (GET_CODE (branch) == CALL_INSN)
2443 {
2444 rtx call;
2445 /* All of our call patterns are in a PARALLEL and the CALL is
2446 the first pattern in the PARALLEL. */
2447 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2448 abort ();
2449 call = XVECEXP (PATTERN (branch), 0, 0);
2450 if (GET_CODE (call) == SET)
2451 call = SET_SRC (call);
2452 if (GET_CODE (call) != CALL)
2453 abort ();
2454 return XEXP (XEXP (call, 0), 0);
2455 }
2456 return 0;
2457}
2458
5a976006 2459/* The special $hbr register is used to prevent the insn scheduler from
2460 moving hbr insns across instructions which invalidate them. It
2461 should only be used in a clobber, and this function searches for
2462 insns which clobber it. */
2463static bool
2464insn_clobbers_hbr (rtx insn)
2465{
2466 if (INSN_P (insn)
2467 && GET_CODE (PATTERN (insn)) == PARALLEL)
2468 {
2469 rtx parallel = PATTERN (insn);
2470 rtx clobber;
2471 int j;
2472 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2473 {
2474 clobber = XVECEXP (parallel, 0, j);
2475 if (GET_CODE (clobber) == CLOBBER
2476 && GET_CODE (XEXP (clobber, 0)) == REG
2477 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2478 return 1;
2479 }
2480 }
2481 return 0;
2482}
2483
2484/* Search up to 32 insns starting at FIRST:
2485 - at any kind of hinted branch, just return
2486 - at any unconditional branch in the first 15 insns, just return
2487 - at a call or indirect branch, after the first 15 insns, force it to
2488 an even address and return
2489 - at any unconditional branch, after the first 15 insns, force it to
2490 an even address.
2491 At then end of the search, insert an hbrp within 4 insns of FIRST,
2492 and an hbrp within 16 instructions of FIRST.
2493 */
644459d0 2494static void
5a976006 2495insert_hbrp_for_ilb_runout (rtx first)
644459d0 2496{
5a976006 2497 rtx insn, before_4 = 0, before_16 = 0;
2498 int addr = 0, length, first_addr = -1;
2499 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2500 int insert_lnop_after = 0;
2501 for (insn = first; insn; insn = NEXT_INSN (insn))
2502 if (INSN_P (insn))
2503 {
2504 if (first_addr == -1)
2505 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2506 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2507 length = get_attr_length (insn);
2508
2509 if (before_4 == 0 && addr + length >= 4 * 4)
2510 before_4 = insn;
2511 /* We test for 14 instructions because the first hbrp will add
2512 up to 2 instructions. */
2513 if (before_16 == 0 && addr + length >= 14 * 4)
2514 before_16 = insn;
2515
2516 if (INSN_CODE (insn) == CODE_FOR_hbr)
2517 {
2518 /* Make sure an hbrp is at least 2 cycles away from a hint.
2519 Insert an lnop after the hbrp when necessary. */
2520 if (before_4 == 0 && addr > 0)
2521 {
2522 before_4 = insn;
2523 insert_lnop_after |= 1;
2524 }
2525 else if (before_4 && addr <= 4 * 4)
2526 insert_lnop_after |= 1;
2527 if (before_16 == 0 && addr > 10 * 4)
2528 {
2529 before_16 = insn;
2530 insert_lnop_after |= 2;
2531 }
2532 else if (before_16 && addr <= 14 * 4)
2533 insert_lnop_after |= 2;
2534 }
644459d0 2535
5a976006 2536 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2537 {
2538 if (addr < hbrp_addr0)
2539 hbrp_addr0 = addr;
2540 else if (addr < hbrp_addr1)
2541 hbrp_addr1 = addr;
2542 }
644459d0 2543
5a976006 2544 if (CALL_P (insn) || JUMP_P (insn))
2545 {
2546 if (HINTED_P (insn))
2547 return;
2548
2549 /* Any branch after the first 15 insns should be on an even
2550 address to avoid a special case branch. There might be
2551 some nops and/or hbrps inserted, so we test after 10
2552 insns. */
2553 if (addr > 10 * 4)
2554 SCHED_ON_EVEN_P (insn) = 1;
2555 }
644459d0 2556
5a976006 2557 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2558 return;
2559
2560
2561 if (addr + length >= 32 * 4)
644459d0 2562 {
5a976006 2563 gcc_assert (before_4 && before_16);
2564 if (hbrp_addr0 > 4 * 4)
644459d0 2565 {
5a976006 2566 insn =
2567 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2568 recog_memoized (insn);
2569 INSN_ADDRESSES_NEW (insn,
2570 INSN_ADDRESSES (INSN_UID (before_4)));
2571 PUT_MODE (insn, GET_MODE (before_4));
2572 PUT_MODE (before_4, TImode);
2573 if (insert_lnop_after & 1)
644459d0 2574 {
5a976006 2575 insn = emit_insn_before (gen_lnop (), before_4);
2576 recog_memoized (insn);
2577 INSN_ADDRESSES_NEW (insn,
2578 INSN_ADDRESSES (INSN_UID (before_4)));
2579 PUT_MODE (insn, TImode);
644459d0 2580 }
644459d0 2581 }
5a976006 2582 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2583 && hbrp_addr1 > 16 * 4)
644459d0 2584 {
5a976006 2585 insn =
2586 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2587 recog_memoized (insn);
2588 INSN_ADDRESSES_NEW (insn,
2589 INSN_ADDRESSES (INSN_UID (before_16)));
2590 PUT_MODE (insn, GET_MODE (before_16));
2591 PUT_MODE (before_16, TImode);
2592 if (insert_lnop_after & 2)
644459d0 2593 {
5a976006 2594 insn = emit_insn_before (gen_lnop (), before_16);
2595 recog_memoized (insn);
2596 INSN_ADDRESSES_NEW (insn,
2597 INSN_ADDRESSES (INSN_UID
2598 (before_16)));
2599 PUT_MODE (insn, TImode);
644459d0 2600 }
2601 }
5a976006 2602 return;
644459d0 2603 }
644459d0 2604 }
5a976006 2605 else if (BARRIER_P (insn))
2606 return;
644459d0 2607
644459d0 2608}
5a976006 2609
2610/* The SPU might hang when it executes 48 inline instructions after a
2611 hinted branch jumps to its hinted target. The beginning of a
2612 function and the return from a call might have been hinted, and must
2613 be handled as well. To prevent a hang we insert 2 hbrps. The first
2614 should be within 6 insns of the branch target. The second should be
2615 within 22 insns of the branch target. When determining if hbrps are
2616 necessary, we look for only 32 inline instructions, because up to to
2617 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2618 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2619static void
5a976006 2620insert_hbrp (void)
644459d0 2621{
5a976006 2622 rtx insn;
2623 if (TARGET_SAFE_HINTS)
644459d0 2624 {
5a976006 2625 shorten_branches (get_insns ());
2626 /* Insert hbrp at beginning of function */
2627 insn = next_active_insn (get_insns ());
2628 if (insn)
2629 insert_hbrp_for_ilb_runout (insn);
2630 /* Insert hbrp after hinted targets. */
2631 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2632 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2633 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2634 }
644459d0 2635}
2636
5a976006 2637static int in_spu_reorg;
2638
2639/* Insert branch hints. There are no branch optimizations after this
2640 pass, so it's safe to set our branch hints now. */
644459d0 2641static void
5a976006 2642spu_machine_dependent_reorg (void)
644459d0 2643{
5a976006 2644 sbitmap blocks;
2645 basic_block bb;
2646 rtx branch, insn;
2647 rtx branch_target = 0;
2648 int branch_addr = 0, insn_addr, required_dist = 0;
2649 int i;
2650 unsigned int j;
644459d0 2651
5a976006 2652 if (!TARGET_BRANCH_HINTS || optimize == 0)
2653 {
2654 /* We still do it for unoptimized code because an external
2655 function might have hinted a call or return. */
2656 insert_hbrp ();
2657 pad_bb ();
2658 return;
2659 }
644459d0 2660
5a976006 2661 blocks = sbitmap_alloc (last_basic_block);
2662 sbitmap_zero (blocks);
644459d0 2663
5a976006 2664 in_spu_reorg = 1;
2665 compute_bb_for_insn ();
2666
2667 compact_blocks ();
2668
2669 spu_bb_info =
2670 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2671 sizeof (struct spu_bb_info));
2672
2673 /* We need exact insn addresses and lengths. */
2674 shorten_branches (get_insns ());
2675
2676 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2677 {
5a976006 2678 bb = BASIC_BLOCK (i);
2679 branch = 0;
2680 if (spu_bb_info[i].prop_jump)
644459d0 2681 {
5a976006 2682 branch = spu_bb_info[i].prop_jump;
2683 branch_target = get_branch_target (branch);
2684 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2685 required_dist = spu_hint_dist;
2686 }
2687 /* Search from end of a block to beginning. In this loop, find
2688 jumps which need a branch and emit them only when:
2689 - it's an indirect branch and we're at the insn which sets
2690 the register
2691 - we're at an insn that will invalidate the hint. e.g., a
2692 call, another hint insn, inline asm that clobbers $hbr, and
2693 some inlined operations (divmodsi4). Don't consider jumps
2694 because they are only at the end of a block and are
2695 considered when we are deciding whether to propagate
2696 - we're getting too far away from the branch. The hbr insns
2697 only have a signed 10 bit offset
2698 We go back as far as possible so the branch will be considered
2699 for propagation when we get to the beginning of the block. */
2700 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2701 {
2702 if (INSN_P (insn))
2703 {
2704 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2705 if (branch
2706 && ((GET_CODE (branch_target) == REG
2707 && set_of (branch_target, insn) != NULL_RTX)
2708 || insn_clobbers_hbr (insn)
2709 || branch_addr - insn_addr > 600))
2710 {
2711 rtx next = NEXT_INSN (insn);
2712 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2713 if (insn != BB_END (bb)
2714 && branch_addr - next_addr >= required_dist)
2715 {
2716 if (dump_file)
2717 fprintf (dump_file,
2718 "hint for %i in block %i before %i\n",
2719 INSN_UID (branch), bb->index,
2720 INSN_UID (next));
2721 spu_emit_branch_hint (next, branch, branch_target,
2722 branch_addr - next_addr, blocks);
2723 }
2724 branch = 0;
2725 }
2726
2727 /* JUMP_P will only be true at the end of a block. When
2728 branch is already set it means we've previously decided
2729 to propagate a hint for that branch into this block. */
2730 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2731 {
2732 branch = 0;
2733 if ((branch_target = get_branch_target (insn)))
2734 {
2735 branch = insn;
2736 branch_addr = insn_addr;
2737 required_dist = spu_hint_dist;
2738 }
2739 }
2740 }
2741 if (insn == BB_HEAD (bb))
2742 break;
2743 }
2744
2745 if (branch)
2746 {
2747 /* If we haven't emitted a hint for this branch yet, it might
2748 be profitable to emit it in one of the predecessor blocks,
2749 especially for loops. */
2750 rtx bbend;
2751 basic_block prev = 0, prop = 0, prev2 = 0;
2752 int loop_exit = 0, simple_loop = 0;
2753 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2754
2755 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2756 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2757 prev = EDGE_PRED (bb, j)->src;
2758 else
2759 prev2 = EDGE_PRED (bb, j)->src;
2760
2761 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2762 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2763 loop_exit = 1;
2764 else if (EDGE_SUCC (bb, j)->dest == bb)
2765 simple_loop = 1;
2766
2767 /* If this branch is a loop exit then propagate to previous
2768 fallthru block. This catches the cases when it is a simple
2769 loop or when there is an initial branch into the loop. */
2770 if (prev && (loop_exit || simple_loop)
2771 && prev->loop_depth <= bb->loop_depth)
2772 prop = prev;
2773
2774 /* If there is only one adjacent predecessor. Don't propagate
2775 outside this loop. This loop_depth test isn't perfect, but
2776 I'm not sure the loop_father member is valid at this point. */
2777 else if (prev && single_pred_p (bb)
2778 && prev->loop_depth == bb->loop_depth)
2779 prop = prev;
2780
2781 /* If this is the JOIN block of a simple IF-THEN then
2782 propogate the hint to the HEADER block. */
2783 else if (prev && prev2
2784 && EDGE_COUNT (bb->preds) == 2
2785 && EDGE_COUNT (prev->preds) == 1
2786 && EDGE_PRED (prev, 0)->src == prev2
2787 && prev2->loop_depth == bb->loop_depth
2788 && GET_CODE (branch_target) != REG)
2789 prop = prev;
2790
2791 /* Don't propagate when:
2792 - this is a simple loop and the hint would be too far
2793 - this is not a simple loop and there are 16 insns in
2794 this block already
2795 - the predecessor block ends in a branch that will be
2796 hinted
2797 - the predecessor block ends in an insn that invalidates
2798 the hint */
2799 if (prop
2800 && prop->index >= 0
2801 && (bbend = BB_END (prop))
2802 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2803 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2804 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2805 {
2806 if (dump_file)
2807 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2808 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2809 bb->index, prop->index, bb->loop_depth,
2810 INSN_UID (branch), loop_exit, simple_loop,
2811 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2812
2813 spu_bb_info[prop->index].prop_jump = branch;
2814 spu_bb_info[prop->index].bb_index = i;
2815 }
2816 else if (branch_addr - next_addr >= required_dist)
2817 {
2818 if (dump_file)
2819 fprintf (dump_file, "hint for %i in block %i before %i\n",
2820 INSN_UID (branch), bb->index,
2821 INSN_UID (NEXT_INSN (insn)));
2822 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2823 branch_addr - next_addr, blocks);
2824 }
2825 branch = 0;
644459d0 2826 }
644459d0 2827 }
5a976006 2828 free (spu_bb_info);
644459d0 2829
5a976006 2830 if (!sbitmap_empty_p (blocks))
2831 find_many_sub_basic_blocks (blocks);
2832
2833 /* We have to schedule to make sure alignment is ok. */
2834 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2835
2836 /* The hints need to be scheduled, so call it again. */
2837 schedule_insns ();
2838
2839 insert_hbrp ();
2840
2841 pad_bb ();
2842
8f1d58ad 2843 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2844 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2845 {
2846 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2847 between its branch label and the branch . We don't move the
2848 label because GCC expects it at the beginning of the block. */
2849 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2850 rtx label_ref = XVECEXP (unspec, 0, 0);
2851 rtx label = XEXP (label_ref, 0);
2852 rtx branch;
2853 int offset = 0;
2854 for (branch = NEXT_INSN (label);
2855 !JUMP_P (branch) && !CALL_P (branch);
2856 branch = NEXT_INSN (branch))
2857 if (NONJUMP_INSN_P (branch))
2858 offset += get_attr_length (branch);
2859 if (offset > 0)
2860 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2861 }
5a976006 2862
2863 if (spu_flag_var_tracking)
644459d0 2864 {
5a976006 2865 df_analyze ();
2866 timevar_push (TV_VAR_TRACKING);
2867 variable_tracking_main ();
2868 timevar_pop (TV_VAR_TRACKING);
2869 df_finish_pass (false);
644459d0 2870 }
5a976006 2871
2872 free_bb_for_insn ();
2873
2874 in_spu_reorg = 0;
644459d0 2875}
2876\f
2877
2878/* Insn scheduling routines, primarily for dual issue. */
2879static int
2880spu_sched_issue_rate (void)
2881{
2882 return 2;
2883}
2884
2885static int
5a976006 2886uses_ls_unit(rtx insn)
644459d0 2887{
5a976006 2888 rtx set = single_set (insn);
2889 if (set != 0
2890 && (GET_CODE (SET_DEST (set)) == MEM
2891 || GET_CODE (SET_SRC (set)) == MEM))
2892 return 1;
2893 return 0;
644459d0 2894}
2895
2896static int
2897get_pipe (rtx insn)
2898{
2899 enum attr_type t;
2900 /* Handle inline asm */
2901 if (INSN_CODE (insn) == -1)
2902 return -1;
2903 t = get_attr_type (insn);
2904 switch (t)
2905 {
2906 case TYPE_CONVERT:
2907 return -2;
2908 case TYPE_MULTI0:
2909 return -1;
2910
2911 case TYPE_FX2:
2912 case TYPE_FX3:
2913 case TYPE_SPR:
2914 case TYPE_NOP:
2915 case TYPE_FXB:
2916 case TYPE_FPD:
2917 case TYPE_FP6:
2918 case TYPE_FP7:
644459d0 2919 return 0;
2920
2921 case TYPE_LNOP:
2922 case TYPE_SHUF:
2923 case TYPE_LOAD:
2924 case TYPE_STORE:
2925 case TYPE_BR:
2926 case TYPE_MULTI1:
2927 case TYPE_HBR:
5a976006 2928 case TYPE_IPREFETCH:
644459d0 2929 return 1;
2930 default:
2931 abort ();
2932 }
2933}
2934
5a976006 2935
2936/* haifa-sched.c has a static variable that keeps track of the current
2937 cycle. It is passed to spu_sched_reorder, and we record it here for
2938 use by spu_sched_variable_issue. It won't be accurate if the
2939 scheduler updates it's clock_var between the two calls. */
2940static int clock_var;
2941
2942/* This is used to keep track of insn alignment. Set to 0 at the
2943 beginning of each block and increased by the "length" attr of each
2944 insn scheduled. */
2945static int spu_sched_length;
2946
2947/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2948 ready list appropriately in spu_sched_reorder(). */
2949static int pipe0_clock;
2950static int pipe1_clock;
2951
2952static int prev_clock_var;
2953
2954static int prev_priority;
2955
2956/* The SPU needs to load the next ilb sometime during the execution of
2957 the previous ilb. There is a potential conflict if every cycle has a
2958 load or store. To avoid the conflict we make sure the load/store
2959 unit is free for at least one cycle during the execution of insns in
2960 the previous ilb. */
2961static int spu_ls_first;
2962static int prev_ls_clock;
2963
2964static void
2965spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2966 int max_ready ATTRIBUTE_UNUSED)
2967{
2968 spu_sched_length = 0;
2969}
2970
2971static void
2972spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2973 int max_ready ATTRIBUTE_UNUSED)
2974{
2975 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2976 {
2977 /* When any block might be at least 8-byte aligned, assume they
2978 will all be at least 8-byte aligned to make sure dual issue
2979 works out correctly. */
2980 spu_sched_length = 0;
2981 }
2982 spu_ls_first = INT_MAX;
2983 clock_var = -1;
2984 prev_ls_clock = -1;
2985 pipe0_clock = -1;
2986 pipe1_clock = -1;
2987 prev_clock_var = -1;
2988 prev_priority = -1;
2989}
2990
644459d0 2991static int
5a976006 2992spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2993 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2994{
5a976006 2995 int len;
2996 int p;
644459d0 2997 if (GET_CODE (PATTERN (insn)) == USE
2998 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2999 || (len = get_attr_length (insn)) == 0)
3000 return more;
3001
3002 spu_sched_length += len;
3003
3004 /* Reset on inline asm */
3005 if (INSN_CODE (insn) == -1)
3006 {
3007 spu_ls_first = INT_MAX;
3008 pipe0_clock = -1;
3009 pipe1_clock = -1;
3010 return 0;
3011 }
3012 p = get_pipe (insn);
3013 if (p == 0)
3014 pipe0_clock = clock_var;
3015 else
3016 pipe1_clock = clock_var;
3017
3018 if (in_spu_reorg)
3019 {
3020 if (clock_var - prev_ls_clock > 1
3021 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3022 spu_ls_first = INT_MAX;
3023 if (uses_ls_unit (insn))
3024 {
3025 if (spu_ls_first == INT_MAX)
3026 spu_ls_first = spu_sched_length;
3027 prev_ls_clock = clock_var;
3028 }
3029
3030 /* The scheduler hasn't inserted the nop, but we will later on.
3031 Include those nops in spu_sched_length. */
3032 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3033 spu_sched_length += 4;
3034 prev_clock_var = clock_var;
3035
3036 /* more is -1 when called from spu_sched_reorder for new insns
3037 that don't have INSN_PRIORITY */
3038 if (more >= 0)
3039 prev_priority = INSN_PRIORITY (insn);
3040 }
3041
3042 /* Always try issueing more insns. spu_sched_reorder will decide
3043 when the cycle should be advanced. */
3044 return 1;
3045}
3046
3047/* This function is called for both TARGET_SCHED_REORDER and
3048 TARGET_SCHED_REORDER2. */
3049static int
3050spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3051 rtx *ready, int *nreadyp, int clock)
3052{
3053 int i, nready = *nreadyp;
3054 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3055 rtx insn;
3056
3057 clock_var = clock;
3058
3059 if (nready <= 0 || pipe1_clock >= clock)
3060 return 0;
3061
3062 /* Find any rtl insns that don't generate assembly insns and schedule
3063 them first. */
3064 for (i = nready - 1; i >= 0; i--)
3065 {
3066 insn = ready[i];
3067 if (INSN_CODE (insn) == -1
3068 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3069 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3070 {
3071 ready[i] = ready[nready - 1];
3072 ready[nready - 1] = insn;
3073 return 1;
3074 }
3075 }
3076
3077 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3078 for (i = 0; i < nready; i++)
3079 if (INSN_CODE (ready[i]) != -1)
3080 {
3081 insn = ready[i];
3082 switch (get_attr_type (insn))
3083 {
3084 default:
3085 case TYPE_MULTI0:
3086 case TYPE_CONVERT:
3087 case TYPE_FX2:
3088 case TYPE_FX3:
3089 case TYPE_SPR:
3090 case TYPE_NOP:
3091 case TYPE_FXB:
3092 case TYPE_FPD:
3093 case TYPE_FP6:
3094 case TYPE_FP7:
3095 pipe_0 = i;
3096 break;
3097 case TYPE_LOAD:
3098 case TYPE_STORE:
3099 pipe_ls = i;
3100 case TYPE_LNOP:
3101 case TYPE_SHUF:
3102 case TYPE_BR:
3103 case TYPE_MULTI1:
3104 case TYPE_HBR:
3105 pipe_1 = i;
3106 break;
3107 case TYPE_IPREFETCH:
3108 pipe_hbrp = i;
3109 break;
3110 }
3111 }
3112
3113 /* In the first scheduling phase, schedule loads and stores together
3114 to increase the chance they will get merged during postreload CSE. */
3115 if (!reload_completed && pipe_ls >= 0)
3116 {
3117 insn = ready[pipe_ls];
3118 ready[pipe_ls] = ready[nready - 1];
3119 ready[nready - 1] = insn;
3120 return 1;
3121 }
3122
3123 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3124 if (pipe_hbrp >= 0)
3125 pipe_1 = pipe_hbrp;
3126
3127 /* When we have loads/stores in every cycle of the last 15 insns and
3128 we are about to schedule another load/store, emit an hbrp insn
3129 instead. */
3130 if (in_spu_reorg
3131 && spu_sched_length - spu_ls_first >= 4 * 15
3132 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3133 {
3134 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3135 recog_memoized (insn);
3136 if (pipe0_clock < clock)
3137 PUT_MODE (insn, TImode);
3138 spu_sched_variable_issue (file, verbose, insn, -1);
3139 return 0;
3140 }
3141
3142 /* In general, we want to emit nops to increase dual issue, but dual
3143 issue isn't faster when one of the insns could be scheduled later
3144 without effecting the critical path. We look at INSN_PRIORITY to
3145 make a good guess, but it isn't perfect so -mdual-nops=n can be
3146 used to effect it. */
3147 if (in_spu_reorg && spu_dual_nops < 10)
3148 {
3149 /* When we are at an even address and we are not issueing nops to
3150 improve scheduling then we need to advance the cycle. */
3151 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3152 && (spu_dual_nops == 0
3153 || (pipe_1 != -1
3154 && prev_priority >
3155 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3156 return 0;
3157
3158 /* When at an odd address, schedule the highest priority insn
3159 without considering pipeline. */
3160 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3161 && (spu_dual_nops == 0
3162 || (prev_priority >
3163 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3164 return 1;
3165 }
3166
3167
3168 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3169 pipe0 insn in the ready list, schedule it. */
3170 if (pipe0_clock < clock && pipe_0 >= 0)
3171 schedule_i = pipe_0;
3172
3173 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3174 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3175 else
3176 schedule_i = pipe_1;
3177
3178 if (schedule_i > -1)
3179 {
3180 insn = ready[schedule_i];
3181 ready[schedule_i] = ready[nready - 1];
3182 ready[nready - 1] = insn;
3183 return 1;
3184 }
3185 return 0;
644459d0 3186}
3187
3188/* INSN is dependent on DEP_INSN. */
3189static int
5a976006 3190spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3191{
5a976006 3192 rtx set;
3193
3194 /* The blockage pattern is used to prevent instructions from being
3195 moved across it and has no cost. */
3196 if (INSN_CODE (insn) == CODE_FOR_blockage
3197 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3198 return 0;
3199
9d98604b 3200 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3201 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3202 return 0;
3203
3204 /* Make sure hbrps are spread out. */
3205 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3206 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3207 return 8;
3208
3209 /* Make sure hints and hbrps are 2 cycles apart. */
3210 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3211 || INSN_CODE (insn) == CODE_FOR_hbr)
3212 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3213 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3214 return 2;
3215
3216 /* An hbrp has no real dependency on other insns. */
3217 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3218 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3219 return 0;
3220
3221 /* Assuming that it is unlikely an argument register will be used in
3222 the first cycle of the called function, we reduce the cost for
3223 slightly better scheduling of dep_insn. When not hinted, the
3224 mispredicted branch would hide the cost as well. */
3225 if (CALL_P (insn))
3226 {
3227 rtx target = get_branch_target (insn);
3228 if (GET_CODE (target) != REG || !set_of (target, insn))
3229 return cost - 2;
3230 return cost;
3231 }
3232
3233 /* And when returning from a function, let's assume the return values
3234 are completed sooner too. */
3235 if (CALL_P (dep_insn))
644459d0 3236 return cost - 2;
5a976006 3237
3238 /* Make sure an instruction that loads from the back chain is schedule
3239 away from the return instruction so a hint is more likely to get
3240 issued. */
3241 if (INSN_CODE (insn) == CODE_FOR__return
3242 && (set = single_set (dep_insn))
3243 && GET_CODE (SET_DEST (set)) == REG
3244 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3245 return 20;
3246
644459d0 3247 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3248 scheduler makes every insn in a block anti-dependent on the final
3249 jump_insn. We adjust here so higher cost insns will get scheduled
3250 earlier. */
5a976006 3251 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3252 return insn_cost (dep_insn) - 3;
5a976006 3253
644459d0 3254 return cost;
3255}
3256\f
3257/* Create a CONST_DOUBLE from a string. */
3258struct rtx_def *
3259spu_float_const (const char *string, enum machine_mode mode)
3260{
3261 REAL_VALUE_TYPE value;
3262 value = REAL_VALUE_ATOF (string, mode);
3263 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3264}
3265
644459d0 3266int
3267spu_constant_address_p (rtx x)
3268{
3269 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3270 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3271 || GET_CODE (x) == HIGH);
3272}
3273
3274static enum spu_immediate
3275which_immediate_load (HOST_WIDE_INT val)
3276{
3277 gcc_assert (val == trunc_int_for_mode (val, SImode));
3278
3279 if (val >= -0x8000 && val <= 0x7fff)
3280 return SPU_IL;
3281 if (val >= 0 && val <= 0x3ffff)
3282 return SPU_ILA;
3283 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3284 return SPU_ILH;
3285 if ((val & 0xffff) == 0)
3286 return SPU_ILHU;
3287
3288 return SPU_NONE;
3289}
3290
dea01258 3291/* Return true when OP can be loaded by one of the il instructions, or
3292 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3293int
3294immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3295{
3296 if (CONSTANT_P (op))
3297 {
3298 enum immediate_class c = classify_immediate (op, mode);
5df189be 3299 return c == IC_IL1 || c == IC_IL1s
3072d30e 3300 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3301 }
3302 return 0;
3303}
3304
3305/* Return true if the first SIZE bytes of arr is a constant that can be
3306 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3307 represent the size and offset of the instruction to use. */
3308static int
3309cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3310{
3311 int cpat, run, i, start;
3312 cpat = 1;
3313 run = 0;
3314 start = -1;
3315 for (i = 0; i < size && cpat; i++)
3316 if (arr[i] != i+16)
3317 {
3318 if (!run)
3319 {
3320 start = i;
3321 if (arr[i] == 3)
3322 run = 1;
3323 else if (arr[i] == 2 && arr[i+1] == 3)
3324 run = 2;
3325 else if (arr[i] == 0)
3326 {
3327 while (arr[i+run] == run && i+run < 16)
3328 run++;
3329 if (run != 4 && run != 8)
3330 cpat = 0;
3331 }
3332 else
3333 cpat = 0;
3334 if ((i & (run-1)) != 0)
3335 cpat = 0;
3336 i += run;
3337 }
3338 else
3339 cpat = 0;
3340 }
b01a6dc3 3341 if (cpat && (run || size < 16))
dea01258 3342 {
3343 if (run == 0)
3344 run = 1;
3345 if (prun)
3346 *prun = run;
3347 if (pstart)
3348 *pstart = start == -1 ? 16-run : start;
3349 return 1;
3350 }
3351 return 0;
3352}
3353
3354/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3355 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3356static enum immediate_class
3357classify_immediate (rtx op, enum machine_mode mode)
644459d0 3358{
3359 HOST_WIDE_INT val;
3360 unsigned char arr[16];
5df189be 3361 int i, j, repeated, fsmbi, repeat;
dea01258 3362
3363 gcc_assert (CONSTANT_P (op));
3364
644459d0 3365 if (GET_MODE (op) != VOIDmode)
3366 mode = GET_MODE (op);
3367
dea01258 3368 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3369 if (!flag_pic
3370 && mode == V4SImode
dea01258 3371 && GET_CODE (op) == CONST_VECTOR
3372 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3373 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3374 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3375 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3376 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3377 op = CONST_VECTOR_ELT (op, 0);
644459d0 3378
dea01258 3379 switch (GET_CODE (op))
3380 {
3381 case SYMBOL_REF:
3382 case LABEL_REF:
3383 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3384
dea01258 3385 case CONST:
0cfc65d4 3386 /* We can never know if the resulting address fits in 18 bits and can be
3387 loaded with ila. For now, assume the address will not overflow if
3388 the displacement is "small" (fits 'K' constraint). */
3389 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3390 {
3391 rtx sym = XEXP (XEXP (op, 0), 0);
3392 rtx cst = XEXP (XEXP (op, 0), 1);
3393
3394 if (GET_CODE (sym) == SYMBOL_REF
3395 && GET_CODE (cst) == CONST_INT
3396 && satisfies_constraint_K (cst))
3397 return IC_IL1s;
3398 }
3399 return IC_IL2s;
644459d0 3400
dea01258 3401 case HIGH:
3402 return IC_IL1s;
3403
3404 case CONST_VECTOR:
3405 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3406 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3407 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3408 return IC_POOL;
3409 /* Fall through. */
3410
3411 case CONST_INT:
3412 case CONST_DOUBLE:
3413 constant_to_array (mode, op, arr);
644459d0 3414
dea01258 3415 /* Check that each 4-byte slot is identical. */
3416 repeated = 1;
3417 for (i = 4; i < 16; i += 4)
3418 for (j = 0; j < 4; j++)
3419 if (arr[j] != arr[i + j])
3420 repeated = 0;
3421
3422 if (repeated)
3423 {
3424 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3425 val = trunc_int_for_mode (val, SImode);
3426
3427 if (which_immediate_load (val) != SPU_NONE)
3428 return IC_IL1;
3429 }
3430
3431 /* Any mode of 2 bytes or smaller can be loaded with an il
3432 instruction. */
3433 gcc_assert (GET_MODE_SIZE (mode) > 2);
3434
3435 fsmbi = 1;
5df189be 3436 repeat = 0;
dea01258 3437 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3438 if (arr[i] != 0 && repeat == 0)
3439 repeat = arr[i];
3440 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3441 fsmbi = 0;
3442 if (fsmbi)
5df189be 3443 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3444
3445 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3446 return IC_CPAT;
3447
3448 if (repeated)
3449 return IC_IL2;
3450
3451 return IC_POOL;
3452 default:
3453 break;
3454 }
3455 gcc_unreachable ();
644459d0 3456}
3457
3458static enum spu_immediate
3459which_logical_immediate (HOST_WIDE_INT val)
3460{
3461 gcc_assert (val == trunc_int_for_mode (val, SImode));
3462
3463 if (val >= -0x200 && val <= 0x1ff)
3464 return SPU_ORI;
3465 if (val >= 0 && val <= 0xffff)
3466 return SPU_IOHL;
3467 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3468 {
3469 val = trunc_int_for_mode (val, HImode);
3470 if (val >= -0x200 && val <= 0x1ff)
3471 return SPU_ORHI;
3472 if ((val & 0xff) == ((val >> 8) & 0xff))
3473 {
3474 val = trunc_int_for_mode (val, QImode);
3475 if (val >= -0x200 && val <= 0x1ff)
3476 return SPU_ORBI;
3477 }
3478 }
3479 return SPU_NONE;
3480}
3481
5df189be 3482/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3483 CONST_DOUBLEs. */
3484static int
3485const_vector_immediate_p (rtx x)
3486{
3487 int i;
3488 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3489 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3490 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3491 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3492 return 0;
3493 return 1;
3494}
3495
644459d0 3496int
3497logical_immediate_p (rtx op, enum machine_mode mode)
3498{
3499 HOST_WIDE_INT val;
3500 unsigned char arr[16];
3501 int i, j;
3502
3503 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3504 || GET_CODE (op) == CONST_VECTOR);
3505
5df189be 3506 if (GET_CODE (op) == CONST_VECTOR
3507 && !const_vector_immediate_p (op))
3508 return 0;
3509
644459d0 3510 if (GET_MODE (op) != VOIDmode)
3511 mode = GET_MODE (op);
3512
3513 constant_to_array (mode, op, arr);
3514
3515 /* Check that bytes are repeated. */
3516 for (i = 4; i < 16; i += 4)
3517 for (j = 0; j < 4; j++)
3518 if (arr[j] != arr[i + j])
3519 return 0;
3520
3521 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3522 val = trunc_int_for_mode (val, SImode);
3523
3524 i = which_logical_immediate (val);
3525 return i != SPU_NONE && i != SPU_IOHL;
3526}
3527
3528int
3529iohl_immediate_p (rtx op, enum machine_mode mode)
3530{
3531 HOST_WIDE_INT val;
3532 unsigned char arr[16];
3533 int i, j;
3534
3535 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3536 || GET_CODE (op) == CONST_VECTOR);
3537
5df189be 3538 if (GET_CODE (op) == CONST_VECTOR
3539 && !const_vector_immediate_p (op))
3540 return 0;
3541
644459d0 3542 if (GET_MODE (op) != VOIDmode)
3543 mode = GET_MODE (op);
3544
3545 constant_to_array (mode, op, arr);
3546
3547 /* Check that bytes are repeated. */
3548 for (i = 4; i < 16; i += 4)
3549 for (j = 0; j < 4; j++)
3550 if (arr[j] != arr[i + j])
3551 return 0;
3552
3553 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3554 val = trunc_int_for_mode (val, SImode);
3555
3556 return val >= 0 && val <= 0xffff;
3557}
3558
3559int
3560arith_immediate_p (rtx op, enum machine_mode mode,
3561 HOST_WIDE_INT low, HOST_WIDE_INT high)
3562{
3563 HOST_WIDE_INT val;
3564 unsigned char arr[16];
3565 int bytes, i, j;
3566
3567 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3568 || GET_CODE (op) == CONST_VECTOR);
3569
5df189be 3570 if (GET_CODE (op) == CONST_VECTOR
3571 && !const_vector_immediate_p (op))
3572 return 0;
3573
644459d0 3574 if (GET_MODE (op) != VOIDmode)
3575 mode = GET_MODE (op);
3576
3577 constant_to_array (mode, op, arr);
3578
3579 if (VECTOR_MODE_P (mode))
3580 mode = GET_MODE_INNER (mode);
3581
3582 bytes = GET_MODE_SIZE (mode);
3583 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3584
3585 /* Check that bytes are repeated. */
3586 for (i = bytes; i < 16; i += bytes)
3587 for (j = 0; j < bytes; j++)
3588 if (arr[j] != arr[i + j])
3589 return 0;
3590
3591 val = arr[0];
3592 for (j = 1; j < bytes; j++)
3593 val = (val << 8) | arr[j];
3594
3595 val = trunc_int_for_mode (val, mode);
3596
3597 return val >= low && val <= high;
3598}
3599
56c7bfc2 3600/* TRUE when op is an immediate and an exact power of 2, and given that
3601 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3602 all entries must be the same. */
3603bool
3604exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3605{
3606 enum machine_mode int_mode;
3607 HOST_WIDE_INT val;
3608 unsigned char arr[16];
3609 int bytes, i, j;
3610
3611 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3612 || GET_CODE (op) == CONST_VECTOR);
3613
3614 if (GET_CODE (op) == CONST_VECTOR
3615 && !const_vector_immediate_p (op))
3616 return 0;
3617
3618 if (GET_MODE (op) != VOIDmode)
3619 mode = GET_MODE (op);
3620
3621 constant_to_array (mode, op, arr);
3622
3623 if (VECTOR_MODE_P (mode))
3624 mode = GET_MODE_INNER (mode);
3625
3626 bytes = GET_MODE_SIZE (mode);
3627 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3628
3629 /* Check that bytes are repeated. */
3630 for (i = bytes; i < 16; i += bytes)
3631 for (j = 0; j < bytes; j++)
3632 if (arr[j] != arr[i + j])
3633 return 0;
3634
3635 val = arr[0];
3636 for (j = 1; j < bytes; j++)
3637 val = (val << 8) | arr[j];
3638
3639 val = trunc_int_for_mode (val, int_mode);
3640
3641 /* Currently, we only handle SFmode */
3642 gcc_assert (mode == SFmode);
3643 if (mode == SFmode)
3644 {
3645 int exp = (val >> 23) - 127;
3646 return val > 0 && (val & 0x007fffff) == 0
3647 && exp >= low && exp <= high;
3648 }
3649 return FALSE;
3650}
3651
6cf5579e 3652/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3653
3654static int
3655ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3656{
3657 rtx x = *px;
3658 tree decl;
3659
3660 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3661 {
3662 rtx plus = XEXP (x, 0);
3663 rtx op0 = XEXP (plus, 0);
3664 rtx op1 = XEXP (plus, 1);
3665 if (GET_CODE (op1) == CONST_INT)
3666 x = op0;
3667 }
3668
3669 return (GET_CODE (x) == SYMBOL_REF
3670 && (decl = SYMBOL_REF_DECL (x)) != 0
3671 && TREE_CODE (decl) == VAR_DECL
3672 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3673}
3674
644459d0 3675/* We accept:
5b865faf 3676 - any 32-bit constant (SImode, SFmode)
644459d0 3677 - any constant that can be generated with fsmbi (any mode)
5b865faf 3678 - a 64-bit constant where the high and low bits are identical
644459d0 3679 (DImode, DFmode)
5b865faf 3680 - a 128-bit constant where the four 32-bit words match. */
644459d0 3681int
3682spu_legitimate_constant_p (rtx x)
3683{
5df189be 3684 if (GET_CODE (x) == HIGH)
3685 x = XEXP (x, 0);
6cf5579e 3686
3687 /* Reject any __ea qualified reference. These can't appear in
3688 instructions but must be forced to the constant pool. */
3689 if (for_each_rtx (&x, ea_symbol_ref, 0))
3690 return 0;
3691
644459d0 3692 /* V4SI with all identical symbols is valid. */
5df189be 3693 if (!flag_pic
3694 && GET_MODE (x) == V4SImode
644459d0 3695 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3696 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3697 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3698 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3699 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3700 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3701
5df189be 3702 if (GET_CODE (x) == CONST_VECTOR
3703 && !const_vector_immediate_p (x))
3704 return 0;
644459d0 3705 return 1;
3706}
3707
3708/* Valid address are:
3709 - symbol_ref, label_ref, const
3710 - reg
9d98604b 3711 - reg + const_int, where const_int is 16 byte aligned
644459d0 3712 - reg + reg, alignment doesn't matter
3713 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3714 ignore the 4 least significant bits of the const. We only care about
3715 16 byte modes because the expand phase will change all smaller MEM
3716 references to TImode. */
3717static bool
3718spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3719 rtx x, bool reg_ok_strict)
644459d0 3720{
9d98604b 3721 int aligned = GET_MODE_SIZE (mode) >= 16;
3722 if (aligned
3723 && GET_CODE (x) == AND
644459d0 3724 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3725 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3726 x = XEXP (x, 0);
3727 switch (GET_CODE (x))
3728 {
644459d0 3729 case LABEL_REF:
6cf5579e 3730 return !TARGET_LARGE_MEM;
3731
9d98604b 3732 case SYMBOL_REF:
644459d0 3733 case CONST:
6cf5579e 3734 /* Keep __ea references until reload so that spu_expand_mov can see them
3735 in MEMs. */
3736 if (ea_symbol_ref (&x, 0))
3737 return !reload_in_progress && !reload_completed;
9d98604b 3738 return !TARGET_LARGE_MEM;
644459d0 3739
3740 case CONST_INT:
3741 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3742
3743 case SUBREG:
3744 x = XEXP (x, 0);
9d98604b 3745 if (REG_P (x))
3746 return 0;
644459d0 3747
3748 case REG:
3749 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3750
3751 case PLUS:
3752 case LO_SUM:
3753 {
3754 rtx op0 = XEXP (x, 0);
3755 rtx op1 = XEXP (x, 1);
3756 if (GET_CODE (op0) == SUBREG)
3757 op0 = XEXP (op0, 0);
3758 if (GET_CODE (op1) == SUBREG)
3759 op1 = XEXP (op1, 0);
644459d0 3760 if (GET_CODE (op0) == REG
3761 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3762 && GET_CODE (op1) == CONST_INT
3763 && INTVAL (op1) >= -0x2000
3764 && INTVAL (op1) <= 0x1fff
9d98604b 3765 && (!aligned || (INTVAL (op1) & 15) == 0))
3766 return TRUE;
644459d0 3767 if (GET_CODE (op0) == REG
3768 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3769 && GET_CODE (op1) == REG
3770 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3771 return TRUE;
644459d0 3772 }
3773 break;
3774
3775 default:
3776 break;
3777 }
9d98604b 3778 return FALSE;
644459d0 3779}
3780
6cf5579e 3781/* Like spu_legitimate_address_p, except with named addresses. */
3782static bool
3783spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3784 bool reg_ok_strict, addr_space_t as)
3785{
3786 if (as == ADDR_SPACE_EA)
3787 return (REG_P (x) && (GET_MODE (x) == EAmode));
3788
3789 else if (as != ADDR_SPACE_GENERIC)
3790 gcc_unreachable ();
3791
3792 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3793}
3794
644459d0 3795/* When the address is reg + const_int, force the const_int into a
fa7637bd 3796 register. */
644459d0 3797rtx
3798spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3799 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3800{
3801 rtx op0, op1;
3802 /* Make sure both operands are registers. */
3803 if (GET_CODE (x) == PLUS)
3804 {
3805 op0 = XEXP (x, 0);
3806 op1 = XEXP (x, 1);
3807 if (ALIGNED_SYMBOL_REF_P (op0))
3808 {
3809 op0 = force_reg (Pmode, op0);
3810 mark_reg_pointer (op0, 128);
3811 }
3812 else if (GET_CODE (op0) != REG)
3813 op0 = force_reg (Pmode, op0);
3814 if (ALIGNED_SYMBOL_REF_P (op1))
3815 {
3816 op1 = force_reg (Pmode, op1);
3817 mark_reg_pointer (op1, 128);
3818 }
3819 else if (GET_CODE (op1) != REG)
3820 op1 = force_reg (Pmode, op1);
3821 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3822 }
41e3a0c7 3823 return x;
644459d0 3824}
3825
6cf5579e 3826/* Like spu_legitimate_address, except with named address support. */
3827static rtx
3828spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3829 addr_space_t as)
3830{
3831 if (as != ADDR_SPACE_GENERIC)
3832 return x;
3833
3834 return spu_legitimize_address (x, oldx, mode);
3835}
3836
644459d0 3837/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3838 struct attribute_spec.handler. */
3839static tree
3840spu_handle_fndecl_attribute (tree * node,
3841 tree name,
3842 tree args ATTRIBUTE_UNUSED,
3843 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3844{
3845 if (TREE_CODE (*node) != FUNCTION_DECL)
3846 {
67a779df 3847 warning (0, "%qE attribute only applies to functions",
3848 name);
644459d0 3849 *no_add_attrs = true;
3850 }
3851
3852 return NULL_TREE;
3853}
3854
3855/* Handle the "vector" attribute. */
3856static tree
3857spu_handle_vector_attribute (tree * node, tree name,
3858 tree args ATTRIBUTE_UNUSED,
3859 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3860{
3861 tree type = *node, result = NULL_TREE;
3862 enum machine_mode mode;
3863 int unsigned_p;
3864
3865 while (POINTER_TYPE_P (type)
3866 || TREE_CODE (type) == FUNCTION_TYPE
3867 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3868 type = TREE_TYPE (type);
3869
3870 mode = TYPE_MODE (type);
3871
3872 unsigned_p = TYPE_UNSIGNED (type);
3873 switch (mode)
3874 {
3875 case DImode:
3876 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3877 break;
3878 case SImode:
3879 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3880 break;
3881 case HImode:
3882 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3883 break;
3884 case QImode:
3885 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3886 break;
3887 case SFmode:
3888 result = V4SF_type_node;
3889 break;
3890 case DFmode:
3891 result = V2DF_type_node;
3892 break;
3893 default:
3894 break;
3895 }
3896
3897 /* Propagate qualifiers attached to the element type
3898 onto the vector type. */
3899 if (result && result != type && TYPE_QUALS (type))
3900 result = build_qualified_type (result, TYPE_QUALS (type));
3901
3902 *no_add_attrs = true; /* No need to hang on to the attribute. */
3903
3904 if (!result)
67a779df 3905 warning (0, "%qE attribute ignored", name);
644459d0 3906 else
d991e6e8 3907 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3908
3909 return NULL_TREE;
3910}
3911
f2b32076 3912/* Return nonzero if FUNC is a naked function. */
644459d0 3913static int
3914spu_naked_function_p (tree func)
3915{
3916 tree a;
3917
3918 if (TREE_CODE (func) != FUNCTION_DECL)
3919 abort ();
3920
3921 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3922 return a != NULL_TREE;
3923}
3924
3925int
3926spu_initial_elimination_offset (int from, int to)
3927{
3928 int saved_regs_size = spu_saved_regs_size ();
3929 int sp_offset = 0;
abe32cce 3930 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3931 || get_frame_size () || saved_regs_size)
3932 sp_offset = STACK_POINTER_OFFSET;
3933 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3934 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3935 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3936 return get_frame_size ();
644459d0 3937 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3938 return sp_offset + crtl->outgoing_args_size
644459d0 3939 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3940 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3941 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3942 else
3943 gcc_unreachable ();
644459d0 3944}
3945
3946rtx
fb80456a 3947spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3948{
3949 enum machine_mode mode = TYPE_MODE (type);
3950 int byte_size = ((mode == BLKmode)
3951 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3952
3953 /* Make sure small structs are left justified in a register. */
3954 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3955 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3956 {
3957 enum machine_mode smode;
3958 rtvec v;
3959 int i;
3960 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3961 int n = byte_size / UNITS_PER_WORD;
3962 v = rtvec_alloc (nregs);
3963 for (i = 0; i < n; i++)
3964 {
3965 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3966 gen_rtx_REG (TImode,
3967 FIRST_RETURN_REGNUM
3968 + i),
3969 GEN_INT (UNITS_PER_WORD * i));
3970 byte_size -= UNITS_PER_WORD;
3971 }
3972
3973 if (n < nregs)
3974 {
3975 if (byte_size < 4)
3976 byte_size = 4;
3977 smode =
3978 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3979 RTVEC_ELT (v, n) =
3980 gen_rtx_EXPR_LIST (VOIDmode,
3981 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3982 GEN_INT (UNITS_PER_WORD * n));
3983 }
3984 return gen_rtx_PARALLEL (mode, v);
3985 }
3986 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3987}
3988
3989rtx
3990spu_function_arg (CUMULATIVE_ARGS cum,
3991 enum machine_mode mode,
3992 tree type, int named ATTRIBUTE_UNUSED)
3993{
3994 int byte_size;
3995
3996 if (cum >= MAX_REGISTER_ARGS)
3997 return 0;
3998
3999 byte_size = ((mode == BLKmode)
4000 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4001
4002 /* The ABI does not allow parameters to be passed partially in
4003 reg and partially in stack. */
4004 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4005 return 0;
4006
4007 /* Make sure small structs are left justified in a register. */
4008 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4009 && byte_size < UNITS_PER_WORD && byte_size > 0)
4010 {
4011 enum machine_mode smode;
4012 rtx gr_reg;
4013 if (byte_size < 4)
4014 byte_size = 4;
4015 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4016 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4017 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4018 const0_rtx);
4019 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4020 }
4021 else
4022 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4023}
4024
4025/* Variable sized types are passed by reference. */
4026static bool
4027spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4028 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4029 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4030{
4031 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4032}
4033\f
4034
4035/* Var args. */
4036
4037/* Create and return the va_list datatype.
4038
4039 On SPU, va_list is an array type equivalent to
4040
4041 typedef struct __va_list_tag
4042 {
4043 void *__args __attribute__((__aligned(16)));
4044 void *__skip __attribute__((__aligned(16)));
4045
4046 } va_list[1];
4047
fa7637bd 4048 where __args points to the arg that will be returned by the next
644459d0 4049 va_arg(), and __skip points to the previous stack frame such that
4050 when __args == __skip we should advance __args by 32 bytes. */
4051static tree
4052spu_build_builtin_va_list (void)
4053{
4054 tree f_args, f_skip, record, type_decl;
4055 bool owp;
4056
4057 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4058
4059 type_decl =
54e46243 4060 build_decl (BUILTINS_LOCATION,
4061 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4062
54e46243 4063 f_args = build_decl (BUILTINS_LOCATION,
4064 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4065 f_skip = build_decl (BUILTINS_LOCATION,
4066 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4067
4068 DECL_FIELD_CONTEXT (f_args) = record;
4069 DECL_ALIGN (f_args) = 128;
4070 DECL_USER_ALIGN (f_args) = 1;
4071
4072 DECL_FIELD_CONTEXT (f_skip) = record;
4073 DECL_ALIGN (f_skip) = 128;
4074 DECL_USER_ALIGN (f_skip) = 1;
4075
4076 TREE_CHAIN (record) = type_decl;
4077 TYPE_NAME (record) = type_decl;
4078 TYPE_FIELDS (record) = f_args;
4079 TREE_CHAIN (f_args) = f_skip;
4080
4081 /* We know this is being padded and we want it too. It is an internal
4082 type so hide the warnings from the user. */
4083 owp = warn_padded;
4084 warn_padded = false;
4085
4086 layout_type (record);
4087
4088 warn_padded = owp;
4089
4090 /* The correct type is an array type of one element. */
4091 return build_array_type (record, build_index_type (size_zero_node));
4092}
4093
4094/* Implement va_start by filling the va_list structure VALIST.
4095 NEXTARG points to the first anonymous stack argument.
4096
4097 The following global variables are used to initialize
4098 the va_list structure:
4099
abe32cce 4100 crtl->args.info;
644459d0 4101 the CUMULATIVE_ARGS for this function
4102
abe32cce 4103 crtl->args.arg_offset_rtx:
644459d0 4104 holds the offset of the first anonymous stack argument
4105 (relative to the virtual arg pointer). */
4106
8a58ed0a 4107static void
644459d0 4108spu_va_start (tree valist, rtx nextarg)
4109{
4110 tree f_args, f_skip;
4111 tree args, skip, t;
4112
4113 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4114 f_skip = TREE_CHAIN (f_args);
4115
4116 valist = build_va_arg_indirect_ref (valist);
4117 args =
4118 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4119 skip =
4120 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4121
4122 /* Find the __args area. */
4123 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4124 if (crtl->args.pretend_args_size > 0)
0de36bdb 4125 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4126 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4127 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4128 TREE_SIDE_EFFECTS (t) = 1;
4129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4130
4131 /* Find the __skip area. */
4132 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4133 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4134 size_int (crtl->args.pretend_args_size
0de36bdb 4135 - STACK_POINTER_OFFSET));
75a70cf9 4136 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4137 TREE_SIDE_EFFECTS (t) = 1;
4138 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4139}
4140
4141/* Gimplify va_arg by updating the va_list structure
4142 VALIST as required to retrieve an argument of type
4143 TYPE, and returning that argument.
4144
4145 ret = va_arg(VALIST, TYPE);
4146
4147 generates code equivalent to:
4148
4149 paddedsize = (sizeof(TYPE) + 15) & -16;
4150 if (VALIST.__args + paddedsize > VALIST.__skip
4151 && VALIST.__args <= VALIST.__skip)
4152 addr = VALIST.__skip + 32;
4153 else
4154 addr = VALIST.__args;
4155 VALIST.__args = addr + paddedsize;
4156 ret = *(TYPE *)addr;
4157 */
4158static tree
75a70cf9 4159spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4160 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4161{
4162 tree f_args, f_skip;
4163 tree args, skip;
4164 HOST_WIDE_INT size, rsize;
4165 tree paddedsize, addr, tmp;
4166 bool pass_by_reference_p;
4167
4168 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4169 f_skip = TREE_CHAIN (f_args);
4170
182cf5a9 4171 valist = build_simple_mem_ref (valist);
644459d0 4172 args =
4173 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4174 skip =
4175 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4176
4177 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4178
4179 /* if an object is dynamically sized, a pointer to it is passed
4180 instead of the object itself. */
4181 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4182 false);
4183 if (pass_by_reference_p)
4184 type = build_pointer_type (type);
4185 size = int_size_in_bytes (type);
4186 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4187
4188 /* build conditional expression to calculate addr. The expression
4189 will be gimplified later. */
0de36bdb 4190 paddedsize = size_int (rsize);
75a70cf9 4191 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4192 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4193 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4194 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4195 unshare_expr (skip)));
644459d0 4196
4197 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4198 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4199 size_int (32)), unshare_expr (args));
644459d0 4200
75a70cf9 4201 gimplify_assign (addr, tmp, pre_p);
644459d0 4202
4203 /* update VALIST.__args */
0de36bdb 4204 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4205 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4206
8115f0af 4207 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4208 addr);
644459d0 4209
4210 if (pass_by_reference_p)
4211 addr = build_va_arg_indirect_ref (addr);
4212
4213 return build_va_arg_indirect_ref (addr);
4214}
4215
4216/* Save parameter registers starting with the register that corresponds
4217 to the first unnamed parameters. If the first unnamed parameter is
4218 in the stack then save no registers. Set pretend_args_size to the
4219 amount of space needed to save the registers. */
4220void
4221spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4222 tree type, int *pretend_size, int no_rtl)
4223{
4224 if (!no_rtl)
4225 {
4226 rtx tmp;
4227 int regno;
4228 int offset;
4229 int ncum = *cum;
4230
4231 /* cum currently points to the last named argument, we want to
4232 start at the next argument. */
4233 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4234
4235 offset = -STACK_POINTER_OFFSET;
4236 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4237 {
4238 tmp = gen_frame_mem (V4SImode,
4239 plus_constant (virtual_incoming_args_rtx,
4240 offset));
4241 emit_move_insn (tmp,
4242 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4243 offset += 16;
4244 }
4245 *pretend_size = offset + STACK_POINTER_OFFSET;
4246 }
4247}
4248\f
4249void
4250spu_conditional_register_usage (void)
4251{
4252 if (flag_pic)
4253 {
4254 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4255 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4256 }
644459d0 4257}
4258
9d98604b 4259/* This is called any time we inspect the alignment of a register for
4260 addresses. */
644459d0 4261static int
9d98604b 4262reg_aligned_for_addr (rtx x)
644459d0 4263{
9d98604b 4264 int regno =
4265 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4266 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4267}
4268
69ced2d6 4269/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4270 into its SYMBOL_REF_FLAGS. */
4271static void
4272spu_encode_section_info (tree decl, rtx rtl, int first)
4273{
4274 default_encode_section_info (decl, rtl, first);
4275
4276 /* If a variable has a forced alignment to < 16 bytes, mark it with
4277 SYMBOL_FLAG_ALIGN1. */
4278 if (TREE_CODE (decl) == VAR_DECL
4279 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4280 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4281}
4282
644459d0 4283/* Return TRUE if we are certain the mem refers to a complete object
4284 which is both 16-byte aligned and padded to a 16-byte boundary. This
4285 would make it safe to store with a single instruction.
4286 We guarantee the alignment and padding for static objects by aligning
4287 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4288 FIXME: We currently cannot guarantee this for objects on the stack
4289 because assign_parm_setup_stack calls assign_stack_local with the
4290 alignment of the parameter mode and in that case the alignment never
4291 gets adjusted by LOCAL_ALIGNMENT. */
4292static int
4293store_with_one_insn_p (rtx mem)
4294{
9d98604b 4295 enum machine_mode mode = GET_MODE (mem);
644459d0 4296 rtx addr = XEXP (mem, 0);
9d98604b 4297 if (mode == BLKmode)
644459d0 4298 return 0;
9d98604b 4299 if (GET_MODE_SIZE (mode) >= 16)
4300 return 1;
644459d0 4301 /* Only static objects. */
4302 if (GET_CODE (addr) == SYMBOL_REF)
4303 {
4304 /* We use the associated declaration to make sure the access is
fa7637bd 4305 referring to the whole object.
644459d0 4306 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4307 if it is necessary. Will there be cases where one exists, and
4308 the other does not? Will there be cases where both exist, but
4309 have different types? */
4310 tree decl = MEM_EXPR (mem);
4311 if (decl
4312 && TREE_CODE (decl) == VAR_DECL
4313 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4314 return 1;
4315 decl = SYMBOL_REF_DECL (addr);
4316 if (decl
4317 && TREE_CODE (decl) == VAR_DECL
4318 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4319 return 1;
4320 }
4321 return 0;
4322}
4323
9d98604b 4324/* Return 1 when the address is not valid for a simple load and store as
4325 required by the '_mov*' patterns. We could make this less strict
4326 for loads, but we prefer mem's to look the same so they are more
4327 likely to be merged. */
4328static int
4329address_needs_split (rtx mem)
4330{
4331 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4332 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4333 || !(store_with_one_insn_p (mem)
4334 || mem_is_padded_component_ref (mem))))
4335 return 1;
4336
4337 return 0;
4338}
4339
6cf5579e 4340static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4341static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4342static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4343
4344/* MEM is known to be an __ea qualified memory access. Emit a call to
4345 fetch the ppu memory to local store, and return its address in local
4346 store. */
4347
4348static void
4349ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4350{
4351 if (is_store)
4352 {
4353 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4354 if (!cache_fetch_dirty)
4355 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4356 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4357 2, ea_addr, EAmode, ndirty, SImode);
4358 }
4359 else
4360 {
4361 if (!cache_fetch)
4362 cache_fetch = init_one_libfunc ("__cache_fetch");
4363 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4364 1, ea_addr, EAmode);
4365 }
4366}
4367
4368/* Like ea_load_store, but do the cache tag comparison and, for stores,
4369 dirty bit marking, inline.
4370
4371 The cache control data structure is an array of
4372
4373 struct __cache_tag_array
4374 {
4375 unsigned int tag_lo[4];
4376 unsigned int tag_hi[4];
4377 void *data_pointer[4];
4378 int reserved[4];
4379 vector unsigned short dirty_bits[4];
4380 } */
4381
4382static void
4383ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4384{
4385 rtx ea_addr_si;
4386 HOST_WIDE_INT v;
4387 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4388 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4389 rtx index_mask = gen_reg_rtx (SImode);
4390 rtx tag_arr = gen_reg_rtx (Pmode);
4391 rtx splat_mask = gen_reg_rtx (TImode);
4392 rtx splat = gen_reg_rtx (V4SImode);
4393 rtx splat_hi = NULL_RTX;
4394 rtx tag_index = gen_reg_rtx (Pmode);
4395 rtx block_off = gen_reg_rtx (SImode);
4396 rtx tag_addr = gen_reg_rtx (Pmode);
4397 rtx tag = gen_reg_rtx (V4SImode);
4398 rtx cache_tag = gen_reg_rtx (V4SImode);
4399 rtx cache_tag_hi = NULL_RTX;
4400 rtx cache_ptrs = gen_reg_rtx (TImode);
4401 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4402 rtx tag_equal = gen_reg_rtx (V4SImode);
4403 rtx tag_equal_hi = NULL_RTX;
4404 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4405 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4406 rtx eq_index = gen_reg_rtx (SImode);
4407 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4408
4409 if (spu_ea_model != 32)
4410 {
4411 splat_hi = gen_reg_rtx (V4SImode);
4412 cache_tag_hi = gen_reg_rtx (V4SImode);
4413 tag_equal_hi = gen_reg_rtx (V4SImode);
4414 }
4415
4416 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4417 emit_move_insn (tag_arr, tag_arr_sym);
4418 v = 0x0001020300010203LL;
4419 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4420 ea_addr_si = ea_addr;
4421 if (spu_ea_model != 32)
4422 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4423
4424 /* tag_index = ea_addr & (tag_array_size - 128) */
4425 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4426
4427 /* splat ea_addr to all 4 slots. */
4428 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4429 /* Similarly for high 32 bits of ea_addr. */
4430 if (spu_ea_model != 32)
4431 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4432
4433 /* block_off = ea_addr & 127 */
4434 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4435
4436 /* tag_addr = tag_arr + tag_index */
4437 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4438
4439 /* Read cache tags. */
4440 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4441 if (spu_ea_model != 32)
4442 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4443 plus_constant (tag_addr, 16)));
4444
4445 /* tag = ea_addr & -128 */
4446 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4447
4448 /* Read all four cache data pointers. */
4449 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4450 plus_constant (tag_addr, 32)));
4451
4452 /* Compare tags. */
4453 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4454 if (spu_ea_model != 32)
4455 {
4456 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4457 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4458 }
4459
4460 /* At most one of the tags compare equal, so tag_equal has one
4461 32-bit slot set to all 1's, with the other slots all zero.
4462 gbb picks off low bit from each byte in the 128-bit registers,
4463 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4464 we have a hit. */
4465 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4466 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4467
4468 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4469 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4470
4471 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4472 (rotating eq_index mod 16 bytes). */
4473 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4474 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4475
4476 /* Add block offset to form final data address. */
4477 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4478
4479 /* Check that we did hit. */
4480 hit_label = gen_label_rtx ();
4481 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4482 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4483 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4484 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4485 hit_ref, pc_rtx)));
4486 /* Say that this branch is very likely to happen. */
4487 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4488 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4489
4490 ea_load_store (mem, is_store, ea_addr, data_addr);
4491 cont_label = gen_label_rtx ();
4492 emit_jump_insn (gen_jump (cont_label));
4493 emit_barrier ();
4494
4495 emit_label (hit_label);
4496
4497 if (is_store)
4498 {
4499 HOST_WIDE_INT v_hi;
4500 rtx dirty_bits = gen_reg_rtx (TImode);
4501 rtx dirty_off = gen_reg_rtx (SImode);
4502 rtx dirty_128 = gen_reg_rtx (TImode);
4503 rtx neg_block_off = gen_reg_rtx (SImode);
4504
4505 /* Set up mask with one dirty bit per byte of the mem we are
4506 writing, starting from top bit. */
4507 v_hi = v = -1;
4508 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4509 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4510 {
4511 v_hi = v;
4512 v = 0;
4513 }
4514 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4515
4516 /* Form index into cache dirty_bits. eq_index is one of
4517 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4518 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4519 offset to each of the four dirty_bits elements. */
4520 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4521
4522 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4523
4524 /* Rotate bit mask to proper bit. */
4525 emit_insn (gen_negsi2 (neg_block_off, block_off));
4526 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4527 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4528
4529 /* Or in the new dirty bits. */
4530 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4531
4532 /* Store. */
4533 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4534 }
4535
4536 emit_label (cont_label);
4537}
4538
4539static rtx
4540expand_ea_mem (rtx mem, bool is_store)
4541{
4542 rtx ea_addr;
4543 rtx data_addr = gen_reg_rtx (Pmode);
4544 rtx new_mem;
4545
4546 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4547 if (optimize_size || optimize == 0)
4548 ea_load_store (mem, is_store, ea_addr, data_addr);
4549 else
4550 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4551
4552 if (ea_alias_set == -1)
4553 ea_alias_set = new_alias_set ();
4554
4555 /* We generate a new MEM RTX to refer to the copy of the data
4556 in the cache. We do not copy memory attributes (except the
4557 alignment) from the original MEM, as they may no longer apply
4558 to the cache copy. */
4559 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4560 set_mem_alias_set (new_mem, ea_alias_set);
4561 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4562
4563 return new_mem;
4564}
4565
644459d0 4566int
4567spu_expand_mov (rtx * ops, enum machine_mode mode)
4568{
4569 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4570 abort ();
4571
4572 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4573 {
4574 rtx from = SUBREG_REG (ops[1]);
8d72495d 4575 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4576
4577 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4578 && GET_MODE_CLASS (imode) == MODE_INT
4579 && subreg_lowpart_p (ops[1]));
4580
4581 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4582 imode = SImode;
4583 if (imode != GET_MODE (from))
4584 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4585
4586 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4587 {
d6bf3b14 4588 enum insn_code icode = convert_optab_handler (trunc_optab,
4589 mode, imode);
644459d0 4590 emit_insn (GEN_FCN (icode) (ops[0], from));
4591 }
4592 else
4593 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4594 return 1;
4595 }
4596
4597 /* At least one of the operands needs to be a register. */
4598 if ((reload_in_progress | reload_completed) == 0
4599 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4600 {
4601 rtx temp = force_reg (mode, ops[1]);
4602 emit_move_insn (ops[0], temp);
4603 return 1;
4604 }
4605 if (reload_in_progress || reload_completed)
4606 {
dea01258 4607 if (CONSTANT_P (ops[1]))
4608 return spu_split_immediate (ops);
644459d0 4609 return 0;
4610 }
9d98604b 4611
4612 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4613 extend them. */
4614 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4615 {
9d98604b 4616 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4617 if (val != INTVAL (ops[1]))
644459d0 4618 {
9d98604b 4619 emit_move_insn (ops[0], GEN_INT (val));
4620 return 1;
644459d0 4621 }
4622 }
9d98604b 4623 if (MEM_P (ops[0]))
6cf5579e 4624 {
4625 if (MEM_ADDR_SPACE (ops[0]))
4626 ops[0] = expand_ea_mem (ops[0], true);
4627 return spu_split_store (ops);
4628 }
9d98604b 4629 if (MEM_P (ops[1]))
6cf5579e 4630 {
4631 if (MEM_ADDR_SPACE (ops[1]))
4632 ops[1] = expand_ea_mem (ops[1], false);
4633 return spu_split_load (ops);
4634 }
9d98604b 4635
644459d0 4636 return 0;
4637}
4638
9d98604b 4639static void
4640spu_convert_move (rtx dst, rtx src)
644459d0 4641{
9d98604b 4642 enum machine_mode mode = GET_MODE (dst);
4643 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4644 rtx reg;
4645 gcc_assert (GET_MODE (src) == TImode);
4646 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4647 emit_insn (gen_rtx_SET (VOIDmode, reg,
4648 gen_rtx_TRUNCATE (int_mode,
4649 gen_rtx_LSHIFTRT (TImode, src,
4650 GEN_INT (int_mode == DImode ? 64 : 96)))));
4651 if (int_mode != mode)
4652 {
4653 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4654 emit_move_insn (dst, reg);
4655 }
4656}
644459d0 4657
9d98604b 4658/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4659 the address from SRC and SRC+16. Return a REG or CONST_INT that
4660 specifies how many bytes to rotate the loaded registers, plus any
4661 extra from EXTRA_ROTQBY. The address and rotate amounts are
4662 normalized to improve merging of loads and rotate computations. */
4663static rtx
4664spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4665{
4666 rtx addr = XEXP (src, 0);
4667 rtx p0, p1, rot, addr0, addr1;
4668 int rot_amt;
644459d0 4669
4670 rot = 0;
4671 rot_amt = 0;
9d98604b 4672
4673 if (MEM_ALIGN (src) >= 128)
4674 /* Address is already aligned; simply perform a TImode load. */ ;
4675 else if (GET_CODE (addr) == PLUS)
644459d0 4676 {
4677 /* 8 cases:
4678 aligned reg + aligned reg => lqx
4679 aligned reg + unaligned reg => lqx, rotqby
4680 aligned reg + aligned const => lqd
4681 aligned reg + unaligned const => lqd, rotqbyi
4682 unaligned reg + aligned reg => lqx, rotqby
4683 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4684 unaligned reg + aligned const => lqd, rotqby
4685 unaligned reg + unaligned const -> not allowed by legitimate address
4686 */
4687 p0 = XEXP (addr, 0);
4688 p1 = XEXP (addr, 1);
9d98604b 4689 if (!reg_aligned_for_addr (p0))
644459d0 4690 {
9d98604b 4691 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4692 {
9d98604b 4693 rot = gen_reg_rtx (SImode);
4694 emit_insn (gen_addsi3 (rot, p0, p1));
4695 }
4696 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4697 {
4698 if (INTVAL (p1) > 0
4699 && REG_POINTER (p0)
4700 && INTVAL (p1) * BITS_PER_UNIT
4701 < REGNO_POINTER_ALIGN (REGNO (p0)))
4702 {
4703 rot = gen_reg_rtx (SImode);
4704 emit_insn (gen_addsi3 (rot, p0, p1));
4705 addr = p0;
4706 }
4707 else
4708 {
4709 rtx x = gen_reg_rtx (SImode);
4710 emit_move_insn (x, p1);
4711 if (!spu_arith_operand (p1, SImode))
4712 p1 = x;
4713 rot = gen_reg_rtx (SImode);
4714 emit_insn (gen_addsi3 (rot, p0, p1));
4715 addr = gen_rtx_PLUS (Pmode, p0, x);
4716 }
644459d0 4717 }
4718 else
4719 rot = p0;
4720 }
4721 else
4722 {
4723 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4724 {
4725 rot_amt = INTVAL (p1) & 15;
9d98604b 4726 if (INTVAL (p1) & -16)
4727 {
4728 p1 = GEN_INT (INTVAL (p1) & -16);
4729 addr = gen_rtx_PLUS (SImode, p0, p1);
4730 }
4731 else
4732 addr = p0;
644459d0 4733 }
9d98604b 4734 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4735 rot = p1;
4736 }
4737 }
9d98604b 4738 else if (REG_P (addr))
644459d0 4739 {
9d98604b 4740 if (!reg_aligned_for_addr (addr))
644459d0 4741 rot = addr;
4742 }
4743 else if (GET_CODE (addr) == CONST)
4744 {
4745 if (GET_CODE (XEXP (addr, 0)) == PLUS
4746 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4747 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4748 {
4749 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4750 if (rot_amt & -16)
4751 addr = gen_rtx_CONST (Pmode,
4752 gen_rtx_PLUS (Pmode,
4753 XEXP (XEXP (addr, 0), 0),
4754 GEN_INT (rot_amt & -16)));
4755 else
4756 addr = XEXP (XEXP (addr, 0), 0);
4757 }
4758 else
9d98604b 4759 {
4760 rot = gen_reg_rtx (Pmode);
4761 emit_move_insn (rot, addr);
4762 }
644459d0 4763 }
4764 else if (GET_CODE (addr) == CONST_INT)
4765 {
4766 rot_amt = INTVAL (addr);
4767 addr = GEN_INT (rot_amt & -16);
4768 }
4769 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4770 {
4771 rot = gen_reg_rtx (Pmode);
4772 emit_move_insn (rot, addr);
4773 }
644459d0 4774
9d98604b 4775 rot_amt += extra_rotby;
644459d0 4776
4777 rot_amt &= 15;
4778
4779 if (rot && rot_amt)
4780 {
9d98604b 4781 rtx x = gen_reg_rtx (SImode);
4782 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4783 rot = x;
644459d0 4784 rot_amt = 0;
4785 }
9d98604b 4786 if (!rot && rot_amt)
4787 rot = GEN_INT (rot_amt);
4788
4789 addr0 = copy_rtx (addr);
4790 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4791 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4792
4793 if (dst1)
4794 {
4795 addr1 = plus_constant (copy_rtx (addr), 16);
4796 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4797 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4798 }
644459d0 4799
9d98604b 4800 return rot;
4801}
4802
4803int
4804spu_split_load (rtx * ops)
4805{
4806 enum machine_mode mode = GET_MODE (ops[0]);
4807 rtx addr, load, rot;
4808 int rot_amt;
644459d0 4809
9d98604b 4810 if (GET_MODE_SIZE (mode) >= 16)
4811 return 0;
644459d0 4812
9d98604b 4813 addr = XEXP (ops[1], 0);
4814 gcc_assert (GET_CODE (addr) != AND);
4815
4816 if (!address_needs_split (ops[1]))
4817 {
4818 ops[1] = change_address (ops[1], TImode, addr);
4819 load = gen_reg_rtx (TImode);
4820 emit_insn (gen__movti (load, ops[1]));
4821 spu_convert_move (ops[0], load);
4822 return 1;
4823 }
4824
4825 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4826
4827 load = gen_reg_rtx (TImode);
4828 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4829
4830 if (rot)
4831 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4832
9d98604b 4833 spu_convert_move (ops[0], load);
4834 return 1;
644459d0 4835}
4836
9d98604b 4837int
644459d0 4838spu_split_store (rtx * ops)
4839{
4840 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4841 rtx reg;
644459d0 4842 rtx addr, p0, p1, p1_lo, smem;
4843 int aform;
4844 int scalar;
4845
9d98604b 4846 if (GET_MODE_SIZE (mode) >= 16)
4847 return 0;
4848
644459d0 4849 addr = XEXP (ops[0], 0);
9d98604b 4850 gcc_assert (GET_CODE (addr) != AND);
4851
4852 if (!address_needs_split (ops[0]))
4853 {
4854 reg = gen_reg_rtx (TImode);
4855 emit_insn (gen_spu_convert (reg, ops[1]));
4856 ops[0] = change_address (ops[0], TImode, addr);
4857 emit_move_insn (ops[0], reg);
4858 return 1;
4859 }
644459d0 4860
4861 if (GET_CODE (addr) == PLUS)
4862 {
4863 /* 8 cases:
4864 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4865 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4866 aligned reg + aligned const => lqd, c?d, shuf, stqx
4867 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4868 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4869 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4870 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4871 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4872 */
4873 aform = 0;
4874 p0 = XEXP (addr, 0);
4875 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4876 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4877 {
4878 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4879 if (reg_aligned_for_addr (p0))
4880 {
4881 p1 = GEN_INT (INTVAL (p1) & -16);
4882 if (p1 == const0_rtx)
4883 addr = p0;
4884 else
4885 addr = gen_rtx_PLUS (SImode, p0, p1);
4886 }
4887 else
4888 {
4889 rtx x = gen_reg_rtx (SImode);
4890 emit_move_insn (x, p1);
4891 addr = gen_rtx_PLUS (SImode, p0, x);
4892 }
644459d0 4893 }
4894 }
9d98604b 4895 else if (REG_P (addr))
644459d0 4896 {
4897 aform = 0;
4898 p0 = addr;
4899 p1 = p1_lo = const0_rtx;
4900 }
4901 else
4902 {
4903 aform = 1;
4904 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4905 p1 = 0; /* aform doesn't use p1 */
4906 p1_lo = addr;
4907 if (ALIGNED_SYMBOL_REF_P (addr))
4908 p1_lo = const0_rtx;
9d98604b 4909 else if (GET_CODE (addr) == CONST
4910 && GET_CODE (XEXP (addr, 0)) == PLUS
4911 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4912 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4913 {
9d98604b 4914 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4915 if ((v & -16) != 0)
4916 addr = gen_rtx_CONST (Pmode,
4917 gen_rtx_PLUS (Pmode,
4918 XEXP (XEXP (addr, 0), 0),
4919 GEN_INT (v & -16)));
4920 else
4921 addr = XEXP (XEXP (addr, 0), 0);
4922 p1_lo = GEN_INT (v & 15);
644459d0 4923 }
4924 else if (GET_CODE (addr) == CONST_INT)
4925 {
4926 p1_lo = GEN_INT (INTVAL (addr) & 15);
4927 addr = GEN_INT (INTVAL (addr) & -16);
4928 }
9d98604b 4929 else
4930 {
4931 p1_lo = gen_reg_rtx (SImode);
4932 emit_move_insn (p1_lo, addr);
4933 }
644459d0 4934 }
4935
9d98604b 4936 reg = gen_reg_rtx (TImode);
e04cf423 4937
644459d0 4938 scalar = store_with_one_insn_p (ops[0]);
4939 if (!scalar)
4940 {
4941 /* We could copy the flags from the ops[0] MEM to mem here,
4942 We don't because we want this load to be optimized away if
4943 possible, and copying the flags will prevent that in certain
4944 cases, e.g. consider the volatile flag. */
4945
9d98604b 4946 rtx pat = gen_reg_rtx (TImode);
e04cf423 4947 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4948 set_mem_alias_set (lmem, 0);
4949 emit_insn (gen_movti (reg, lmem));
644459d0 4950
9d98604b 4951 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4952 p0 = stack_pointer_rtx;
4953 if (!p1_lo)
4954 p1_lo = const0_rtx;
4955
4956 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4957 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4958 }
644459d0 4959 else
4960 {
4961 if (GET_CODE (ops[1]) == REG)
4962 emit_insn (gen_spu_convert (reg, ops[1]));
4963 else if (GET_CODE (ops[1]) == SUBREG)
4964 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4965 else
4966 abort ();
4967 }
4968
4969 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4970 emit_insn (gen_ashlti3
4971 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4972
9d98604b 4973 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4974 /* We can't use the previous alias set because the memory has changed
4975 size and can potentially overlap objects of other types. */
4976 set_mem_alias_set (smem, 0);
4977
e04cf423 4978 emit_insn (gen_movti (smem, reg));
9d98604b 4979 return 1;
644459d0 4980}
4981
4982/* Return TRUE if X is MEM which is a struct member reference
4983 and the member can safely be loaded and stored with a single
4984 instruction because it is padded. */
4985static int
4986mem_is_padded_component_ref (rtx x)
4987{
4988 tree t = MEM_EXPR (x);
4989 tree r;
4990 if (!t || TREE_CODE (t) != COMPONENT_REF)
4991 return 0;
4992 t = TREE_OPERAND (t, 1);
4993 if (!t || TREE_CODE (t) != FIELD_DECL
4994 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4995 return 0;
4996 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4997 r = DECL_FIELD_CONTEXT (t);
4998 if (!r || TREE_CODE (r) != RECORD_TYPE)
4999 return 0;
5000 /* Make sure they are the same mode */
5001 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5002 return 0;
5003 /* If there are no following fields then the field alignment assures
fa7637bd 5004 the structure is padded to the alignment which means this field is
5005 padded too. */
644459d0 5006 if (TREE_CHAIN (t) == 0)
5007 return 1;
5008 /* If the following field is also aligned then this field will be
5009 padded. */
5010 t = TREE_CHAIN (t);
5011 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5012 return 1;
5013 return 0;
5014}
5015
c7b91b14 5016/* Parse the -mfixed-range= option string. */
5017static void
5018fix_range (const char *const_str)
5019{
5020 int i, first, last;
5021 char *str, *dash, *comma;
5022
5023 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5024 REG2 are either register names or register numbers. The effect
5025 of this option is to mark the registers in the range from REG1 to
5026 REG2 as ``fixed'' so they won't be used by the compiler. */
5027
5028 i = strlen (const_str);
5029 str = (char *) alloca (i + 1);
5030 memcpy (str, const_str, i + 1);
5031
5032 while (1)
5033 {
5034 dash = strchr (str, '-');
5035 if (!dash)
5036 {
5037 warning (0, "value of -mfixed-range must have form REG1-REG2");
5038 return;
5039 }
5040 *dash = '\0';
5041 comma = strchr (dash + 1, ',');
5042 if (comma)
5043 *comma = '\0';
5044
5045 first = decode_reg_name (str);
5046 if (first < 0)
5047 {
5048 warning (0, "unknown register name: %s", str);
5049 return;
5050 }
5051
5052 last = decode_reg_name (dash + 1);
5053 if (last < 0)
5054 {
5055 warning (0, "unknown register name: %s", dash + 1);
5056 return;
5057 }
5058
5059 *dash = '-';
5060
5061 if (first > last)
5062 {
5063 warning (0, "%s-%s is an empty range", str, dash + 1);
5064 return;
5065 }
5066
5067 for (i = first; i <= last; ++i)
5068 fixed_regs[i] = call_used_regs[i] = 1;
5069
5070 if (!comma)
5071 break;
5072
5073 *comma = ',';
5074 str = comma + 1;
5075 }
5076}
5077
644459d0 5078/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5079 can be generated using the fsmbi instruction. */
5080int
5081fsmbi_const_p (rtx x)
5082{
dea01258 5083 if (CONSTANT_P (x))
5084 {
5df189be 5085 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5086 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5087 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5088 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5089 }
5090 return 0;
5091}
5092
5093/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5094 can be generated using the cbd, chd, cwd or cdd instruction. */
5095int
5096cpat_const_p (rtx x, enum machine_mode mode)
5097{
5098 if (CONSTANT_P (x))
5099 {
5100 enum immediate_class c = classify_immediate (x, mode);
5101 return c == IC_CPAT;
5102 }
5103 return 0;
5104}
644459d0 5105
dea01258 5106rtx
5107gen_cpat_const (rtx * ops)
5108{
5109 unsigned char dst[16];
5110 int i, offset, shift, isize;
5111 if (GET_CODE (ops[3]) != CONST_INT
5112 || GET_CODE (ops[2]) != CONST_INT
5113 || (GET_CODE (ops[1]) != CONST_INT
5114 && GET_CODE (ops[1]) != REG))
5115 return 0;
5116 if (GET_CODE (ops[1]) == REG
5117 && (!REG_POINTER (ops[1])
5118 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5119 return 0;
644459d0 5120
5121 for (i = 0; i < 16; i++)
dea01258 5122 dst[i] = i + 16;
5123 isize = INTVAL (ops[3]);
5124 if (isize == 1)
5125 shift = 3;
5126 else if (isize == 2)
5127 shift = 2;
5128 else
5129 shift = 0;
5130 offset = (INTVAL (ops[2]) +
5131 (GET_CODE (ops[1]) ==
5132 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5133 for (i = 0; i < isize; i++)
5134 dst[offset + i] = i + shift;
5135 return array_to_constant (TImode, dst);
644459d0 5136}
5137
5138/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5139 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5140 than 16 bytes, the value is repeated across the rest of the array. */
5141void
5142constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5143{
5144 HOST_WIDE_INT val;
5145 int i, j, first;
5146
5147 memset (arr, 0, 16);
5148 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5149 if (GET_CODE (x) == CONST_INT
5150 || (GET_CODE (x) == CONST_DOUBLE
5151 && (mode == SFmode || mode == DFmode)))
5152 {
5153 gcc_assert (mode != VOIDmode && mode != BLKmode);
5154
5155 if (GET_CODE (x) == CONST_DOUBLE)
5156 val = const_double_to_hwint (x);
5157 else
5158 val = INTVAL (x);
5159 first = GET_MODE_SIZE (mode) - 1;
5160 for (i = first; i >= 0; i--)
5161 {
5162 arr[i] = val & 0xff;
5163 val >>= 8;
5164 }
5165 /* Splat the constant across the whole array. */
5166 for (j = 0, i = first + 1; i < 16; i++)
5167 {
5168 arr[i] = arr[j];
5169 j = (j == first) ? 0 : j + 1;
5170 }
5171 }
5172 else if (GET_CODE (x) == CONST_DOUBLE)
5173 {
5174 val = CONST_DOUBLE_LOW (x);
5175 for (i = 15; i >= 8; i--)
5176 {
5177 arr[i] = val & 0xff;
5178 val >>= 8;
5179 }
5180 val = CONST_DOUBLE_HIGH (x);
5181 for (i = 7; i >= 0; i--)
5182 {
5183 arr[i] = val & 0xff;
5184 val >>= 8;
5185 }
5186 }
5187 else if (GET_CODE (x) == CONST_VECTOR)
5188 {
5189 int units;
5190 rtx elt;
5191 mode = GET_MODE_INNER (mode);
5192 units = CONST_VECTOR_NUNITS (x);
5193 for (i = 0; i < units; i++)
5194 {
5195 elt = CONST_VECTOR_ELT (x, i);
5196 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5197 {
5198 if (GET_CODE (elt) == CONST_DOUBLE)
5199 val = const_double_to_hwint (elt);
5200 else
5201 val = INTVAL (elt);
5202 first = GET_MODE_SIZE (mode) - 1;
5203 if (first + i * GET_MODE_SIZE (mode) > 16)
5204 abort ();
5205 for (j = first; j >= 0; j--)
5206 {
5207 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5208 val >>= 8;
5209 }
5210 }
5211 }
5212 }
5213 else
5214 gcc_unreachable();
5215}
5216
5217/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5218 smaller than 16 bytes, use the bytes that would represent that value
5219 in a register, e.g., for QImode return the value of arr[3]. */
5220rtx
e96f2783 5221array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5222{
5223 enum machine_mode inner_mode;
5224 rtvec v;
5225 int units, size, i, j, k;
5226 HOST_WIDE_INT val;
5227
5228 if (GET_MODE_CLASS (mode) == MODE_INT
5229 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5230 {
5231 j = GET_MODE_SIZE (mode);
5232 i = j < 4 ? 4 - j : 0;
5233 for (val = 0; i < j; i++)
5234 val = (val << 8) | arr[i];
5235 val = trunc_int_for_mode (val, mode);
5236 return GEN_INT (val);
5237 }
5238
5239 if (mode == TImode)
5240 {
5241 HOST_WIDE_INT high;
5242 for (i = high = 0; i < 8; i++)
5243 high = (high << 8) | arr[i];
5244 for (i = 8, val = 0; i < 16; i++)
5245 val = (val << 8) | arr[i];
5246 return immed_double_const (val, high, TImode);
5247 }
5248 if (mode == SFmode)
5249 {
5250 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5251 val = trunc_int_for_mode (val, SImode);
171b6d22 5252 return hwint_to_const_double (SFmode, val);
644459d0 5253 }
5254 if (mode == DFmode)
5255 {
1f915911 5256 for (i = 0, val = 0; i < 8; i++)
5257 val = (val << 8) | arr[i];
171b6d22 5258 return hwint_to_const_double (DFmode, val);
644459d0 5259 }
5260
5261 if (!VECTOR_MODE_P (mode))
5262 abort ();
5263
5264 units = GET_MODE_NUNITS (mode);
5265 size = GET_MODE_UNIT_SIZE (mode);
5266 inner_mode = GET_MODE_INNER (mode);
5267 v = rtvec_alloc (units);
5268
5269 for (k = i = 0; i < units; ++i)
5270 {
5271 val = 0;
5272 for (j = 0; j < size; j++, k++)
5273 val = (val << 8) | arr[k];
5274
5275 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5276 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5277 else
5278 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5279 }
5280 if (k > 16)
5281 abort ();
5282
5283 return gen_rtx_CONST_VECTOR (mode, v);
5284}
5285
5286static void
5287reloc_diagnostic (rtx x)
5288{
712d2297 5289 tree decl = 0;
644459d0 5290 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5291 return;
5292
5293 if (GET_CODE (x) == SYMBOL_REF)
5294 decl = SYMBOL_REF_DECL (x);
5295 else if (GET_CODE (x) == CONST
5296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5297 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5298
5299 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5300 if (decl && !DECL_P (decl))
5301 decl = 0;
5302
644459d0 5303 /* The decl could be a string constant. */
5304 if (decl && DECL_P (decl))
712d2297 5305 {
5306 location_t loc;
5307 /* We use last_assemble_variable_decl to get line information. It's
5308 not always going to be right and might not even be close, but will
5309 be right for the more common cases. */
5310 if (!last_assemble_variable_decl || in_section == ctors_section)
5311 loc = DECL_SOURCE_LOCATION (decl);
5312 else
5313 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5314
712d2297 5315 if (TARGET_WARN_RELOC)
5316 warning_at (loc, 0,
5317 "creating run-time relocation for %qD", decl);
5318 else
5319 error_at (loc,
5320 "creating run-time relocation for %qD", decl);
5321 }
5322 else
5323 {
5324 if (TARGET_WARN_RELOC)
5325 warning_at (input_location, 0, "creating run-time relocation");
5326 else
5327 error_at (input_location, "creating run-time relocation");
5328 }
644459d0 5329}
5330
5331/* Hook into assemble_integer so we can generate an error for run-time
5332 relocations. The SPU ABI disallows them. */
5333static bool
5334spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5335{
5336 /* By default run-time relocations aren't supported, but we allow them
5337 in case users support it in their own run-time loader. And we provide
5338 a warning for those users that don't. */
5339 if ((GET_CODE (x) == SYMBOL_REF)
5340 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5341 reloc_diagnostic (x);
5342
5343 return default_assemble_integer (x, size, aligned_p);
5344}
5345
5346static void
5347spu_asm_globalize_label (FILE * file, const char *name)
5348{
5349 fputs ("\t.global\t", file);
5350 assemble_name (file, name);
5351 fputs ("\n", file);
5352}
5353
5354static bool
f529eb25 5355spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5356 bool speed ATTRIBUTE_UNUSED)
644459d0 5357{
5358 enum machine_mode mode = GET_MODE (x);
5359 int cost = COSTS_N_INSNS (2);
5360
5361 /* Folding to a CONST_VECTOR will use extra space but there might
5362 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5363 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5364 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5365 because this cost will only be compared against a single insn.
5366 if (code == CONST_VECTOR)
5367 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5368 */
5369
5370 /* Use defaults for float operations. Not accurate but good enough. */
5371 if (mode == DFmode)
5372 {
5373 *total = COSTS_N_INSNS (13);
5374 return true;
5375 }
5376 if (mode == SFmode)
5377 {
5378 *total = COSTS_N_INSNS (6);
5379 return true;
5380 }
5381 switch (code)
5382 {
5383 case CONST_INT:
5384 if (satisfies_constraint_K (x))
5385 *total = 0;
5386 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5387 *total = COSTS_N_INSNS (1);
5388 else
5389 *total = COSTS_N_INSNS (3);
5390 return true;
5391
5392 case CONST:
5393 *total = COSTS_N_INSNS (3);
5394 return true;
5395
5396 case LABEL_REF:
5397 case SYMBOL_REF:
5398 *total = COSTS_N_INSNS (0);
5399 return true;
5400
5401 case CONST_DOUBLE:
5402 *total = COSTS_N_INSNS (5);
5403 return true;
5404
5405 case FLOAT_EXTEND:
5406 case FLOAT_TRUNCATE:
5407 case FLOAT:
5408 case UNSIGNED_FLOAT:
5409 case FIX:
5410 case UNSIGNED_FIX:
5411 *total = COSTS_N_INSNS (7);
5412 return true;
5413
5414 case PLUS:
5415 if (mode == TImode)
5416 {
5417 *total = COSTS_N_INSNS (9);
5418 return true;
5419 }
5420 break;
5421
5422 case MULT:
5423 cost =
5424 GET_CODE (XEXP (x, 0)) ==
5425 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5426 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5427 {
5428 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5429 {
5430 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5431 cost = COSTS_N_INSNS (14);
5432 if ((val & 0xffff) == 0)
5433 cost = COSTS_N_INSNS (9);
5434 else if (val > 0 && val < 0x10000)
5435 cost = COSTS_N_INSNS (11);
5436 }
5437 }
5438 *total = cost;
5439 return true;
5440 case DIV:
5441 case UDIV:
5442 case MOD:
5443 case UMOD:
5444 *total = COSTS_N_INSNS (20);
5445 return true;
5446 case ROTATE:
5447 case ROTATERT:
5448 case ASHIFT:
5449 case ASHIFTRT:
5450 case LSHIFTRT:
5451 *total = COSTS_N_INSNS (4);
5452 return true;
5453 case UNSPEC:
5454 if (XINT (x, 1) == UNSPEC_CONVERT)
5455 *total = COSTS_N_INSNS (0);
5456 else
5457 *total = COSTS_N_INSNS (4);
5458 return true;
5459 }
5460 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5461 if (GET_MODE_CLASS (mode) == MODE_INT
5462 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5463 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5464 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5465 *total = cost;
5466 return true;
5467}
5468
1bd43494 5469static enum machine_mode
5470spu_unwind_word_mode (void)
644459d0 5471{
1bd43494 5472 return SImode;
644459d0 5473}
5474
5475/* Decide whether we can make a sibling call to a function. DECL is the
5476 declaration of the function being targeted by the call and EXP is the
5477 CALL_EXPR representing the call. */
5478static bool
5479spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5480{
5481 return decl && !TARGET_LARGE_MEM;
5482}
5483
5484/* We need to correctly update the back chain pointer and the Available
5485 Stack Size (which is in the second slot of the sp register.) */
5486void
5487spu_allocate_stack (rtx op0, rtx op1)
5488{
5489 HOST_WIDE_INT v;
5490 rtx chain = gen_reg_rtx (V4SImode);
5491 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5492 rtx sp = gen_reg_rtx (V4SImode);
5493 rtx splatted = gen_reg_rtx (V4SImode);
5494 rtx pat = gen_reg_rtx (TImode);
5495
5496 /* copy the back chain so we can save it back again. */
5497 emit_move_insn (chain, stack_bot);
5498
5499 op1 = force_reg (SImode, op1);
5500
5501 v = 0x1020300010203ll;
5502 emit_move_insn (pat, immed_double_const (v, v, TImode));
5503 emit_insn (gen_shufb (splatted, op1, op1, pat));
5504
5505 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5506 emit_insn (gen_subv4si3 (sp, sp, splatted));
5507
5508 if (flag_stack_check)
5509 {
5510 rtx avail = gen_reg_rtx(SImode);
5511 rtx result = gen_reg_rtx(SImode);
5512 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5513 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5514 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5515 }
5516
5517 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5518
5519 emit_move_insn (stack_bot, chain);
5520
5521 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5522}
5523
5524void
5525spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5526{
5527 static unsigned char arr[16] =
5528 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5529 rtx temp = gen_reg_rtx (SImode);
5530 rtx temp2 = gen_reg_rtx (SImode);
5531 rtx temp3 = gen_reg_rtx (V4SImode);
5532 rtx temp4 = gen_reg_rtx (V4SImode);
5533 rtx pat = gen_reg_rtx (TImode);
5534 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5535
5536 /* Restore the backchain from the first word, sp from the second. */
5537 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5538 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5539
5540 emit_move_insn (pat, array_to_constant (TImode, arr));
5541
5542 /* Compute Available Stack Size for sp */
5543 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5544 emit_insn (gen_shufb (temp3, temp, temp, pat));
5545
5546 /* Compute Available Stack Size for back chain */
5547 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5548 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5549 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5550
5551 emit_insn (gen_addv4si3 (sp, sp, temp3));
5552 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5553}
5554
5555static void
5556spu_init_libfuncs (void)
5557{
5558 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5559 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5560 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5561 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5562 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5563 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5564 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5565 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5566 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5567 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5568 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5569
5570 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5571 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5572
5573 set_optab_libfunc (smul_optab, TImode, "__multi3");
5574 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5575 set_optab_libfunc (smod_optab, TImode, "__modti3");
5576 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5577 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5578 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5579}
5580
5581/* Make a subreg, stripping any existing subreg. We could possibly just
5582 call simplify_subreg, but in this case we know what we want. */
5583rtx
5584spu_gen_subreg (enum machine_mode mode, rtx x)
5585{
5586 if (GET_CODE (x) == SUBREG)
5587 x = SUBREG_REG (x);
5588 if (GET_MODE (x) == mode)
5589 return x;
5590 return gen_rtx_SUBREG (mode, x, 0);
5591}
5592
5593static bool
fb80456a 5594spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5595{
5596 return (TYPE_MODE (type) == BLKmode
5597 && ((type) == 0
5598 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5599 || int_size_in_bytes (type) >
5600 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5601}
5602\f
5603/* Create the built-in types and functions */
5604
c2233b46 5605enum spu_function_code
5606{
5607#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5608#include "spu-builtins.def"
5609#undef DEF_BUILTIN
5610 NUM_SPU_BUILTINS
5611};
5612
5613extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5614
644459d0 5615struct spu_builtin_description spu_builtins[] = {
5616#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5617 {fcode, icode, name, type, params},
644459d0 5618#include "spu-builtins.def"
5619#undef DEF_BUILTIN
5620};
5621
0c5c4d59 5622static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5623
5624/* Returns the spu builtin decl for CODE. */
e6925042 5625
5626static tree
5627spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5628{
5629 if (code >= NUM_SPU_BUILTINS)
5630 return error_mark_node;
5631
0c5c4d59 5632 return spu_builtin_decls[code];
e6925042 5633}
5634
5635
644459d0 5636static void
5637spu_init_builtins (void)
5638{
5639 struct spu_builtin_description *d;
5640 unsigned int i;
5641
5642 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5643 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5644 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5645 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5646 V4SF_type_node = build_vector_type (float_type_node, 4);
5647 V2DF_type_node = build_vector_type (double_type_node, 2);
5648
5649 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5650 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5651 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5652 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5653
c4ecce0c 5654 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5655
5656 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5657 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5658 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5659 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5660 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5661 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5662 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5663 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5664 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5665 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5666 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5667 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5668
5669 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5670 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5671 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5672 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5673 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5674 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5675 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5676 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5677
5678 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5679 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5680
5681 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5682
5683 spu_builtin_types[SPU_BTI_PTR] =
5684 build_pointer_type (build_qualified_type
5685 (void_type_node,
5686 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5687
5688 /* For each builtin we build a new prototype. The tree code will make
5689 sure nodes are shared. */
5690 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5691 {
5692 tree p;
5693 char name[64]; /* build_function will make a copy. */
5694 int parm;
5695
5696 if (d->name == 0)
5697 continue;
5698
5dfbd18f 5699 /* Find last parm. */
644459d0 5700 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5701 ;
644459d0 5702
5703 p = void_list_node;
5704 while (parm > 1)
5705 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5706
5707 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5708
5709 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5710 spu_builtin_decls[i] =
644459d0 5711 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5712 NULL, NULL_TREE);
a76866d3 5713 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5714 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5715
5716 /* These builtins don't throw. */
0c5c4d59 5717 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5718 }
5719}
5720
cf31d486 5721void
5722spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5723{
5724 static unsigned char arr[16] =
5725 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5726
5727 rtx temp = gen_reg_rtx (Pmode);
5728 rtx temp2 = gen_reg_rtx (V4SImode);
5729 rtx temp3 = gen_reg_rtx (V4SImode);
5730 rtx pat = gen_reg_rtx (TImode);
5731 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5732
5733 emit_move_insn (pat, array_to_constant (TImode, arr));
5734
5735 /* Restore the sp. */
5736 emit_move_insn (temp, op1);
5737 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5738
5739 /* Compute available stack size for sp. */
5740 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5741 emit_insn (gen_shufb (temp3, temp, temp, pat));
5742
5743 emit_insn (gen_addv4si3 (sp, sp, temp3));
5744 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5745}
5746
644459d0 5747int
5748spu_safe_dma (HOST_WIDE_INT channel)
5749{
006e4b96 5750 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5751}
5752
5753void
5754spu_builtin_splats (rtx ops[])
5755{
5756 enum machine_mode mode = GET_MODE (ops[0]);
5757 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5758 {
5759 unsigned char arr[16];
5760 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5761 emit_move_insn (ops[0], array_to_constant (mode, arr));
5762 }
644459d0 5763 else
5764 {
5765 rtx reg = gen_reg_rtx (TImode);
5766 rtx shuf;
5767 if (GET_CODE (ops[1]) != REG
5768 && GET_CODE (ops[1]) != SUBREG)
5769 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5770 switch (mode)
5771 {
5772 case V2DImode:
5773 case V2DFmode:
5774 shuf =
5775 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5776 TImode);
5777 break;
5778 case V4SImode:
5779 case V4SFmode:
5780 shuf =
5781 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5782 TImode);
5783 break;
5784 case V8HImode:
5785 shuf =
5786 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5787 TImode);
5788 break;
5789 case V16QImode:
5790 shuf =
5791 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5792 TImode);
5793 break;
5794 default:
5795 abort ();
5796 }
5797 emit_move_insn (reg, shuf);
5798 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5799 }
5800}
5801
5802void
5803spu_builtin_extract (rtx ops[])
5804{
5805 enum machine_mode mode;
5806 rtx rot, from, tmp;
5807
5808 mode = GET_MODE (ops[1]);
5809
5810 if (GET_CODE (ops[2]) == CONST_INT)
5811 {
5812 switch (mode)
5813 {
5814 case V16QImode:
5815 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5816 break;
5817 case V8HImode:
5818 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5819 break;
5820 case V4SFmode:
5821 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5822 break;
5823 case V4SImode:
5824 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5825 break;
5826 case V2DImode:
5827 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5828 break;
5829 case V2DFmode:
5830 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5831 break;
5832 default:
5833 abort ();
5834 }
5835 return;
5836 }
5837
5838 from = spu_gen_subreg (TImode, ops[1]);
5839 rot = gen_reg_rtx (TImode);
5840 tmp = gen_reg_rtx (SImode);
5841
5842 switch (mode)
5843 {
5844 case V16QImode:
5845 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5846 break;
5847 case V8HImode:
5848 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5849 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5850 break;
5851 case V4SFmode:
5852 case V4SImode:
5853 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5854 break;
5855 case V2DImode:
5856 case V2DFmode:
5857 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5858 break;
5859 default:
5860 abort ();
5861 }
5862 emit_insn (gen_rotqby_ti (rot, from, tmp));
5863
5864 emit_insn (gen_spu_convert (ops[0], rot));
5865}
5866
5867void
5868spu_builtin_insert (rtx ops[])
5869{
5870 enum machine_mode mode = GET_MODE (ops[0]);
5871 enum machine_mode imode = GET_MODE_INNER (mode);
5872 rtx mask = gen_reg_rtx (TImode);
5873 rtx offset;
5874
5875 if (GET_CODE (ops[3]) == CONST_INT)
5876 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5877 else
5878 {
5879 offset = gen_reg_rtx (SImode);
5880 emit_insn (gen_mulsi3
5881 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5882 }
5883 emit_insn (gen_cpat
5884 (mask, stack_pointer_rtx, offset,
5885 GEN_INT (GET_MODE_SIZE (imode))));
5886 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5887}
5888
5889void
5890spu_builtin_promote (rtx ops[])
5891{
5892 enum machine_mode mode, imode;
5893 rtx rot, from, offset;
5894 HOST_WIDE_INT pos;
5895
5896 mode = GET_MODE (ops[0]);
5897 imode = GET_MODE_INNER (mode);
5898
5899 from = gen_reg_rtx (TImode);
5900 rot = spu_gen_subreg (TImode, ops[0]);
5901
5902 emit_insn (gen_spu_convert (from, ops[1]));
5903
5904 if (GET_CODE (ops[2]) == CONST_INT)
5905 {
5906 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5907 if (GET_MODE_SIZE (imode) < 4)
5908 pos += 4 - GET_MODE_SIZE (imode);
5909 offset = GEN_INT (pos & 15);
5910 }
5911 else
5912 {
5913 offset = gen_reg_rtx (SImode);
5914 switch (mode)
5915 {
5916 case V16QImode:
5917 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5918 break;
5919 case V8HImode:
5920 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5921 emit_insn (gen_addsi3 (offset, offset, offset));
5922 break;
5923 case V4SFmode:
5924 case V4SImode:
5925 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5926 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5927 break;
5928 case V2DImode:
5929 case V2DFmode:
5930 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5931 break;
5932 default:
5933 abort ();
5934 }
5935 }
5936 emit_insn (gen_rotqby_ti (rot, from, offset));
5937}
5938
e96f2783 5939static void
5940spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5941{
e96f2783 5942 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5943 rtx shuf = gen_reg_rtx (V4SImode);
5944 rtx insn = gen_reg_rtx (V4SImode);
5945 rtx shufc;
5946 rtx insnc;
5947 rtx mem;
5948
5949 fnaddr = force_reg (SImode, fnaddr);
5950 cxt = force_reg (SImode, cxt);
5951
5952 if (TARGET_LARGE_MEM)
5953 {
5954 rtx rotl = gen_reg_rtx (V4SImode);
5955 rtx mask = gen_reg_rtx (V4SImode);
5956 rtx bi = gen_reg_rtx (SImode);
e96f2783 5957 static unsigned char const shufa[16] = {
644459d0 5958 2, 3, 0, 1, 18, 19, 16, 17,
5959 0, 1, 2, 3, 16, 17, 18, 19
5960 };
e96f2783 5961 static unsigned char const insna[16] = {
644459d0 5962 0x41, 0, 0, 79,
5963 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5964 0x60, 0x80, 0, 79,
5965 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5966 };
5967
5968 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5969 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5970
5971 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5972 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5973 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5974 emit_insn (gen_selb (insn, insnc, rotl, mask));
5975
e96f2783 5976 mem = adjust_address (m_tramp, V4SImode, 0);
5977 emit_move_insn (mem, insn);
644459d0 5978
5979 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5980 mem = adjust_address (m_tramp, Pmode, 16);
5981 emit_move_insn (mem, bi);
644459d0 5982 }
5983 else
5984 {
5985 rtx scxt = gen_reg_rtx (SImode);
5986 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5987 static unsigned char const insna[16] = {
644459d0 5988 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5989 0x30, 0, 0, 0,
5990 0, 0, 0, 0,
5991 0, 0, 0, 0
5992 };
5993
5994 shufc = gen_reg_rtx (TImode);
5995 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5996
5997 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5998 fits 18 bits and the last 4 are zeros. This will be true if
5999 the stack pointer is initialized to 0x3fff0 at program start,
6000 otherwise the ila instruction will be garbage. */
6001
6002 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6003 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6004 emit_insn (gen_cpat
6005 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6006 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6007 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6008
e96f2783 6009 mem = adjust_address (m_tramp, V4SImode, 0);
6010 emit_move_insn (mem, insn);
644459d0 6011 }
6012 emit_insn (gen_sync ());
6013}
6014
6015void
6016spu_expand_sign_extend (rtx ops[])
6017{
6018 unsigned char arr[16];
6019 rtx pat = gen_reg_rtx (TImode);
6020 rtx sign, c;
6021 int i, last;
6022 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6023 if (GET_MODE (ops[1]) == QImode)
6024 {
6025 sign = gen_reg_rtx (HImode);
6026 emit_insn (gen_extendqihi2 (sign, ops[1]));
6027 for (i = 0; i < 16; i++)
6028 arr[i] = 0x12;
6029 arr[last] = 0x13;
6030 }
6031 else
6032 {
6033 for (i = 0; i < 16; i++)
6034 arr[i] = 0x10;
6035 switch (GET_MODE (ops[1]))
6036 {
6037 case HImode:
6038 sign = gen_reg_rtx (SImode);
6039 emit_insn (gen_extendhisi2 (sign, ops[1]));
6040 arr[last] = 0x03;
6041 arr[last - 1] = 0x02;
6042 break;
6043 case SImode:
6044 sign = gen_reg_rtx (SImode);
6045 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6046 for (i = 0; i < 4; i++)
6047 arr[last - i] = 3 - i;
6048 break;
6049 case DImode:
6050 sign = gen_reg_rtx (SImode);
6051 c = gen_reg_rtx (SImode);
6052 emit_insn (gen_spu_convert (c, ops[1]));
6053 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6054 for (i = 0; i < 8; i++)
6055 arr[last - i] = 7 - i;
6056 break;
6057 default:
6058 abort ();
6059 }
6060 }
6061 emit_move_insn (pat, array_to_constant (TImode, arr));
6062 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6063}
6064
6065/* expand vector initialization. If there are any constant parts,
6066 load constant parts first. Then load any non-constant parts. */
6067void
6068spu_expand_vector_init (rtx target, rtx vals)
6069{
6070 enum machine_mode mode = GET_MODE (target);
6071 int n_elts = GET_MODE_NUNITS (mode);
6072 int n_var = 0;
6073 bool all_same = true;
790c536c 6074 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6075 int i;
6076
6077 first = XVECEXP (vals, 0, 0);
6078 for (i = 0; i < n_elts; ++i)
6079 {
6080 x = XVECEXP (vals, 0, i);
e442af0b 6081 if (!(CONST_INT_P (x)
6082 || GET_CODE (x) == CONST_DOUBLE
6083 || GET_CODE (x) == CONST_FIXED))
644459d0 6084 ++n_var;
6085 else
6086 {
6087 if (first_constant == NULL_RTX)
6088 first_constant = x;
6089 }
6090 if (i > 0 && !rtx_equal_p (x, first))
6091 all_same = false;
6092 }
6093
6094 /* if all elements are the same, use splats to repeat elements */
6095 if (all_same)
6096 {
6097 if (!CONSTANT_P (first)
6098 && !register_operand (first, GET_MODE (x)))
6099 first = force_reg (GET_MODE (first), first);
6100 emit_insn (gen_spu_splats (target, first));
6101 return;
6102 }
6103
6104 /* load constant parts */
6105 if (n_var != n_elts)
6106 {
6107 if (n_var == 0)
6108 {
6109 emit_move_insn (target,
6110 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6111 }
6112 else
6113 {
6114 rtx constant_parts_rtx = copy_rtx (vals);
6115
6116 gcc_assert (first_constant != NULL_RTX);
6117 /* fill empty slots with the first constant, this increases
6118 our chance of using splats in the recursive call below. */
6119 for (i = 0; i < n_elts; ++i)
e442af0b 6120 {
6121 x = XVECEXP (constant_parts_rtx, 0, i);
6122 if (!(CONST_INT_P (x)
6123 || GET_CODE (x) == CONST_DOUBLE
6124 || GET_CODE (x) == CONST_FIXED))
6125 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6126 }
644459d0 6127
6128 spu_expand_vector_init (target, constant_parts_rtx);
6129 }
6130 }
6131
6132 /* load variable parts */
6133 if (n_var != 0)
6134 {
6135 rtx insert_operands[4];
6136
6137 insert_operands[0] = target;
6138 insert_operands[2] = target;
6139 for (i = 0; i < n_elts; ++i)
6140 {
6141 x = XVECEXP (vals, 0, i);
e442af0b 6142 if (!(CONST_INT_P (x)
6143 || GET_CODE (x) == CONST_DOUBLE
6144 || GET_CODE (x) == CONST_FIXED))
644459d0 6145 {
6146 if (!register_operand (x, GET_MODE (x)))
6147 x = force_reg (GET_MODE (x), x);
6148 insert_operands[1] = x;
6149 insert_operands[3] = GEN_INT (i);
6150 spu_builtin_insert (insert_operands);
6151 }
6152 }
6153 }
6154}
6352eedf 6155
5474166e 6156/* Return insn index for the vector compare instruction for given CODE,
6157 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6158
6159static int
6160get_vec_cmp_insn (enum rtx_code code,
6161 enum machine_mode dest_mode,
6162 enum machine_mode op_mode)
6163
6164{
6165 switch (code)
6166 {
6167 case EQ:
6168 if (dest_mode == V16QImode && op_mode == V16QImode)
6169 return CODE_FOR_ceq_v16qi;
6170 if (dest_mode == V8HImode && op_mode == V8HImode)
6171 return CODE_FOR_ceq_v8hi;
6172 if (dest_mode == V4SImode && op_mode == V4SImode)
6173 return CODE_FOR_ceq_v4si;
6174 if (dest_mode == V4SImode && op_mode == V4SFmode)
6175 return CODE_FOR_ceq_v4sf;
6176 if (dest_mode == V2DImode && op_mode == V2DFmode)
6177 return CODE_FOR_ceq_v2df;
6178 break;
6179 case GT:
6180 if (dest_mode == V16QImode && op_mode == V16QImode)
6181 return CODE_FOR_cgt_v16qi;
6182 if (dest_mode == V8HImode && op_mode == V8HImode)
6183 return CODE_FOR_cgt_v8hi;
6184 if (dest_mode == V4SImode && op_mode == V4SImode)
6185 return CODE_FOR_cgt_v4si;
6186 if (dest_mode == V4SImode && op_mode == V4SFmode)
6187 return CODE_FOR_cgt_v4sf;
6188 if (dest_mode == V2DImode && op_mode == V2DFmode)
6189 return CODE_FOR_cgt_v2df;
6190 break;
6191 case GTU:
6192 if (dest_mode == V16QImode && op_mode == V16QImode)
6193 return CODE_FOR_clgt_v16qi;
6194 if (dest_mode == V8HImode && op_mode == V8HImode)
6195 return CODE_FOR_clgt_v8hi;
6196 if (dest_mode == V4SImode && op_mode == V4SImode)
6197 return CODE_FOR_clgt_v4si;
6198 break;
6199 default:
6200 break;
6201 }
6202 return -1;
6203}
6204
6205/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6206 DMODE is expected destination mode. This is a recursive function. */
6207
6208static rtx
6209spu_emit_vector_compare (enum rtx_code rcode,
6210 rtx op0, rtx op1,
6211 enum machine_mode dmode)
6212{
6213 int vec_cmp_insn;
6214 rtx mask;
6215 enum machine_mode dest_mode;
6216 enum machine_mode op_mode = GET_MODE (op1);
6217
6218 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6219
6220 /* Floating point vector compare instructions uses destination V4SImode.
6221 Double floating point vector compare instructions uses destination V2DImode.
6222 Move destination to appropriate mode later. */
6223 if (dmode == V4SFmode)
6224 dest_mode = V4SImode;
6225 else if (dmode == V2DFmode)
6226 dest_mode = V2DImode;
6227 else
6228 dest_mode = dmode;
6229
6230 mask = gen_reg_rtx (dest_mode);
6231 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6232
6233 if (vec_cmp_insn == -1)
6234 {
6235 bool swap_operands = false;
6236 bool try_again = false;
6237 switch (rcode)
6238 {
6239 case LT:
6240 rcode = GT;
6241 swap_operands = true;
6242 try_again = true;
6243 break;
6244 case LTU:
6245 rcode = GTU;
6246 swap_operands = true;
6247 try_again = true;
6248 break;
6249 case NE:
6250 /* Treat A != B as ~(A==B). */
6251 {
6252 enum insn_code nor_code;
6253 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6254 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6255 gcc_assert (nor_code != CODE_FOR_nothing);
6256 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6257 if (dmode != dest_mode)
6258 {
6259 rtx temp = gen_reg_rtx (dest_mode);
6260 convert_move (temp, mask, 0);
6261 return temp;
6262 }
6263 return mask;
6264 }
6265 break;
6266 case GE:
6267 case GEU:
6268 case LE:
6269 case LEU:
6270 /* Try GT/GTU/LT/LTU OR EQ */
6271 {
6272 rtx c_rtx, eq_rtx;
6273 enum insn_code ior_code;
6274 enum rtx_code new_code;
6275
6276 switch (rcode)
6277 {
6278 case GE: new_code = GT; break;
6279 case GEU: new_code = GTU; break;
6280 case LE: new_code = LT; break;
6281 case LEU: new_code = LTU; break;
6282 default:
6283 gcc_unreachable ();
6284 }
6285
6286 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6287 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6288
d6bf3b14 6289 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6290 gcc_assert (ior_code != CODE_FOR_nothing);
6291 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6292 if (dmode != dest_mode)
6293 {
6294 rtx temp = gen_reg_rtx (dest_mode);
6295 convert_move (temp, mask, 0);
6296 return temp;
6297 }
6298 return mask;
6299 }
6300 break;
6301 default:
6302 gcc_unreachable ();
6303 }
6304
6305 /* You only get two chances. */
6306 if (try_again)
6307 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6308
6309 gcc_assert (vec_cmp_insn != -1);
6310
6311 if (swap_operands)
6312 {
6313 rtx tmp;
6314 tmp = op0;
6315 op0 = op1;
6316 op1 = tmp;
6317 }
6318 }
6319
6320 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6321 if (dmode != dest_mode)
6322 {
6323 rtx temp = gen_reg_rtx (dest_mode);
6324 convert_move (temp, mask, 0);
6325 return temp;
6326 }
6327 return mask;
6328}
6329
6330
6331/* Emit vector conditional expression.
6332 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6333 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6334
6335int
6336spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6337 rtx cond, rtx cc_op0, rtx cc_op1)
6338{
6339 enum machine_mode dest_mode = GET_MODE (dest);
6340 enum rtx_code rcode = GET_CODE (cond);
6341 rtx mask;
6342
6343 /* Get the vector mask for the given relational operations. */
6344 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6345
6346 emit_insn(gen_selb (dest, op2, op1, mask));
6347
6348 return 1;
6349}
6350
6352eedf 6351static rtx
6352spu_force_reg (enum machine_mode mode, rtx op)
6353{
6354 rtx x, r;
6355 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6356 {
6357 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6358 || GET_MODE (op) == BLKmode)
6359 return force_reg (mode, convert_to_mode (mode, op, 0));
6360 abort ();
6361 }
6362
6363 r = force_reg (GET_MODE (op), op);
6364 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6365 {
6366 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6367 if (x)
6368 return x;
6369 }
6370
6371 x = gen_reg_rtx (mode);
6372 emit_insn (gen_spu_convert (x, r));
6373 return x;
6374}
6375
6376static void
6377spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6378{
6379 HOST_WIDE_INT v = 0;
6380 int lsbits;
6381 /* Check the range of immediate operands. */
6382 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6383 {
6384 int range = p - SPU_BTI_7;
5df189be 6385
6386 if (!CONSTANT_P (op))
6352eedf 6387 error ("%s expects an integer literal in the range [%d, %d].",
6388 d->name,
6389 spu_builtin_range[range].low, spu_builtin_range[range].high);
6390
6391 if (GET_CODE (op) == CONST
6392 && (GET_CODE (XEXP (op, 0)) == PLUS
6393 || GET_CODE (XEXP (op, 0)) == MINUS))
6394 {
6395 v = INTVAL (XEXP (XEXP (op, 0), 1));
6396 op = XEXP (XEXP (op, 0), 0);
6397 }
6398 else if (GET_CODE (op) == CONST_INT)
6399 v = INTVAL (op);
5df189be 6400 else if (GET_CODE (op) == CONST_VECTOR
6401 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6402 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6403
6404 /* The default for v is 0 which is valid in every range. */
6405 if (v < spu_builtin_range[range].low
6406 || v > spu_builtin_range[range].high)
6407 error ("%s expects an integer literal in the range [%d, %d]. ("
6408 HOST_WIDE_INT_PRINT_DEC ")",
6409 d->name,
6410 spu_builtin_range[range].low, spu_builtin_range[range].high,
6411 v);
6352eedf 6412
6413 switch (p)
6414 {
6415 case SPU_BTI_S10_4:
6416 lsbits = 4;
6417 break;
6418 case SPU_BTI_U16_2:
6419 /* This is only used in lqa, and stqa. Even though the insns
6420 encode 16 bits of the address (all but the 2 least
6421 significant), only 14 bits are used because it is masked to
6422 be 16 byte aligned. */
6423 lsbits = 4;
6424 break;
6425 case SPU_BTI_S16_2:
6426 /* This is used for lqr and stqr. */
6427 lsbits = 2;
6428 break;
6429 default:
6430 lsbits = 0;
6431 }
6432
6433 if (GET_CODE (op) == LABEL_REF
6434 || (GET_CODE (op) == SYMBOL_REF
6435 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6436 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6437 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6438 d->name);
6439 }
6440}
6441
6442
70ca06f8 6443static int
5df189be 6444expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6445 rtx target, rtx ops[])
6446{
bc620c5c 6447 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6448 int i = 0, a;
6352eedf 6449
6450 /* Expand the arguments into rtl. */
6451
6452 if (d->parm[0] != SPU_BTI_VOID)
6453 ops[i++] = target;
6454
70ca06f8 6455 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6456 {
5df189be 6457 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6458 if (arg == 0)
6459 abort ();
b9c74b4d 6460 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6461 }
70ca06f8 6462
6463 /* The insn pattern may have additional operands (SCRATCH).
6464 Return the number of actual non-SCRATCH operands. */
6465 gcc_assert (i <= insn_data[icode].n_operands);
6466 return i;
6352eedf 6467}
6468
6469static rtx
6470spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6471 tree exp, rtx target)
6352eedf 6472{
6473 rtx pat;
6474 rtx ops[8];
bc620c5c 6475 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6476 enum machine_mode mode, tmode;
6477 int i, p;
70ca06f8 6478 int n_operands;
6352eedf 6479 tree return_type;
6480
6481 /* Set up ops[] with values from arglist. */
70ca06f8 6482 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6483
6484 /* Handle the target operand which must be operand 0. */
6485 i = 0;
6486 if (d->parm[0] != SPU_BTI_VOID)
6487 {
6488
6489 /* We prefer the mode specified for the match_operand otherwise
6490 use the mode from the builtin function prototype. */
6491 tmode = insn_data[d->icode].operand[0].mode;
6492 if (tmode == VOIDmode)
6493 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6494
6495 /* Try to use target because not using it can lead to extra copies
6496 and when we are using all of the registers extra copies leads
6497 to extra spills. */
6498 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6499 ops[0] = target;
6500 else
6501 target = ops[0] = gen_reg_rtx (tmode);
6502
6503 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6504 abort ();
6505
6506 i++;
6507 }
6508
a76866d3 6509 if (d->fcode == SPU_MASK_FOR_LOAD)
6510 {
6511 enum machine_mode mode = insn_data[icode].operand[1].mode;
6512 tree arg;
6513 rtx addr, op, pat;
6514
6515 /* get addr */
5df189be 6516 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6517 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6518 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6519 addr = memory_address (mode, op);
6520
6521 /* negate addr */
6522 op = gen_reg_rtx (GET_MODE (addr));
6523 emit_insn (gen_rtx_SET (VOIDmode, op,
6524 gen_rtx_NEG (GET_MODE (addr), addr)));
6525 op = gen_rtx_MEM (mode, op);
6526
6527 pat = GEN_FCN (icode) (target, op);
6528 if (!pat)
6529 return 0;
6530 emit_insn (pat);
6531 return target;
6532 }
6533
6352eedf 6534 /* Ignore align_hint, but still expand it's args in case they have
6535 side effects. */
6536 if (icode == CODE_FOR_spu_align_hint)
6537 return 0;
6538
6539 /* Handle the rest of the operands. */
70ca06f8 6540 for (p = 1; i < n_operands; i++, p++)
6352eedf 6541 {
6542 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6543 mode = insn_data[d->icode].operand[i].mode;
6544 else
6545 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6546
6547 /* mode can be VOIDmode here for labels */
6548
6549 /* For specific intrinsics with an immediate operand, e.g.,
6550 si_ai(), we sometimes need to convert the scalar argument to a
6551 vector argument by splatting the scalar. */
6552 if (VECTOR_MODE_P (mode)
6553 && (GET_CODE (ops[i]) == CONST_INT
6554 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6555 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6556 {
6557 if (GET_CODE (ops[i]) == CONST_INT)
6558 ops[i] = spu_const (mode, INTVAL (ops[i]));
6559 else
6560 {
6561 rtx reg = gen_reg_rtx (mode);
6562 enum machine_mode imode = GET_MODE_INNER (mode);
6563 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6564 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6565 if (imode != GET_MODE (ops[i]))
6566 ops[i] = convert_to_mode (imode, ops[i],
6567 TYPE_UNSIGNED (spu_builtin_types
6568 [d->parm[i]]));
6569 emit_insn (gen_spu_splats (reg, ops[i]));
6570 ops[i] = reg;
6571 }
6572 }
6573
5df189be 6574 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6575
6352eedf 6576 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6577 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6578 }
6579
70ca06f8 6580 switch (n_operands)
6352eedf 6581 {
6582 case 0:
6583 pat = GEN_FCN (icode) (0);
6584 break;
6585 case 1:
6586 pat = GEN_FCN (icode) (ops[0]);
6587 break;
6588 case 2:
6589 pat = GEN_FCN (icode) (ops[0], ops[1]);
6590 break;
6591 case 3:
6592 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6593 break;
6594 case 4:
6595 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6596 break;
6597 case 5:
6598 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6599 break;
6600 case 6:
6601 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6602 break;
6603 default:
6604 abort ();
6605 }
6606
6607 if (!pat)
6608 abort ();
6609
6610 if (d->type == B_CALL || d->type == B_BISLED)
6611 emit_call_insn (pat);
6612 else if (d->type == B_JUMP)
6613 {
6614 emit_jump_insn (pat);
6615 emit_barrier ();
6616 }
6617 else
6618 emit_insn (pat);
6619
6620 return_type = spu_builtin_types[d->parm[0]];
6621 if (d->parm[0] != SPU_BTI_VOID
6622 && GET_MODE (target) != TYPE_MODE (return_type))
6623 {
6624 /* target is the return value. It should always be the mode of
6625 the builtin function prototype. */
6626 target = spu_force_reg (TYPE_MODE (return_type), target);
6627 }
6628
6629 return target;
6630}
6631
6632rtx
6633spu_expand_builtin (tree exp,
6634 rtx target,
6635 rtx subtarget ATTRIBUTE_UNUSED,
6636 enum machine_mode mode ATTRIBUTE_UNUSED,
6637 int ignore ATTRIBUTE_UNUSED)
6638{
5df189be 6639 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6640 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6641 struct spu_builtin_description *d;
6642
6643 if (fcode < NUM_SPU_BUILTINS)
6644 {
6645 d = &spu_builtins[fcode];
6646
5df189be 6647 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6648 }
6649 abort ();
6650}
6651
e99f512d 6652/* Implement targetm.vectorize.builtin_mul_widen_even. */
6653static tree
6654spu_builtin_mul_widen_even (tree type)
6655{
e99f512d 6656 switch (TYPE_MODE (type))
6657 {
6658 case V8HImode:
6659 if (TYPE_UNSIGNED (type))
0c5c4d59 6660 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6661 else
0c5c4d59 6662 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6663 break;
6664 default:
6665 return NULL_TREE;
6666 }
6667}
6668
6669/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6670static tree
6671spu_builtin_mul_widen_odd (tree type)
6672{
6673 switch (TYPE_MODE (type))
6674 {
6675 case V8HImode:
6676 if (TYPE_UNSIGNED (type))
0c5c4d59 6677 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6678 else
0c5c4d59 6679 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6680 break;
6681 default:
6682 return NULL_TREE;
6683 }
6684}
6685
a76866d3 6686/* Implement targetm.vectorize.builtin_mask_for_load. */
6687static tree
6688spu_builtin_mask_for_load (void)
6689{
0c5c4d59 6690 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6691}
5df189be 6692
a28df51d 6693/* Implement targetm.vectorize.builtin_vectorization_cost. */
6694static int
0822b158 6695spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6696 tree vectype ATTRIBUTE_UNUSED,
6697 int misalign ATTRIBUTE_UNUSED)
559093aa 6698{
6699 switch (type_of_cost)
6700 {
6701 case scalar_stmt:
6702 case vector_stmt:
6703 case vector_load:
6704 case vector_store:
6705 case vec_to_scalar:
6706 case scalar_to_vec:
6707 case cond_branch_not_taken:
6708 case vec_perm:
6709 return 1;
6710
6711 case scalar_store:
6712 return 10;
6713
6714 case scalar_load:
6715 /* Load + rotate. */
6716 return 2;
6717
6718 case unaligned_load:
6719 return 2;
6720
6721 case cond_branch_taken:
6722 return 6;
6723
6724 default:
6725 gcc_unreachable ();
6726 }
a28df51d 6727}
6728
0e87db76 6729/* Return true iff, data reference of TYPE can reach vector alignment (16)
6730 after applying N number of iterations. This routine does not determine
6731 how may iterations are required to reach desired alignment. */
6732
6733static bool
a9f1838b 6734spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6735{
6736 if (is_packed)
6737 return false;
6738
6739 /* All other types are naturally aligned. */
6740 return true;
6741}
6742
a0515226 6743/* Implement targetm.vectorize.builtin_vec_perm. */
6744tree
6745spu_builtin_vec_perm (tree type, tree *mask_element_type)
6746{
a0515226 6747 *mask_element_type = unsigned_char_type_node;
6748
6749 switch (TYPE_MODE (type))
6750 {
6751 case V16QImode:
6752 if (TYPE_UNSIGNED (type))
0c5c4d59 6753 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6754 else
0c5c4d59 6755 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6756
6757 case V8HImode:
6758 if (TYPE_UNSIGNED (type))
0c5c4d59 6759 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6760 else
0c5c4d59 6761 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6762
6763 case V4SImode:
6764 if (TYPE_UNSIGNED (type))
0c5c4d59 6765 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6766 else
0c5c4d59 6767 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6768
6769 case V2DImode:
6770 if (TYPE_UNSIGNED (type))
0c5c4d59 6771 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6772 else
0c5c4d59 6773 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6774
6775 case V4SFmode:
0c5c4d59 6776 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6777
6778 case V2DFmode:
0c5c4d59 6779 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6780
6781 default:
6782 return NULL_TREE;
6783 }
a0515226 6784}
6785
6cf5579e 6786/* Return the appropriate mode for a named address pointer. */
6787static enum machine_mode
6788spu_addr_space_pointer_mode (addr_space_t addrspace)
6789{
6790 switch (addrspace)
6791 {
6792 case ADDR_SPACE_GENERIC:
6793 return ptr_mode;
6794 case ADDR_SPACE_EA:
6795 return EAmode;
6796 default:
6797 gcc_unreachable ();
6798 }
6799}
6800
6801/* Return the appropriate mode for a named address address. */
6802static enum machine_mode
6803spu_addr_space_address_mode (addr_space_t addrspace)
6804{
6805 switch (addrspace)
6806 {
6807 case ADDR_SPACE_GENERIC:
6808 return Pmode;
6809 case ADDR_SPACE_EA:
6810 return EAmode;
6811 default:
6812 gcc_unreachable ();
6813 }
6814}
6815
6816/* Determine if one named address space is a subset of another. */
6817
6818static bool
6819spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6820{
6821 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6822 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6823
6824 if (subset == superset)
6825 return true;
6826
6827 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6828 being subsets but instead as disjoint address spaces. */
6829 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6830 return false;
6831
6832 else
6833 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6834}
6835
6836/* Convert from one address space to another. */
6837static rtx
6838spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6839{
6840 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6841 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6842
6843 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6844 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6845
6846 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6847 {
6848 rtx result, ls;
6849
6850 ls = gen_const_mem (DImode,
6851 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6852 set_mem_align (ls, 128);
6853
6854 result = gen_reg_rtx (Pmode);
6855 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6856 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6857 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6858 ls, const0_rtx, Pmode, 1);
6859
6860 emit_insn (gen_subsi3 (result, op, ls));
6861
6862 return result;
6863 }
6864
6865 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6866 {
6867 rtx result, ls;
6868
6869 ls = gen_const_mem (DImode,
6870 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6871 set_mem_align (ls, 128);
6872
6873 result = gen_reg_rtx (EAmode);
6874 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6875 op = force_reg (Pmode, op);
6876 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6877 ls, const0_rtx, EAmode, 1);
6878 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6879
6880 if (EAmode == SImode)
6881 emit_insn (gen_addsi3 (result, op, ls));
6882 else
6883 emit_insn (gen_adddi3 (result, op, ls));
6884
6885 return result;
6886 }
6887
6888 else
6889 gcc_unreachable ();
6890}
6891
6892
d52fd16a 6893/* Count the total number of instructions in each pipe and return the
6894 maximum, which is used as the Minimum Iteration Interval (MII)
6895 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6896 -2 are instructions that can go in pipe0 or pipe1. */
6897static int
6898spu_sms_res_mii (struct ddg *g)
6899{
6900 int i;
6901 unsigned t[4] = {0, 0, 0, 0};
6902
6903 for (i = 0; i < g->num_nodes; i++)
6904 {
6905 rtx insn = g->nodes[i].insn;
6906 int p = get_pipe (insn) + 2;
6907
6908 assert (p >= 0);
6909 assert (p < 4);
6910
6911 t[p]++;
6912 if (dump_file && INSN_P (insn))
6913 fprintf (dump_file, "i%d %s %d %d\n",
6914 INSN_UID (insn),
6915 insn_data[INSN_CODE(insn)].name,
6916 p, t[p]);
6917 }
6918 if (dump_file)
6919 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6920
6921 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6922}
6923
6924
5df189be 6925void
6926spu_init_expanders (void)
9d98604b 6927{
5df189be 6928 if (cfun)
9d98604b 6929 {
6930 rtx r0, r1;
6931 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6932 frame_pointer_needed is true. We don't know that until we're
6933 expanding the prologue. */
6934 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6935
6936 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6937 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6938 to be treated as aligned, so generate them here. */
6939 r0 = gen_reg_rtx (SImode);
6940 r1 = gen_reg_rtx (SImode);
6941 mark_reg_pointer (r0, 128);
6942 mark_reg_pointer (r1, 128);
6943 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6944 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6945 }
ea32e033 6946}
6947
6948static enum machine_mode
6949spu_libgcc_cmp_return_mode (void)
6950{
6951
6952/* For SPU word mode is TI mode so it is better to use SImode
6953 for compare returns. */
6954 return SImode;
6955}
6956
6957static enum machine_mode
6958spu_libgcc_shift_count_mode (void)
6959{
6960/* For SPU word mode is TI mode so it is better to use SImode
6961 for shift counts. */
6962 return SImode;
6963}
5a976006 6964
6965/* An early place to adjust some flags after GCC has finished processing
6966 * them. */
6967static void
6968asm_file_start (void)
6969{
6970 /* Variable tracking should be run after all optimizations which
6971 change order of insns. It also needs a valid CFG. */
6972 spu_flag_var_tracking = flag_var_tracking;
6973 flag_var_tracking = 0;
6974
6975 default_file_start ();
6976}
6977
a08dfd55 6978/* Implement targetm.section_type_flags. */
6979static unsigned int
6980spu_section_type_flags (tree decl, const char *name, int reloc)
6981{
6982 /* .toe needs to have type @nobits. */
6983 if (strcmp (name, ".toe") == 0)
6984 return SECTION_BSS;
6cf5579e 6985 /* Don't load _ea into the current address space. */
6986 if (strcmp (name, "._ea") == 0)
6987 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6988 return default_section_type_flags (decl, name, reloc);
6989}
c2233b46 6990
6cf5579e 6991/* Implement targetm.select_section. */
6992static section *
6993spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6994{
6995 /* Variables and constants defined in the __ea address space
6996 go into a special section named "._ea". */
6997 if (TREE_TYPE (decl) != error_mark_node
6998 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6999 {
7000 /* We might get called with string constants, but get_named_section
7001 doesn't like them as they are not DECLs. Also, we need to set
7002 flags in that case. */
7003 if (!DECL_P (decl))
7004 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7005
7006 return get_named_section (decl, "._ea", reloc);
7007 }
7008
7009 return default_elf_select_section (decl, reloc, align);
7010}
7011
7012/* Implement targetm.unique_section. */
7013static void
7014spu_unique_section (tree decl, int reloc)
7015{
7016 /* We don't support unique section names in the __ea address
7017 space for now. */
7018 if (TREE_TYPE (decl) != error_mark_node
7019 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7020 return;
7021
7022 default_unique_section (decl, reloc);
7023}
7024
56c7bfc2 7025/* Generate a constant or register which contains 2^SCALE. We assume
7026 the result is valid for MODE. Currently, MODE must be V4SFmode and
7027 SCALE must be SImode. */
7028rtx
7029spu_gen_exp2 (enum machine_mode mode, rtx scale)
7030{
7031 gcc_assert (mode == V4SFmode);
7032 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7033 if (GET_CODE (scale) != CONST_INT)
7034 {
7035 /* unsigned int exp = (127 + scale) << 23;
7036 __vector float m = (__vector float) spu_splats (exp); */
7037 rtx reg = force_reg (SImode, scale);
7038 rtx exp = gen_reg_rtx (SImode);
7039 rtx mul = gen_reg_rtx (mode);
7040 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7041 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7042 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7043 return mul;
7044 }
7045 else
7046 {
7047 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7048 unsigned char arr[16];
7049 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7050 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7051 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7052 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7053 return array_to_constant (mode, arr);
7054 }
7055}
7056
9d98604b 7057/* After reload, just change the convert into a move instruction
7058 or a dead instruction. */
7059void
7060spu_split_convert (rtx ops[])
7061{
7062 if (REGNO (ops[0]) == REGNO (ops[1]))
7063 emit_note (NOTE_INSN_DELETED);
7064 else
7065 {
7066 /* Use TImode always as this might help hard reg copyprop. */
7067 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7068 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7069 emit_insn (gen_move_insn (op0, op1));
7070 }
7071}
7072
b3878a6c 7073void
7074spu_function_profiler (FILE * file, int labelno)
7075{
7076 fprintf (file, "# profile\n");
7077 fprintf (file, "brsl $75, _mcount\n");
7078}
7079
c2233b46 7080#include "gt-spu.h"