]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* testsuite/libffi.call/cls_longdouble_va.c (main): Fix format string.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
152static void spu_init_builtins (void);
e6925042 153static tree spu_builtin_decl (unsigned, bool);
644459d0 154static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
155static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 156static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 157static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
158 bool, addr_space_t);
644459d0 159static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
160static rtx get_pic_reg (void);
161static int need_to_save_reg (int regno, int saving);
162static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
163static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
165 rtx scratch);
166static void emit_nop_for_insn (rtx insn);
167static bool insn_clobbers_hbr (rtx insn);
168static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 169 int distance, sbitmap blocks);
5474166e 170static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
171 enum machine_mode dmode);
644459d0 172static rtx get_branch_target (rtx branch);
644459d0 173static void spu_machine_dependent_reorg (void);
174static int spu_sched_issue_rate (void);
175static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
176 int can_issue_more);
177static int get_pipe (rtx insn);
644459d0 178static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 179static void spu_sched_init_global (FILE *, int, int);
180static void spu_sched_init (FILE *, int, int);
181static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 182static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
186 int flags,
187 unsigned char *no_add_attrs);
188static int spu_naked_function_p (tree func);
fb80456a 189static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
190 const_tree type, unsigned char named);
644459d0 191static tree spu_build_builtin_va_list (void);
8a58ed0a 192static void spu_va_start (tree, rtx);
75a70cf9 193static tree spu_gimplify_va_arg_expr (tree valist, tree type,
194 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 195static int store_with_one_insn_p (rtx mem);
644459d0 196static int mem_is_padded_component_ref (rtx x);
9d98604b 197static int reg_aligned_for_addr (rtx x);
644459d0 198static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
199static void spu_asm_globalize_label (FILE * file, const char *name);
200static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 201 int *total, bool speed);
644459d0 202static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
203static void spu_init_libfuncs (void);
fb80456a 204static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 205static void fix_range (const char *);
69ced2d6 206static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 207static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 208static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
209 addr_space_t);
e99f512d 210static tree spu_builtin_mul_widen_even (tree);
211static tree spu_builtin_mul_widen_odd (tree);
a76866d3 212static tree spu_builtin_mask_for_load (void);
a28df51d 213static int spu_builtin_vectorization_cost (bool);
a9f1838b 214static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 215static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 216static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
217static enum machine_mode spu_addr_space_address_mode (addr_space_t);
218static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
219static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 220static int spu_sms_res_mii (struct ddg *g);
5a976006 221static void asm_file_start (void);
a08dfd55 222static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 223static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
224static void spu_unique_section (tree, int);
9d98604b 225static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 226static void spu_trampoline_init (rtx, tree, rtx);
644459d0 227
228extern const char *reg_names[];
644459d0 229
5474166e 230/* Which instruction set architecture to use. */
231int spu_arch;
232/* Which cpu are we tuning for. */
233int spu_tune;
234
5a976006 235/* The hardware requires 8 insns between a hint and the branch it
236 effects. This variable describes how many rtl instructions the
237 compiler needs to see before inserting a hint, and then the compiler
238 will insert enough nops to make it at least 8 insns. The default is
239 for the compiler to allow up to 2 nops be emitted. The nops are
240 inserted in pairs, so we round down. */
241int spu_hint_dist = (8*4) - (2*4);
242
243/* Determines whether we run variable tracking in machine dependent
244 reorganization. */
245static int spu_flag_var_tracking;
246
644459d0 247enum spu_immediate {
248 SPU_NONE,
249 SPU_IL,
250 SPU_ILA,
251 SPU_ILH,
252 SPU_ILHU,
253 SPU_ORI,
254 SPU_ORHI,
255 SPU_ORBI,
99369027 256 SPU_IOHL
644459d0 257};
dea01258 258enum immediate_class
259{
260 IC_POOL, /* constant pool */
261 IC_IL1, /* one il* instruction */
262 IC_IL2, /* both ilhu and iohl instructions */
263 IC_IL1s, /* one il* instruction */
264 IC_IL2s, /* both ilhu and iohl instructions */
265 IC_FSMBI, /* the fsmbi instruction */
266 IC_CPAT, /* one of the c*d instructions */
5df189be 267 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 268};
644459d0 269
270static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
271static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 272static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
273static enum immediate_class classify_immediate (rtx op,
274 enum machine_mode mode);
644459d0 275
1bd43494 276static enum machine_mode spu_unwind_word_mode (void);
277
ea32e033 278static enum machine_mode
279spu_libgcc_cmp_return_mode (void);
280
281static enum machine_mode
282spu_libgcc_shift_count_mode (void);
6cf5579e 283
284/* Pointer mode for __ea references. */
285#define EAmode (spu_ea_model != 32 ? DImode : SImode)
286
ef51d1e3 287\f
288/* Table of machine attributes. */
289static const struct attribute_spec spu_attribute_table[] =
290{
291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
292 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
293 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
294 { NULL, 0, 0, false, false, false, NULL }
295};
644459d0 296\f
297/* TARGET overrides. */
298
6cf5579e 299#undef TARGET_ADDR_SPACE_POINTER_MODE
300#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
301
302#undef TARGET_ADDR_SPACE_ADDRESS_MODE
303#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
304
305#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
306#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
307 spu_addr_space_legitimate_address_p
308
309#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
310#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
311
312#undef TARGET_ADDR_SPACE_SUBSET_P
313#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
314
315#undef TARGET_ADDR_SPACE_CONVERT
316#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
317
644459d0 318#undef TARGET_INIT_BUILTINS
319#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 320#undef TARGET_BUILTIN_DECL
321#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 322
644459d0 323#undef TARGET_EXPAND_BUILTIN
324#define TARGET_EXPAND_BUILTIN spu_expand_builtin
325
1bd43494 326#undef TARGET_UNWIND_WORD_MODE
327#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 328
41e3a0c7 329#undef TARGET_LEGITIMIZE_ADDRESS
330#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
331
6cf5579e 332/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
333 and .quad for the debugger. When it is known that the assembler is fixed,
334 these can be removed. */
335#undef TARGET_ASM_UNALIGNED_SI_OP
336#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
337
338#undef TARGET_ASM_ALIGNED_DI_OP
339#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
340
644459d0 341/* The .8byte directive doesn't seem to work well for a 32 bit
342 architecture. */
343#undef TARGET_ASM_UNALIGNED_DI_OP
344#define TARGET_ASM_UNALIGNED_DI_OP NULL
345
346#undef TARGET_RTX_COSTS
347#define TARGET_RTX_COSTS spu_rtx_costs
348
349#undef TARGET_ADDRESS_COST
f529eb25 350#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 351
352#undef TARGET_SCHED_ISSUE_RATE
353#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
354
5a976006 355#undef TARGET_SCHED_INIT_GLOBAL
356#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
357
358#undef TARGET_SCHED_INIT
359#define TARGET_SCHED_INIT spu_sched_init
360
644459d0 361#undef TARGET_SCHED_VARIABLE_ISSUE
362#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
363
5a976006 364#undef TARGET_SCHED_REORDER
365#define TARGET_SCHED_REORDER spu_sched_reorder
366
367#undef TARGET_SCHED_REORDER2
368#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 369
370#undef TARGET_SCHED_ADJUST_COST
371#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
372
644459d0 373#undef TARGET_ATTRIBUTE_TABLE
374#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
375
376#undef TARGET_ASM_INTEGER
377#define TARGET_ASM_INTEGER spu_assemble_integer
378
379#undef TARGET_SCALAR_MODE_SUPPORTED_P
380#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
381
382#undef TARGET_VECTOR_MODE_SUPPORTED_P
383#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
384
385#undef TARGET_FUNCTION_OK_FOR_SIBCALL
386#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
387
388#undef TARGET_ASM_GLOBALIZE_LABEL
389#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
390
391#undef TARGET_PASS_BY_REFERENCE
392#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
393
394#undef TARGET_MUST_PASS_IN_STACK
395#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
396
397#undef TARGET_BUILD_BUILTIN_VA_LIST
398#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
399
8a58ed0a 400#undef TARGET_EXPAND_BUILTIN_VA_START
401#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
402
644459d0 403#undef TARGET_SETUP_INCOMING_VARARGS
404#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
405
406#undef TARGET_MACHINE_DEPENDENT_REORG
407#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
408
409#undef TARGET_GIMPLIFY_VA_ARG_EXPR
410#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
411
412#undef TARGET_DEFAULT_TARGET_FLAGS
413#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
414
415#undef TARGET_INIT_LIBFUNCS
416#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
417
418#undef TARGET_RETURN_IN_MEMORY
419#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
420
69ced2d6 421#undef TARGET_ENCODE_SECTION_INFO
422#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
423
e99f512d 424#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
425#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
426
427#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
428#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
429
a76866d3 430#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
431#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
432
a28df51d 433#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
434#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
435
0e87db76 436#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
437#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
438
a0515226 439#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
440#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
441
ea32e033 442#undef TARGET_LIBGCC_CMP_RETURN_MODE
443#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
444
445#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
446#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
447
d52fd16a 448#undef TARGET_SCHED_SMS_RES_MII
449#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
450
5a976006 451#undef TARGET_ASM_FILE_START
452#define TARGET_ASM_FILE_START asm_file_start
453
a08dfd55 454#undef TARGET_SECTION_TYPE_FLAGS
455#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
456
6cf5579e 457#undef TARGET_ASM_SELECT_SECTION
458#define TARGET_ASM_SELECT_SECTION spu_select_section
459
460#undef TARGET_ASM_UNIQUE_SECTION
461#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
462
fd50b071 463#undef TARGET_LEGITIMATE_ADDRESS_P
464#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
465
e96f2783 466#undef TARGET_TRAMPOLINE_INIT
467#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
468
644459d0 469struct gcc_target targetm = TARGET_INITIALIZER;
470
5df189be 471void
472spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
473{
5df189be 474 /* Override some of the default param values. With so many registers
475 larger values are better for these params. */
476 MAX_PENDING_LIST_LENGTH = 128;
477
478 /* With so many registers this is better on by default. */
479 flag_rename_registers = 1;
480}
481
644459d0 482/* Sometimes certain combinations of command options do not make sense
483 on a particular target machine. You can define a macro
484 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
485 executed once just after all the command options have been parsed. */
486void
487spu_override_options (void)
488{
14d408d9 489 /* Small loops will be unpeeled at -O3. For SPU it is more important
490 to keep code small by default. */
491 if (!flag_unroll_loops && !flag_peel_loops
492 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
493 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
494
644459d0 495 flag_omit_frame_pointer = 1;
496
5a976006 497 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 498 if (align_functions < 8)
499 align_functions = 8;
c7b91b14 500
5a976006 501 spu_hint_dist = 8*4 - spu_max_nops*4;
502 if (spu_hint_dist < 0)
503 spu_hint_dist = 0;
504
c7b91b14 505 if (spu_fixed_range_string)
506 fix_range (spu_fixed_range_string);
5474166e 507
508 /* Determine processor architectural level. */
509 if (spu_arch_string)
510 {
511 if (strcmp (&spu_arch_string[0], "cell") == 0)
512 spu_arch = PROCESSOR_CELL;
513 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
514 spu_arch = PROCESSOR_CELLEDP;
515 else
516 error ("Unknown architecture '%s'", &spu_arch_string[0]);
517 }
518
519 /* Determine processor to tune for. */
520 if (spu_tune_string)
521 {
522 if (strcmp (&spu_tune_string[0], "cell") == 0)
523 spu_tune = PROCESSOR_CELL;
524 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
525 spu_tune = PROCESSOR_CELLEDP;
526 else
527 error ("Unknown architecture '%s'", &spu_tune_string[0]);
528 }
98bbec1e 529
13684256 530 /* Change defaults according to the processor architecture. */
531 if (spu_arch == PROCESSOR_CELLEDP)
532 {
533 /* If no command line option has been otherwise specified, change
534 the default to -mno-safe-hints on celledp -- only the original
535 Cell/B.E. processors require this workaround. */
536 if (!(target_flags_explicit & MASK_SAFE_HINTS))
537 target_flags &= ~MASK_SAFE_HINTS;
538 }
539
98bbec1e 540 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 541}
542\f
543/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
544 struct attribute_spec.handler. */
545
644459d0 546/* True if MODE is valid for the target. By "valid", we mean able to
547 be manipulated in non-trivial ways. In particular, this means all
548 the arithmetic is supported. */
549static bool
550spu_scalar_mode_supported_p (enum machine_mode mode)
551{
552 switch (mode)
553 {
554 case QImode:
555 case HImode:
556 case SImode:
557 case SFmode:
558 case DImode:
559 case TImode:
560 case DFmode:
561 return true;
562
563 default:
564 return false;
565 }
566}
567
568/* Similarly for vector modes. "Supported" here is less strict. At
569 least some operations are supported; need to check optabs or builtins
570 for further details. */
571static bool
572spu_vector_mode_supported_p (enum machine_mode mode)
573{
574 switch (mode)
575 {
576 case V16QImode:
577 case V8HImode:
578 case V4SImode:
579 case V2DImode:
580 case V4SFmode:
581 case V2DFmode:
582 return true;
583
584 default:
585 return false;
586 }
587}
588
589/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
590 least significant bytes of the outer mode. This function returns
591 TRUE for the SUBREG's where this is correct. */
592int
593valid_subreg (rtx op)
594{
595 enum machine_mode om = GET_MODE (op);
596 enum machine_mode im = GET_MODE (SUBREG_REG (op));
597 return om != VOIDmode && im != VOIDmode
598 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 599 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
600 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 601}
602
603/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 604 and adjust the start offset. */
644459d0 605static rtx
606adjust_operand (rtx op, HOST_WIDE_INT * start)
607{
608 enum machine_mode mode;
609 int op_size;
38aca5eb 610 /* Strip any paradoxical SUBREG. */
611 if (GET_CODE (op) == SUBREG
612 && (GET_MODE_BITSIZE (GET_MODE (op))
613 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 614 {
615 if (start)
616 *start -=
617 GET_MODE_BITSIZE (GET_MODE (op)) -
618 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
619 op = SUBREG_REG (op);
620 }
621 /* If it is smaller than SI, assure a SUBREG */
622 op_size = GET_MODE_BITSIZE (GET_MODE (op));
623 if (op_size < 32)
624 {
625 if (start)
626 *start += 32 - op_size;
627 op_size = 32;
628 }
629 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
630 mode = mode_for_size (op_size, MODE_INT, 0);
631 if (mode != GET_MODE (op))
632 op = gen_rtx_SUBREG (mode, op, 0);
633 return op;
634}
635
636void
637spu_expand_extv (rtx ops[], int unsignedp)
638{
9d98604b 639 rtx dst = ops[0], src = ops[1];
644459d0 640 HOST_WIDE_INT width = INTVAL (ops[2]);
641 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 642 HOST_WIDE_INT align_mask;
643 rtx s0, s1, mask, r0;
644459d0 644
9d98604b 645 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 646
9d98604b 647 if (MEM_P (src))
644459d0 648 {
9d98604b 649 /* First, determine if we need 1 TImode load or 2. We need only 1
650 if the bits being extracted do not cross the alignment boundary
651 as determined by the MEM and its address. */
652
653 align_mask = -MEM_ALIGN (src);
654 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 655 {
9d98604b 656 /* Alignment is sufficient for 1 load. */
657 s0 = gen_reg_rtx (TImode);
658 r0 = spu_expand_load (s0, 0, src, start / 8);
659 start &= 7;
660 if (r0)
661 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 662 }
9d98604b 663 else
664 {
665 /* Need 2 loads. */
666 s0 = gen_reg_rtx (TImode);
667 s1 = gen_reg_rtx (TImode);
668 r0 = spu_expand_load (s0, s1, src, start / 8);
669 start &= 7;
670
671 gcc_assert (start + width <= 128);
672 if (r0)
673 {
674 rtx r1 = gen_reg_rtx (SImode);
675 mask = gen_reg_rtx (TImode);
676 emit_move_insn (mask, GEN_INT (-1));
677 emit_insn (gen_rotqby_ti (s0, s0, r0));
678 emit_insn (gen_rotqby_ti (s1, s1, r0));
679 if (GET_CODE (r0) == CONST_INT)
680 r1 = GEN_INT (INTVAL (r0) & 15);
681 else
682 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
683 emit_insn (gen_shlqby_ti (mask, mask, r1));
684 emit_insn (gen_selb (s0, s1, s0, mask));
685 }
686 }
687
688 }
689 else if (GET_CODE (src) == SUBREG)
690 {
691 rtx r = SUBREG_REG (src);
692 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
693 s0 = gen_reg_rtx (TImode);
694 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
695 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
696 else
697 emit_move_insn (s0, src);
698 }
699 else
700 {
701 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
702 s0 = gen_reg_rtx (TImode);
703 emit_move_insn (s0, src);
644459d0 704 }
705
9d98604b 706 /* Now s0 is TImode and contains the bits to extract at start. */
707
708 if (start)
709 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
710
711 if (128 - width)
644459d0 712 {
9d98604b 713 tree c = build_int_cst (NULL_TREE, 128 - width);
714 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 715 }
716
9d98604b 717 emit_move_insn (dst, s0);
644459d0 718}
719
720void
721spu_expand_insv (rtx ops[])
722{
723 HOST_WIDE_INT width = INTVAL (ops[1]);
724 HOST_WIDE_INT start = INTVAL (ops[2]);
725 HOST_WIDE_INT maskbits;
726 enum machine_mode dst_mode, src_mode;
727 rtx dst = ops[0], src = ops[3];
728 int dst_size, src_size;
729 rtx mask;
730 rtx shift_reg;
731 int shift;
732
733
734 if (GET_CODE (ops[0]) == MEM)
735 dst = gen_reg_rtx (TImode);
736 else
737 dst = adjust_operand (dst, &start);
738 dst_mode = GET_MODE (dst);
739 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
740
741 if (CONSTANT_P (src))
742 {
743 enum machine_mode m =
744 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
745 src = force_reg (m, convert_to_mode (m, src, 0));
746 }
747 src = adjust_operand (src, 0);
748 src_mode = GET_MODE (src);
749 src_size = GET_MODE_BITSIZE (GET_MODE (src));
750
751 mask = gen_reg_rtx (dst_mode);
752 shift_reg = gen_reg_rtx (dst_mode);
753 shift = dst_size - start - width;
754
755 /* It's not safe to use subreg here because the compiler assumes
756 that the SUBREG_REG is right justified in the SUBREG. */
757 convert_move (shift_reg, src, 1);
758
759 if (shift > 0)
760 {
761 switch (dst_mode)
762 {
763 case SImode:
764 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
765 break;
766 case DImode:
767 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
768 break;
769 case TImode:
770 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
771 break;
772 default:
773 abort ();
774 }
775 }
776 else if (shift < 0)
777 abort ();
778
779 switch (dst_size)
780 {
781 case 32:
782 maskbits = (-1ll << (32 - width - start));
783 if (start)
784 maskbits += (1ll << (32 - start));
785 emit_move_insn (mask, GEN_INT (maskbits));
786 break;
787 case 64:
788 maskbits = (-1ll << (64 - width - start));
789 if (start)
790 maskbits += (1ll << (64 - start));
791 emit_move_insn (mask, GEN_INT (maskbits));
792 break;
793 case 128:
794 {
795 unsigned char arr[16];
796 int i = start / 8;
797 memset (arr, 0, sizeof (arr));
798 arr[i] = 0xff >> (start & 7);
799 for (i++; i <= (start + width - 1) / 8; i++)
800 arr[i] = 0xff;
801 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
802 emit_move_insn (mask, array_to_constant (TImode, arr));
803 }
804 break;
805 default:
806 abort ();
807 }
808 if (GET_CODE (ops[0]) == MEM)
809 {
644459d0 810 rtx low = gen_reg_rtx (SImode);
644459d0 811 rtx rotl = gen_reg_rtx (SImode);
812 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 813 rtx addr;
814 rtx addr0;
815 rtx addr1;
644459d0 816 rtx mem;
817
9d98604b 818 addr = force_reg (Pmode, XEXP (ops[0], 0));
819 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 820 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
821 emit_insn (gen_negsi2 (rotl, low));
822 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
823 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 824 mem = change_address (ops[0], TImode, addr0);
644459d0 825 set_mem_alias_set (mem, 0);
826 emit_move_insn (dst, mem);
827 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 828 if (start + width > MEM_ALIGN (ops[0]))
829 {
830 rtx shl = gen_reg_rtx (SImode);
831 rtx mask1 = gen_reg_rtx (TImode);
832 rtx dst1 = gen_reg_rtx (TImode);
833 rtx mem1;
9d98604b 834 addr1 = plus_constant (addr, 16);
835 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 836 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
837 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 838 mem1 = change_address (ops[0], TImode, addr1);
644459d0 839 set_mem_alias_set (mem1, 0);
840 emit_move_insn (dst1, mem1);
841 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
842 emit_move_insn (mem1, dst1);
843 }
9d98604b 844 emit_move_insn (mem, dst);
644459d0 845 }
846 else
71cd778d 847 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 848}
849
850
851int
852spu_expand_block_move (rtx ops[])
853{
854 HOST_WIDE_INT bytes, align, offset;
855 rtx src, dst, sreg, dreg, target;
856 int i;
857 if (GET_CODE (ops[2]) != CONST_INT
858 || GET_CODE (ops[3]) != CONST_INT
48eb4342 859 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 860 return 0;
861
862 bytes = INTVAL (ops[2]);
863 align = INTVAL (ops[3]);
864
865 if (bytes <= 0)
866 return 1;
867
868 dst = ops[0];
869 src = ops[1];
870
871 if (align == 16)
872 {
873 for (offset = 0; offset + 16 <= bytes; offset += 16)
874 {
875 dst = adjust_address (ops[0], V16QImode, offset);
876 src = adjust_address (ops[1], V16QImode, offset);
877 emit_move_insn (dst, src);
878 }
879 if (offset < bytes)
880 {
881 rtx mask;
882 unsigned char arr[16] = { 0 };
883 for (i = 0; i < bytes - offset; i++)
884 arr[i] = 0xff;
885 dst = adjust_address (ops[0], V16QImode, offset);
886 src = adjust_address (ops[1], V16QImode, offset);
887 mask = gen_reg_rtx (V16QImode);
888 sreg = gen_reg_rtx (V16QImode);
889 dreg = gen_reg_rtx (V16QImode);
890 target = gen_reg_rtx (V16QImode);
891 emit_move_insn (mask, array_to_constant (V16QImode, arr));
892 emit_move_insn (dreg, dst);
893 emit_move_insn (sreg, src);
894 emit_insn (gen_selb (target, dreg, sreg, mask));
895 emit_move_insn (dst, target);
896 }
897 return 1;
898 }
899 return 0;
900}
901
902enum spu_comp_code
903{ SPU_EQ, SPU_GT, SPU_GTU };
904
5474166e 905int spu_comp_icode[12][3] = {
906 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
907 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
908 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
909 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
910 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
911 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
912 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
913 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
914 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
915 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
916 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
917 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 918};
919
920/* Generate a compare for CODE. Return a brand-new rtx that represents
921 the result of the compare. GCC can figure this out too if we don't
922 provide all variations of compares, but GCC always wants to use
923 WORD_MODE, we can generate better code in most cases if we do it
924 ourselves. */
925void
74f4459c 926spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 927{
928 int reverse_compare = 0;
929 int reverse_test = 0;
5d70b918 930 rtx compare_result, eq_result;
931 rtx comp_rtx, eq_rtx;
644459d0 932 enum machine_mode comp_mode;
933 enum machine_mode op_mode;
b9c74b4d 934 enum spu_comp_code scode, eq_code;
935 enum insn_code ior_code;
74f4459c 936 enum rtx_code code = GET_CODE (cmp);
937 rtx op0 = XEXP (cmp, 0);
938 rtx op1 = XEXP (cmp, 1);
644459d0 939 int index;
5d70b918 940 int eq_test = 0;
644459d0 941
74f4459c 942 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 943 and so on, to keep the constant in operand 1. */
74f4459c 944 if (GET_CODE (op1) == CONST_INT)
644459d0 945 {
74f4459c 946 HOST_WIDE_INT val = INTVAL (op1) - 1;
947 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 948 switch (code)
949 {
950 case GE:
74f4459c 951 op1 = GEN_INT (val);
644459d0 952 code = GT;
953 break;
954 case LT:
74f4459c 955 op1 = GEN_INT (val);
644459d0 956 code = LE;
957 break;
958 case GEU:
74f4459c 959 op1 = GEN_INT (val);
644459d0 960 code = GTU;
961 break;
962 case LTU:
74f4459c 963 op1 = GEN_INT (val);
644459d0 964 code = LEU;
965 break;
966 default:
967 break;
968 }
969 }
970
5d70b918 971 comp_mode = SImode;
74f4459c 972 op_mode = GET_MODE (op0);
5d70b918 973
644459d0 974 switch (code)
975 {
976 case GE:
644459d0 977 scode = SPU_GT;
07027691 978 if (HONOR_NANS (op_mode))
5d70b918 979 {
980 reverse_compare = 0;
981 reverse_test = 0;
982 eq_test = 1;
983 eq_code = SPU_EQ;
984 }
985 else
986 {
987 reverse_compare = 1;
988 reverse_test = 1;
989 }
644459d0 990 break;
991 case LE:
644459d0 992 scode = SPU_GT;
07027691 993 if (HONOR_NANS (op_mode))
5d70b918 994 {
995 reverse_compare = 1;
996 reverse_test = 0;
997 eq_test = 1;
998 eq_code = SPU_EQ;
999 }
1000 else
1001 {
1002 reverse_compare = 0;
1003 reverse_test = 1;
1004 }
644459d0 1005 break;
1006 case LT:
1007 reverse_compare = 1;
1008 reverse_test = 0;
1009 scode = SPU_GT;
1010 break;
1011 case GEU:
1012 reverse_compare = 1;
1013 reverse_test = 1;
1014 scode = SPU_GTU;
1015 break;
1016 case LEU:
1017 reverse_compare = 0;
1018 reverse_test = 1;
1019 scode = SPU_GTU;
1020 break;
1021 case LTU:
1022 reverse_compare = 1;
1023 reverse_test = 0;
1024 scode = SPU_GTU;
1025 break;
1026 case NE:
1027 reverse_compare = 0;
1028 reverse_test = 1;
1029 scode = SPU_EQ;
1030 break;
1031
1032 case EQ:
1033 scode = SPU_EQ;
1034 break;
1035 case GT:
1036 scode = SPU_GT;
1037 break;
1038 case GTU:
1039 scode = SPU_GTU;
1040 break;
1041 default:
1042 scode = SPU_EQ;
1043 break;
1044 }
1045
644459d0 1046 switch (op_mode)
1047 {
1048 case QImode:
1049 index = 0;
1050 comp_mode = QImode;
1051 break;
1052 case HImode:
1053 index = 1;
1054 comp_mode = HImode;
1055 break;
1056 case SImode:
1057 index = 2;
1058 break;
1059 case DImode:
1060 index = 3;
1061 break;
1062 case TImode:
1063 index = 4;
1064 break;
1065 case SFmode:
1066 index = 5;
1067 break;
1068 case DFmode:
1069 index = 6;
1070 break;
1071 case V16QImode:
5474166e 1072 index = 7;
1073 comp_mode = op_mode;
1074 break;
644459d0 1075 case V8HImode:
5474166e 1076 index = 8;
1077 comp_mode = op_mode;
1078 break;
644459d0 1079 case V4SImode:
5474166e 1080 index = 9;
1081 comp_mode = op_mode;
1082 break;
644459d0 1083 case V4SFmode:
5474166e 1084 index = 10;
1085 comp_mode = V4SImode;
1086 break;
644459d0 1087 case V2DFmode:
5474166e 1088 index = 11;
1089 comp_mode = V2DImode;
644459d0 1090 break;
5474166e 1091 case V2DImode:
644459d0 1092 default:
1093 abort ();
1094 }
1095
74f4459c 1096 if (GET_MODE (op1) == DFmode
07027691 1097 && (scode != SPU_GT && scode != SPU_EQ))
1098 abort ();
644459d0 1099
74f4459c 1100 if (is_set == 0 && op1 == const0_rtx
1101 && (GET_MODE (op0) == SImode
1102 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1103 {
1104 /* Don't need to set a register with the result when we are
1105 comparing against zero and branching. */
1106 reverse_test = !reverse_test;
74f4459c 1107 compare_result = op0;
644459d0 1108 }
1109 else
1110 {
1111 compare_result = gen_reg_rtx (comp_mode);
1112
1113 if (reverse_compare)
1114 {
74f4459c 1115 rtx t = op1;
1116 op1 = op0;
1117 op0 = t;
644459d0 1118 }
1119
1120 if (spu_comp_icode[index][scode] == 0)
1121 abort ();
1122
1123 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1124 (op0, op_mode))
1125 op0 = force_reg (op_mode, op0);
644459d0 1126 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1127 (op1, op_mode))
1128 op1 = force_reg (op_mode, op1);
644459d0 1129 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1130 op0, op1);
644459d0 1131 if (comp_rtx == 0)
1132 abort ();
1133 emit_insn (comp_rtx);
1134
5d70b918 1135 if (eq_test)
1136 {
1137 eq_result = gen_reg_rtx (comp_mode);
1138 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1139 op0, op1);
5d70b918 1140 if (eq_rtx == 0)
1141 abort ();
1142 emit_insn (eq_rtx);
1143 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1144 gcc_assert (ior_code != CODE_FOR_nothing);
1145 emit_insn (GEN_FCN (ior_code)
1146 (compare_result, compare_result, eq_result));
1147 }
644459d0 1148 }
1149
1150 if (is_set == 0)
1151 {
1152 rtx bcomp;
1153 rtx loc_ref;
1154
1155 /* We don't have branch on QI compare insns, so we convert the
1156 QI compare result to a HI result. */
1157 if (comp_mode == QImode)
1158 {
1159 rtx old_res = compare_result;
1160 compare_result = gen_reg_rtx (HImode);
1161 comp_mode = HImode;
1162 emit_insn (gen_extendqihi2 (compare_result, old_res));
1163 }
1164
1165 if (reverse_test)
1166 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1167 else
1168 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1169
74f4459c 1170 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1171 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1172 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1173 loc_ref, pc_rtx)));
1174 }
1175 else if (is_set == 2)
1176 {
74f4459c 1177 rtx target = operands[0];
644459d0 1178 int compare_size = GET_MODE_BITSIZE (comp_mode);
1179 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1180 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1181 rtx select_mask;
1182 rtx op_t = operands[2];
1183 rtx op_f = operands[3];
1184
1185 /* The result of the comparison can be SI, HI or QI mode. Create a
1186 mask based on that result. */
1187 if (target_size > compare_size)
1188 {
1189 select_mask = gen_reg_rtx (mode);
1190 emit_insn (gen_extend_compare (select_mask, compare_result));
1191 }
1192 else if (target_size < compare_size)
1193 select_mask =
1194 gen_rtx_SUBREG (mode, compare_result,
1195 (compare_size - target_size) / BITS_PER_UNIT);
1196 else if (comp_mode != mode)
1197 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1198 else
1199 select_mask = compare_result;
1200
1201 if (GET_MODE (target) != GET_MODE (op_t)
1202 || GET_MODE (target) != GET_MODE (op_f))
1203 abort ();
1204
1205 if (reverse_test)
1206 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1207 else
1208 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1209 }
1210 else
1211 {
74f4459c 1212 rtx target = operands[0];
644459d0 1213 if (reverse_test)
1214 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1215 gen_rtx_NOT (comp_mode, compare_result)));
1216 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1217 emit_insn (gen_extendhisi2 (target, compare_result));
1218 else if (GET_MODE (target) == SImode
1219 && GET_MODE (compare_result) == QImode)
1220 emit_insn (gen_extend_compare (target, compare_result));
1221 else
1222 emit_move_insn (target, compare_result);
1223 }
1224}
1225
1226HOST_WIDE_INT
1227const_double_to_hwint (rtx x)
1228{
1229 HOST_WIDE_INT val;
1230 REAL_VALUE_TYPE rv;
1231 if (GET_MODE (x) == SFmode)
1232 {
1233 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1234 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1235 }
1236 else if (GET_MODE (x) == DFmode)
1237 {
1238 long l[2];
1239 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1240 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1241 val = l[0];
1242 val = (val << 32) | (l[1] & 0xffffffff);
1243 }
1244 else
1245 abort ();
1246 return val;
1247}
1248
1249rtx
1250hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1251{
1252 long tv[2];
1253 REAL_VALUE_TYPE rv;
1254 gcc_assert (mode == SFmode || mode == DFmode);
1255
1256 if (mode == SFmode)
1257 tv[0] = (v << 32) >> 32;
1258 else if (mode == DFmode)
1259 {
1260 tv[1] = (v << 32) >> 32;
1261 tv[0] = v >> 32;
1262 }
1263 real_from_target (&rv, tv, mode);
1264 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1265}
1266
1267void
1268print_operand_address (FILE * file, register rtx addr)
1269{
1270 rtx reg;
1271 rtx offset;
1272
e04cf423 1273 if (GET_CODE (addr) == AND
1274 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1275 && INTVAL (XEXP (addr, 1)) == -16)
1276 addr = XEXP (addr, 0);
1277
644459d0 1278 switch (GET_CODE (addr))
1279 {
1280 case REG:
1281 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1282 break;
1283
1284 case PLUS:
1285 reg = XEXP (addr, 0);
1286 offset = XEXP (addr, 1);
1287 if (GET_CODE (offset) == REG)
1288 {
1289 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1290 reg_names[REGNO (offset)]);
1291 }
1292 else if (GET_CODE (offset) == CONST_INT)
1293 {
1294 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1295 INTVAL (offset), reg_names[REGNO (reg)]);
1296 }
1297 else
1298 abort ();
1299 break;
1300
1301 case CONST:
1302 case LABEL_REF:
1303 case SYMBOL_REF:
1304 case CONST_INT:
1305 output_addr_const (file, addr);
1306 break;
1307
1308 default:
1309 debug_rtx (addr);
1310 abort ();
1311 }
1312}
1313
1314void
1315print_operand (FILE * file, rtx x, int code)
1316{
1317 enum machine_mode mode = GET_MODE (x);
1318 HOST_WIDE_INT val;
1319 unsigned char arr[16];
1320 int xcode = GET_CODE (x);
dea01258 1321 int i, info;
644459d0 1322 if (GET_MODE (x) == VOIDmode)
1323 switch (code)
1324 {
644459d0 1325 case 'L': /* 128 bits, signed */
1326 case 'm': /* 128 bits, signed */
1327 case 'T': /* 128 bits, signed */
1328 case 't': /* 128 bits, signed */
1329 mode = TImode;
1330 break;
644459d0 1331 case 'K': /* 64 bits, signed */
1332 case 'k': /* 64 bits, signed */
1333 case 'D': /* 64 bits, signed */
1334 case 'd': /* 64 bits, signed */
1335 mode = DImode;
1336 break;
644459d0 1337 case 'J': /* 32 bits, signed */
1338 case 'j': /* 32 bits, signed */
1339 case 's': /* 32 bits, signed */
1340 case 'S': /* 32 bits, signed */
1341 mode = SImode;
1342 break;
1343 }
1344 switch (code)
1345 {
1346
1347 case 'j': /* 32 bits, signed */
1348 case 'k': /* 64 bits, signed */
1349 case 'm': /* 128 bits, signed */
1350 if (xcode == CONST_INT
1351 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1352 {
1353 gcc_assert (logical_immediate_p (x, mode));
1354 constant_to_array (mode, x, arr);
1355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1356 val = trunc_int_for_mode (val, SImode);
1357 switch (which_logical_immediate (val))
1358 {
1359 case SPU_ORI:
1360 break;
1361 case SPU_ORHI:
1362 fprintf (file, "h");
1363 break;
1364 case SPU_ORBI:
1365 fprintf (file, "b");
1366 break;
1367 default:
1368 gcc_unreachable();
1369 }
1370 }
1371 else
1372 gcc_unreachable();
1373 return;
1374
1375 case 'J': /* 32 bits, signed */
1376 case 'K': /* 64 bits, signed */
1377 case 'L': /* 128 bits, signed */
1378 if (xcode == CONST_INT
1379 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1380 {
1381 gcc_assert (logical_immediate_p (x, mode)
1382 || iohl_immediate_p (x, mode));
1383 constant_to_array (mode, x, arr);
1384 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1385 val = trunc_int_for_mode (val, SImode);
1386 switch (which_logical_immediate (val))
1387 {
1388 case SPU_ORI:
1389 case SPU_IOHL:
1390 break;
1391 case SPU_ORHI:
1392 val = trunc_int_for_mode (val, HImode);
1393 break;
1394 case SPU_ORBI:
1395 val = trunc_int_for_mode (val, QImode);
1396 break;
1397 default:
1398 gcc_unreachable();
1399 }
1400 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1401 }
1402 else
1403 gcc_unreachable();
1404 return;
1405
1406 case 't': /* 128 bits, signed */
1407 case 'd': /* 64 bits, signed */
1408 case 's': /* 32 bits, signed */
dea01258 1409 if (CONSTANT_P (x))
644459d0 1410 {
dea01258 1411 enum immediate_class c = classify_immediate (x, mode);
1412 switch (c)
1413 {
1414 case IC_IL1:
1415 constant_to_array (mode, x, arr);
1416 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1417 val = trunc_int_for_mode (val, SImode);
1418 switch (which_immediate_load (val))
1419 {
1420 case SPU_IL:
1421 break;
1422 case SPU_ILA:
1423 fprintf (file, "a");
1424 break;
1425 case SPU_ILH:
1426 fprintf (file, "h");
1427 break;
1428 case SPU_ILHU:
1429 fprintf (file, "hu");
1430 break;
1431 default:
1432 gcc_unreachable ();
1433 }
1434 break;
1435 case IC_CPAT:
1436 constant_to_array (mode, x, arr);
1437 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1438 if (info == 1)
1439 fprintf (file, "b");
1440 else if (info == 2)
1441 fprintf (file, "h");
1442 else if (info == 4)
1443 fprintf (file, "w");
1444 else if (info == 8)
1445 fprintf (file, "d");
1446 break;
1447 case IC_IL1s:
1448 if (xcode == CONST_VECTOR)
1449 {
1450 x = CONST_VECTOR_ELT (x, 0);
1451 xcode = GET_CODE (x);
1452 }
1453 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1454 fprintf (file, "a");
1455 else if (xcode == HIGH)
1456 fprintf (file, "hu");
1457 break;
1458 case IC_FSMBI:
5df189be 1459 case IC_FSMBI2:
dea01258 1460 case IC_IL2:
1461 case IC_IL2s:
1462 case IC_POOL:
1463 abort ();
1464 }
644459d0 1465 }
644459d0 1466 else
1467 gcc_unreachable ();
1468 return;
1469
1470 case 'T': /* 128 bits, signed */
1471 case 'D': /* 64 bits, signed */
1472 case 'S': /* 32 bits, signed */
dea01258 1473 if (CONSTANT_P (x))
644459d0 1474 {
dea01258 1475 enum immediate_class c = classify_immediate (x, mode);
1476 switch (c)
644459d0 1477 {
dea01258 1478 case IC_IL1:
1479 constant_to_array (mode, x, arr);
1480 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1481 val = trunc_int_for_mode (val, SImode);
1482 switch (which_immediate_load (val))
1483 {
1484 case SPU_IL:
1485 case SPU_ILA:
1486 break;
1487 case SPU_ILH:
1488 case SPU_ILHU:
1489 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1490 break;
1491 default:
1492 gcc_unreachable ();
1493 }
1494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1495 break;
1496 case IC_FSMBI:
1497 constant_to_array (mode, x, arr);
1498 val = 0;
1499 for (i = 0; i < 16; i++)
1500 {
1501 val <<= 1;
1502 val |= arr[i] & 1;
1503 }
1504 print_operand (file, GEN_INT (val), 0);
1505 break;
1506 case IC_CPAT:
1507 constant_to_array (mode, x, arr);
1508 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1510 break;
dea01258 1511 case IC_IL1s:
dea01258 1512 if (xcode == HIGH)
5df189be 1513 x = XEXP (x, 0);
1514 if (GET_CODE (x) == CONST_VECTOR)
1515 x = CONST_VECTOR_ELT (x, 0);
1516 output_addr_const (file, x);
1517 if (xcode == HIGH)
1518 fprintf (file, "@h");
644459d0 1519 break;
dea01258 1520 case IC_IL2:
1521 case IC_IL2s:
5df189be 1522 case IC_FSMBI2:
dea01258 1523 case IC_POOL:
1524 abort ();
644459d0 1525 }
c8befdb9 1526 }
644459d0 1527 else
1528 gcc_unreachable ();
1529 return;
1530
644459d0 1531 case 'C':
1532 if (xcode == CONST_INT)
1533 {
1534 /* Only 4 least significant bits are relevant for generate
1535 control word instructions. */
1536 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1537 return;
1538 }
1539 break;
1540
1541 case 'M': /* print code for c*d */
1542 if (GET_CODE (x) == CONST_INT)
1543 switch (INTVAL (x))
1544 {
1545 case 1:
1546 fprintf (file, "b");
1547 break;
1548 case 2:
1549 fprintf (file, "h");
1550 break;
1551 case 4:
1552 fprintf (file, "w");
1553 break;
1554 case 8:
1555 fprintf (file, "d");
1556 break;
1557 default:
1558 gcc_unreachable();
1559 }
1560 else
1561 gcc_unreachable();
1562 return;
1563
1564 case 'N': /* Negate the operand */
1565 if (xcode == CONST_INT)
1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1567 else if (xcode == CONST_VECTOR)
1568 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1569 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1570 return;
1571
1572 case 'I': /* enable/disable interrupts */
1573 if (xcode == CONST_INT)
1574 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1575 return;
1576
1577 case 'b': /* branch modifiers */
1578 if (xcode == REG)
1579 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1580 else if (COMPARISON_P (x))
1581 fprintf (file, "%s", xcode == NE ? "n" : "");
1582 return;
1583
1584 case 'i': /* indirect call */
1585 if (xcode == MEM)
1586 {
1587 if (GET_CODE (XEXP (x, 0)) == REG)
1588 /* Used in indirect function calls. */
1589 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1590 else
1591 output_address (XEXP (x, 0));
1592 }
1593 return;
1594
1595 case 'p': /* load/store */
1596 if (xcode == MEM)
1597 {
1598 x = XEXP (x, 0);
1599 xcode = GET_CODE (x);
1600 }
e04cf423 1601 if (xcode == AND)
1602 {
1603 x = XEXP (x, 0);
1604 xcode = GET_CODE (x);
1605 }
644459d0 1606 if (xcode == REG)
1607 fprintf (file, "d");
1608 else if (xcode == CONST_INT)
1609 fprintf (file, "a");
1610 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1611 fprintf (file, "r");
1612 else if (xcode == PLUS || xcode == LO_SUM)
1613 {
1614 if (GET_CODE (XEXP (x, 1)) == REG)
1615 fprintf (file, "x");
1616 else
1617 fprintf (file, "d");
1618 }
1619 return;
1620
5df189be 1621 case 'e':
1622 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1623 val &= 0x7;
1624 output_addr_const (file, GEN_INT (val));
1625 return;
1626
1627 case 'f':
1628 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1629 val &= 0x1f;
1630 output_addr_const (file, GEN_INT (val));
1631 return;
1632
1633 case 'g':
1634 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1635 val &= 0x3f;
1636 output_addr_const (file, GEN_INT (val));
1637 return;
1638
1639 case 'h':
1640 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1641 val = (val >> 3) & 0x1f;
1642 output_addr_const (file, GEN_INT (val));
1643 return;
1644
1645 case 'E':
1646 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1647 val = -val;
1648 val &= 0x7;
1649 output_addr_const (file, GEN_INT (val));
1650 return;
1651
1652 case 'F':
1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1654 val = -val;
1655 val &= 0x1f;
1656 output_addr_const (file, GEN_INT (val));
1657 return;
1658
1659 case 'G':
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661 val = -val;
1662 val &= 0x3f;
1663 output_addr_const (file, GEN_INT (val));
1664 return;
1665
1666 case 'H':
1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1668 val = -(val & -8ll);
1669 val = (val >> 3) & 0x1f;
1670 output_addr_const (file, GEN_INT (val));
1671 return;
1672
56c7bfc2 1673 case 'v':
1674 case 'w':
1675 constant_to_array (mode, x, arr);
1676 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1677 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1678 return;
1679
644459d0 1680 case 0:
1681 if (xcode == REG)
1682 fprintf (file, "%s", reg_names[REGNO (x)]);
1683 else if (xcode == MEM)
1684 output_address (XEXP (x, 0));
1685 else if (xcode == CONST_VECTOR)
dea01258 1686 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1687 else
1688 output_addr_const (file, x);
1689 return;
1690
f6a0d06f 1691 /* unused letters
56c7bfc2 1692 o qr u yz
5df189be 1693 AB OPQR UVWXYZ */
644459d0 1694 default:
1695 output_operand_lossage ("invalid %%xn code");
1696 }
1697 gcc_unreachable ();
1698}
1699
1700extern char call_used_regs[];
644459d0 1701
1702/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1703 caller saved register. For leaf functions it is more efficient to
1704 use a volatile register because we won't need to save and restore the
1705 pic register. This routine is only valid after register allocation
1706 is completed, so we can pick an unused register. */
1707static rtx
1708get_pic_reg (void)
1709{
1710 rtx pic_reg = pic_offset_table_rtx;
1711 if (!reload_completed && !reload_in_progress)
1712 abort ();
87a95921 1713 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1714 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1715 return pic_reg;
1716}
1717
5df189be 1718/* Split constant addresses to handle cases that are too large.
1719 Add in the pic register when in PIC mode.
1720 Split immediates that require more than 1 instruction. */
dea01258 1721int
1722spu_split_immediate (rtx * ops)
c8befdb9 1723{
dea01258 1724 enum machine_mode mode = GET_MODE (ops[0]);
1725 enum immediate_class c = classify_immediate (ops[1], mode);
1726
1727 switch (c)
c8befdb9 1728 {
dea01258 1729 case IC_IL2:
1730 {
1731 unsigned char arrhi[16];
1732 unsigned char arrlo[16];
98bbec1e 1733 rtx to, temp, hi, lo;
dea01258 1734 int i;
98bbec1e 1735 enum machine_mode imode = mode;
1736 /* We need to do reals as ints because the constant used in the
1737 IOR might not be a legitimate real constant. */
1738 imode = int_mode_for_mode (mode);
dea01258 1739 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1740 if (imode != mode)
1741 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1742 else
1743 to = ops[0];
1744 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1745 for (i = 0; i < 16; i += 4)
1746 {
1747 arrlo[i + 2] = arrhi[i + 2];
1748 arrlo[i + 3] = arrhi[i + 3];
1749 arrlo[i + 0] = arrlo[i + 1] = 0;
1750 arrhi[i + 2] = arrhi[i + 3] = 0;
1751 }
98bbec1e 1752 hi = array_to_constant (imode, arrhi);
1753 lo = array_to_constant (imode, arrlo);
1754 emit_move_insn (temp, hi);
dea01258 1755 emit_insn (gen_rtx_SET
98bbec1e 1756 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1757 return 1;
1758 }
5df189be 1759 case IC_FSMBI2:
1760 {
1761 unsigned char arr_fsmbi[16];
1762 unsigned char arr_andbi[16];
1763 rtx to, reg_fsmbi, reg_and;
1764 int i;
1765 enum machine_mode imode = mode;
1766 /* We need to do reals as ints because the constant used in the
1767 * AND might not be a legitimate real constant. */
1768 imode = int_mode_for_mode (mode);
1769 constant_to_array (mode, ops[1], arr_fsmbi);
1770 if (imode != mode)
1771 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1772 else
1773 to = ops[0];
1774 for (i = 0; i < 16; i++)
1775 if (arr_fsmbi[i] != 0)
1776 {
1777 arr_andbi[0] = arr_fsmbi[i];
1778 arr_fsmbi[i] = 0xff;
1779 }
1780 for (i = 1; i < 16; i++)
1781 arr_andbi[i] = arr_andbi[0];
1782 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1783 reg_and = array_to_constant (imode, arr_andbi);
1784 emit_move_insn (to, reg_fsmbi);
1785 emit_insn (gen_rtx_SET
1786 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1787 return 1;
1788 }
dea01258 1789 case IC_POOL:
1790 if (reload_in_progress || reload_completed)
1791 {
1792 rtx mem = force_const_mem (mode, ops[1]);
1793 if (TARGET_LARGE_MEM)
1794 {
1795 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1796 emit_move_insn (addr, XEXP (mem, 0));
1797 mem = replace_equiv_address (mem, addr);
1798 }
1799 emit_move_insn (ops[0], mem);
1800 return 1;
1801 }
1802 break;
1803 case IC_IL1s:
1804 case IC_IL2s:
1805 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1806 {
1807 if (c == IC_IL2s)
1808 {
5df189be 1809 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1810 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1811 }
1812 else if (flag_pic)
1813 emit_insn (gen_pic (ops[0], ops[1]));
1814 if (flag_pic)
1815 {
1816 rtx pic_reg = get_pic_reg ();
1817 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1818 crtl->uses_pic_offset_table = 1;
dea01258 1819 }
1820 return flag_pic || c == IC_IL2s;
1821 }
1822 break;
1823 case IC_IL1:
1824 case IC_FSMBI:
1825 case IC_CPAT:
1826 break;
c8befdb9 1827 }
dea01258 1828 return 0;
c8befdb9 1829}
1830
644459d0 1831/* SAVING is TRUE when we are generating the actual load and store
1832 instructions for REGNO. When determining the size of the stack
1833 needed for saving register we must allocate enough space for the
1834 worst case, because we don't always have the information early enough
1835 to not allocate it. But we can at least eliminate the actual loads
1836 and stores during the prologue/epilogue. */
1837static int
1838need_to_save_reg (int regno, int saving)
1839{
3072d30e 1840 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1841 return 1;
1842 if (flag_pic
1843 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1844 && (!saving || crtl->uses_pic_offset_table)
644459d0 1845 && (!saving
3072d30e 1846 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1847 return 1;
1848 return 0;
1849}
1850
1851/* This function is only correct starting with local register
1852 allocation */
1853int
1854spu_saved_regs_size (void)
1855{
1856 int reg_save_size = 0;
1857 int regno;
1858
1859 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1860 if (need_to_save_reg (regno, 0))
1861 reg_save_size += 0x10;
1862 return reg_save_size;
1863}
1864
1865static rtx
1866frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1867{
1868 rtx reg = gen_rtx_REG (V4SImode, regno);
1869 rtx mem =
1870 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1871 return emit_insn (gen_movv4si (mem, reg));
1872}
1873
1874static rtx
1875frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1876{
1877 rtx reg = gen_rtx_REG (V4SImode, regno);
1878 rtx mem =
1879 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1880 return emit_insn (gen_movv4si (reg, mem));
1881}
1882
1883/* This happens after reload, so we need to expand it. */
1884static rtx
1885frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1886{
1887 rtx insn;
1888 if (satisfies_constraint_K (GEN_INT (imm)))
1889 {
1890 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1891 }
1892 else
1893 {
3072d30e 1894 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1895 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1896 if (REGNO (src) == REGNO (scratch))
1897 abort ();
1898 }
644459d0 1899 return insn;
1900}
1901
1902/* Return nonzero if this function is known to have a null epilogue. */
1903
1904int
1905direct_return (void)
1906{
1907 if (reload_completed)
1908 {
1909 if (cfun->static_chain_decl == 0
1910 && (spu_saved_regs_size ()
1911 + get_frame_size ()
abe32cce 1912 + crtl->outgoing_args_size
1913 + crtl->args.pretend_args_size == 0)
644459d0 1914 && current_function_is_leaf)
1915 return 1;
1916 }
1917 return 0;
1918}
1919
1920/*
1921 The stack frame looks like this:
1922 +-------------+
1923 | incoming |
a8e019fa 1924 | args |
1925 AP -> +-------------+
644459d0 1926 | $lr save |
1927 +-------------+
1928 prev SP | back chain |
1929 +-------------+
1930 | var args |
abe32cce 1931 | reg save | crtl->args.pretend_args_size bytes
644459d0 1932 +-------------+
1933 | ... |
1934 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1935 FP -> +-------------+
644459d0 1936 | ... |
a8e019fa 1937 | vars | get_frame_size() bytes
1938 HFP -> +-------------+
644459d0 1939 | ... |
1940 | outgoing |
abe32cce 1941 | args | crtl->outgoing_args_size bytes
644459d0 1942 +-------------+
1943 | $lr of next |
1944 | frame |
1945 +-------------+
a8e019fa 1946 | back chain |
1947 SP -> +-------------+
644459d0 1948
1949*/
1950void
1951spu_expand_prologue (void)
1952{
1953 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1954 HOST_WIDE_INT total_size;
1955 HOST_WIDE_INT saved_regs_size;
1956 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1957 rtx scratch_reg_0, scratch_reg_1;
1958 rtx insn, real;
1959
1960 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1961 the "toplevel" insn chain. */
1962 emit_note (NOTE_INSN_DELETED);
1963
1964 if (flag_pic && optimize == 0)
18d50ae6 1965 crtl->uses_pic_offset_table = 1;
644459d0 1966
1967 if (spu_naked_function_p (current_function_decl))
1968 return;
1969
1970 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1971 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1972
1973 saved_regs_size = spu_saved_regs_size ();
1974 total_size = size + saved_regs_size
abe32cce 1975 + crtl->outgoing_args_size
1976 + crtl->args.pretend_args_size;
644459d0 1977
1978 if (!current_function_is_leaf
18d50ae6 1979 || cfun->calls_alloca || total_size > 0)
644459d0 1980 total_size += STACK_POINTER_OFFSET;
1981
1982 /* Save this first because code after this might use the link
1983 register as a scratch register. */
1984 if (!current_function_is_leaf)
1985 {
1986 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1987 RTX_FRAME_RELATED_P (insn) = 1;
1988 }
1989
1990 if (total_size > 0)
1991 {
abe32cce 1992 offset = -crtl->args.pretend_args_size;
644459d0 1993 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1994 if (need_to_save_reg (regno, 1))
1995 {
1996 offset -= 16;
1997 insn = frame_emit_store (regno, sp_reg, offset);
1998 RTX_FRAME_RELATED_P (insn) = 1;
1999 }
2000 }
2001
18d50ae6 2002 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2003 {
2004 rtx pic_reg = get_pic_reg ();
2005 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2006 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2007 }
2008
2009 if (total_size > 0)
2010 {
2011 if (flag_stack_check)
2012 {
d819917f 2013 /* We compare against total_size-1 because
644459d0 2014 ($sp >= total_size) <=> ($sp > total_size-1) */
2015 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2016 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2017 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2018 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2019 {
2020 emit_move_insn (scratch_v4si, size_v4si);
2021 size_v4si = scratch_v4si;
2022 }
2023 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2024 emit_insn (gen_vec_extractv4si
2025 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2026 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2027 }
2028
2029 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2030 the value of the previous $sp because we save it as the back
2031 chain. */
2032 if (total_size <= 2000)
2033 {
2034 /* In this case we save the back chain first. */
2035 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2036 insn =
2037 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2038 }
644459d0 2039 else
2040 {
2041 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2042 insn =
2043 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2044 }
2045 RTX_FRAME_RELATED_P (insn) = 1;
2046 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2047 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2048
2049 if (total_size > 2000)
2050 {
2051 /* Save the back chain ptr */
2052 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2053 }
2054
2055 if (frame_pointer_needed)
2056 {
2057 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2058 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2059 + crtl->outgoing_args_size;
644459d0 2060 /* Set the new frame_pointer */
d8dfeb55 2061 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2062 RTX_FRAME_RELATED_P (insn) = 1;
2063 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2064 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2065 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2066 }
2067 }
2068
2069 emit_note (NOTE_INSN_DELETED);
2070}
2071
2072void
2073spu_expand_epilogue (bool sibcall_p)
2074{
2075 int size = get_frame_size (), offset, regno;
2076 HOST_WIDE_INT saved_regs_size, total_size;
2077 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2078 rtx jump, scratch_reg_0;
2079
2080 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2081 the "toplevel" insn chain. */
2082 emit_note (NOTE_INSN_DELETED);
2083
2084 if (spu_naked_function_p (current_function_decl))
2085 return;
2086
2087 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2088
2089 saved_regs_size = spu_saved_regs_size ();
2090 total_size = size + saved_regs_size
abe32cce 2091 + crtl->outgoing_args_size
2092 + crtl->args.pretend_args_size;
644459d0 2093
2094 if (!current_function_is_leaf
18d50ae6 2095 || cfun->calls_alloca || total_size > 0)
644459d0 2096 total_size += STACK_POINTER_OFFSET;
2097
2098 if (total_size > 0)
2099 {
18d50ae6 2100 if (cfun->calls_alloca)
644459d0 2101 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2102 else
2103 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2104
2105
2106 if (saved_regs_size > 0)
2107 {
abe32cce 2108 offset = -crtl->args.pretend_args_size;
644459d0 2109 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2110 if (need_to_save_reg (regno, 1))
2111 {
2112 offset -= 0x10;
2113 frame_emit_load (regno, sp_reg, offset);
2114 }
2115 }
2116 }
2117
2118 if (!current_function_is_leaf)
2119 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2120
2121 if (!sibcall_p)
2122 {
18b42941 2123 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2124 jump = emit_jump_insn (gen__return ());
2125 emit_barrier_after (jump);
2126 }
2127
2128 emit_note (NOTE_INSN_DELETED);
2129}
2130
2131rtx
2132spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2133{
2134 if (count != 0)
2135 return 0;
2136 /* This is inefficient because it ends up copying to a save-register
2137 which then gets saved even though $lr has already been saved. But
2138 it does generate better code for leaf functions and we don't need
2139 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2140 used for __builtin_return_address anyway, so maybe we don't care if
2141 it's inefficient. */
2142 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2143}
2144\f
2145
2146/* Given VAL, generate a constant appropriate for MODE.
2147 If MODE is a vector mode, every element will be VAL.
2148 For TImode, VAL will be zero extended to 128 bits. */
2149rtx
2150spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2151{
2152 rtx inner;
2153 rtvec v;
2154 int units, i;
2155
2156 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2157 || GET_MODE_CLASS (mode) == MODE_FLOAT
2158 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2159 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2160
2161 if (GET_MODE_CLASS (mode) == MODE_INT)
2162 return immed_double_const (val, 0, mode);
2163
2164 /* val is the bit representation of the float */
2165 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2166 return hwint_to_const_double (mode, val);
2167
2168 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2169 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2170 else
2171 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2172
2173 units = GET_MODE_NUNITS (mode);
2174
2175 v = rtvec_alloc (units);
2176
2177 for (i = 0; i < units; ++i)
2178 RTVEC_ELT (v, i) = inner;
2179
2180 return gen_rtx_CONST_VECTOR (mode, v);
2181}
644459d0 2182
5474166e 2183/* Create a MODE vector constant from 4 ints. */
2184rtx
2185spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2186{
2187 unsigned char arr[16];
2188 arr[0] = (a >> 24) & 0xff;
2189 arr[1] = (a >> 16) & 0xff;
2190 arr[2] = (a >> 8) & 0xff;
2191 arr[3] = (a >> 0) & 0xff;
2192 arr[4] = (b >> 24) & 0xff;
2193 arr[5] = (b >> 16) & 0xff;
2194 arr[6] = (b >> 8) & 0xff;
2195 arr[7] = (b >> 0) & 0xff;
2196 arr[8] = (c >> 24) & 0xff;
2197 arr[9] = (c >> 16) & 0xff;
2198 arr[10] = (c >> 8) & 0xff;
2199 arr[11] = (c >> 0) & 0xff;
2200 arr[12] = (d >> 24) & 0xff;
2201 arr[13] = (d >> 16) & 0xff;
2202 arr[14] = (d >> 8) & 0xff;
2203 arr[15] = (d >> 0) & 0xff;
2204 return array_to_constant(mode, arr);
2205}
5a976006 2206\f
2207/* branch hint stuff */
5474166e 2208
644459d0 2209/* An array of these is used to propagate hints to predecessor blocks. */
2210struct spu_bb_info
2211{
5a976006 2212 rtx prop_jump; /* propagated from another block */
2213 int bb_index; /* the original block. */
644459d0 2214};
5a976006 2215static struct spu_bb_info *spu_bb_info;
644459d0 2216
5a976006 2217#define STOP_HINT_P(INSN) \
2218 (GET_CODE(INSN) == CALL_INSN \
2219 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2220 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2221
2222/* 1 when RTX is a hinted branch or its target. We keep track of
2223 what has been hinted so the safe-hint code can test it easily. */
2224#define HINTED_P(RTX) \
2225 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2226
2227/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2228#define SCHED_ON_EVEN_P(RTX) \
2229 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2230
2231/* Emit a nop for INSN such that the two will dual issue. This assumes
2232 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2233 We check for TImode to handle a MULTI1 insn which has dual issued its
2234 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2235 ADDR_VEC insns. */
2236static void
2237emit_nop_for_insn (rtx insn)
644459d0 2238{
5a976006 2239 int p;
2240 rtx new_insn;
2241 p = get_pipe (insn);
2242 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2243 new_insn = emit_insn_after (gen_lnop (), insn);
2244 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2245 {
5a976006 2246 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2247 PUT_MODE (new_insn, TImode);
2248 PUT_MODE (insn, VOIDmode);
2249 }
2250 else
2251 new_insn = emit_insn_after (gen_lnop (), insn);
2252 recog_memoized (new_insn);
2253}
2254
2255/* Insert nops in basic blocks to meet dual issue alignment
2256 requirements. Also make sure hbrp and hint instructions are at least
2257 one cycle apart, possibly inserting a nop. */
2258static void
2259pad_bb(void)
2260{
2261 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2262 int length;
2263 int addr;
2264
2265 /* This sets up INSN_ADDRESSES. */
2266 shorten_branches (get_insns ());
2267
2268 /* Keep track of length added by nops. */
2269 length = 0;
2270
2271 prev_insn = 0;
2272 insn = get_insns ();
2273 if (!active_insn_p (insn))
2274 insn = next_active_insn (insn);
2275 for (; insn; insn = next_insn)
2276 {
2277 next_insn = next_active_insn (insn);
2278 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2279 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2280 {
5a976006 2281 if (hbr_insn)
2282 {
2283 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2284 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2285 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2286 || (a1 - a0 == 4))
2287 {
2288 prev_insn = emit_insn_before (gen_lnop (), insn);
2289 PUT_MODE (prev_insn, GET_MODE (insn));
2290 PUT_MODE (insn, TImode);
2291 length += 4;
2292 }
2293 }
2294 hbr_insn = insn;
2295 }
2296 if (INSN_CODE (insn) == CODE_FOR_blockage)
2297 {
2298 if (GET_MODE (insn) == TImode)
2299 PUT_MODE (next_insn, TImode);
2300 insn = next_insn;
2301 next_insn = next_active_insn (insn);
2302 }
2303 addr = INSN_ADDRESSES (INSN_UID (insn));
2304 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2305 {
2306 if (((addr + length) & 7) != 0)
2307 {
2308 emit_nop_for_insn (prev_insn);
2309 length += 4;
2310 }
644459d0 2311 }
5a976006 2312 else if (GET_MODE (insn) == TImode
2313 && ((next_insn && GET_MODE (next_insn) != TImode)
2314 || get_attr_type (insn) == TYPE_MULTI0)
2315 && ((addr + length) & 7) != 0)
2316 {
2317 /* prev_insn will always be set because the first insn is
2318 always 8-byte aligned. */
2319 emit_nop_for_insn (prev_insn);
2320 length += 4;
2321 }
2322 prev_insn = insn;
644459d0 2323 }
644459d0 2324}
2325
5a976006 2326\f
2327/* Routines for branch hints. */
2328
644459d0 2329static void
5a976006 2330spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2331 int distance, sbitmap blocks)
644459d0 2332{
5a976006 2333 rtx branch_label = 0;
2334 rtx hint;
2335 rtx insn;
2336 rtx table;
644459d0 2337
2338 if (before == 0 || branch == 0 || target == 0)
2339 return;
2340
5a976006 2341 /* While scheduling we require hints to be no further than 600, so
2342 we need to enforce that here too */
644459d0 2343 if (distance > 600)
2344 return;
2345
5a976006 2346 /* If we have a Basic block note, emit it after the basic block note. */
2347 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2348 before = NEXT_INSN (before);
644459d0 2349
2350 branch_label = gen_label_rtx ();
2351 LABEL_NUSES (branch_label)++;
2352 LABEL_PRESERVE_P (branch_label) = 1;
2353 insn = emit_label_before (branch_label, branch);
2354 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2355 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2356
2357 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2358 recog_memoized (hint);
2359 HINTED_P (branch) = 1;
644459d0 2360
5a976006 2361 if (GET_CODE (target) == LABEL_REF)
2362 HINTED_P (XEXP (target, 0)) = 1;
2363 else if (tablejump_p (branch, 0, &table))
644459d0 2364 {
5a976006 2365 rtvec vec;
2366 int j;
2367 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2368 vec = XVEC (PATTERN (table), 0);
2369 else
2370 vec = XVEC (PATTERN (table), 1);
2371 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2372 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2373 }
5a976006 2374
2375 if (distance >= 588)
644459d0 2376 {
5a976006 2377 /* Make sure the hint isn't scheduled any earlier than this point,
2378 which could make it too far for the branch offest to fit */
2379 recog_memoized (emit_insn_before (gen_blockage (), hint));
2380 }
2381 else if (distance <= 8 * 4)
2382 {
2383 /* To guarantee at least 8 insns between the hint and branch we
2384 insert nops. */
2385 int d;
2386 for (d = distance; d < 8 * 4; d += 4)
2387 {
2388 insn =
2389 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2390 recog_memoized (insn);
2391 }
2392
2393 /* Make sure any nops inserted aren't scheduled before the hint. */
2394 recog_memoized (emit_insn_after (gen_blockage (), hint));
2395
2396 /* Make sure any nops inserted aren't scheduled after the call. */
2397 if (CALL_P (branch) && distance < 8 * 4)
2398 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2399 }
644459d0 2400}
2401
2402/* Returns 0 if we don't want a hint for this branch. Otherwise return
2403 the rtx for the branch target. */
2404static rtx
2405get_branch_target (rtx branch)
2406{
2407 if (GET_CODE (branch) == JUMP_INSN)
2408 {
2409 rtx set, src;
2410
2411 /* Return statements */
2412 if (GET_CODE (PATTERN (branch)) == RETURN)
2413 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2414
2415 /* jump table */
2416 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2417 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2418 return 0;
2419
fcc31b99 2420 /* ASM GOTOs. */
604157f6 2421 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2422 return NULL;
2423
644459d0 2424 set = single_set (branch);
2425 src = SET_SRC (set);
2426 if (GET_CODE (SET_DEST (set)) != PC)
2427 abort ();
2428
2429 if (GET_CODE (src) == IF_THEN_ELSE)
2430 {
2431 rtx lab = 0;
2432 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2433 if (note)
2434 {
2435 /* If the more probable case is not a fall through, then
2436 try a branch hint. */
2437 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2438 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2439 && GET_CODE (XEXP (src, 1)) != PC)
2440 lab = XEXP (src, 1);
2441 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2442 && GET_CODE (XEXP (src, 2)) != PC)
2443 lab = XEXP (src, 2);
2444 }
2445 if (lab)
2446 {
2447 if (GET_CODE (lab) == RETURN)
2448 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2449 return lab;
2450 }
2451 return 0;
2452 }
2453
2454 return src;
2455 }
2456 else if (GET_CODE (branch) == CALL_INSN)
2457 {
2458 rtx call;
2459 /* All of our call patterns are in a PARALLEL and the CALL is
2460 the first pattern in the PARALLEL. */
2461 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2462 abort ();
2463 call = XVECEXP (PATTERN (branch), 0, 0);
2464 if (GET_CODE (call) == SET)
2465 call = SET_SRC (call);
2466 if (GET_CODE (call) != CALL)
2467 abort ();
2468 return XEXP (XEXP (call, 0), 0);
2469 }
2470 return 0;
2471}
2472
5a976006 2473/* The special $hbr register is used to prevent the insn scheduler from
2474 moving hbr insns across instructions which invalidate them. It
2475 should only be used in a clobber, and this function searches for
2476 insns which clobber it. */
2477static bool
2478insn_clobbers_hbr (rtx insn)
2479{
2480 if (INSN_P (insn)
2481 && GET_CODE (PATTERN (insn)) == PARALLEL)
2482 {
2483 rtx parallel = PATTERN (insn);
2484 rtx clobber;
2485 int j;
2486 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2487 {
2488 clobber = XVECEXP (parallel, 0, j);
2489 if (GET_CODE (clobber) == CLOBBER
2490 && GET_CODE (XEXP (clobber, 0)) == REG
2491 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2492 return 1;
2493 }
2494 }
2495 return 0;
2496}
2497
2498/* Search up to 32 insns starting at FIRST:
2499 - at any kind of hinted branch, just return
2500 - at any unconditional branch in the first 15 insns, just return
2501 - at a call or indirect branch, after the first 15 insns, force it to
2502 an even address and return
2503 - at any unconditional branch, after the first 15 insns, force it to
2504 an even address.
2505 At then end of the search, insert an hbrp within 4 insns of FIRST,
2506 and an hbrp within 16 instructions of FIRST.
2507 */
644459d0 2508static void
5a976006 2509insert_hbrp_for_ilb_runout (rtx first)
644459d0 2510{
5a976006 2511 rtx insn, before_4 = 0, before_16 = 0;
2512 int addr = 0, length, first_addr = -1;
2513 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2514 int insert_lnop_after = 0;
2515 for (insn = first; insn; insn = NEXT_INSN (insn))
2516 if (INSN_P (insn))
2517 {
2518 if (first_addr == -1)
2519 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2520 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2521 length = get_attr_length (insn);
2522
2523 if (before_4 == 0 && addr + length >= 4 * 4)
2524 before_4 = insn;
2525 /* We test for 14 instructions because the first hbrp will add
2526 up to 2 instructions. */
2527 if (before_16 == 0 && addr + length >= 14 * 4)
2528 before_16 = insn;
2529
2530 if (INSN_CODE (insn) == CODE_FOR_hbr)
2531 {
2532 /* Make sure an hbrp is at least 2 cycles away from a hint.
2533 Insert an lnop after the hbrp when necessary. */
2534 if (before_4 == 0 && addr > 0)
2535 {
2536 before_4 = insn;
2537 insert_lnop_after |= 1;
2538 }
2539 else if (before_4 && addr <= 4 * 4)
2540 insert_lnop_after |= 1;
2541 if (before_16 == 0 && addr > 10 * 4)
2542 {
2543 before_16 = insn;
2544 insert_lnop_after |= 2;
2545 }
2546 else if (before_16 && addr <= 14 * 4)
2547 insert_lnop_after |= 2;
2548 }
644459d0 2549
5a976006 2550 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2551 {
2552 if (addr < hbrp_addr0)
2553 hbrp_addr0 = addr;
2554 else if (addr < hbrp_addr1)
2555 hbrp_addr1 = addr;
2556 }
644459d0 2557
5a976006 2558 if (CALL_P (insn) || JUMP_P (insn))
2559 {
2560 if (HINTED_P (insn))
2561 return;
2562
2563 /* Any branch after the first 15 insns should be on an even
2564 address to avoid a special case branch. There might be
2565 some nops and/or hbrps inserted, so we test after 10
2566 insns. */
2567 if (addr > 10 * 4)
2568 SCHED_ON_EVEN_P (insn) = 1;
2569 }
644459d0 2570
5a976006 2571 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2572 return;
2573
2574
2575 if (addr + length >= 32 * 4)
644459d0 2576 {
5a976006 2577 gcc_assert (before_4 && before_16);
2578 if (hbrp_addr0 > 4 * 4)
644459d0 2579 {
5a976006 2580 insn =
2581 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2582 recog_memoized (insn);
2583 INSN_ADDRESSES_NEW (insn,
2584 INSN_ADDRESSES (INSN_UID (before_4)));
2585 PUT_MODE (insn, GET_MODE (before_4));
2586 PUT_MODE (before_4, TImode);
2587 if (insert_lnop_after & 1)
644459d0 2588 {
5a976006 2589 insn = emit_insn_before (gen_lnop (), before_4);
2590 recog_memoized (insn);
2591 INSN_ADDRESSES_NEW (insn,
2592 INSN_ADDRESSES (INSN_UID (before_4)));
2593 PUT_MODE (insn, TImode);
644459d0 2594 }
644459d0 2595 }
5a976006 2596 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2597 && hbrp_addr1 > 16 * 4)
644459d0 2598 {
5a976006 2599 insn =
2600 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2601 recog_memoized (insn);
2602 INSN_ADDRESSES_NEW (insn,
2603 INSN_ADDRESSES (INSN_UID (before_16)));
2604 PUT_MODE (insn, GET_MODE (before_16));
2605 PUT_MODE (before_16, TImode);
2606 if (insert_lnop_after & 2)
644459d0 2607 {
5a976006 2608 insn = emit_insn_before (gen_lnop (), before_16);
2609 recog_memoized (insn);
2610 INSN_ADDRESSES_NEW (insn,
2611 INSN_ADDRESSES (INSN_UID
2612 (before_16)));
2613 PUT_MODE (insn, TImode);
644459d0 2614 }
2615 }
5a976006 2616 return;
644459d0 2617 }
644459d0 2618 }
5a976006 2619 else if (BARRIER_P (insn))
2620 return;
644459d0 2621
644459d0 2622}
5a976006 2623
2624/* The SPU might hang when it executes 48 inline instructions after a
2625 hinted branch jumps to its hinted target. The beginning of a
2626 function and the return from a call might have been hinted, and must
2627 be handled as well. To prevent a hang we insert 2 hbrps. The first
2628 should be within 6 insns of the branch target. The second should be
2629 within 22 insns of the branch target. When determining if hbrps are
2630 necessary, we look for only 32 inline instructions, because up to to
2631 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2632 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2633static void
5a976006 2634insert_hbrp (void)
644459d0 2635{
5a976006 2636 rtx insn;
2637 if (TARGET_SAFE_HINTS)
644459d0 2638 {
5a976006 2639 shorten_branches (get_insns ());
2640 /* Insert hbrp at beginning of function */
2641 insn = next_active_insn (get_insns ());
2642 if (insn)
2643 insert_hbrp_for_ilb_runout (insn);
2644 /* Insert hbrp after hinted targets. */
2645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2647 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2648 }
644459d0 2649}
2650
5a976006 2651static int in_spu_reorg;
2652
2653/* Insert branch hints. There are no branch optimizations after this
2654 pass, so it's safe to set our branch hints now. */
644459d0 2655static void
5a976006 2656spu_machine_dependent_reorg (void)
644459d0 2657{
5a976006 2658 sbitmap blocks;
2659 basic_block bb;
2660 rtx branch, insn;
2661 rtx branch_target = 0;
2662 int branch_addr = 0, insn_addr, required_dist = 0;
2663 int i;
2664 unsigned int j;
644459d0 2665
5a976006 2666 if (!TARGET_BRANCH_HINTS || optimize == 0)
2667 {
2668 /* We still do it for unoptimized code because an external
2669 function might have hinted a call or return. */
2670 insert_hbrp ();
2671 pad_bb ();
2672 return;
2673 }
644459d0 2674
5a976006 2675 blocks = sbitmap_alloc (last_basic_block);
2676 sbitmap_zero (blocks);
644459d0 2677
5a976006 2678 in_spu_reorg = 1;
2679 compute_bb_for_insn ();
2680
2681 compact_blocks ();
2682
2683 spu_bb_info =
2684 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2685 sizeof (struct spu_bb_info));
2686
2687 /* We need exact insn addresses and lengths. */
2688 shorten_branches (get_insns ());
2689
2690 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2691 {
5a976006 2692 bb = BASIC_BLOCK (i);
2693 branch = 0;
2694 if (spu_bb_info[i].prop_jump)
644459d0 2695 {
5a976006 2696 branch = spu_bb_info[i].prop_jump;
2697 branch_target = get_branch_target (branch);
2698 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2699 required_dist = spu_hint_dist;
2700 }
2701 /* Search from end of a block to beginning. In this loop, find
2702 jumps which need a branch and emit them only when:
2703 - it's an indirect branch and we're at the insn which sets
2704 the register
2705 - we're at an insn that will invalidate the hint. e.g., a
2706 call, another hint insn, inline asm that clobbers $hbr, and
2707 some inlined operations (divmodsi4). Don't consider jumps
2708 because they are only at the end of a block and are
2709 considered when we are deciding whether to propagate
2710 - we're getting too far away from the branch. The hbr insns
2711 only have a signed 10 bit offset
2712 We go back as far as possible so the branch will be considered
2713 for propagation when we get to the beginning of the block. */
2714 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2715 {
2716 if (INSN_P (insn))
2717 {
2718 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2719 if (branch
2720 && ((GET_CODE (branch_target) == REG
2721 && set_of (branch_target, insn) != NULL_RTX)
2722 || insn_clobbers_hbr (insn)
2723 || branch_addr - insn_addr > 600))
2724 {
2725 rtx next = NEXT_INSN (insn);
2726 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2727 if (insn != BB_END (bb)
2728 && branch_addr - next_addr >= required_dist)
2729 {
2730 if (dump_file)
2731 fprintf (dump_file,
2732 "hint for %i in block %i before %i\n",
2733 INSN_UID (branch), bb->index,
2734 INSN_UID (next));
2735 spu_emit_branch_hint (next, branch, branch_target,
2736 branch_addr - next_addr, blocks);
2737 }
2738 branch = 0;
2739 }
2740
2741 /* JUMP_P will only be true at the end of a block. When
2742 branch is already set it means we've previously decided
2743 to propagate a hint for that branch into this block. */
2744 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2745 {
2746 branch = 0;
2747 if ((branch_target = get_branch_target (insn)))
2748 {
2749 branch = insn;
2750 branch_addr = insn_addr;
2751 required_dist = spu_hint_dist;
2752 }
2753 }
2754 }
2755 if (insn == BB_HEAD (bb))
2756 break;
2757 }
2758
2759 if (branch)
2760 {
2761 /* If we haven't emitted a hint for this branch yet, it might
2762 be profitable to emit it in one of the predecessor blocks,
2763 especially for loops. */
2764 rtx bbend;
2765 basic_block prev = 0, prop = 0, prev2 = 0;
2766 int loop_exit = 0, simple_loop = 0;
2767 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2768
2769 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2770 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2771 prev = EDGE_PRED (bb, j)->src;
2772 else
2773 prev2 = EDGE_PRED (bb, j)->src;
2774
2775 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2776 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2777 loop_exit = 1;
2778 else if (EDGE_SUCC (bb, j)->dest == bb)
2779 simple_loop = 1;
2780
2781 /* If this branch is a loop exit then propagate to previous
2782 fallthru block. This catches the cases when it is a simple
2783 loop or when there is an initial branch into the loop. */
2784 if (prev && (loop_exit || simple_loop)
2785 && prev->loop_depth <= bb->loop_depth)
2786 prop = prev;
2787
2788 /* If there is only one adjacent predecessor. Don't propagate
2789 outside this loop. This loop_depth test isn't perfect, but
2790 I'm not sure the loop_father member is valid at this point. */
2791 else if (prev && single_pred_p (bb)
2792 && prev->loop_depth == bb->loop_depth)
2793 prop = prev;
2794
2795 /* If this is the JOIN block of a simple IF-THEN then
2796 propogate the hint to the HEADER block. */
2797 else if (prev && prev2
2798 && EDGE_COUNT (bb->preds) == 2
2799 && EDGE_COUNT (prev->preds) == 1
2800 && EDGE_PRED (prev, 0)->src == prev2
2801 && prev2->loop_depth == bb->loop_depth
2802 && GET_CODE (branch_target) != REG)
2803 prop = prev;
2804
2805 /* Don't propagate when:
2806 - this is a simple loop and the hint would be too far
2807 - this is not a simple loop and there are 16 insns in
2808 this block already
2809 - the predecessor block ends in a branch that will be
2810 hinted
2811 - the predecessor block ends in an insn that invalidates
2812 the hint */
2813 if (prop
2814 && prop->index >= 0
2815 && (bbend = BB_END (prop))
2816 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2817 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2818 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2819 {
2820 if (dump_file)
2821 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2822 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2823 bb->index, prop->index, bb->loop_depth,
2824 INSN_UID (branch), loop_exit, simple_loop,
2825 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2826
2827 spu_bb_info[prop->index].prop_jump = branch;
2828 spu_bb_info[prop->index].bb_index = i;
2829 }
2830 else if (branch_addr - next_addr >= required_dist)
2831 {
2832 if (dump_file)
2833 fprintf (dump_file, "hint for %i in block %i before %i\n",
2834 INSN_UID (branch), bb->index,
2835 INSN_UID (NEXT_INSN (insn)));
2836 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2837 branch_addr - next_addr, blocks);
2838 }
2839 branch = 0;
644459d0 2840 }
644459d0 2841 }
5a976006 2842 free (spu_bb_info);
644459d0 2843
5a976006 2844 if (!sbitmap_empty_p (blocks))
2845 find_many_sub_basic_blocks (blocks);
2846
2847 /* We have to schedule to make sure alignment is ok. */
2848 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2849
2850 /* The hints need to be scheduled, so call it again. */
2851 schedule_insns ();
2852
2853 insert_hbrp ();
2854
2855 pad_bb ();
2856
8f1d58ad 2857 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2858 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2859 {
2860 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2861 between its branch label and the branch . We don't move the
2862 label because GCC expects it at the beginning of the block. */
2863 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2864 rtx label_ref = XVECEXP (unspec, 0, 0);
2865 rtx label = XEXP (label_ref, 0);
2866 rtx branch;
2867 int offset = 0;
2868 for (branch = NEXT_INSN (label);
2869 !JUMP_P (branch) && !CALL_P (branch);
2870 branch = NEXT_INSN (branch))
2871 if (NONJUMP_INSN_P (branch))
2872 offset += get_attr_length (branch);
2873 if (offset > 0)
2874 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2875 }
5a976006 2876
2877 if (spu_flag_var_tracking)
644459d0 2878 {
5a976006 2879 df_analyze ();
2880 timevar_push (TV_VAR_TRACKING);
2881 variable_tracking_main ();
2882 timevar_pop (TV_VAR_TRACKING);
2883 df_finish_pass (false);
644459d0 2884 }
5a976006 2885
2886 free_bb_for_insn ();
2887
2888 in_spu_reorg = 0;
644459d0 2889}
2890\f
2891
2892/* Insn scheduling routines, primarily for dual issue. */
2893static int
2894spu_sched_issue_rate (void)
2895{
2896 return 2;
2897}
2898
2899static int
5a976006 2900uses_ls_unit(rtx insn)
644459d0 2901{
5a976006 2902 rtx set = single_set (insn);
2903 if (set != 0
2904 && (GET_CODE (SET_DEST (set)) == MEM
2905 || GET_CODE (SET_SRC (set)) == MEM))
2906 return 1;
2907 return 0;
644459d0 2908}
2909
2910static int
2911get_pipe (rtx insn)
2912{
2913 enum attr_type t;
2914 /* Handle inline asm */
2915 if (INSN_CODE (insn) == -1)
2916 return -1;
2917 t = get_attr_type (insn);
2918 switch (t)
2919 {
2920 case TYPE_CONVERT:
2921 return -2;
2922 case TYPE_MULTI0:
2923 return -1;
2924
2925 case TYPE_FX2:
2926 case TYPE_FX3:
2927 case TYPE_SPR:
2928 case TYPE_NOP:
2929 case TYPE_FXB:
2930 case TYPE_FPD:
2931 case TYPE_FP6:
2932 case TYPE_FP7:
644459d0 2933 return 0;
2934
2935 case TYPE_LNOP:
2936 case TYPE_SHUF:
2937 case TYPE_LOAD:
2938 case TYPE_STORE:
2939 case TYPE_BR:
2940 case TYPE_MULTI1:
2941 case TYPE_HBR:
5a976006 2942 case TYPE_IPREFETCH:
644459d0 2943 return 1;
2944 default:
2945 abort ();
2946 }
2947}
2948
5a976006 2949
2950/* haifa-sched.c has a static variable that keeps track of the current
2951 cycle. It is passed to spu_sched_reorder, and we record it here for
2952 use by spu_sched_variable_issue. It won't be accurate if the
2953 scheduler updates it's clock_var between the two calls. */
2954static int clock_var;
2955
2956/* This is used to keep track of insn alignment. Set to 0 at the
2957 beginning of each block and increased by the "length" attr of each
2958 insn scheduled. */
2959static int spu_sched_length;
2960
2961/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2962 ready list appropriately in spu_sched_reorder(). */
2963static int pipe0_clock;
2964static int pipe1_clock;
2965
2966static int prev_clock_var;
2967
2968static int prev_priority;
2969
2970/* The SPU needs to load the next ilb sometime during the execution of
2971 the previous ilb. There is a potential conflict if every cycle has a
2972 load or store. To avoid the conflict we make sure the load/store
2973 unit is free for at least one cycle during the execution of insns in
2974 the previous ilb. */
2975static int spu_ls_first;
2976static int prev_ls_clock;
2977
2978static void
2979spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2980 int max_ready ATTRIBUTE_UNUSED)
2981{
2982 spu_sched_length = 0;
2983}
2984
2985static void
2986spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2987 int max_ready ATTRIBUTE_UNUSED)
2988{
2989 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2990 {
2991 /* When any block might be at least 8-byte aligned, assume they
2992 will all be at least 8-byte aligned to make sure dual issue
2993 works out correctly. */
2994 spu_sched_length = 0;
2995 }
2996 spu_ls_first = INT_MAX;
2997 clock_var = -1;
2998 prev_ls_clock = -1;
2999 pipe0_clock = -1;
3000 pipe1_clock = -1;
3001 prev_clock_var = -1;
3002 prev_priority = -1;
3003}
3004
644459d0 3005static int
5a976006 3006spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3007 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3008{
5a976006 3009 int len;
3010 int p;
644459d0 3011 if (GET_CODE (PATTERN (insn)) == USE
3012 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3013 || (len = get_attr_length (insn)) == 0)
3014 return more;
3015
3016 spu_sched_length += len;
3017
3018 /* Reset on inline asm */
3019 if (INSN_CODE (insn) == -1)
3020 {
3021 spu_ls_first = INT_MAX;
3022 pipe0_clock = -1;
3023 pipe1_clock = -1;
3024 return 0;
3025 }
3026 p = get_pipe (insn);
3027 if (p == 0)
3028 pipe0_clock = clock_var;
3029 else
3030 pipe1_clock = clock_var;
3031
3032 if (in_spu_reorg)
3033 {
3034 if (clock_var - prev_ls_clock > 1
3035 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3036 spu_ls_first = INT_MAX;
3037 if (uses_ls_unit (insn))
3038 {
3039 if (spu_ls_first == INT_MAX)
3040 spu_ls_first = spu_sched_length;
3041 prev_ls_clock = clock_var;
3042 }
3043
3044 /* The scheduler hasn't inserted the nop, but we will later on.
3045 Include those nops in spu_sched_length. */
3046 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3047 spu_sched_length += 4;
3048 prev_clock_var = clock_var;
3049
3050 /* more is -1 when called from spu_sched_reorder for new insns
3051 that don't have INSN_PRIORITY */
3052 if (more >= 0)
3053 prev_priority = INSN_PRIORITY (insn);
3054 }
3055
3056 /* Always try issueing more insns. spu_sched_reorder will decide
3057 when the cycle should be advanced. */
3058 return 1;
3059}
3060
3061/* This function is called for both TARGET_SCHED_REORDER and
3062 TARGET_SCHED_REORDER2. */
3063static int
3064spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3065 rtx *ready, int *nreadyp, int clock)
3066{
3067 int i, nready = *nreadyp;
3068 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3069 rtx insn;
3070
3071 clock_var = clock;
3072
3073 if (nready <= 0 || pipe1_clock >= clock)
3074 return 0;
3075
3076 /* Find any rtl insns that don't generate assembly insns and schedule
3077 them first. */
3078 for (i = nready - 1; i >= 0; i--)
3079 {
3080 insn = ready[i];
3081 if (INSN_CODE (insn) == -1
3082 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3083 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3084 {
3085 ready[i] = ready[nready - 1];
3086 ready[nready - 1] = insn;
3087 return 1;
3088 }
3089 }
3090
3091 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3092 for (i = 0; i < nready; i++)
3093 if (INSN_CODE (ready[i]) != -1)
3094 {
3095 insn = ready[i];
3096 switch (get_attr_type (insn))
3097 {
3098 default:
3099 case TYPE_MULTI0:
3100 case TYPE_CONVERT:
3101 case TYPE_FX2:
3102 case TYPE_FX3:
3103 case TYPE_SPR:
3104 case TYPE_NOP:
3105 case TYPE_FXB:
3106 case TYPE_FPD:
3107 case TYPE_FP6:
3108 case TYPE_FP7:
3109 pipe_0 = i;
3110 break;
3111 case TYPE_LOAD:
3112 case TYPE_STORE:
3113 pipe_ls = i;
3114 case TYPE_LNOP:
3115 case TYPE_SHUF:
3116 case TYPE_BR:
3117 case TYPE_MULTI1:
3118 case TYPE_HBR:
3119 pipe_1 = i;
3120 break;
3121 case TYPE_IPREFETCH:
3122 pipe_hbrp = i;
3123 break;
3124 }
3125 }
3126
3127 /* In the first scheduling phase, schedule loads and stores together
3128 to increase the chance they will get merged during postreload CSE. */
3129 if (!reload_completed && pipe_ls >= 0)
3130 {
3131 insn = ready[pipe_ls];
3132 ready[pipe_ls] = ready[nready - 1];
3133 ready[nready - 1] = insn;
3134 return 1;
3135 }
3136
3137 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3138 if (pipe_hbrp >= 0)
3139 pipe_1 = pipe_hbrp;
3140
3141 /* When we have loads/stores in every cycle of the last 15 insns and
3142 we are about to schedule another load/store, emit an hbrp insn
3143 instead. */
3144 if (in_spu_reorg
3145 && spu_sched_length - spu_ls_first >= 4 * 15
3146 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3147 {
3148 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3149 recog_memoized (insn);
3150 if (pipe0_clock < clock)
3151 PUT_MODE (insn, TImode);
3152 spu_sched_variable_issue (file, verbose, insn, -1);
3153 return 0;
3154 }
3155
3156 /* In general, we want to emit nops to increase dual issue, but dual
3157 issue isn't faster when one of the insns could be scheduled later
3158 without effecting the critical path. We look at INSN_PRIORITY to
3159 make a good guess, but it isn't perfect so -mdual-nops=n can be
3160 used to effect it. */
3161 if (in_spu_reorg && spu_dual_nops < 10)
3162 {
3163 /* When we are at an even address and we are not issueing nops to
3164 improve scheduling then we need to advance the cycle. */
3165 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3166 && (spu_dual_nops == 0
3167 || (pipe_1 != -1
3168 && prev_priority >
3169 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3170 return 0;
3171
3172 /* When at an odd address, schedule the highest priority insn
3173 without considering pipeline. */
3174 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3175 && (spu_dual_nops == 0
3176 || (prev_priority >
3177 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3178 return 1;
3179 }
3180
3181
3182 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3183 pipe0 insn in the ready list, schedule it. */
3184 if (pipe0_clock < clock && pipe_0 >= 0)
3185 schedule_i = pipe_0;
3186
3187 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3188 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3189 else
3190 schedule_i = pipe_1;
3191
3192 if (schedule_i > -1)
3193 {
3194 insn = ready[schedule_i];
3195 ready[schedule_i] = ready[nready - 1];
3196 ready[nready - 1] = insn;
3197 return 1;
3198 }
3199 return 0;
644459d0 3200}
3201
3202/* INSN is dependent on DEP_INSN. */
3203static int
5a976006 3204spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3205{
5a976006 3206 rtx set;
3207
3208 /* The blockage pattern is used to prevent instructions from being
3209 moved across it and has no cost. */
3210 if (INSN_CODE (insn) == CODE_FOR_blockage
3211 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3212 return 0;
3213
9d98604b 3214 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3215 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3216 return 0;
3217
3218 /* Make sure hbrps are spread out. */
3219 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3220 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3221 return 8;
3222
3223 /* Make sure hints and hbrps are 2 cycles apart. */
3224 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3225 || INSN_CODE (insn) == CODE_FOR_hbr)
3226 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3227 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3228 return 2;
3229
3230 /* An hbrp has no real dependency on other insns. */
3231 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3232 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3233 return 0;
3234
3235 /* Assuming that it is unlikely an argument register will be used in
3236 the first cycle of the called function, we reduce the cost for
3237 slightly better scheduling of dep_insn. When not hinted, the
3238 mispredicted branch would hide the cost as well. */
3239 if (CALL_P (insn))
3240 {
3241 rtx target = get_branch_target (insn);
3242 if (GET_CODE (target) != REG || !set_of (target, insn))
3243 return cost - 2;
3244 return cost;
3245 }
3246
3247 /* And when returning from a function, let's assume the return values
3248 are completed sooner too. */
3249 if (CALL_P (dep_insn))
644459d0 3250 return cost - 2;
5a976006 3251
3252 /* Make sure an instruction that loads from the back chain is schedule
3253 away from the return instruction so a hint is more likely to get
3254 issued. */
3255 if (INSN_CODE (insn) == CODE_FOR__return
3256 && (set = single_set (dep_insn))
3257 && GET_CODE (SET_DEST (set)) == REG
3258 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3259 return 20;
3260
644459d0 3261 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3262 scheduler makes every insn in a block anti-dependent on the final
3263 jump_insn. We adjust here so higher cost insns will get scheduled
3264 earlier. */
5a976006 3265 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3266 return insn_cost (dep_insn) - 3;
5a976006 3267
644459d0 3268 return cost;
3269}
3270\f
3271/* Create a CONST_DOUBLE from a string. */
3272struct rtx_def *
3273spu_float_const (const char *string, enum machine_mode mode)
3274{
3275 REAL_VALUE_TYPE value;
3276 value = REAL_VALUE_ATOF (string, mode);
3277 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3278}
3279
644459d0 3280int
3281spu_constant_address_p (rtx x)
3282{
3283 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3284 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3285 || GET_CODE (x) == HIGH);
3286}
3287
3288static enum spu_immediate
3289which_immediate_load (HOST_WIDE_INT val)
3290{
3291 gcc_assert (val == trunc_int_for_mode (val, SImode));
3292
3293 if (val >= -0x8000 && val <= 0x7fff)
3294 return SPU_IL;
3295 if (val >= 0 && val <= 0x3ffff)
3296 return SPU_ILA;
3297 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3298 return SPU_ILH;
3299 if ((val & 0xffff) == 0)
3300 return SPU_ILHU;
3301
3302 return SPU_NONE;
3303}
3304
dea01258 3305/* Return true when OP can be loaded by one of the il instructions, or
3306 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3307int
3308immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3309{
3310 if (CONSTANT_P (op))
3311 {
3312 enum immediate_class c = classify_immediate (op, mode);
5df189be 3313 return c == IC_IL1 || c == IC_IL1s
3072d30e 3314 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3315 }
3316 return 0;
3317}
3318
3319/* Return true if the first SIZE bytes of arr is a constant that can be
3320 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3321 represent the size and offset of the instruction to use. */
3322static int
3323cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3324{
3325 int cpat, run, i, start;
3326 cpat = 1;
3327 run = 0;
3328 start = -1;
3329 for (i = 0; i < size && cpat; i++)
3330 if (arr[i] != i+16)
3331 {
3332 if (!run)
3333 {
3334 start = i;
3335 if (arr[i] == 3)
3336 run = 1;
3337 else if (arr[i] == 2 && arr[i+1] == 3)
3338 run = 2;
3339 else if (arr[i] == 0)
3340 {
3341 while (arr[i+run] == run && i+run < 16)
3342 run++;
3343 if (run != 4 && run != 8)
3344 cpat = 0;
3345 }
3346 else
3347 cpat = 0;
3348 if ((i & (run-1)) != 0)
3349 cpat = 0;
3350 i += run;
3351 }
3352 else
3353 cpat = 0;
3354 }
b01a6dc3 3355 if (cpat && (run || size < 16))
dea01258 3356 {
3357 if (run == 0)
3358 run = 1;
3359 if (prun)
3360 *prun = run;
3361 if (pstart)
3362 *pstart = start == -1 ? 16-run : start;
3363 return 1;
3364 }
3365 return 0;
3366}
3367
3368/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3369 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3370static enum immediate_class
3371classify_immediate (rtx op, enum machine_mode mode)
644459d0 3372{
3373 HOST_WIDE_INT val;
3374 unsigned char arr[16];
5df189be 3375 int i, j, repeated, fsmbi, repeat;
dea01258 3376
3377 gcc_assert (CONSTANT_P (op));
3378
644459d0 3379 if (GET_MODE (op) != VOIDmode)
3380 mode = GET_MODE (op);
3381
dea01258 3382 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3383 if (!flag_pic
3384 && mode == V4SImode
dea01258 3385 && GET_CODE (op) == CONST_VECTOR
3386 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3387 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3388 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3389 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3390 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3391 op = CONST_VECTOR_ELT (op, 0);
644459d0 3392
dea01258 3393 switch (GET_CODE (op))
3394 {
3395 case SYMBOL_REF:
3396 case LABEL_REF:
3397 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3398
dea01258 3399 case CONST:
0cfc65d4 3400 /* We can never know if the resulting address fits in 18 bits and can be
3401 loaded with ila. For now, assume the address will not overflow if
3402 the displacement is "small" (fits 'K' constraint). */
3403 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3404 {
3405 rtx sym = XEXP (XEXP (op, 0), 0);
3406 rtx cst = XEXP (XEXP (op, 0), 1);
3407
3408 if (GET_CODE (sym) == SYMBOL_REF
3409 && GET_CODE (cst) == CONST_INT
3410 && satisfies_constraint_K (cst))
3411 return IC_IL1s;
3412 }
3413 return IC_IL2s;
644459d0 3414
dea01258 3415 case HIGH:
3416 return IC_IL1s;
3417
3418 case CONST_VECTOR:
3419 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3420 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3421 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3422 return IC_POOL;
3423 /* Fall through. */
3424
3425 case CONST_INT:
3426 case CONST_DOUBLE:
3427 constant_to_array (mode, op, arr);
644459d0 3428
dea01258 3429 /* Check that each 4-byte slot is identical. */
3430 repeated = 1;
3431 for (i = 4; i < 16; i += 4)
3432 for (j = 0; j < 4; j++)
3433 if (arr[j] != arr[i + j])
3434 repeated = 0;
3435
3436 if (repeated)
3437 {
3438 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3439 val = trunc_int_for_mode (val, SImode);
3440
3441 if (which_immediate_load (val) != SPU_NONE)
3442 return IC_IL1;
3443 }
3444
3445 /* Any mode of 2 bytes or smaller can be loaded with an il
3446 instruction. */
3447 gcc_assert (GET_MODE_SIZE (mode) > 2);
3448
3449 fsmbi = 1;
5df189be 3450 repeat = 0;
dea01258 3451 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3452 if (arr[i] != 0 && repeat == 0)
3453 repeat = arr[i];
3454 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3455 fsmbi = 0;
3456 if (fsmbi)
5df189be 3457 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3458
3459 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3460 return IC_CPAT;
3461
3462 if (repeated)
3463 return IC_IL2;
3464
3465 return IC_POOL;
3466 default:
3467 break;
3468 }
3469 gcc_unreachable ();
644459d0 3470}
3471
3472static enum spu_immediate
3473which_logical_immediate (HOST_WIDE_INT val)
3474{
3475 gcc_assert (val == trunc_int_for_mode (val, SImode));
3476
3477 if (val >= -0x200 && val <= 0x1ff)
3478 return SPU_ORI;
3479 if (val >= 0 && val <= 0xffff)
3480 return SPU_IOHL;
3481 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3482 {
3483 val = trunc_int_for_mode (val, HImode);
3484 if (val >= -0x200 && val <= 0x1ff)
3485 return SPU_ORHI;
3486 if ((val & 0xff) == ((val >> 8) & 0xff))
3487 {
3488 val = trunc_int_for_mode (val, QImode);
3489 if (val >= -0x200 && val <= 0x1ff)
3490 return SPU_ORBI;
3491 }
3492 }
3493 return SPU_NONE;
3494}
3495
5df189be 3496/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3497 CONST_DOUBLEs. */
3498static int
3499const_vector_immediate_p (rtx x)
3500{
3501 int i;
3502 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3503 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3504 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3505 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3506 return 0;
3507 return 1;
3508}
3509
644459d0 3510int
3511logical_immediate_p (rtx op, enum machine_mode mode)
3512{
3513 HOST_WIDE_INT val;
3514 unsigned char arr[16];
3515 int i, j;
3516
3517 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3518 || GET_CODE (op) == CONST_VECTOR);
3519
5df189be 3520 if (GET_CODE (op) == CONST_VECTOR
3521 && !const_vector_immediate_p (op))
3522 return 0;
3523
644459d0 3524 if (GET_MODE (op) != VOIDmode)
3525 mode = GET_MODE (op);
3526
3527 constant_to_array (mode, op, arr);
3528
3529 /* Check that bytes are repeated. */
3530 for (i = 4; i < 16; i += 4)
3531 for (j = 0; j < 4; j++)
3532 if (arr[j] != arr[i + j])
3533 return 0;
3534
3535 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3536 val = trunc_int_for_mode (val, SImode);
3537
3538 i = which_logical_immediate (val);
3539 return i != SPU_NONE && i != SPU_IOHL;
3540}
3541
3542int
3543iohl_immediate_p (rtx op, enum machine_mode mode)
3544{
3545 HOST_WIDE_INT val;
3546 unsigned char arr[16];
3547 int i, j;
3548
3549 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3550 || GET_CODE (op) == CONST_VECTOR);
3551
5df189be 3552 if (GET_CODE (op) == CONST_VECTOR
3553 && !const_vector_immediate_p (op))
3554 return 0;
3555
644459d0 3556 if (GET_MODE (op) != VOIDmode)
3557 mode = GET_MODE (op);
3558
3559 constant_to_array (mode, op, arr);
3560
3561 /* Check that bytes are repeated. */
3562 for (i = 4; i < 16; i += 4)
3563 for (j = 0; j < 4; j++)
3564 if (arr[j] != arr[i + j])
3565 return 0;
3566
3567 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3568 val = trunc_int_for_mode (val, SImode);
3569
3570 return val >= 0 && val <= 0xffff;
3571}
3572
3573int
3574arith_immediate_p (rtx op, enum machine_mode mode,
3575 HOST_WIDE_INT low, HOST_WIDE_INT high)
3576{
3577 HOST_WIDE_INT val;
3578 unsigned char arr[16];
3579 int bytes, i, j;
3580
3581 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3582 || GET_CODE (op) == CONST_VECTOR);
3583
5df189be 3584 if (GET_CODE (op) == CONST_VECTOR
3585 && !const_vector_immediate_p (op))
3586 return 0;
3587
644459d0 3588 if (GET_MODE (op) != VOIDmode)
3589 mode = GET_MODE (op);
3590
3591 constant_to_array (mode, op, arr);
3592
3593 if (VECTOR_MODE_P (mode))
3594 mode = GET_MODE_INNER (mode);
3595
3596 bytes = GET_MODE_SIZE (mode);
3597 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3598
3599 /* Check that bytes are repeated. */
3600 for (i = bytes; i < 16; i += bytes)
3601 for (j = 0; j < bytes; j++)
3602 if (arr[j] != arr[i + j])
3603 return 0;
3604
3605 val = arr[0];
3606 for (j = 1; j < bytes; j++)
3607 val = (val << 8) | arr[j];
3608
3609 val = trunc_int_for_mode (val, mode);
3610
3611 return val >= low && val <= high;
3612}
3613
56c7bfc2 3614/* TRUE when op is an immediate and an exact power of 2, and given that
3615 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3616 all entries must be the same. */
3617bool
3618exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3619{
3620 enum machine_mode int_mode;
3621 HOST_WIDE_INT val;
3622 unsigned char arr[16];
3623 int bytes, i, j;
3624
3625 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3626 || GET_CODE (op) == CONST_VECTOR);
3627
3628 if (GET_CODE (op) == CONST_VECTOR
3629 && !const_vector_immediate_p (op))
3630 return 0;
3631
3632 if (GET_MODE (op) != VOIDmode)
3633 mode = GET_MODE (op);
3634
3635 constant_to_array (mode, op, arr);
3636
3637 if (VECTOR_MODE_P (mode))
3638 mode = GET_MODE_INNER (mode);
3639
3640 bytes = GET_MODE_SIZE (mode);
3641 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3642
3643 /* Check that bytes are repeated. */
3644 for (i = bytes; i < 16; i += bytes)
3645 for (j = 0; j < bytes; j++)
3646 if (arr[j] != arr[i + j])
3647 return 0;
3648
3649 val = arr[0];
3650 for (j = 1; j < bytes; j++)
3651 val = (val << 8) | arr[j];
3652
3653 val = trunc_int_for_mode (val, int_mode);
3654
3655 /* Currently, we only handle SFmode */
3656 gcc_assert (mode == SFmode);
3657 if (mode == SFmode)
3658 {
3659 int exp = (val >> 23) - 127;
3660 return val > 0 && (val & 0x007fffff) == 0
3661 && exp >= low && exp <= high;
3662 }
3663 return FALSE;
3664}
3665
6cf5579e 3666/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3667
3668static int
3669ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3670{
3671 rtx x = *px;
3672 tree decl;
3673
3674 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3675 {
3676 rtx plus = XEXP (x, 0);
3677 rtx op0 = XEXP (plus, 0);
3678 rtx op1 = XEXP (plus, 1);
3679 if (GET_CODE (op1) == CONST_INT)
3680 x = op0;
3681 }
3682
3683 return (GET_CODE (x) == SYMBOL_REF
3684 && (decl = SYMBOL_REF_DECL (x)) != 0
3685 && TREE_CODE (decl) == VAR_DECL
3686 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3687}
3688
644459d0 3689/* We accept:
5b865faf 3690 - any 32-bit constant (SImode, SFmode)
644459d0 3691 - any constant that can be generated with fsmbi (any mode)
5b865faf 3692 - a 64-bit constant where the high and low bits are identical
644459d0 3693 (DImode, DFmode)
5b865faf 3694 - a 128-bit constant where the four 32-bit words match. */
644459d0 3695int
3696spu_legitimate_constant_p (rtx x)
3697{
5df189be 3698 if (GET_CODE (x) == HIGH)
3699 x = XEXP (x, 0);
6cf5579e 3700
3701 /* Reject any __ea qualified reference. These can't appear in
3702 instructions but must be forced to the constant pool. */
3703 if (for_each_rtx (&x, ea_symbol_ref, 0))
3704 return 0;
3705
644459d0 3706 /* V4SI with all identical symbols is valid. */
5df189be 3707 if (!flag_pic
3708 && GET_MODE (x) == V4SImode
644459d0 3709 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3710 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3711 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3712 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3713 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3714 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3715
5df189be 3716 if (GET_CODE (x) == CONST_VECTOR
3717 && !const_vector_immediate_p (x))
3718 return 0;
644459d0 3719 return 1;
3720}
3721
3722/* Valid address are:
3723 - symbol_ref, label_ref, const
3724 - reg
9d98604b 3725 - reg + const_int, where const_int is 16 byte aligned
644459d0 3726 - reg + reg, alignment doesn't matter
3727 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3728 ignore the 4 least significant bits of the const. We only care about
3729 16 byte modes because the expand phase will change all smaller MEM
3730 references to TImode. */
3731static bool
3732spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3733 rtx x, bool reg_ok_strict)
644459d0 3734{
9d98604b 3735 int aligned = GET_MODE_SIZE (mode) >= 16;
3736 if (aligned
3737 && GET_CODE (x) == AND
644459d0 3738 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3739 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3740 x = XEXP (x, 0);
3741 switch (GET_CODE (x))
3742 {
644459d0 3743 case LABEL_REF:
6cf5579e 3744 return !TARGET_LARGE_MEM;
3745
9d98604b 3746 case SYMBOL_REF:
644459d0 3747 case CONST:
6cf5579e 3748 /* Keep __ea references until reload so that spu_expand_mov can see them
3749 in MEMs. */
3750 if (ea_symbol_ref (&x, 0))
3751 return !reload_in_progress && !reload_completed;
9d98604b 3752 return !TARGET_LARGE_MEM;
644459d0 3753
3754 case CONST_INT:
3755 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3756
3757 case SUBREG:
3758 x = XEXP (x, 0);
9d98604b 3759 if (REG_P (x))
3760 return 0;
644459d0 3761
3762 case REG:
3763 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3764
3765 case PLUS:
3766 case LO_SUM:
3767 {
3768 rtx op0 = XEXP (x, 0);
3769 rtx op1 = XEXP (x, 1);
3770 if (GET_CODE (op0) == SUBREG)
3771 op0 = XEXP (op0, 0);
3772 if (GET_CODE (op1) == SUBREG)
3773 op1 = XEXP (op1, 0);
644459d0 3774 if (GET_CODE (op0) == REG
3775 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3776 && GET_CODE (op1) == CONST_INT
3777 && INTVAL (op1) >= -0x2000
3778 && INTVAL (op1) <= 0x1fff
9d98604b 3779 && (!aligned || (INTVAL (op1) & 15) == 0))
3780 return TRUE;
644459d0 3781 if (GET_CODE (op0) == REG
3782 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3783 && GET_CODE (op1) == REG
3784 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3785 return TRUE;
644459d0 3786 }
3787 break;
3788
3789 default:
3790 break;
3791 }
9d98604b 3792 return FALSE;
644459d0 3793}
3794
6cf5579e 3795/* Like spu_legitimate_address_p, except with named addresses. */
3796static bool
3797spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3798 bool reg_ok_strict, addr_space_t as)
3799{
3800 if (as == ADDR_SPACE_EA)
3801 return (REG_P (x) && (GET_MODE (x) == EAmode));
3802
3803 else if (as != ADDR_SPACE_GENERIC)
3804 gcc_unreachable ();
3805
3806 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3807}
3808
644459d0 3809/* When the address is reg + const_int, force the const_int into a
fa7637bd 3810 register. */
644459d0 3811rtx
3812spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3813 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3814{
3815 rtx op0, op1;
3816 /* Make sure both operands are registers. */
3817 if (GET_CODE (x) == PLUS)
3818 {
3819 op0 = XEXP (x, 0);
3820 op1 = XEXP (x, 1);
3821 if (ALIGNED_SYMBOL_REF_P (op0))
3822 {
3823 op0 = force_reg (Pmode, op0);
3824 mark_reg_pointer (op0, 128);
3825 }
3826 else if (GET_CODE (op0) != REG)
3827 op0 = force_reg (Pmode, op0);
3828 if (ALIGNED_SYMBOL_REF_P (op1))
3829 {
3830 op1 = force_reg (Pmode, op1);
3831 mark_reg_pointer (op1, 128);
3832 }
3833 else if (GET_CODE (op1) != REG)
3834 op1 = force_reg (Pmode, op1);
3835 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3836 }
41e3a0c7 3837 return x;
644459d0 3838}
3839
6cf5579e 3840/* Like spu_legitimate_address, except with named address support. */
3841static rtx
3842spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3843 addr_space_t as)
3844{
3845 if (as != ADDR_SPACE_GENERIC)
3846 return x;
3847
3848 return spu_legitimize_address (x, oldx, mode);
3849}
3850
644459d0 3851/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3852 struct attribute_spec.handler. */
3853static tree
3854spu_handle_fndecl_attribute (tree * node,
3855 tree name,
3856 tree args ATTRIBUTE_UNUSED,
3857 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3858{
3859 if (TREE_CODE (*node) != FUNCTION_DECL)
3860 {
67a779df 3861 warning (0, "%qE attribute only applies to functions",
3862 name);
644459d0 3863 *no_add_attrs = true;
3864 }
3865
3866 return NULL_TREE;
3867}
3868
3869/* Handle the "vector" attribute. */
3870static tree
3871spu_handle_vector_attribute (tree * node, tree name,
3872 tree args ATTRIBUTE_UNUSED,
3873 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3874{
3875 tree type = *node, result = NULL_TREE;
3876 enum machine_mode mode;
3877 int unsigned_p;
3878
3879 while (POINTER_TYPE_P (type)
3880 || TREE_CODE (type) == FUNCTION_TYPE
3881 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3882 type = TREE_TYPE (type);
3883
3884 mode = TYPE_MODE (type);
3885
3886 unsigned_p = TYPE_UNSIGNED (type);
3887 switch (mode)
3888 {
3889 case DImode:
3890 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3891 break;
3892 case SImode:
3893 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3894 break;
3895 case HImode:
3896 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3897 break;
3898 case QImode:
3899 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3900 break;
3901 case SFmode:
3902 result = V4SF_type_node;
3903 break;
3904 case DFmode:
3905 result = V2DF_type_node;
3906 break;
3907 default:
3908 break;
3909 }
3910
3911 /* Propagate qualifiers attached to the element type
3912 onto the vector type. */
3913 if (result && result != type && TYPE_QUALS (type))
3914 result = build_qualified_type (result, TYPE_QUALS (type));
3915
3916 *no_add_attrs = true; /* No need to hang on to the attribute. */
3917
3918 if (!result)
67a779df 3919 warning (0, "%qE attribute ignored", name);
644459d0 3920 else
d991e6e8 3921 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3922
3923 return NULL_TREE;
3924}
3925
f2b32076 3926/* Return nonzero if FUNC is a naked function. */
644459d0 3927static int
3928spu_naked_function_p (tree func)
3929{
3930 tree a;
3931
3932 if (TREE_CODE (func) != FUNCTION_DECL)
3933 abort ();
3934
3935 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3936 return a != NULL_TREE;
3937}
3938
3939int
3940spu_initial_elimination_offset (int from, int to)
3941{
3942 int saved_regs_size = spu_saved_regs_size ();
3943 int sp_offset = 0;
abe32cce 3944 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3945 || get_frame_size () || saved_regs_size)
3946 sp_offset = STACK_POINTER_OFFSET;
3947 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3948 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3949 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3950 return get_frame_size ();
644459d0 3951 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3952 return sp_offset + crtl->outgoing_args_size
644459d0 3953 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3954 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3955 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3956 else
3957 gcc_unreachable ();
644459d0 3958}
3959
3960rtx
fb80456a 3961spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3962{
3963 enum machine_mode mode = TYPE_MODE (type);
3964 int byte_size = ((mode == BLKmode)
3965 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3966
3967 /* Make sure small structs are left justified in a register. */
3968 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3969 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3970 {
3971 enum machine_mode smode;
3972 rtvec v;
3973 int i;
3974 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3975 int n = byte_size / UNITS_PER_WORD;
3976 v = rtvec_alloc (nregs);
3977 for (i = 0; i < n; i++)
3978 {
3979 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3980 gen_rtx_REG (TImode,
3981 FIRST_RETURN_REGNUM
3982 + i),
3983 GEN_INT (UNITS_PER_WORD * i));
3984 byte_size -= UNITS_PER_WORD;
3985 }
3986
3987 if (n < nregs)
3988 {
3989 if (byte_size < 4)
3990 byte_size = 4;
3991 smode =
3992 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3993 RTVEC_ELT (v, n) =
3994 gen_rtx_EXPR_LIST (VOIDmode,
3995 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3996 GEN_INT (UNITS_PER_WORD * n));
3997 }
3998 return gen_rtx_PARALLEL (mode, v);
3999 }
4000 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4001}
4002
4003rtx
4004spu_function_arg (CUMULATIVE_ARGS cum,
4005 enum machine_mode mode,
4006 tree type, int named ATTRIBUTE_UNUSED)
4007{
4008 int byte_size;
4009
4010 if (cum >= MAX_REGISTER_ARGS)
4011 return 0;
4012
4013 byte_size = ((mode == BLKmode)
4014 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4015
4016 /* The ABI does not allow parameters to be passed partially in
4017 reg and partially in stack. */
4018 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4019 return 0;
4020
4021 /* Make sure small structs are left justified in a register. */
4022 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4023 && byte_size < UNITS_PER_WORD && byte_size > 0)
4024 {
4025 enum machine_mode smode;
4026 rtx gr_reg;
4027 if (byte_size < 4)
4028 byte_size = 4;
4029 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4030 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4031 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4032 const0_rtx);
4033 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4034 }
4035 else
4036 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4037}
4038
4039/* Variable sized types are passed by reference. */
4040static bool
4041spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4042 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4043 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4044{
4045 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4046}
4047\f
4048
4049/* Var args. */
4050
4051/* Create and return the va_list datatype.
4052
4053 On SPU, va_list is an array type equivalent to
4054
4055 typedef struct __va_list_tag
4056 {
4057 void *__args __attribute__((__aligned(16)));
4058 void *__skip __attribute__((__aligned(16)));
4059
4060 } va_list[1];
4061
fa7637bd 4062 where __args points to the arg that will be returned by the next
644459d0 4063 va_arg(), and __skip points to the previous stack frame such that
4064 when __args == __skip we should advance __args by 32 bytes. */
4065static tree
4066spu_build_builtin_va_list (void)
4067{
4068 tree f_args, f_skip, record, type_decl;
4069 bool owp;
4070
4071 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4072
4073 type_decl =
54e46243 4074 build_decl (BUILTINS_LOCATION,
4075 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4076
54e46243 4077 f_args = build_decl (BUILTINS_LOCATION,
4078 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4079 f_skip = build_decl (BUILTINS_LOCATION,
4080 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4081
4082 DECL_FIELD_CONTEXT (f_args) = record;
4083 DECL_ALIGN (f_args) = 128;
4084 DECL_USER_ALIGN (f_args) = 1;
4085
4086 DECL_FIELD_CONTEXT (f_skip) = record;
4087 DECL_ALIGN (f_skip) = 128;
4088 DECL_USER_ALIGN (f_skip) = 1;
4089
4090 TREE_CHAIN (record) = type_decl;
4091 TYPE_NAME (record) = type_decl;
4092 TYPE_FIELDS (record) = f_args;
4093 TREE_CHAIN (f_args) = f_skip;
4094
4095 /* We know this is being padded and we want it too. It is an internal
4096 type so hide the warnings from the user. */
4097 owp = warn_padded;
4098 warn_padded = false;
4099
4100 layout_type (record);
4101
4102 warn_padded = owp;
4103
4104 /* The correct type is an array type of one element. */
4105 return build_array_type (record, build_index_type (size_zero_node));
4106}
4107
4108/* Implement va_start by filling the va_list structure VALIST.
4109 NEXTARG points to the first anonymous stack argument.
4110
4111 The following global variables are used to initialize
4112 the va_list structure:
4113
abe32cce 4114 crtl->args.info;
644459d0 4115 the CUMULATIVE_ARGS for this function
4116
abe32cce 4117 crtl->args.arg_offset_rtx:
644459d0 4118 holds the offset of the first anonymous stack argument
4119 (relative to the virtual arg pointer). */
4120
8a58ed0a 4121static void
644459d0 4122spu_va_start (tree valist, rtx nextarg)
4123{
4124 tree f_args, f_skip;
4125 tree args, skip, t;
4126
4127 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4128 f_skip = TREE_CHAIN (f_args);
4129
4130 valist = build_va_arg_indirect_ref (valist);
4131 args =
4132 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4133 skip =
4134 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4135
4136 /* Find the __args area. */
4137 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4138 if (crtl->args.pretend_args_size > 0)
0de36bdb 4139 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4140 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4141 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4142 TREE_SIDE_EFFECTS (t) = 1;
4143 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4144
4145 /* Find the __skip area. */
4146 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4147 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4148 size_int (crtl->args.pretend_args_size
0de36bdb 4149 - STACK_POINTER_OFFSET));
75a70cf9 4150 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4151 TREE_SIDE_EFFECTS (t) = 1;
4152 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4153}
4154
4155/* Gimplify va_arg by updating the va_list structure
4156 VALIST as required to retrieve an argument of type
4157 TYPE, and returning that argument.
4158
4159 ret = va_arg(VALIST, TYPE);
4160
4161 generates code equivalent to:
4162
4163 paddedsize = (sizeof(TYPE) + 15) & -16;
4164 if (VALIST.__args + paddedsize > VALIST.__skip
4165 && VALIST.__args <= VALIST.__skip)
4166 addr = VALIST.__skip + 32;
4167 else
4168 addr = VALIST.__args;
4169 VALIST.__args = addr + paddedsize;
4170 ret = *(TYPE *)addr;
4171 */
4172static tree
75a70cf9 4173spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4174 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4175{
4176 tree f_args, f_skip;
4177 tree args, skip;
4178 HOST_WIDE_INT size, rsize;
4179 tree paddedsize, addr, tmp;
4180 bool pass_by_reference_p;
4181
4182 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4183 f_skip = TREE_CHAIN (f_args);
4184
4185 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4186 args =
4187 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4188 skip =
4189 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4190
4191 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4192
4193 /* if an object is dynamically sized, a pointer to it is passed
4194 instead of the object itself. */
4195 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4196 false);
4197 if (pass_by_reference_p)
4198 type = build_pointer_type (type);
4199 size = int_size_in_bytes (type);
4200 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4201
4202 /* build conditional expression to calculate addr. The expression
4203 will be gimplified later. */
0de36bdb 4204 paddedsize = size_int (rsize);
75a70cf9 4205 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4206 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4207 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4208 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4209 unshare_expr (skip)));
644459d0 4210
4211 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4212 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4213 size_int (32)), unshare_expr (args));
644459d0 4214
75a70cf9 4215 gimplify_assign (addr, tmp, pre_p);
644459d0 4216
4217 /* update VALIST.__args */
0de36bdb 4218 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4219 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4220
8115f0af 4221 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4222 addr);
644459d0 4223
4224 if (pass_by_reference_p)
4225 addr = build_va_arg_indirect_ref (addr);
4226
4227 return build_va_arg_indirect_ref (addr);
4228}
4229
4230/* Save parameter registers starting with the register that corresponds
4231 to the first unnamed parameters. If the first unnamed parameter is
4232 in the stack then save no registers. Set pretend_args_size to the
4233 amount of space needed to save the registers. */
4234void
4235spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4236 tree type, int *pretend_size, int no_rtl)
4237{
4238 if (!no_rtl)
4239 {
4240 rtx tmp;
4241 int regno;
4242 int offset;
4243 int ncum = *cum;
4244
4245 /* cum currently points to the last named argument, we want to
4246 start at the next argument. */
4247 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4248
4249 offset = -STACK_POINTER_OFFSET;
4250 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4251 {
4252 tmp = gen_frame_mem (V4SImode,
4253 plus_constant (virtual_incoming_args_rtx,
4254 offset));
4255 emit_move_insn (tmp,
4256 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4257 offset += 16;
4258 }
4259 *pretend_size = offset + STACK_POINTER_OFFSET;
4260 }
4261}
4262\f
4263void
4264spu_conditional_register_usage (void)
4265{
4266 if (flag_pic)
4267 {
4268 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4269 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4270 }
644459d0 4271}
4272
9d98604b 4273/* This is called any time we inspect the alignment of a register for
4274 addresses. */
644459d0 4275static int
9d98604b 4276reg_aligned_for_addr (rtx x)
644459d0 4277{
9d98604b 4278 int regno =
4279 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4280 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4281}
4282
69ced2d6 4283/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4284 into its SYMBOL_REF_FLAGS. */
4285static void
4286spu_encode_section_info (tree decl, rtx rtl, int first)
4287{
4288 default_encode_section_info (decl, rtl, first);
4289
4290 /* If a variable has a forced alignment to < 16 bytes, mark it with
4291 SYMBOL_FLAG_ALIGN1. */
4292 if (TREE_CODE (decl) == VAR_DECL
4293 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4294 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4295}
4296
644459d0 4297/* Return TRUE if we are certain the mem refers to a complete object
4298 which is both 16-byte aligned and padded to a 16-byte boundary. This
4299 would make it safe to store with a single instruction.
4300 We guarantee the alignment and padding for static objects by aligning
4301 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4302 FIXME: We currently cannot guarantee this for objects on the stack
4303 because assign_parm_setup_stack calls assign_stack_local with the
4304 alignment of the parameter mode and in that case the alignment never
4305 gets adjusted by LOCAL_ALIGNMENT. */
4306static int
4307store_with_one_insn_p (rtx mem)
4308{
9d98604b 4309 enum machine_mode mode = GET_MODE (mem);
644459d0 4310 rtx addr = XEXP (mem, 0);
9d98604b 4311 if (mode == BLKmode)
644459d0 4312 return 0;
9d98604b 4313 if (GET_MODE_SIZE (mode) >= 16)
4314 return 1;
644459d0 4315 /* Only static objects. */
4316 if (GET_CODE (addr) == SYMBOL_REF)
4317 {
4318 /* We use the associated declaration to make sure the access is
fa7637bd 4319 referring to the whole object.
644459d0 4320 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4321 if it is necessary. Will there be cases where one exists, and
4322 the other does not? Will there be cases where both exist, but
4323 have different types? */
4324 tree decl = MEM_EXPR (mem);
4325 if (decl
4326 && TREE_CODE (decl) == VAR_DECL
4327 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4328 return 1;
4329 decl = SYMBOL_REF_DECL (addr);
4330 if (decl
4331 && TREE_CODE (decl) == VAR_DECL
4332 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4333 return 1;
4334 }
4335 return 0;
4336}
4337
9d98604b 4338/* Return 1 when the address is not valid for a simple load and store as
4339 required by the '_mov*' patterns. We could make this less strict
4340 for loads, but we prefer mem's to look the same so they are more
4341 likely to be merged. */
4342static int
4343address_needs_split (rtx mem)
4344{
4345 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4346 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4347 || !(store_with_one_insn_p (mem)
4348 || mem_is_padded_component_ref (mem))))
4349 return 1;
4350
4351 return 0;
4352}
4353
6cf5579e 4354static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4355static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4356static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4357
4358/* MEM is known to be an __ea qualified memory access. Emit a call to
4359 fetch the ppu memory to local store, and return its address in local
4360 store. */
4361
4362static void
4363ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4364{
4365 if (is_store)
4366 {
4367 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4368 if (!cache_fetch_dirty)
4369 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4370 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4371 2, ea_addr, EAmode, ndirty, SImode);
4372 }
4373 else
4374 {
4375 if (!cache_fetch)
4376 cache_fetch = init_one_libfunc ("__cache_fetch");
4377 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4378 1, ea_addr, EAmode);
4379 }
4380}
4381
4382/* Like ea_load_store, but do the cache tag comparison and, for stores,
4383 dirty bit marking, inline.
4384
4385 The cache control data structure is an array of
4386
4387 struct __cache_tag_array
4388 {
4389 unsigned int tag_lo[4];
4390 unsigned int tag_hi[4];
4391 void *data_pointer[4];
4392 int reserved[4];
4393 vector unsigned short dirty_bits[4];
4394 } */
4395
4396static void
4397ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4398{
4399 rtx ea_addr_si;
4400 HOST_WIDE_INT v;
4401 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4402 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4403 rtx index_mask = gen_reg_rtx (SImode);
4404 rtx tag_arr = gen_reg_rtx (Pmode);
4405 rtx splat_mask = gen_reg_rtx (TImode);
4406 rtx splat = gen_reg_rtx (V4SImode);
4407 rtx splat_hi = NULL_RTX;
4408 rtx tag_index = gen_reg_rtx (Pmode);
4409 rtx block_off = gen_reg_rtx (SImode);
4410 rtx tag_addr = gen_reg_rtx (Pmode);
4411 rtx tag = gen_reg_rtx (V4SImode);
4412 rtx cache_tag = gen_reg_rtx (V4SImode);
4413 rtx cache_tag_hi = NULL_RTX;
4414 rtx cache_ptrs = gen_reg_rtx (TImode);
4415 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4416 rtx tag_equal = gen_reg_rtx (V4SImode);
4417 rtx tag_equal_hi = NULL_RTX;
4418 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4419 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4420 rtx eq_index = gen_reg_rtx (SImode);
4421 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4422
4423 if (spu_ea_model != 32)
4424 {
4425 splat_hi = gen_reg_rtx (V4SImode);
4426 cache_tag_hi = gen_reg_rtx (V4SImode);
4427 tag_equal_hi = gen_reg_rtx (V4SImode);
4428 }
4429
4430 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4431 emit_move_insn (tag_arr, tag_arr_sym);
4432 v = 0x0001020300010203LL;
4433 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4434 ea_addr_si = ea_addr;
4435 if (spu_ea_model != 32)
4436 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4437
4438 /* tag_index = ea_addr & (tag_array_size - 128) */
4439 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4440
4441 /* splat ea_addr to all 4 slots. */
4442 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4443 /* Similarly for high 32 bits of ea_addr. */
4444 if (spu_ea_model != 32)
4445 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4446
4447 /* block_off = ea_addr & 127 */
4448 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4449
4450 /* tag_addr = tag_arr + tag_index */
4451 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4452
4453 /* Read cache tags. */
4454 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4455 if (spu_ea_model != 32)
4456 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4457 plus_constant (tag_addr, 16)));
4458
4459 /* tag = ea_addr & -128 */
4460 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4461
4462 /* Read all four cache data pointers. */
4463 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4464 plus_constant (tag_addr, 32)));
4465
4466 /* Compare tags. */
4467 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4468 if (spu_ea_model != 32)
4469 {
4470 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4471 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4472 }
4473
4474 /* At most one of the tags compare equal, so tag_equal has one
4475 32-bit slot set to all 1's, with the other slots all zero.
4476 gbb picks off low bit from each byte in the 128-bit registers,
4477 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4478 we have a hit. */
4479 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4480 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4481
4482 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4483 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4484
4485 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4486 (rotating eq_index mod 16 bytes). */
4487 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4488 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4489
4490 /* Add block offset to form final data address. */
4491 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4492
4493 /* Check that we did hit. */
4494 hit_label = gen_label_rtx ();
4495 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4496 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4497 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4498 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4499 hit_ref, pc_rtx)));
4500 /* Say that this branch is very likely to happen. */
4501 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4502 REG_NOTES (insn)
4503 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn));
4504
4505 ea_load_store (mem, is_store, ea_addr, data_addr);
4506 cont_label = gen_label_rtx ();
4507 emit_jump_insn (gen_jump (cont_label));
4508 emit_barrier ();
4509
4510 emit_label (hit_label);
4511
4512 if (is_store)
4513 {
4514 HOST_WIDE_INT v_hi;
4515 rtx dirty_bits = gen_reg_rtx (TImode);
4516 rtx dirty_off = gen_reg_rtx (SImode);
4517 rtx dirty_128 = gen_reg_rtx (TImode);
4518 rtx neg_block_off = gen_reg_rtx (SImode);
4519
4520 /* Set up mask with one dirty bit per byte of the mem we are
4521 writing, starting from top bit. */
4522 v_hi = v = -1;
4523 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4524 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4525 {
4526 v_hi = v;
4527 v = 0;
4528 }
4529 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4530
4531 /* Form index into cache dirty_bits. eq_index is one of
4532 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4533 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4534 offset to each of the four dirty_bits elements. */
4535 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4536
4537 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4538
4539 /* Rotate bit mask to proper bit. */
4540 emit_insn (gen_negsi2 (neg_block_off, block_off));
4541 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4542 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4543
4544 /* Or in the new dirty bits. */
4545 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4546
4547 /* Store. */
4548 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4549 }
4550
4551 emit_label (cont_label);
4552}
4553
4554static rtx
4555expand_ea_mem (rtx mem, bool is_store)
4556{
4557 rtx ea_addr;
4558 rtx data_addr = gen_reg_rtx (Pmode);
4559 rtx new_mem;
4560
4561 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4562 if (optimize_size || optimize == 0)
4563 ea_load_store (mem, is_store, ea_addr, data_addr);
4564 else
4565 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4566
4567 if (ea_alias_set == -1)
4568 ea_alias_set = new_alias_set ();
4569
4570 /* We generate a new MEM RTX to refer to the copy of the data
4571 in the cache. We do not copy memory attributes (except the
4572 alignment) from the original MEM, as they may no longer apply
4573 to the cache copy. */
4574 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4575 set_mem_alias_set (new_mem, ea_alias_set);
4576 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4577
4578 return new_mem;
4579}
4580
644459d0 4581int
4582spu_expand_mov (rtx * ops, enum machine_mode mode)
4583{
4584 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4585 abort ();
4586
4587 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4588 {
4589 rtx from = SUBREG_REG (ops[1]);
8d72495d 4590 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4591
4592 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4593 && GET_MODE_CLASS (imode) == MODE_INT
4594 && subreg_lowpart_p (ops[1]));
4595
4596 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4597 imode = SImode;
4598 if (imode != GET_MODE (from))
4599 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4600
4601 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4602 {
99bdde56 4603 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4604 emit_insn (GEN_FCN (icode) (ops[0], from));
4605 }
4606 else
4607 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4608 return 1;
4609 }
4610
4611 /* At least one of the operands needs to be a register. */
4612 if ((reload_in_progress | reload_completed) == 0
4613 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4614 {
4615 rtx temp = force_reg (mode, ops[1]);
4616 emit_move_insn (ops[0], temp);
4617 return 1;
4618 }
4619 if (reload_in_progress || reload_completed)
4620 {
dea01258 4621 if (CONSTANT_P (ops[1]))
4622 return spu_split_immediate (ops);
644459d0 4623 return 0;
4624 }
9d98604b 4625
4626 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4627 extend them. */
4628 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4629 {
9d98604b 4630 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4631 if (val != INTVAL (ops[1]))
644459d0 4632 {
9d98604b 4633 emit_move_insn (ops[0], GEN_INT (val));
4634 return 1;
644459d0 4635 }
4636 }
9d98604b 4637 if (MEM_P (ops[0]))
6cf5579e 4638 {
4639 if (MEM_ADDR_SPACE (ops[0]))
4640 ops[0] = expand_ea_mem (ops[0], true);
4641 return spu_split_store (ops);
4642 }
9d98604b 4643 if (MEM_P (ops[1]))
6cf5579e 4644 {
4645 if (MEM_ADDR_SPACE (ops[1]))
4646 ops[1] = expand_ea_mem (ops[1], false);
4647 return spu_split_load (ops);
4648 }
9d98604b 4649
644459d0 4650 return 0;
4651}
4652
9d98604b 4653static void
4654spu_convert_move (rtx dst, rtx src)
644459d0 4655{
9d98604b 4656 enum machine_mode mode = GET_MODE (dst);
4657 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4658 rtx reg;
4659 gcc_assert (GET_MODE (src) == TImode);
4660 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4661 emit_insn (gen_rtx_SET (VOIDmode, reg,
4662 gen_rtx_TRUNCATE (int_mode,
4663 gen_rtx_LSHIFTRT (TImode, src,
4664 GEN_INT (int_mode == DImode ? 64 : 96)))));
4665 if (int_mode != mode)
4666 {
4667 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4668 emit_move_insn (dst, reg);
4669 }
4670}
644459d0 4671
9d98604b 4672/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4673 the address from SRC and SRC+16. Return a REG or CONST_INT that
4674 specifies how many bytes to rotate the loaded registers, plus any
4675 extra from EXTRA_ROTQBY. The address and rotate amounts are
4676 normalized to improve merging of loads and rotate computations. */
4677static rtx
4678spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4679{
4680 rtx addr = XEXP (src, 0);
4681 rtx p0, p1, rot, addr0, addr1;
4682 int rot_amt;
644459d0 4683
4684 rot = 0;
4685 rot_amt = 0;
9d98604b 4686
4687 if (MEM_ALIGN (src) >= 128)
4688 /* Address is already aligned; simply perform a TImode load. */ ;
4689 else if (GET_CODE (addr) == PLUS)
644459d0 4690 {
4691 /* 8 cases:
4692 aligned reg + aligned reg => lqx
4693 aligned reg + unaligned reg => lqx, rotqby
4694 aligned reg + aligned const => lqd
4695 aligned reg + unaligned const => lqd, rotqbyi
4696 unaligned reg + aligned reg => lqx, rotqby
4697 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4698 unaligned reg + aligned const => lqd, rotqby
4699 unaligned reg + unaligned const -> not allowed by legitimate address
4700 */
4701 p0 = XEXP (addr, 0);
4702 p1 = XEXP (addr, 1);
9d98604b 4703 if (!reg_aligned_for_addr (p0))
644459d0 4704 {
9d98604b 4705 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4706 {
9d98604b 4707 rot = gen_reg_rtx (SImode);
4708 emit_insn (gen_addsi3 (rot, p0, p1));
4709 }
4710 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4711 {
4712 if (INTVAL (p1) > 0
4713 && REG_POINTER (p0)
4714 && INTVAL (p1) * BITS_PER_UNIT
4715 < REGNO_POINTER_ALIGN (REGNO (p0)))
4716 {
4717 rot = gen_reg_rtx (SImode);
4718 emit_insn (gen_addsi3 (rot, p0, p1));
4719 addr = p0;
4720 }
4721 else
4722 {
4723 rtx x = gen_reg_rtx (SImode);
4724 emit_move_insn (x, p1);
4725 if (!spu_arith_operand (p1, SImode))
4726 p1 = x;
4727 rot = gen_reg_rtx (SImode);
4728 emit_insn (gen_addsi3 (rot, p0, p1));
4729 addr = gen_rtx_PLUS (Pmode, p0, x);
4730 }
644459d0 4731 }
4732 else
4733 rot = p0;
4734 }
4735 else
4736 {
4737 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4738 {
4739 rot_amt = INTVAL (p1) & 15;
9d98604b 4740 if (INTVAL (p1) & -16)
4741 {
4742 p1 = GEN_INT (INTVAL (p1) & -16);
4743 addr = gen_rtx_PLUS (SImode, p0, p1);
4744 }
4745 else
4746 addr = p0;
644459d0 4747 }
9d98604b 4748 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4749 rot = p1;
4750 }
4751 }
9d98604b 4752 else if (REG_P (addr))
644459d0 4753 {
9d98604b 4754 if (!reg_aligned_for_addr (addr))
644459d0 4755 rot = addr;
4756 }
4757 else if (GET_CODE (addr) == CONST)
4758 {
4759 if (GET_CODE (XEXP (addr, 0)) == PLUS
4760 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4761 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4762 {
4763 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4764 if (rot_amt & -16)
4765 addr = gen_rtx_CONST (Pmode,
4766 gen_rtx_PLUS (Pmode,
4767 XEXP (XEXP (addr, 0), 0),
4768 GEN_INT (rot_amt & -16)));
4769 else
4770 addr = XEXP (XEXP (addr, 0), 0);
4771 }
4772 else
9d98604b 4773 {
4774 rot = gen_reg_rtx (Pmode);
4775 emit_move_insn (rot, addr);
4776 }
644459d0 4777 }
4778 else if (GET_CODE (addr) == CONST_INT)
4779 {
4780 rot_amt = INTVAL (addr);
4781 addr = GEN_INT (rot_amt & -16);
4782 }
4783 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4784 {
4785 rot = gen_reg_rtx (Pmode);
4786 emit_move_insn (rot, addr);
4787 }
644459d0 4788
9d98604b 4789 rot_amt += extra_rotby;
644459d0 4790
4791 rot_amt &= 15;
4792
4793 if (rot && rot_amt)
4794 {
9d98604b 4795 rtx x = gen_reg_rtx (SImode);
4796 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4797 rot = x;
644459d0 4798 rot_amt = 0;
4799 }
9d98604b 4800 if (!rot && rot_amt)
4801 rot = GEN_INT (rot_amt);
4802
4803 addr0 = copy_rtx (addr);
4804 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4805 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4806
4807 if (dst1)
4808 {
4809 addr1 = plus_constant (copy_rtx (addr), 16);
4810 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4811 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4812 }
644459d0 4813
9d98604b 4814 return rot;
4815}
4816
4817int
4818spu_split_load (rtx * ops)
4819{
4820 enum machine_mode mode = GET_MODE (ops[0]);
4821 rtx addr, load, rot;
4822 int rot_amt;
644459d0 4823
9d98604b 4824 if (GET_MODE_SIZE (mode) >= 16)
4825 return 0;
644459d0 4826
9d98604b 4827 addr = XEXP (ops[1], 0);
4828 gcc_assert (GET_CODE (addr) != AND);
4829
4830 if (!address_needs_split (ops[1]))
4831 {
4832 ops[1] = change_address (ops[1], TImode, addr);
4833 load = gen_reg_rtx (TImode);
4834 emit_insn (gen__movti (load, ops[1]));
4835 spu_convert_move (ops[0], load);
4836 return 1;
4837 }
4838
4839 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4840
4841 load = gen_reg_rtx (TImode);
4842 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4843
4844 if (rot)
4845 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4846
9d98604b 4847 spu_convert_move (ops[0], load);
4848 return 1;
644459d0 4849}
4850
9d98604b 4851int
644459d0 4852spu_split_store (rtx * ops)
4853{
4854 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4855 rtx reg;
644459d0 4856 rtx addr, p0, p1, p1_lo, smem;
4857 int aform;
4858 int scalar;
4859
9d98604b 4860 if (GET_MODE_SIZE (mode) >= 16)
4861 return 0;
4862
644459d0 4863 addr = XEXP (ops[0], 0);
9d98604b 4864 gcc_assert (GET_CODE (addr) != AND);
4865
4866 if (!address_needs_split (ops[0]))
4867 {
4868 reg = gen_reg_rtx (TImode);
4869 emit_insn (gen_spu_convert (reg, ops[1]));
4870 ops[0] = change_address (ops[0], TImode, addr);
4871 emit_move_insn (ops[0], reg);
4872 return 1;
4873 }
644459d0 4874
4875 if (GET_CODE (addr) == PLUS)
4876 {
4877 /* 8 cases:
4878 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4879 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4880 aligned reg + aligned const => lqd, c?d, shuf, stqx
4881 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4882 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4883 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4884 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4885 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4886 */
4887 aform = 0;
4888 p0 = XEXP (addr, 0);
4889 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4890 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4891 {
4892 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4893 if (reg_aligned_for_addr (p0))
4894 {
4895 p1 = GEN_INT (INTVAL (p1) & -16);
4896 if (p1 == const0_rtx)
4897 addr = p0;
4898 else
4899 addr = gen_rtx_PLUS (SImode, p0, p1);
4900 }
4901 else
4902 {
4903 rtx x = gen_reg_rtx (SImode);
4904 emit_move_insn (x, p1);
4905 addr = gen_rtx_PLUS (SImode, p0, x);
4906 }
644459d0 4907 }
4908 }
9d98604b 4909 else if (REG_P (addr))
644459d0 4910 {
4911 aform = 0;
4912 p0 = addr;
4913 p1 = p1_lo = const0_rtx;
4914 }
4915 else
4916 {
4917 aform = 1;
4918 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4919 p1 = 0; /* aform doesn't use p1 */
4920 p1_lo = addr;
4921 if (ALIGNED_SYMBOL_REF_P (addr))
4922 p1_lo = const0_rtx;
9d98604b 4923 else if (GET_CODE (addr) == CONST
4924 && GET_CODE (XEXP (addr, 0)) == PLUS
4925 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4926 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4927 {
9d98604b 4928 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4929 if ((v & -16) != 0)
4930 addr = gen_rtx_CONST (Pmode,
4931 gen_rtx_PLUS (Pmode,
4932 XEXP (XEXP (addr, 0), 0),
4933 GEN_INT (v & -16)));
4934 else
4935 addr = XEXP (XEXP (addr, 0), 0);
4936 p1_lo = GEN_INT (v & 15);
644459d0 4937 }
4938 else if (GET_CODE (addr) == CONST_INT)
4939 {
4940 p1_lo = GEN_INT (INTVAL (addr) & 15);
4941 addr = GEN_INT (INTVAL (addr) & -16);
4942 }
9d98604b 4943 else
4944 {
4945 p1_lo = gen_reg_rtx (SImode);
4946 emit_move_insn (p1_lo, addr);
4947 }
644459d0 4948 }
4949
9d98604b 4950 reg = gen_reg_rtx (TImode);
e04cf423 4951
644459d0 4952 scalar = store_with_one_insn_p (ops[0]);
4953 if (!scalar)
4954 {
4955 /* We could copy the flags from the ops[0] MEM to mem here,
4956 We don't because we want this load to be optimized away if
4957 possible, and copying the flags will prevent that in certain
4958 cases, e.g. consider the volatile flag. */
4959
9d98604b 4960 rtx pat = gen_reg_rtx (TImode);
e04cf423 4961 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4962 set_mem_alias_set (lmem, 0);
4963 emit_insn (gen_movti (reg, lmem));
644459d0 4964
9d98604b 4965 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4966 p0 = stack_pointer_rtx;
4967 if (!p1_lo)
4968 p1_lo = const0_rtx;
4969
4970 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4971 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4972 }
644459d0 4973 else
4974 {
4975 if (GET_CODE (ops[1]) == REG)
4976 emit_insn (gen_spu_convert (reg, ops[1]));
4977 else if (GET_CODE (ops[1]) == SUBREG)
4978 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4979 else
4980 abort ();
4981 }
4982
4983 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4984 emit_insn (gen_ashlti3
4985 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4986
9d98604b 4987 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4988 /* We can't use the previous alias set because the memory has changed
4989 size and can potentially overlap objects of other types. */
4990 set_mem_alias_set (smem, 0);
4991
e04cf423 4992 emit_insn (gen_movti (smem, reg));
9d98604b 4993 return 1;
644459d0 4994}
4995
4996/* Return TRUE if X is MEM which is a struct member reference
4997 and the member can safely be loaded and stored with a single
4998 instruction because it is padded. */
4999static int
5000mem_is_padded_component_ref (rtx x)
5001{
5002 tree t = MEM_EXPR (x);
5003 tree r;
5004 if (!t || TREE_CODE (t) != COMPONENT_REF)
5005 return 0;
5006 t = TREE_OPERAND (t, 1);
5007 if (!t || TREE_CODE (t) != FIELD_DECL
5008 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5009 return 0;
5010 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5011 r = DECL_FIELD_CONTEXT (t);
5012 if (!r || TREE_CODE (r) != RECORD_TYPE)
5013 return 0;
5014 /* Make sure they are the same mode */
5015 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5016 return 0;
5017 /* If there are no following fields then the field alignment assures
fa7637bd 5018 the structure is padded to the alignment which means this field is
5019 padded too. */
644459d0 5020 if (TREE_CHAIN (t) == 0)
5021 return 1;
5022 /* If the following field is also aligned then this field will be
5023 padded. */
5024 t = TREE_CHAIN (t);
5025 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5026 return 1;
5027 return 0;
5028}
5029
c7b91b14 5030/* Parse the -mfixed-range= option string. */
5031static void
5032fix_range (const char *const_str)
5033{
5034 int i, first, last;
5035 char *str, *dash, *comma;
5036
5037 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5038 REG2 are either register names or register numbers. The effect
5039 of this option is to mark the registers in the range from REG1 to
5040 REG2 as ``fixed'' so they won't be used by the compiler. */
5041
5042 i = strlen (const_str);
5043 str = (char *) alloca (i + 1);
5044 memcpy (str, const_str, i + 1);
5045
5046 while (1)
5047 {
5048 dash = strchr (str, '-');
5049 if (!dash)
5050 {
5051 warning (0, "value of -mfixed-range must have form REG1-REG2");
5052 return;
5053 }
5054 *dash = '\0';
5055 comma = strchr (dash + 1, ',');
5056 if (comma)
5057 *comma = '\0';
5058
5059 first = decode_reg_name (str);
5060 if (first < 0)
5061 {
5062 warning (0, "unknown register name: %s", str);
5063 return;
5064 }
5065
5066 last = decode_reg_name (dash + 1);
5067 if (last < 0)
5068 {
5069 warning (0, "unknown register name: %s", dash + 1);
5070 return;
5071 }
5072
5073 *dash = '-';
5074
5075 if (first > last)
5076 {
5077 warning (0, "%s-%s is an empty range", str, dash + 1);
5078 return;
5079 }
5080
5081 for (i = first; i <= last; ++i)
5082 fixed_regs[i] = call_used_regs[i] = 1;
5083
5084 if (!comma)
5085 break;
5086
5087 *comma = ',';
5088 str = comma + 1;
5089 }
5090}
5091
644459d0 5092/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5093 can be generated using the fsmbi instruction. */
5094int
5095fsmbi_const_p (rtx x)
5096{
dea01258 5097 if (CONSTANT_P (x))
5098 {
5df189be 5099 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5100 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5101 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5102 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5103 }
5104 return 0;
5105}
5106
5107/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5108 can be generated using the cbd, chd, cwd or cdd instruction. */
5109int
5110cpat_const_p (rtx x, enum machine_mode mode)
5111{
5112 if (CONSTANT_P (x))
5113 {
5114 enum immediate_class c = classify_immediate (x, mode);
5115 return c == IC_CPAT;
5116 }
5117 return 0;
5118}
644459d0 5119
dea01258 5120rtx
5121gen_cpat_const (rtx * ops)
5122{
5123 unsigned char dst[16];
5124 int i, offset, shift, isize;
5125 if (GET_CODE (ops[3]) != CONST_INT
5126 || GET_CODE (ops[2]) != CONST_INT
5127 || (GET_CODE (ops[1]) != CONST_INT
5128 && GET_CODE (ops[1]) != REG))
5129 return 0;
5130 if (GET_CODE (ops[1]) == REG
5131 && (!REG_POINTER (ops[1])
5132 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5133 return 0;
644459d0 5134
5135 for (i = 0; i < 16; i++)
dea01258 5136 dst[i] = i + 16;
5137 isize = INTVAL (ops[3]);
5138 if (isize == 1)
5139 shift = 3;
5140 else if (isize == 2)
5141 shift = 2;
5142 else
5143 shift = 0;
5144 offset = (INTVAL (ops[2]) +
5145 (GET_CODE (ops[1]) ==
5146 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5147 for (i = 0; i < isize; i++)
5148 dst[offset + i] = i + shift;
5149 return array_to_constant (TImode, dst);
644459d0 5150}
5151
5152/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5153 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5154 than 16 bytes, the value is repeated across the rest of the array. */
5155void
5156constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5157{
5158 HOST_WIDE_INT val;
5159 int i, j, first;
5160
5161 memset (arr, 0, 16);
5162 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5163 if (GET_CODE (x) == CONST_INT
5164 || (GET_CODE (x) == CONST_DOUBLE
5165 && (mode == SFmode || mode == DFmode)))
5166 {
5167 gcc_assert (mode != VOIDmode && mode != BLKmode);
5168
5169 if (GET_CODE (x) == CONST_DOUBLE)
5170 val = const_double_to_hwint (x);
5171 else
5172 val = INTVAL (x);
5173 first = GET_MODE_SIZE (mode) - 1;
5174 for (i = first; i >= 0; i--)
5175 {
5176 arr[i] = val & 0xff;
5177 val >>= 8;
5178 }
5179 /* Splat the constant across the whole array. */
5180 for (j = 0, i = first + 1; i < 16; i++)
5181 {
5182 arr[i] = arr[j];
5183 j = (j == first) ? 0 : j + 1;
5184 }
5185 }
5186 else if (GET_CODE (x) == CONST_DOUBLE)
5187 {
5188 val = CONST_DOUBLE_LOW (x);
5189 for (i = 15; i >= 8; i--)
5190 {
5191 arr[i] = val & 0xff;
5192 val >>= 8;
5193 }
5194 val = CONST_DOUBLE_HIGH (x);
5195 for (i = 7; i >= 0; i--)
5196 {
5197 arr[i] = val & 0xff;
5198 val >>= 8;
5199 }
5200 }
5201 else if (GET_CODE (x) == CONST_VECTOR)
5202 {
5203 int units;
5204 rtx elt;
5205 mode = GET_MODE_INNER (mode);
5206 units = CONST_VECTOR_NUNITS (x);
5207 for (i = 0; i < units; i++)
5208 {
5209 elt = CONST_VECTOR_ELT (x, i);
5210 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5211 {
5212 if (GET_CODE (elt) == CONST_DOUBLE)
5213 val = const_double_to_hwint (elt);
5214 else
5215 val = INTVAL (elt);
5216 first = GET_MODE_SIZE (mode) - 1;
5217 if (first + i * GET_MODE_SIZE (mode) > 16)
5218 abort ();
5219 for (j = first; j >= 0; j--)
5220 {
5221 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5222 val >>= 8;
5223 }
5224 }
5225 }
5226 }
5227 else
5228 gcc_unreachable();
5229}
5230
5231/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5232 smaller than 16 bytes, use the bytes that would represent that value
5233 in a register, e.g., for QImode return the value of arr[3]. */
5234rtx
e96f2783 5235array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5236{
5237 enum machine_mode inner_mode;
5238 rtvec v;
5239 int units, size, i, j, k;
5240 HOST_WIDE_INT val;
5241
5242 if (GET_MODE_CLASS (mode) == MODE_INT
5243 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5244 {
5245 j = GET_MODE_SIZE (mode);
5246 i = j < 4 ? 4 - j : 0;
5247 for (val = 0; i < j; i++)
5248 val = (val << 8) | arr[i];
5249 val = trunc_int_for_mode (val, mode);
5250 return GEN_INT (val);
5251 }
5252
5253 if (mode == TImode)
5254 {
5255 HOST_WIDE_INT high;
5256 for (i = high = 0; i < 8; i++)
5257 high = (high << 8) | arr[i];
5258 for (i = 8, val = 0; i < 16; i++)
5259 val = (val << 8) | arr[i];
5260 return immed_double_const (val, high, TImode);
5261 }
5262 if (mode == SFmode)
5263 {
5264 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5265 val = trunc_int_for_mode (val, SImode);
171b6d22 5266 return hwint_to_const_double (SFmode, val);
644459d0 5267 }
5268 if (mode == DFmode)
5269 {
1f915911 5270 for (i = 0, val = 0; i < 8; i++)
5271 val = (val << 8) | arr[i];
171b6d22 5272 return hwint_to_const_double (DFmode, val);
644459d0 5273 }
5274
5275 if (!VECTOR_MODE_P (mode))
5276 abort ();
5277
5278 units = GET_MODE_NUNITS (mode);
5279 size = GET_MODE_UNIT_SIZE (mode);
5280 inner_mode = GET_MODE_INNER (mode);
5281 v = rtvec_alloc (units);
5282
5283 for (k = i = 0; i < units; ++i)
5284 {
5285 val = 0;
5286 for (j = 0; j < size; j++, k++)
5287 val = (val << 8) | arr[k];
5288
5289 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5290 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5291 else
5292 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5293 }
5294 if (k > 16)
5295 abort ();
5296
5297 return gen_rtx_CONST_VECTOR (mode, v);
5298}
5299
5300static void
5301reloc_diagnostic (rtx x)
5302{
712d2297 5303 tree decl = 0;
644459d0 5304 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5305 return;
5306
5307 if (GET_CODE (x) == SYMBOL_REF)
5308 decl = SYMBOL_REF_DECL (x);
5309 else if (GET_CODE (x) == CONST
5310 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5311 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5312
5313 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5314 if (decl && !DECL_P (decl))
5315 decl = 0;
5316
644459d0 5317 /* The decl could be a string constant. */
5318 if (decl && DECL_P (decl))
712d2297 5319 {
5320 location_t loc;
5321 /* We use last_assemble_variable_decl to get line information. It's
5322 not always going to be right and might not even be close, but will
5323 be right for the more common cases. */
5324 if (!last_assemble_variable_decl || in_section == ctors_section)
5325 loc = DECL_SOURCE_LOCATION (decl);
5326 else
5327 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5328
712d2297 5329 if (TARGET_WARN_RELOC)
5330 warning_at (loc, 0,
5331 "creating run-time relocation for %qD", decl);
5332 else
5333 error_at (loc,
5334 "creating run-time relocation for %qD", decl);
5335 }
5336 else
5337 {
5338 if (TARGET_WARN_RELOC)
5339 warning_at (input_location, 0, "creating run-time relocation");
5340 else
5341 error_at (input_location, "creating run-time relocation");
5342 }
644459d0 5343}
5344
5345/* Hook into assemble_integer so we can generate an error for run-time
5346 relocations. The SPU ABI disallows them. */
5347static bool
5348spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5349{
5350 /* By default run-time relocations aren't supported, but we allow them
5351 in case users support it in their own run-time loader. And we provide
5352 a warning for those users that don't. */
5353 if ((GET_CODE (x) == SYMBOL_REF)
5354 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5355 reloc_diagnostic (x);
5356
5357 return default_assemble_integer (x, size, aligned_p);
5358}
5359
5360static void
5361spu_asm_globalize_label (FILE * file, const char *name)
5362{
5363 fputs ("\t.global\t", file);
5364 assemble_name (file, name);
5365 fputs ("\n", file);
5366}
5367
5368static bool
f529eb25 5369spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5370 bool speed ATTRIBUTE_UNUSED)
644459d0 5371{
5372 enum machine_mode mode = GET_MODE (x);
5373 int cost = COSTS_N_INSNS (2);
5374
5375 /* Folding to a CONST_VECTOR will use extra space but there might
5376 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5377 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5378 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5379 because this cost will only be compared against a single insn.
5380 if (code == CONST_VECTOR)
5381 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5382 */
5383
5384 /* Use defaults for float operations. Not accurate but good enough. */
5385 if (mode == DFmode)
5386 {
5387 *total = COSTS_N_INSNS (13);
5388 return true;
5389 }
5390 if (mode == SFmode)
5391 {
5392 *total = COSTS_N_INSNS (6);
5393 return true;
5394 }
5395 switch (code)
5396 {
5397 case CONST_INT:
5398 if (satisfies_constraint_K (x))
5399 *total = 0;
5400 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5401 *total = COSTS_N_INSNS (1);
5402 else
5403 *total = COSTS_N_INSNS (3);
5404 return true;
5405
5406 case CONST:
5407 *total = COSTS_N_INSNS (3);
5408 return true;
5409
5410 case LABEL_REF:
5411 case SYMBOL_REF:
5412 *total = COSTS_N_INSNS (0);
5413 return true;
5414
5415 case CONST_DOUBLE:
5416 *total = COSTS_N_INSNS (5);
5417 return true;
5418
5419 case FLOAT_EXTEND:
5420 case FLOAT_TRUNCATE:
5421 case FLOAT:
5422 case UNSIGNED_FLOAT:
5423 case FIX:
5424 case UNSIGNED_FIX:
5425 *total = COSTS_N_INSNS (7);
5426 return true;
5427
5428 case PLUS:
5429 if (mode == TImode)
5430 {
5431 *total = COSTS_N_INSNS (9);
5432 return true;
5433 }
5434 break;
5435
5436 case MULT:
5437 cost =
5438 GET_CODE (XEXP (x, 0)) ==
5439 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5440 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5441 {
5442 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5443 {
5444 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5445 cost = COSTS_N_INSNS (14);
5446 if ((val & 0xffff) == 0)
5447 cost = COSTS_N_INSNS (9);
5448 else if (val > 0 && val < 0x10000)
5449 cost = COSTS_N_INSNS (11);
5450 }
5451 }
5452 *total = cost;
5453 return true;
5454 case DIV:
5455 case UDIV:
5456 case MOD:
5457 case UMOD:
5458 *total = COSTS_N_INSNS (20);
5459 return true;
5460 case ROTATE:
5461 case ROTATERT:
5462 case ASHIFT:
5463 case ASHIFTRT:
5464 case LSHIFTRT:
5465 *total = COSTS_N_INSNS (4);
5466 return true;
5467 case UNSPEC:
5468 if (XINT (x, 1) == UNSPEC_CONVERT)
5469 *total = COSTS_N_INSNS (0);
5470 else
5471 *total = COSTS_N_INSNS (4);
5472 return true;
5473 }
5474 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5475 if (GET_MODE_CLASS (mode) == MODE_INT
5476 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5477 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5478 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5479 *total = cost;
5480 return true;
5481}
5482
1bd43494 5483static enum machine_mode
5484spu_unwind_word_mode (void)
644459d0 5485{
1bd43494 5486 return SImode;
644459d0 5487}
5488
5489/* Decide whether we can make a sibling call to a function. DECL is the
5490 declaration of the function being targeted by the call and EXP is the
5491 CALL_EXPR representing the call. */
5492static bool
5493spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5494{
5495 return decl && !TARGET_LARGE_MEM;
5496}
5497
5498/* We need to correctly update the back chain pointer and the Available
5499 Stack Size (which is in the second slot of the sp register.) */
5500void
5501spu_allocate_stack (rtx op0, rtx op1)
5502{
5503 HOST_WIDE_INT v;
5504 rtx chain = gen_reg_rtx (V4SImode);
5505 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5506 rtx sp = gen_reg_rtx (V4SImode);
5507 rtx splatted = gen_reg_rtx (V4SImode);
5508 rtx pat = gen_reg_rtx (TImode);
5509
5510 /* copy the back chain so we can save it back again. */
5511 emit_move_insn (chain, stack_bot);
5512
5513 op1 = force_reg (SImode, op1);
5514
5515 v = 0x1020300010203ll;
5516 emit_move_insn (pat, immed_double_const (v, v, TImode));
5517 emit_insn (gen_shufb (splatted, op1, op1, pat));
5518
5519 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5520 emit_insn (gen_subv4si3 (sp, sp, splatted));
5521
5522 if (flag_stack_check)
5523 {
5524 rtx avail = gen_reg_rtx(SImode);
5525 rtx result = gen_reg_rtx(SImode);
5526 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5527 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5528 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5529 }
5530
5531 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5532
5533 emit_move_insn (stack_bot, chain);
5534
5535 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5536}
5537
5538void
5539spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5540{
5541 static unsigned char arr[16] =
5542 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5543 rtx temp = gen_reg_rtx (SImode);
5544 rtx temp2 = gen_reg_rtx (SImode);
5545 rtx temp3 = gen_reg_rtx (V4SImode);
5546 rtx temp4 = gen_reg_rtx (V4SImode);
5547 rtx pat = gen_reg_rtx (TImode);
5548 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5549
5550 /* Restore the backchain from the first word, sp from the second. */
5551 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5552 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5553
5554 emit_move_insn (pat, array_to_constant (TImode, arr));
5555
5556 /* Compute Available Stack Size for sp */
5557 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5558 emit_insn (gen_shufb (temp3, temp, temp, pat));
5559
5560 /* Compute Available Stack Size for back chain */
5561 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5562 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5563 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5564
5565 emit_insn (gen_addv4si3 (sp, sp, temp3));
5566 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5567}
5568
5569static void
5570spu_init_libfuncs (void)
5571{
5572 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5573 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5574 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5575 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5576 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5577 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5578 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5579 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5580 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5581 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5582 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5583
5584 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5585 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5586
5587 set_optab_libfunc (smul_optab, TImode, "__multi3");
5588 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5589 set_optab_libfunc (smod_optab, TImode, "__modti3");
5590 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5591 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5592 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5593}
5594
5595/* Make a subreg, stripping any existing subreg. We could possibly just
5596 call simplify_subreg, but in this case we know what we want. */
5597rtx
5598spu_gen_subreg (enum machine_mode mode, rtx x)
5599{
5600 if (GET_CODE (x) == SUBREG)
5601 x = SUBREG_REG (x);
5602 if (GET_MODE (x) == mode)
5603 return x;
5604 return gen_rtx_SUBREG (mode, x, 0);
5605}
5606
5607static bool
fb80456a 5608spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5609{
5610 return (TYPE_MODE (type) == BLKmode
5611 && ((type) == 0
5612 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5613 || int_size_in_bytes (type) >
5614 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5615}
5616\f
5617/* Create the built-in types and functions */
5618
c2233b46 5619enum spu_function_code
5620{
5621#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5622#include "spu-builtins.def"
5623#undef DEF_BUILTIN
5624 NUM_SPU_BUILTINS
5625};
5626
5627extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5628
644459d0 5629struct spu_builtin_description spu_builtins[] = {
5630#define DEF_BUILTIN(fcode, icode, name, type, params) \
5631 {fcode, icode, name, type, params, NULL_TREE},
5632#include "spu-builtins.def"
5633#undef DEF_BUILTIN
5634};
5635
e6925042 5636/* Returns the rs6000 builtin decl for CODE. */
5637
5638static tree
5639spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5640{
5641 if (code >= NUM_SPU_BUILTINS)
5642 return error_mark_node;
5643
5644 return spu_builtins[code].fndecl;
5645}
5646
5647
644459d0 5648static void
5649spu_init_builtins (void)
5650{
5651 struct spu_builtin_description *d;
5652 unsigned int i;
5653
5654 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5655 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5656 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5657 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5658 V4SF_type_node = build_vector_type (float_type_node, 4);
5659 V2DF_type_node = build_vector_type (double_type_node, 2);
5660
5661 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5662 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5663 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5664 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5665
c4ecce0c 5666 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5667
5668 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5669 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5670 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5671 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5672 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5673 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5674 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5675 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5676 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5677 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5678 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5679 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5680
5681 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5682 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5683 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5684 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5685 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5686 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5687 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5688 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5689
5690 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5691 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5692
5693 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5694
5695 spu_builtin_types[SPU_BTI_PTR] =
5696 build_pointer_type (build_qualified_type
5697 (void_type_node,
5698 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5699
5700 /* For each builtin we build a new prototype. The tree code will make
5701 sure nodes are shared. */
5702 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5703 {
5704 tree p;
5705 char name[64]; /* build_function will make a copy. */
5706 int parm;
5707
5708 if (d->name == 0)
5709 continue;
5710
5dfbd18f 5711 /* Find last parm. */
644459d0 5712 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5713 ;
644459d0 5714
5715 p = void_list_node;
5716 while (parm > 1)
5717 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5718
5719 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5720
5721 sprintf (name, "__builtin_%s", d->name);
5722 d->fndecl =
5723 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5724 NULL, NULL_TREE);
a76866d3 5725 if (d->fcode == SPU_MASK_FOR_LOAD)
5726 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5727
5728 /* These builtins don't throw. */
5729 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5730 }
5731}
5732
cf31d486 5733void
5734spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5735{
5736 static unsigned char arr[16] =
5737 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5738
5739 rtx temp = gen_reg_rtx (Pmode);
5740 rtx temp2 = gen_reg_rtx (V4SImode);
5741 rtx temp3 = gen_reg_rtx (V4SImode);
5742 rtx pat = gen_reg_rtx (TImode);
5743 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5744
5745 emit_move_insn (pat, array_to_constant (TImode, arr));
5746
5747 /* Restore the sp. */
5748 emit_move_insn (temp, op1);
5749 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5750
5751 /* Compute available stack size for sp. */
5752 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5753 emit_insn (gen_shufb (temp3, temp, temp, pat));
5754
5755 emit_insn (gen_addv4si3 (sp, sp, temp3));
5756 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5757}
5758
644459d0 5759int
5760spu_safe_dma (HOST_WIDE_INT channel)
5761{
006e4b96 5762 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5763}
5764
5765void
5766spu_builtin_splats (rtx ops[])
5767{
5768 enum machine_mode mode = GET_MODE (ops[0]);
5769 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5770 {
5771 unsigned char arr[16];
5772 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5773 emit_move_insn (ops[0], array_to_constant (mode, arr));
5774 }
644459d0 5775 else
5776 {
5777 rtx reg = gen_reg_rtx (TImode);
5778 rtx shuf;
5779 if (GET_CODE (ops[1]) != REG
5780 && GET_CODE (ops[1]) != SUBREG)
5781 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5782 switch (mode)
5783 {
5784 case V2DImode:
5785 case V2DFmode:
5786 shuf =
5787 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5788 TImode);
5789 break;
5790 case V4SImode:
5791 case V4SFmode:
5792 shuf =
5793 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5794 TImode);
5795 break;
5796 case V8HImode:
5797 shuf =
5798 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5799 TImode);
5800 break;
5801 case V16QImode:
5802 shuf =
5803 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5804 TImode);
5805 break;
5806 default:
5807 abort ();
5808 }
5809 emit_move_insn (reg, shuf);
5810 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5811 }
5812}
5813
5814void
5815spu_builtin_extract (rtx ops[])
5816{
5817 enum machine_mode mode;
5818 rtx rot, from, tmp;
5819
5820 mode = GET_MODE (ops[1]);
5821
5822 if (GET_CODE (ops[2]) == CONST_INT)
5823 {
5824 switch (mode)
5825 {
5826 case V16QImode:
5827 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5828 break;
5829 case V8HImode:
5830 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5831 break;
5832 case V4SFmode:
5833 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5834 break;
5835 case V4SImode:
5836 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5837 break;
5838 case V2DImode:
5839 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5840 break;
5841 case V2DFmode:
5842 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5843 break;
5844 default:
5845 abort ();
5846 }
5847 return;
5848 }
5849
5850 from = spu_gen_subreg (TImode, ops[1]);
5851 rot = gen_reg_rtx (TImode);
5852 tmp = gen_reg_rtx (SImode);
5853
5854 switch (mode)
5855 {
5856 case V16QImode:
5857 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5858 break;
5859 case V8HImode:
5860 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5861 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5862 break;
5863 case V4SFmode:
5864 case V4SImode:
5865 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5866 break;
5867 case V2DImode:
5868 case V2DFmode:
5869 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5870 break;
5871 default:
5872 abort ();
5873 }
5874 emit_insn (gen_rotqby_ti (rot, from, tmp));
5875
5876 emit_insn (gen_spu_convert (ops[0], rot));
5877}
5878
5879void
5880spu_builtin_insert (rtx ops[])
5881{
5882 enum machine_mode mode = GET_MODE (ops[0]);
5883 enum machine_mode imode = GET_MODE_INNER (mode);
5884 rtx mask = gen_reg_rtx (TImode);
5885 rtx offset;
5886
5887 if (GET_CODE (ops[3]) == CONST_INT)
5888 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5889 else
5890 {
5891 offset = gen_reg_rtx (SImode);
5892 emit_insn (gen_mulsi3
5893 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5894 }
5895 emit_insn (gen_cpat
5896 (mask, stack_pointer_rtx, offset,
5897 GEN_INT (GET_MODE_SIZE (imode))));
5898 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5899}
5900
5901void
5902spu_builtin_promote (rtx ops[])
5903{
5904 enum machine_mode mode, imode;
5905 rtx rot, from, offset;
5906 HOST_WIDE_INT pos;
5907
5908 mode = GET_MODE (ops[0]);
5909 imode = GET_MODE_INNER (mode);
5910
5911 from = gen_reg_rtx (TImode);
5912 rot = spu_gen_subreg (TImode, ops[0]);
5913
5914 emit_insn (gen_spu_convert (from, ops[1]));
5915
5916 if (GET_CODE (ops[2]) == CONST_INT)
5917 {
5918 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5919 if (GET_MODE_SIZE (imode) < 4)
5920 pos += 4 - GET_MODE_SIZE (imode);
5921 offset = GEN_INT (pos & 15);
5922 }
5923 else
5924 {
5925 offset = gen_reg_rtx (SImode);
5926 switch (mode)
5927 {
5928 case V16QImode:
5929 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5930 break;
5931 case V8HImode:
5932 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5933 emit_insn (gen_addsi3 (offset, offset, offset));
5934 break;
5935 case V4SFmode:
5936 case V4SImode:
5937 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5938 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5939 break;
5940 case V2DImode:
5941 case V2DFmode:
5942 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5943 break;
5944 default:
5945 abort ();
5946 }
5947 }
5948 emit_insn (gen_rotqby_ti (rot, from, offset));
5949}
5950
e96f2783 5951static void
5952spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5953{
e96f2783 5954 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5955 rtx shuf = gen_reg_rtx (V4SImode);
5956 rtx insn = gen_reg_rtx (V4SImode);
5957 rtx shufc;
5958 rtx insnc;
5959 rtx mem;
5960
5961 fnaddr = force_reg (SImode, fnaddr);
5962 cxt = force_reg (SImode, cxt);
5963
5964 if (TARGET_LARGE_MEM)
5965 {
5966 rtx rotl = gen_reg_rtx (V4SImode);
5967 rtx mask = gen_reg_rtx (V4SImode);
5968 rtx bi = gen_reg_rtx (SImode);
e96f2783 5969 static unsigned char const shufa[16] = {
644459d0 5970 2, 3, 0, 1, 18, 19, 16, 17,
5971 0, 1, 2, 3, 16, 17, 18, 19
5972 };
e96f2783 5973 static unsigned char const insna[16] = {
644459d0 5974 0x41, 0, 0, 79,
5975 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5976 0x60, 0x80, 0, 79,
5977 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5978 };
5979
5980 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5981 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5982
5983 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5984 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5985 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5986 emit_insn (gen_selb (insn, insnc, rotl, mask));
5987
e96f2783 5988 mem = adjust_address (m_tramp, V4SImode, 0);
5989 emit_move_insn (mem, insn);
644459d0 5990
5991 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5992 mem = adjust_address (m_tramp, Pmode, 16);
5993 emit_move_insn (mem, bi);
644459d0 5994 }
5995 else
5996 {
5997 rtx scxt = gen_reg_rtx (SImode);
5998 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5999 static unsigned char const insna[16] = {
644459d0 6000 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6001 0x30, 0, 0, 0,
6002 0, 0, 0, 0,
6003 0, 0, 0, 0
6004 };
6005
6006 shufc = gen_reg_rtx (TImode);
6007 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6008
6009 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6010 fits 18 bits and the last 4 are zeros. This will be true if
6011 the stack pointer is initialized to 0x3fff0 at program start,
6012 otherwise the ila instruction will be garbage. */
6013
6014 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6015 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6016 emit_insn (gen_cpat
6017 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6018 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6019 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6020
e96f2783 6021 mem = adjust_address (m_tramp, V4SImode, 0);
6022 emit_move_insn (mem, insn);
644459d0 6023 }
6024 emit_insn (gen_sync ());
6025}
6026
6027void
6028spu_expand_sign_extend (rtx ops[])
6029{
6030 unsigned char arr[16];
6031 rtx pat = gen_reg_rtx (TImode);
6032 rtx sign, c;
6033 int i, last;
6034 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6035 if (GET_MODE (ops[1]) == QImode)
6036 {
6037 sign = gen_reg_rtx (HImode);
6038 emit_insn (gen_extendqihi2 (sign, ops[1]));
6039 for (i = 0; i < 16; i++)
6040 arr[i] = 0x12;
6041 arr[last] = 0x13;
6042 }
6043 else
6044 {
6045 for (i = 0; i < 16; i++)
6046 arr[i] = 0x10;
6047 switch (GET_MODE (ops[1]))
6048 {
6049 case HImode:
6050 sign = gen_reg_rtx (SImode);
6051 emit_insn (gen_extendhisi2 (sign, ops[1]));
6052 arr[last] = 0x03;
6053 arr[last - 1] = 0x02;
6054 break;
6055 case SImode:
6056 sign = gen_reg_rtx (SImode);
6057 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6058 for (i = 0; i < 4; i++)
6059 arr[last - i] = 3 - i;
6060 break;
6061 case DImode:
6062 sign = gen_reg_rtx (SImode);
6063 c = gen_reg_rtx (SImode);
6064 emit_insn (gen_spu_convert (c, ops[1]));
6065 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6066 for (i = 0; i < 8; i++)
6067 arr[last - i] = 7 - i;
6068 break;
6069 default:
6070 abort ();
6071 }
6072 }
6073 emit_move_insn (pat, array_to_constant (TImode, arr));
6074 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6075}
6076
6077/* expand vector initialization. If there are any constant parts,
6078 load constant parts first. Then load any non-constant parts. */
6079void
6080spu_expand_vector_init (rtx target, rtx vals)
6081{
6082 enum machine_mode mode = GET_MODE (target);
6083 int n_elts = GET_MODE_NUNITS (mode);
6084 int n_var = 0;
6085 bool all_same = true;
790c536c 6086 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6087 int i;
6088
6089 first = XVECEXP (vals, 0, 0);
6090 for (i = 0; i < n_elts; ++i)
6091 {
6092 x = XVECEXP (vals, 0, i);
e442af0b 6093 if (!(CONST_INT_P (x)
6094 || GET_CODE (x) == CONST_DOUBLE
6095 || GET_CODE (x) == CONST_FIXED))
644459d0 6096 ++n_var;
6097 else
6098 {
6099 if (first_constant == NULL_RTX)
6100 first_constant = x;
6101 }
6102 if (i > 0 && !rtx_equal_p (x, first))
6103 all_same = false;
6104 }
6105
6106 /* if all elements are the same, use splats to repeat elements */
6107 if (all_same)
6108 {
6109 if (!CONSTANT_P (first)
6110 && !register_operand (first, GET_MODE (x)))
6111 first = force_reg (GET_MODE (first), first);
6112 emit_insn (gen_spu_splats (target, first));
6113 return;
6114 }
6115
6116 /* load constant parts */
6117 if (n_var != n_elts)
6118 {
6119 if (n_var == 0)
6120 {
6121 emit_move_insn (target,
6122 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6123 }
6124 else
6125 {
6126 rtx constant_parts_rtx = copy_rtx (vals);
6127
6128 gcc_assert (first_constant != NULL_RTX);
6129 /* fill empty slots with the first constant, this increases
6130 our chance of using splats in the recursive call below. */
6131 for (i = 0; i < n_elts; ++i)
e442af0b 6132 {
6133 x = XVECEXP (constant_parts_rtx, 0, i);
6134 if (!(CONST_INT_P (x)
6135 || GET_CODE (x) == CONST_DOUBLE
6136 || GET_CODE (x) == CONST_FIXED))
6137 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6138 }
644459d0 6139
6140 spu_expand_vector_init (target, constant_parts_rtx);
6141 }
6142 }
6143
6144 /* load variable parts */
6145 if (n_var != 0)
6146 {
6147 rtx insert_operands[4];
6148
6149 insert_operands[0] = target;
6150 insert_operands[2] = target;
6151 for (i = 0; i < n_elts; ++i)
6152 {
6153 x = XVECEXP (vals, 0, i);
e442af0b 6154 if (!(CONST_INT_P (x)
6155 || GET_CODE (x) == CONST_DOUBLE
6156 || GET_CODE (x) == CONST_FIXED))
644459d0 6157 {
6158 if (!register_operand (x, GET_MODE (x)))
6159 x = force_reg (GET_MODE (x), x);
6160 insert_operands[1] = x;
6161 insert_operands[3] = GEN_INT (i);
6162 spu_builtin_insert (insert_operands);
6163 }
6164 }
6165 }
6166}
6352eedf 6167
5474166e 6168/* Return insn index for the vector compare instruction for given CODE,
6169 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6170
6171static int
6172get_vec_cmp_insn (enum rtx_code code,
6173 enum machine_mode dest_mode,
6174 enum machine_mode op_mode)
6175
6176{
6177 switch (code)
6178 {
6179 case EQ:
6180 if (dest_mode == V16QImode && op_mode == V16QImode)
6181 return CODE_FOR_ceq_v16qi;
6182 if (dest_mode == V8HImode && op_mode == V8HImode)
6183 return CODE_FOR_ceq_v8hi;
6184 if (dest_mode == V4SImode && op_mode == V4SImode)
6185 return CODE_FOR_ceq_v4si;
6186 if (dest_mode == V4SImode && op_mode == V4SFmode)
6187 return CODE_FOR_ceq_v4sf;
6188 if (dest_mode == V2DImode && op_mode == V2DFmode)
6189 return CODE_FOR_ceq_v2df;
6190 break;
6191 case GT:
6192 if (dest_mode == V16QImode && op_mode == V16QImode)
6193 return CODE_FOR_cgt_v16qi;
6194 if (dest_mode == V8HImode && op_mode == V8HImode)
6195 return CODE_FOR_cgt_v8hi;
6196 if (dest_mode == V4SImode && op_mode == V4SImode)
6197 return CODE_FOR_cgt_v4si;
6198 if (dest_mode == V4SImode && op_mode == V4SFmode)
6199 return CODE_FOR_cgt_v4sf;
6200 if (dest_mode == V2DImode && op_mode == V2DFmode)
6201 return CODE_FOR_cgt_v2df;
6202 break;
6203 case GTU:
6204 if (dest_mode == V16QImode && op_mode == V16QImode)
6205 return CODE_FOR_clgt_v16qi;
6206 if (dest_mode == V8HImode && op_mode == V8HImode)
6207 return CODE_FOR_clgt_v8hi;
6208 if (dest_mode == V4SImode && op_mode == V4SImode)
6209 return CODE_FOR_clgt_v4si;
6210 break;
6211 default:
6212 break;
6213 }
6214 return -1;
6215}
6216
6217/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6218 DMODE is expected destination mode. This is a recursive function. */
6219
6220static rtx
6221spu_emit_vector_compare (enum rtx_code rcode,
6222 rtx op0, rtx op1,
6223 enum machine_mode dmode)
6224{
6225 int vec_cmp_insn;
6226 rtx mask;
6227 enum machine_mode dest_mode;
6228 enum machine_mode op_mode = GET_MODE (op1);
6229
6230 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6231
6232 /* Floating point vector compare instructions uses destination V4SImode.
6233 Double floating point vector compare instructions uses destination V2DImode.
6234 Move destination to appropriate mode later. */
6235 if (dmode == V4SFmode)
6236 dest_mode = V4SImode;
6237 else if (dmode == V2DFmode)
6238 dest_mode = V2DImode;
6239 else
6240 dest_mode = dmode;
6241
6242 mask = gen_reg_rtx (dest_mode);
6243 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6244
6245 if (vec_cmp_insn == -1)
6246 {
6247 bool swap_operands = false;
6248 bool try_again = false;
6249 switch (rcode)
6250 {
6251 case LT:
6252 rcode = GT;
6253 swap_operands = true;
6254 try_again = true;
6255 break;
6256 case LTU:
6257 rcode = GTU;
6258 swap_operands = true;
6259 try_again = true;
6260 break;
6261 case NE:
6262 /* Treat A != B as ~(A==B). */
6263 {
6264 enum insn_code nor_code;
6265 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 6266 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 6267 gcc_assert (nor_code != CODE_FOR_nothing);
6268 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6269 if (dmode != dest_mode)
6270 {
6271 rtx temp = gen_reg_rtx (dest_mode);
6272 convert_move (temp, mask, 0);
6273 return temp;
6274 }
6275 return mask;
6276 }
6277 break;
6278 case GE:
6279 case GEU:
6280 case LE:
6281 case LEU:
6282 /* Try GT/GTU/LT/LTU OR EQ */
6283 {
6284 rtx c_rtx, eq_rtx;
6285 enum insn_code ior_code;
6286 enum rtx_code new_code;
6287
6288 switch (rcode)
6289 {
6290 case GE: new_code = GT; break;
6291 case GEU: new_code = GTU; break;
6292 case LE: new_code = LT; break;
6293 case LEU: new_code = LTU; break;
6294 default:
6295 gcc_unreachable ();
6296 }
6297
6298 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6299 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6300
99bdde56 6301 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 6302 gcc_assert (ior_code != CODE_FOR_nothing);
6303 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6304 if (dmode != dest_mode)
6305 {
6306 rtx temp = gen_reg_rtx (dest_mode);
6307 convert_move (temp, mask, 0);
6308 return temp;
6309 }
6310 return mask;
6311 }
6312 break;
6313 default:
6314 gcc_unreachable ();
6315 }
6316
6317 /* You only get two chances. */
6318 if (try_again)
6319 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6320
6321 gcc_assert (vec_cmp_insn != -1);
6322
6323 if (swap_operands)
6324 {
6325 rtx tmp;
6326 tmp = op0;
6327 op0 = op1;
6328 op1 = tmp;
6329 }
6330 }
6331
6332 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6333 if (dmode != dest_mode)
6334 {
6335 rtx temp = gen_reg_rtx (dest_mode);
6336 convert_move (temp, mask, 0);
6337 return temp;
6338 }
6339 return mask;
6340}
6341
6342
6343/* Emit vector conditional expression.
6344 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6345 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6346
6347int
6348spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6349 rtx cond, rtx cc_op0, rtx cc_op1)
6350{
6351 enum machine_mode dest_mode = GET_MODE (dest);
6352 enum rtx_code rcode = GET_CODE (cond);
6353 rtx mask;
6354
6355 /* Get the vector mask for the given relational operations. */
6356 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6357
6358 emit_insn(gen_selb (dest, op2, op1, mask));
6359
6360 return 1;
6361}
6362
6352eedf 6363static rtx
6364spu_force_reg (enum machine_mode mode, rtx op)
6365{
6366 rtx x, r;
6367 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6368 {
6369 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6370 || GET_MODE (op) == BLKmode)
6371 return force_reg (mode, convert_to_mode (mode, op, 0));
6372 abort ();
6373 }
6374
6375 r = force_reg (GET_MODE (op), op);
6376 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6377 {
6378 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6379 if (x)
6380 return x;
6381 }
6382
6383 x = gen_reg_rtx (mode);
6384 emit_insn (gen_spu_convert (x, r));
6385 return x;
6386}
6387
6388static void
6389spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6390{
6391 HOST_WIDE_INT v = 0;
6392 int lsbits;
6393 /* Check the range of immediate operands. */
6394 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6395 {
6396 int range = p - SPU_BTI_7;
5df189be 6397
6398 if (!CONSTANT_P (op))
6352eedf 6399 error ("%s expects an integer literal in the range [%d, %d].",
6400 d->name,
6401 spu_builtin_range[range].low, spu_builtin_range[range].high);
6402
6403 if (GET_CODE (op) == CONST
6404 && (GET_CODE (XEXP (op, 0)) == PLUS
6405 || GET_CODE (XEXP (op, 0)) == MINUS))
6406 {
6407 v = INTVAL (XEXP (XEXP (op, 0), 1));
6408 op = XEXP (XEXP (op, 0), 0);
6409 }
6410 else if (GET_CODE (op) == CONST_INT)
6411 v = INTVAL (op);
5df189be 6412 else if (GET_CODE (op) == CONST_VECTOR
6413 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6414 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6415
6416 /* The default for v is 0 which is valid in every range. */
6417 if (v < spu_builtin_range[range].low
6418 || v > spu_builtin_range[range].high)
6419 error ("%s expects an integer literal in the range [%d, %d]. ("
6420 HOST_WIDE_INT_PRINT_DEC ")",
6421 d->name,
6422 spu_builtin_range[range].low, spu_builtin_range[range].high,
6423 v);
6352eedf 6424
6425 switch (p)
6426 {
6427 case SPU_BTI_S10_4:
6428 lsbits = 4;
6429 break;
6430 case SPU_BTI_U16_2:
6431 /* This is only used in lqa, and stqa. Even though the insns
6432 encode 16 bits of the address (all but the 2 least
6433 significant), only 14 bits are used because it is masked to
6434 be 16 byte aligned. */
6435 lsbits = 4;
6436 break;
6437 case SPU_BTI_S16_2:
6438 /* This is used for lqr and stqr. */
6439 lsbits = 2;
6440 break;
6441 default:
6442 lsbits = 0;
6443 }
6444
6445 if (GET_CODE (op) == LABEL_REF
6446 || (GET_CODE (op) == SYMBOL_REF
6447 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6448 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6449 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6450 d->name);
6451 }
6452}
6453
6454
70ca06f8 6455static int
5df189be 6456expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6457 rtx target, rtx ops[])
6458{
bc620c5c 6459 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6460 int i = 0, a;
6352eedf 6461
6462 /* Expand the arguments into rtl. */
6463
6464 if (d->parm[0] != SPU_BTI_VOID)
6465 ops[i++] = target;
6466
70ca06f8 6467 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6468 {
5df189be 6469 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6470 if (arg == 0)
6471 abort ();
b9c74b4d 6472 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6473 }
70ca06f8 6474
6475 /* The insn pattern may have additional operands (SCRATCH).
6476 Return the number of actual non-SCRATCH operands. */
6477 gcc_assert (i <= insn_data[icode].n_operands);
6478 return i;
6352eedf 6479}
6480
6481static rtx
6482spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6483 tree exp, rtx target)
6352eedf 6484{
6485 rtx pat;
6486 rtx ops[8];
bc620c5c 6487 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6488 enum machine_mode mode, tmode;
6489 int i, p;
70ca06f8 6490 int n_operands;
6352eedf 6491 tree return_type;
6492
6493 /* Set up ops[] with values from arglist. */
70ca06f8 6494 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6495
6496 /* Handle the target operand which must be operand 0. */
6497 i = 0;
6498 if (d->parm[0] != SPU_BTI_VOID)
6499 {
6500
6501 /* We prefer the mode specified for the match_operand otherwise
6502 use the mode from the builtin function prototype. */
6503 tmode = insn_data[d->icode].operand[0].mode;
6504 if (tmode == VOIDmode)
6505 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6506
6507 /* Try to use target because not using it can lead to extra copies
6508 and when we are using all of the registers extra copies leads
6509 to extra spills. */
6510 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6511 ops[0] = target;
6512 else
6513 target = ops[0] = gen_reg_rtx (tmode);
6514
6515 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6516 abort ();
6517
6518 i++;
6519 }
6520
a76866d3 6521 if (d->fcode == SPU_MASK_FOR_LOAD)
6522 {
6523 enum machine_mode mode = insn_data[icode].operand[1].mode;
6524 tree arg;
6525 rtx addr, op, pat;
6526
6527 /* get addr */
5df189be 6528 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6529 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6530 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6531 addr = memory_address (mode, op);
6532
6533 /* negate addr */
6534 op = gen_reg_rtx (GET_MODE (addr));
6535 emit_insn (gen_rtx_SET (VOIDmode, op,
6536 gen_rtx_NEG (GET_MODE (addr), addr)));
6537 op = gen_rtx_MEM (mode, op);
6538
6539 pat = GEN_FCN (icode) (target, op);
6540 if (!pat)
6541 return 0;
6542 emit_insn (pat);
6543 return target;
6544 }
6545
6352eedf 6546 /* Ignore align_hint, but still expand it's args in case they have
6547 side effects. */
6548 if (icode == CODE_FOR_spu_align_hint)
6549 return 0;
6550
6551 /* Handle the rest of the operands. */
70ca06f8 6552 for (p = 1; i < n_operands; i++, p++)
6352eedf 6553 {
6554 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6555 mode = insn_data[d->icode].operand[i].mode;
6556 else
6557 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6558
6559 /* mode can be VOIDmode here for labels */
6560
6561 /* For specific intrinsics with an immediate operand, e.g.,
6562 si_ai(), we sometimes need to convert the scalar argument to a
6563 vector argument by splatting the scalar. */
6564 if (VECTOR_MODE_P (mode)
6565 && (GET_CODE (ops[i]) == CONST_INT
6566 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6567 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6568 {
6569 if (GET_CODE (ops[i]) == CONST_INT)
6570 ops[i] = spu_const (mode, INTVAL (ops[i]));
6571 else
6572 {
6573 rtx reg = gen_reg_rtx (mode);
6574 enum machine_mode imode = GET_MODE_INNER (mode);
6575 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6576 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6577 if (imode != GET_MODE (ops[i]))
6578 ops[i] = convert_to_mode (imode, ops[i],
6579 TYPE_UNSIGNED (spu_builtin_types
6580 [d->parm[i]]));
6581 emit_insn (gen_spu_splats (reg, ops[i]));
6582 ops[i] = reg;
6583 }
6584 }
6585
5df189be 6586 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6587
6352eedf 6588 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6589 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6590 }
6591
70ca06f8 6592 switch (n_operands)
6352eedf 6593 {
6594 case 0:
6595 pat = GEN_FCN (icode) (0);
6596 break;
6597 case 1:
6598 pat = GEN_FCN (icode) (ops[0]);
6599 break;
6600 case 2:
6601 pat = GEN_FCN (icode) (ops[0], ops[1]);
6602 break;
6603 case 3:
6604 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6605 break;
6606 case 4:
6607 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6608 break;
6609 case 5:
6610 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6611 break;
6612 case 6:
6613 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6614 break;
6615 default:
6616 abort ();
6617 }
6618
6619 if (!pat)
6620 abort ();
6621
6622 if (d->type == B_CALL || d->type == B_BISLED)
6623 emit_call_insn (pat);
6624 else if (d->type == B_JUMP)
6625 {
6626 emit_jump_insn (pat);
6627 emit_barrier ();
6628 }
6629 else
6630 emit_insn (pat);
6631
6632 return_type = spu_builtin_types[d->parm[0]];
6633 if (d->parm[0] != SPU_BTI_VOID
6634 && GET_MODE (target) != TYPE_MODE (return_type))
6635 {
6636 /* target is the return value. It should always be the mode of
6637 the builtin function prototype. */
6638 target = spu_force_reg (TYPE_MODE (return_type), target);
6639 }
6640
6641 return target;
6642}
6643
6644rtx
6645spu_expand_builtin (tree exp,
6646 rtx target,
6647 rtx subtarget ATTRIBUTE_UNUSED,
6648 enum machine_mode mode ATTRIBUTE_UNUSED,
6649 int ignore ATTRIBUTE_UNUSED)
6650{
5df189be 6651 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6652 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6653 struct spu_builtin_description *d;
6654
6655 if (fcode < NUM_SPU_BUILTINS)
6656 {
6657 d = &spu_builtins[fcode];
6658
5df189be 6659 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6660 }
6661 abort ();
6662}
6663
e99f512d 6664/* Implement targetm.vectorize.builtin_mul_widen_even. */
6665static tree
6666spu_builtin_mul_widen_even (tree type)
6667{
e99f512d 6668 switch (TYPE_MODE (type))
6669 {
6670 case V8HImode:
6671 if (TYPE_UNSIGNED (type))
6672 return spu_builtins[SPU_MULE_0].fndecl;
6673 else
6674 return spu_builtins[SPU_MULE_1].fndecl;
6675 break;
6676 default:
6677 return NULL_TREE;
6678 }
6679}
6680
6681/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6682static tree
6683spu_builtin_mul_widen_odd (tree type)
6684{
6685 switch (TYPE_MODE (type))
6686 {
6687 case V8HImode:
6688 if (TYPE_UNSIGNED (type))
6689 return spu_builtins[SPU_MULO_1].fndecl;
6690 else
6691 return spu_builtins[SPU_MULO_0].fndecl;
6692 break;
6693 default:
6694 return NULL_TREE;
6695 }
6696}
6697
a76866d3 6698/* Implement targetm.vectorize.builtin_mask_for_load. */
6699static tree
6700spu_builtin_mask_for_load (void)
6701{
6702 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6703 gcc_assert (d);
6704 return d->fndecl;
6705}
5df189be 6706
a28df51d 6707/* Implement targetm.vectorize.builtin_vectorization_cost. */
6708static int
6709spu_builtin_vectorization_cost (bool runtime_test)
6710{
6711 /* If the branch of the runtime test is taken - i.e. - the vectorized
6712 version is skipped - this incurs a misprediction cost (because the
6713 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6714 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6715 when the vectorized version is executed. */
6716 if (runtime_test)
6717 return -19;
6718 else
6719 return 0;
6720}
6721
0e87db76 6722/* Return true iff, data reference of TYPE can reach vector alignment (16)
6723 after applying N number of iterations. This routine does not determine
6724 how may iterations are required to reach desired alignment. */
6725
6726static bool
a9f1838b 6727spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6728{
6729 if (is_packed)
6730 return false;
6731
6732 /* All other types are naturally aligned. */
6733 return true;
6734}
6735
a0515226 6736/* Implement targetm.vectorize.builtin_vec_perm. */
6737tree
6738spu_builtin_vec_perm (tree type, tree *mask_element_type)
6739{
6740 struct spu_builtin_description *d;
6741
6742 *mask_element_type = unsigned_char_type_node;
6743
6744 switch (TYPE_MODE (type))
6745 {
6746 case V16QImode:
6747 if (TYPE_UNSIGNED (type))
6748 d = &spu_builtins[SPU_SHUFFLE_0];
6749 else
6750 d = &spu_builtins[SPU_SHUFFLE_1];
6751 break;
6752
6753 case V8HImode:
6754 if (TYPE_UNSIGNED (type))
6755 d = &spu_builtins[SPU_SHUFFLE_2];
6756 else
6757 d = &spu_builtins[SPU_SHUFFLE_3];
6758 break;
6759
6760 case V4SImode:
6761 if (TYPE_UNSIGNED (type))
6762 d = &spu_builtins[SPU_SHUFFLE_4];
6763 else
6764 d = &spu_builtins[SPU_SHUFFLE_5];
6765 break;
6766
6767 case V2DImode:
6768 if (TYPE_UNSIGNED (type))
6769 d = &spu_builtins[SPU_SHUFFLE_6];
6770 else
6771 d = &spu_builtins[SPU_SHUFFLE_7];
6772 break;
6773
6774 case V4SFmode:
6775 d = &spu_builtins[SPU_SHUFFLE_8];
6776 break;
6777
6778 case V2DFmode:
6779 d = &spu_builtins[SPU_SHUFFLE_9];
6780 break;
6781
6782 default:
6783 return NULL_TREE;
6784 }
6785
6786 gcc_assert (d);
6787 return d->fndecl;
6788}
6789
6cf5579e 6790/* Return the appropriate mode for a named address pointer. */
6791static enum machine_mode
6792spu_addr_space_pointer_mode (addr_space_t addrspace)
6793{
6794 switch (addrspace)
6795 {
6796 case ADDR_SPACE_GENERIC:
6797 return ptr_mode;
6798 case ADDR_SPACE_EA:
6799 return EAmode;
6800 default:
6801 gcc_unreachable ();
6802 }
6803}
6804
6805/* Return the appropriate mode for a named address address. */
6806static enum machine_mode
6807spu_addr_space_address_mode (addr_space_t addrspace)
6808{
6809 switch (addrspace)
6810 {
6811 case ADDR_SPACE_GENERIC:
6812 return Pmode;
6813 case ADDR_SPACE_EA:
6814 return EAmode;
6815 default:
6816 gcc_unreachable ();
6817 }
6818}
6819
6820/* Determine if one named address space is a subset of another. */
6821
6822static bool
6823spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6824{
6825 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6826 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6827
6828 if (subset == superset)
6829 return true;
6830
6831 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6832 being subsets but instead as disjoint address spaces. */
6833 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6834 return false;
6835
6836 else
6837 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6838}
6839
6840/* Convert from one address space to another. */
6841static rtx
6842spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6843{
6844 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6845 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6846
6847 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6848 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6849
6850 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6851 {
6852 rtx result, ls;
6853
6854 ls = gen_const_mem (DImode,
6855 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6856 set_mem_align (ls, 128);
6857
6858 result = gen_reg_rtx (Pmode);
6859 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6860 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6861 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6862 ls, const0_rtx, Pmode, 1);
6863
6864 emit_insn (gen_subsi3 (result, op, ls));
6865
6866 return result;
6867 }
6868
6869 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6870 {
6871 rtx result, ls;
6872
6873 ls = gen_const_mem (DImode,
6874 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6875 set_mem_align (ls, 128);
6876
6877 result = gen_reg_rtx (EAmode);
6878 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6879 op = force_reg (Pmode, op);
6880 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6881 ls, const0_rtx, EAmode, 1);
6882 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6883
6884 if (EAmode == SImode)
6885 emit_insn (gen_addsi3 (result, op, ls));
6886 else
6887 emit_insn (gen_adddi3 (result, op, ls));
6888
6889 return result;
6890 }
6891
6892 else
6893 gcc_unreachable ();
6894}
6895
6896
d52fd16a 6897/* Count the total number of instructions in each pipe and return the
6898 maximum, which is used as the Minimum Iteration Interval (MII)
6899 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6900 -2 are instructions that can go in pipe0 or pipe1. */
6901static int
6902spu_sms_res_mii (struct ddg *g)
6903{
6904 int i;
6905 unsigned t[4] = {0, 0, 0, 0};
6906
6907 for (i = 0; i < g->num_nodes; i++)
6908 {
6909 rtx insn = g->nodes[i].insn;
6910 int p = get_pipe (insn) + 2;
6911
6912 assert (p >= 0);
6913 assert (p < 4);
6914
6915 t[p]++;
6916 if (dump_file && INSN_P (insn))
6917 fprintf (dump_file, "i%d %s %d %d\n",
6918 INSN_UID (insn),
6919 insn_data[INSN_CODE(insn)].name,
6920 p, t[p]);
6921 }
6922 if (dump_file)
6923 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6924
6925 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6926}
6927
6928
5df189be 6929void
6930spu_init_expanders (void)
9d98604b 6931{
5df189be 6932 if (cfun)
9d98604b 6933 {
6934 rtx r0, r1;
6935 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6936 frame_pointer_needed is true. We don't know that until we're
6937 expanding the prologue. */
6938 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6939
6940 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6941 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6942 to be treated as aligned, so generate them here. */
6943 r0 = gen_reg_rtx (SImode);
6944 r1 = gen_reg_rtx (SImode);
6945 mark_reg_pointer (r0, 128);
6946 mark_reg_pointer (r1, 128);
6947 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6948 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6949 }
ea32e033 6950}
6951
6952static enum machine_mode
6953spu_libgcc_cmp_return_mode (void)
6954{
6955
6956/* For SPU word mode is TI mode so it is better to use SImode
6957 for compare returns. */
6958 return SImode;
6959}
6960
6961static enum machine_mode
6962spu_libgcc_shift_count_mode (void)
6963{
6964/* For SPU word mode is TI mode so it is better to use SImode
6965 for shift counts. */
6966 return SImode;
6967}
5a976006 6968
6969/* An early place to adjust some flags after GCC has finished processing
6970 * them. */
6971static void
6972asm_file_start (void)
6973{
6974 /* Variable tracking should be run after all optimizations which
6975 change order of insns. It also needs a valid CFG. */
6976 spu_flag_var_tracking = flag_var_tracking;
6977 flag_var_tracking = 0;
6978
6979 default_file_start ();
6980}
6981
a08dfd55 6982/* Implement targetm.section_type_flags. */
6983static unsigned int
6984spu_section_type_flags (tree decl, const char *name, int reloc)
6985{
6986 /* .toe needs to have type @nobits. */
6987 if (strcmp (name, ".toe") == 0)
6988 return SECTION_BSS;
6cf5579e 6989 /* Don't load _ea into the current address space. */
6990 if (strcmp (name, "._ea") == 0)
6991 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 6992 return default_section_type_flags (decl, name, reloc);
6993}
c2233b46 6994
6cf5579e 6995/* Implement targetm.select_section. */
6996static section *
6997spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6998{
6999 /* Variables and constants defined in the __ea address space
7000 go into a special section named "._ea". */
7001 if (TREE_TYPE (decl) != error_mark_node
7002 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7003 {
7004 /* We might get called with string constants, but get_named_section
7005 doesn't like them as they are not DECLs. Also, we need to set
7006 flags in that case. */
7007 if (!DECL_P (decl))
7008 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7009
7010 return get_named_section (decl, "._ea", reloc);
7011 }
7012
7013 return default_elf_select_section (decl, reloc, align);
7014}
7015
7016/* Implement targetm.unique_section. */
7017static void
7018spu_unique_section (tree decl, int reloc)
7019{
7020 /* We don't support unique section names in the __ea address
7021 space for now. */
7022 if (TREE_TYPE (decl) != error_mark_node
7023 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7024 return;
7025
7026 default_unique_section (decl, reloc);
7027}
7028
56c7bfc2 7029/* Generate a constant or register which contains 2^SCALE. We assume
7030 the result is valid for MODE. Currently, MODE must be V4SFmode and
7031 SCALE must be SImode. */
7032rtx
7033spu_gen_exp2 (enum machine_mode mode, rtx scale)
7034{
7035 gcc_assert (mode == V4SFmode);
7036 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7037 if (GET_CODE (scale) != CONST_INT)
7038 {
7039 /* unsigned int exp = (127 + scale) << 23;
7040 __vector float m = (__vector float) spu_splats (exp); */
7041 rtx reg = force_reg (SImode, scale);
7042 rtx exp = gen_reg_rtx (SImode);
7043 rtx mul = gen_reg_rtx (mode);
7044 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7045 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7046 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7047 return mul;
7048 }
7049 else
7050 {
7051 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7052 unsigned char arr[16];
7053 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7054 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7055 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7056 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7057 return array_to_constant (mode, arr);
7058 }
7059}
7060
9d98604b 7061/* After reload, just change the convert into a move instruction
7062 or a dead instruction. */
7063void
7064spu_split_convert (rtx ops[])
7065{
7066 if (REGNO (ops[0]) == REGNO (ops[1]))
7067 emit_note (NOTE_INSN_DELETED);
7068 else
7069 {
7070 /* Use TImode always as this might help hard reg copyprop. */
7071 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7072 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7073 emit_insn (gen_move_insn (op0, op1));
7074 }
7075}
7076
c2233b46 7077#include "gt-spu.h"