]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* ddg.c (add_cross_iteration_register_deps): Call gcc_assert instead
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
4c834714 152static void spu_option_override (void);
cc07c468 153static void spu_option_init_struct (struct gcc_options *opts);
686e2769 154static void spu_option_default_params (void);
644459d0 155static void spu_init_builtins (void);
e6925042 156static tree spu_builtin_decl (unsigned, bool);
b62e30b8 157static bool spu_scalar_mode_supported_p (enum machine_mode mode);
158static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 159static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 160static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
161 bool, addr_space_t);
644459d0 162static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
163static rtx get_pic_reg (void);
164static int need_to_save_reg (int regno, int saving);
165static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
166static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
167static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
168 rtx scratch);
169static void emit_nop_for_insn (rtx insn);
170static bool insn_clobbers_hbr (rtx insn);
171static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 172 int distance, sbitmap blocks);
5474166e 173static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
174 enum machine_mode dmode);
644459d0 175static rtx get_branch_target (rtx branch);
644459d0 176static void spu_machine_dependent_reorg (void);
177static int spu_sched_issue_rate (void);
178static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
179 int can_issue_more);
180static int get_pipe (rtx insn);
644459d0 181static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 182static void spu_sched_init_global (FILE *, int, int);
183static void spu_sched_init (FILE *, int, int);
184static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 185static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
186 int flags,
b62e30b8 187 bool *no_add_attrs);
644459d0 188static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
189 int flags,
b62e30b8 190 bool *no_add_attrs);
644459d0 191static int spu_naked_function_p (tree func);
b62e30b8 192static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
ee9034d4 194static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
196static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
197 const_tree type, bool named);
644459d0 198static tree spu_build_builtin_va_list (void);
8a58ed0a 199static void spu_va_start (tree, rtx);
75a70cf9 200static tree spu_gimplify_va_arg_expr (tree valist, tree type,
201 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 202static int store_with_one_insn_p (rtx mem);
644459d0 203static int mem_is_padded_component_ref (rtx x);
9d98604b 204static int reg_aligned_for_addr (rtx x);
644459d0 205static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
206static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 207static bool spu_rtx_costs (rtx x, int code, int outer_code,
208 int *total, bool speed);
209static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 210static void spu_init_libfuncs (void);
fb80456a 211static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 212static void fix_range (const char *);
69ced2d6 213static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 214static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 215static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
216 addr_space_t);
e99f512d 217static tree spu_builtin_mul_widen_even (tree);
218static tree spu_builtin_mul_widen_odd (tree);
a76866d3 219static tree spu_builtin_mask_for_load (void);
0822b158 220static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 221static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 222static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 223static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
224static enum machine_mode spu_addr_space_address_mode (addr_space_t);
225static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
226static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 227static int spu_sms_res_mii (struct ddg *g);
5a976006 228static void asm_file_start (void);
a08dfd55 229static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 230static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
231static void spu_unique_section (tree, int);
9d98604b 232static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 233static void spu_trampoline_init (rtx, tree, rtx);
644459d0 234
5474166e 235/* Which instruction set architecture to use. */
236int spu_arch;
237/* Which cpu are we tuning for. */
238int spu_tune;
239
5a976006 240/* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246int spu_hint_dist = (8*4) - (2*4);
247
248/* Determines whether we run variable tracking in machine dependent
249 reorganization. */
250static int spu_flag_var_tracking;
251
644459d0 252enum spu_immediate {
253 SPU_NONE,
254 SPU_IL,
255 SPU_ILA,
256 SPU_ILH,
257 SPU_ILHU,
258 SPU_ORI,
259 SPU_ORHI,
260 SPU_ORBI,
99369027 261 SPU_IOHL
644459d0 262};
dea01258 263enum immediate_class
264{
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
5df189be 272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 273};
644459d0 274
275static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 277static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
644459d0 280
1bd43494 281static enum machine_mode spu_unwind_word_mode (void);
282
ea32e033 283static enum machine_mode
284spu_libgcc_cmp_return_mode (void);
285
286static enum machine_mode
287spu_libgcc_shift_count_mode (void);
6cf5579e 288
289/* Pointer mode for __ea references. */
290#define EAmode (spu_ea_model != 32 ? DImode : SImode)
291
ef51d1e3 292\f
293/* Table of machine attributes. */
294static const struct attribute_spec spu_attribute_table[] =
295{
296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
297 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
299 { NULL, 0, 0, false, false, false, NULL }
300};
644459d0 301\f
302/* TARGET overrides. */
303
6cf5579e 304#undef TARGET_ADDR_SPACE_POINTER_MODE
305#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
306
307#undef TARGET_ADDR_SPACE_ADDRESS_MODE
308#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
309
310#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
313
314#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
316
317#undef TARGET_ADDR_SPACE_SUBSET_P
318#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
319
320#undef TARGET_ADDR_SPACE_CONVERT
321#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
322
644459d0 323#undef TARGET_INIT_BUILTINS
324#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 325#undef TARGET_BUILTIN_DECL
326#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 327
644459d0 328#undef TARGET_EXPAND_BUILTIN
329#define TARGET_EXPAND_BUILTIN spu_expand_builtin
330
1bd43494 331#undef TARGET_UNWIND_WORD_MODE
332#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 333
41e3a0c7 334#undef TARGET_LEGITIMIZE_ADDRESS
335#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
336
6cf5579e 337/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340#undef TARGET_ASM_UNALIGNED_SI_OP
341#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
342
343#undef TARGET_ASM_ALIGNED_DI_OP
344#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
345
644459d0 346/* The .8byte directive doesn't seem to work well for a 32 bit
347 architecture. */
348#undef TARGET_ASM_UNALIGNED_DI_OP
349#define TARGET_ASM_UNALIGNED_DI_OP NULL
350
351#undef TARGET_RTX_COSTS
352#define TARGET_RTX_COSTS spu_rtx_costs
353
354#undef TARGET_ADDRESS_COST
f529eb25 355#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 356
357#undef TARGET_SCHED_ISSUE_RATE
358#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
359
5a976006 360#undef TARGET_SCHED_INIT_GLOBAL
361#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
362
363#undef TARGET_SCHED_INIT
364#define TARGET_SCHED_INIT spu_sched_init
365
644459d0 366#undef TARGET_SCHED_VARIABLE_ISSUE
367#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
368
5a976006 369#undef TARGET_SCHED_REORDER
370#define TARGET_SCHED_REORDER spu_sched_reorder
371
372#undef TARGET_SCHED_REORDER2
373#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 374
375#undef TARGET_SCHED_ADJUST_COST
376#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
377
644459d0 378#undef TARGET_ATTRIBUTE_TABLE
379#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
380
381#undef TARGET_ASM_INTEGER
382#define TARGET_ASM_INTEGER spu_assemble_integer
383
384#undef TARGET_SCALAR_MODE_SUPPORTED_P
385#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
386
387#undef TARGET_VECTOR_MODE_SUPPORTED_P
388#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
389
390#undef TARGET_FUNCTION_OK_FOR_SIBCALL
391#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
392
393#undef TARGET_ASM_GLOBALIZE_LABEL
394#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
395
396#undef TARGET_PASS_BY_REFERENCE
397#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
398
ee9034d4 399#undef TARGET_FUNCTION_ARG
400#define TARGET_FUNCTION_ARG spu_function_arg
401
402#undef TARGET_FUNCTION_ARG_ADVANCE
403#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
404
644459d0 405#undef TARGET_MUST_PASS_IN_STACK
406#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
408#undef TARGET_BUILD_BUILTIN_VA_LIST
409#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
410
8a58ed0a 411#undef TARGET_EXPAND_BUILTIN_VA_START
412#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
413
644459d0 414#undef TARGET_SETUP_INCOMING_VARARGS
415#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
416
417#undef TARGET_MACHINE_DEPENDENT_REORG
418#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
419
420#undef TARGET_GIMPLIFY_VA_ARG_EXPR
421#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
422
423#undef TARGET_DEFAULT_TARGET_FLAGS
424#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
425
426#undef TARGET_INIT_LIBFUNCS
427#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
428
429#undef TARGET_RETURN_IN_MEMORY
430#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
431
69ced2d6 432#undef TARGET_ENCODE_SECTION_INFO
433#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
434
e99f512d 435#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
436#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
437
438#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
439#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
440
a76866d3 441#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
442#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
443
a28df51d 444#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
445#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
446
202d6e5f 447#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
448#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 449
a0515226 450#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
451#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
452
ea32e033 453#undef TARGET_LIBGCC_CMP_RETURN_MODE
454#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
455
456#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
457#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
458
d52fd16a 459#undef TARGET_SCHED_SMS_RES_MII
460#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
461
5a976006 462#undef TARGET_ASM_FILE_START
463#define TARGET_ASM_FILE_START asm_file_start
464
a08dfd55 465#undef TARGET_SECTION_TYPE_FLAGS
466#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
467
6cf5579e 468#undef TARGET_ASM_SELECT_SECTION
469#define TARGET_ASM_SELECT_SECTION spu_select_section
470
471#undef TARGET_ASM_UNIQUE_SECTION
472#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
473
fd50b071 474#undef TARGET_LEGITIMATE_ADDRESS_P
475#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
476
e96f2783 477#undef TARGET_TRAMPOLINE_INIT
478#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
479
4c834714 480#undef TARGET_OPTION_OVERRIDE
481#define TARGET_OPTION_OVERRIDE spu_option_override
482
cc07c468 483#undef TARGET_OPTION_INIT_STRUCT
484#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
02e53c17 485
686e2769 486#undef TARGET_OPTION_DEFAULT_PARAMS
487#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
488
ed6befa5 489#undef TARGET_EXCEPT_UNWIND_INFO
490#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
491
644459d0 492struct gcc_target targetm = TARGET_INITIALIZER;
493
02e53c17 494static void
cc07c468 495spu_option_init_struct (struct gcc_options *opts)
5df189be 496{
5df189be 497 /* With so many registers this is better on by default. */
cc07c468 498 opts->x_flag_rename_registers = 1;
5df189be 499}
500
686e2769 501/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
502static void
503spu_option_default_params (void)
504{
505 /* Override some of the default param values. With so many registers
506 larger values are better for these params. */
507 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
508}
509
4c834714 510/* Implement TARGET_OPTION_OVERRIDE. */
511static void
512spu_option_override (void)
644459d0 513{
14d408d9 514 /* Small loops will be unpeeled at -O3. For SPU it is more important
515 to keep code small by default. */
686e2769 516 if (!flag_unroll_loops && !flag_peel_loops)
56f280c4 517 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 1,
518 global_options.x_param_values,
519 global_options_set.x_param_values);
14d408d9 520
644459d0 521 flag_omit_frame_pointer = 1;
522
5a976006 523 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 524 if (align_functions < 8)
525 align_functions = 8;
c7b91b14 526
5a976006 527 spu_hint_dist = 8*4 - spu_max_nops*4;
528 if (spu_hint_dist < 0)
529 spu_hint_dist = 0;
530
c7b91b14 531 if (spu_fixed_range_string)
532 fix_range (spu_fixed_range_string);
5474166e 533
534 /* Determine processor architectural level. */
535 if (spu_arch_string)
536 {
537 if (strcmp (&spu_arch_string[0], "cell") == 0)
538 spu_arch = PROCESSOR_CELL;
539 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
540 spu_arch = PROCESSOR_CELLEDP;
541 else
542 error ("Unknown architecture '%s'", &spu_arch_string[0]);
543 }
544
545 /* Determine processor to tune for. */
546 if (spu_tune_string)
547 {
548 if (strcmp (&spu_tune_string[0], "cell") == 0)
549 spu_tune = PROCESSOR_CELL;
550 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
551 spu_tune = PROCESSOR_CELLEDP;
552 else
553 error ("Unknown architecture '%s'", &spu_tune_string[0]);
554 }
98bbec1e 555
13684256 556 /* Change defaults according to the processor architecture. */
557 if (spu_arch == PROCESSOR_CELLEDP)
558 {
559 /* If no command line option has been otherwise specified, change
560 the default to -mno-safe-hints on celledp -- only the original
561 Cell/B.E. processors require this workaround. */
562 if (!(target_flags_explicit & MASK_SAFE_HINTS))
563 target_flags &= ~MASK_SAFE_HINTS;
564 }
565
98bbec1e 566 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 567}
568\f
569/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
570 struct attribute_spec.handler. */
571
644459d0 572/* True if MODE is valid for the target. By "valid", we mean able to
573 be manipulated in non-trivial ways. In particular, this means all
574 the arithmetic is supported. */
575static bool
576spu_scalar_mode_supported_p (enum machine_mode mode)
577{
578 switch (mode)
579 {
580 case QImode:
581 case HImode:
582 case SImode:
583 case SFmode:
584 case DImode:
585 case TImode:
586 case DFmode:
587 return true;
588
589 default:
590 return false;
591 }
592}
593
594/* Similarly for vector modes. "Supported" here is less strict. At
595 least some operations are supported; need to check optabs or builtins
596 for further details. */
597static bool
598spu_vector_mode_supported_p (enum machine_mode mode)
599{
600 switch (mode)
601 {
602 case V16QImode:
603 case V8HImode:
604 case V4SImode:
605 case V2DImode:
606 case V4SFmode:
607 case V2DFmode:
608 return true;
609
610 default:
611 return false;
612 }
613}
614
615/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
616 least significant bytes of the outer mode. This function returns
617 TRUE for the SUBREG's where this is correct. */
618int
619valid_subreg (rtx op)
620{
621 enum machine_mode om = GET_MODE (op);
622 enum machine_mode im = GET_MODE (SUBREG_REG (op));
623 return om != VOIDmode && im != VOIDmode
624 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 625 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
626 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 627}
628
629/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 630 and adjust the start offset. */
644459d0 631static rtx
632adjust_operand (rtx op, HOST_WIDE_INT * start)
633{
634 enum machine_mode mode;
635 int op_size;
38aca5eb 636 /* Strip any paradoxical SUBREG. */
637 if (GET_CODE (op) == SUBREG
638 && (GET_MODE_BITSIZE (GET_MODE (op))
639 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 640 {
641 if (start)
642 *start -=
643 GET_MODE_BITSIZE (GET_MODE (op)) -
644 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
645 op = SUBREG_REG (op);
646 }
647 /* If it is smaller than SI, assure a SUBREG */
648 op_size = GET_MODE_BITSIZE (GET_MODE (op));
649 if (op_size < 32)
650 {
651 if (start)
652 *start += 32 - op_size;
653 op_size = 32;
654 }
655 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
656 mode = mode_for_size (op_size, MODE_INT, 0);
657 if (mode != GET_MODE (op))
658 op = gen_rtx_SUBREG (mode, op, 0);
659 return op;
660}
661
662void
663spu_expand_extv (rtx ops[], int unsignedp)
664{
9d98604b 665 rtx dst = ops[0], src = ops[1];
644459d0 666 HOST_WIDE_INT width = INTVAL (ops[2]);
667 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 668 HOST_WIDE_INT align_mask;
669 rtx s0, s1, mask, r0;
644459d0 670
9d98604b 671 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 672
9d98604b 673 if (MEM_P (src))
644459d0 674 {
9d98604b 675 /* First, determine if we need 1 TImode load or 2. We need only 1
676 if the bits being extracted do not cross the alignment boundary
677 as determined by the MEM and its address. */
678
679 align_mask = -MEM_ALIGN (src);
680 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 681 {
9d98604b 682 /* Alignment is sufficient for 1 load. */
683 s0 = gen_reg_rtx (TImode);
684 r0 = spu_expand_load (s0, 0, src, start / 8);
685 start &= 7;
686 if (r0)
687 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 688 }
9d98604b 689 else
690 {
691 /* Need 2 loads. */
692 s0 = gen_reg_rtx (TImode);
693 s1 = gen_reg_rtx (TImode);
694 r0 = spu_expand_load (s0, s1, src, start / 8);
695 start &= 7;
696
697 gcc_assert (start + width <= 128);
698 if (r0)
699 {
700 rtx r1 = gen_reg_rtx (SImode);
701 mask = gen_reg_rtx (TImode);
702 emit_move_insn (mask, GEN_INT (-1));
703 emit_insn (gen_rotqby_ti (s0, s0, r0));
704 emit_insn (gen_rotqby_ti (s1, s1, r0));
705 if (GET_CODE (r0) == CONST_INT)
706 r1 = GEN_INT (INTVAL (r0) & 15);
707 else
708 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
709 emit_insn (gen_shlqby_ti (mask, mask, r1));
710 emit_insn (gen_selb (s0, s1, s0, mask));
711 }
712 }
713
714 }
715 else if (GET_CODE (src) == SUBREG)
716 {
717 rtx r = SUBREG_REG (src);
718 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
719 s0 = gen_reg_rtx (TImode);
720 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
721 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
722 else
723 emit_move_insn (s0, src);
724 }
725 else
726 {
727 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
728 s0 = gen_reg_rtx (TImode);
729 emit_move_insn (s0, src);
644459d0 730 }
731
9d98604b 732 /* Now s0 is TImode and contains the bits to extract at start. */
733
734 if (start)
735 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
736
737 if (128 - width)
644459d0 738 {
9d98604b 739 tree c = build_int_cst (NULL_TREE, 128 - width);
740 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 741 }
742
9d98604b 743 emit_move_insn (dst, s0);
644459d0 744}
745
746void
747spu_expand_insv (rtx ops[])
748{
749 HOST_WIDE_INT width = INTVAL (ops[1]);
750 HOST_WIDE_INT start = INTVAL (ops[2]);
751 HOST_WIDE_INT maskbits;
752 enum machine_mode dst_mode, src_mode;
753 rtx dst = ops[0], src = ops[3];
754 int dst_size, src_size;
755 rtx mask;
756 rtx shift_reg;
757 int shift;
758
759
760 if (GET_CODE (ops[0]) == MEM)
761 dst = gen_reg_rtx (TImode);
762 else
763 dst = adjust_operand (dst, &start);
764 dst_mode = GET_MODE (dst);
765 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
766
767 if (CONSTANT_P (src))
768 {
769 enum machine_mode m =
770 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
771 src = force_reg (m, convert_to_mode (m, src, 0));
772 }
773 src = adjust_operand (src, 0);
774 src_mode = GET_MODE (src);
775 src_size = GET_MODE_BITSIZE (GET_MODE (src));
776
777 mask = gen_reg_rtx (dst_mode);
778 shift_reg = gen_reg_rtx (dst_mode);
779 shift = dst_size - start - width;
780
781 /* It's not safe to use subreg here because the compiler assumes
782 that the SUBREG_REG is right justified in the SUBREG. */
783 convert_move (shift_reg, src, 1);
784
785 if (shift > 0)
786 {
787 switch (dst_mode)
788 {
789 case SImode:
790 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
791 break;
792 case DImode:
793 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
794 break;
795 case TImode:
796 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
797 break;
798 default:
799 abort ();
800 }
801 }
802 else if (shift < 0)
803 abort ();
804
805 switch (dst_size)
806 {
807 case 32:
808 maskbits = (-1ll << (32 - width - start));
809 if (start)
810 maskbits += (1ll << (32 - start));
811 emit_move_insn (mask, GEN_INT (maskbits));
812 break;
813 case 64:
814 maskbits = (-1ll << (64 - width - start));
815 if (start)
816 maskbits += (1ll << (64 - start));
817 emit_move_insn (mask, GEN_INT (maskbits));
818 break;
819 case 128:
820 {
821 unsigned char arr[16];
822 int i = start / 8;
823 memset (arr, 0, sizeof (arr));
824 arr[i] = 0xff >> (start & 7);
825 for (i++; i <= (start + width - 1) / 8; i++)
826 arr[i] = 0xff;
827 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
828 emit_move_insn (mask, array_to_constant (TImode, arr));
829 }
830 break;
831 default:
832 abort ();
833 }
834 if (GET_CODE (ops[0]) == MEM)
835 {
644459d0 836 rtx low = gen_reg_rtx (SImode);
644459d0 837 rtx rotl = gen_reg_rtx (SImode);
838 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 839 rtx addr;
840 rtx addr0;
841 rtx addr1;
644459d0 842 rtx mem;
843
9d98604b 844 addr = force_reg (Pmode, XEXP (ops[0], 0));
845 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 846 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
847 emit_insn (gen_negsi2 (rotl, low));
848 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
849 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 850 mem = change_address (ops[0], TImode, addr0);
644459d0 851 set_mem_alias_set (mem, 0);
852 emit_move_insn (dst, mem);
853 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 854 if (start + width > MEM_ALIGN (ops[0]))
855 {
856 rtx shl = gen_reg_rtx (SImode);
857 rtx mask1 = gen_reg_rtx (TImode);
858 rtx dst1 = gen_reg_rtx (TImode);
859 rtx mem1;
9d98604b 860 addr1 = plus_constant (addr, 16);
861 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 862 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
863 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 864 mem1 = change_address (ops[0], TImode, addr1);
644459d0 865 set_mem_alias_set (mem1, 0);
866 emit_move_insn (dst1, mem1);
867 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
868 emit_move_insn (mem1, dst1);
869 }
9d98604b 870 emit_move_insn (mem, dst);
644459d0 871 }
872 else
71cd778d 873 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 874}
875
876
877int
878spu_expand_block_move (rtx ops[])
879{
880 HOST_WIDE_INT bytes, align, offset;
881 rtx src, dst, sreg, dreg, target;
882 int i;
883 if (GET_CODE (ops[2]) != CONST_INT
884 || GET_CODE (ops[3]) != CONST_INT
48eb4342 885 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 886 return 0;
887
888 bytes = INTVAL (ops[2]);
889 align = INTVAL (ops[3]);
890
891 if (bytes <= 0)
892 return 1;
893
894 dst = ops[0];
895 src = ops[1];
896
897 if (align == 16)
898 {
899 for (offset = 0; offset + 16 <= bytes; offset += 16)
900 {
901 dst = adjust_address (ops[0], V16QImode, offset);
902 src = adjust_address (ops[1], V16QImode, offset);
903 emit_move_insn (dst, src);
904 }
905 if (offset < bytes)
906 {
907 rtx mask;
908 unsigned char arr[16] = { 0 };
909 for (i = 0; i < bytes - offset; i++)
910 arr[i] = 0xff;
911 dst = adjust_address (ops[0], V16QImode, offset);
912 src = adjust_address (ops[1], V16QImode, offset);
913 mask = gen_reg_rtx (V16QImode);
914 sreg = gen_reg_rtx (V16QImode);
915 dreg = gen_reg_rtx (V16QImode);
916 target = gen_reg_rtx (V16QImode);
917 emit_move_insn (mask, array_to_constant (V16QImode, arr));
918 emit_move_insn (dreg, dst);
919 emit_move_insn (sreg, src);
920 emit_insn (gen_selb (target, dreg, sreg, mask));
921 emit_move_insn (dst, target);
922 }
923 return 1;
924 }
925 return 0;
926}
927
928enum spu_comp_code
929{ SPU_EQ, SPU_GT, SPU_GTU };
930
5474166e 931int spu_comp_icode[12][3] = {
932 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
933 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
934 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
935 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
936 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
937 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
938 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
939 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
940 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
941 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
942 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
943 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 944};
945
946/* Generate a compare for CODE. Return a brand-new rtx that represents
947 the result of the compare. GCC can figure this out too if we don't
948 provide all variations of compares, but GCC always wants to use
949 WORD_MODE, we can generate better code in most cases if we do it
950 ourselves. */
951void
74f4459c 952spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 953{
954 int reverse_compare = 0;
955 int reverse_test = 0;
5d70b918 956 rtx compare_result, eq_result;
957 rtx comp_rtx, eq_rtx;
644459d0 958 enum machine_mode comp_mode;
959 enum machine_mode op_mode;
b9c74b4d 960 enum spu_comp_code scode, eq_code;
961 enum insn_code ior_code;
74f4459c 962 enum rtx_code code = GET_CODE (cmp);
963 rtx op0 = XEXP (cmp, 0);
964 rtx op1 = XEXP (cmp, 1);
644459d0 965 int index;
5d70b918 966 int eq_test = 0;
644459d0 967
74f4459c 968 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 969 and so on, to keep the constant in operand 1. */
74f4459c 970 if (GET_CODE (op1) == CONST_INT)
644459d0 971 {
74f4459c 972 HOST_WIDE_INT val = INTVAL (op1) - 1;
973 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 974 switch (code)
975 {
976 case GE:
74f4459c 977 op1 = GEN_INT (val);
644459d0 978 code = GT;
979 break;
980 case LT:
74f4459c 981 op1 = GEN_INT (val);
644459d0 982 code = LE;
983 break;
984 case GEU:
74f4459c 985 op1 = GEN_INT (val);
644459d0 986 code = GTU;
987 break;
988 case LTU:
74f4459c 989 op1 = GEN_INT (val);
644459d0 990 code = LEU;
991 break;
992 default:
993 break;
994 }
995 }
996
5d70b918 997 comp_mode = SImode;
74f4459c 998 op_mode = GET_MODE (op0);
5d70b918 999
644459d0 1000 switch (code)
1001 {
1002 case GE:
644459d0 1003 scode = SPU_GT;
07027691 1004 if (HONOR_NANS (op_mode))
5d70b918 1005 {
1006 reverse_compare = 0;
1007 reverse_test = 0;
1008 eq_test = 1;
1009 eq_code = SPU_EQ;
1010 }
1011 else
1012 {
1013 reverse_compare = 1;
1014 reverse_test = 1;
1015 }
644459d0 1016 break;
1017 case LE:
644459d0 1018 scode = SPU_GT;
07027691 1019 if (HONOR_NANS (op_mode))
5d70b918 1020 {
1021 reverse_compare = 1;
1022 reverse_test = 0;
1023 eq_test = 1;
1024 eq_code = SPU_EQ;
1025 }
1026 else
1027 {
1028 reverse_compare = 0;
1029 reverse_test = 1;
1030 }
644459d0 1031 break;
1032 case LT:
1033 reverse_compare = 1;
1034 reverse_test = 0;
1035 scode = SPU_GT;
1036 break;
1037 case GEU:
1038 reverse_compare = 1;
1039 reverse_test = 1;
1040 scode = SPU_GTU;
1041 break;
1042 case LEU:
1043 reverse_compare = 0;
1044 reverse_test = 1;
1045 scode = SPU_GTU;
1046 break;
1047 case LTU:
1048 reverse_compare = 1;
1049 reverse_test = 0;
1050 scode = SPU_GTU;
1051 break;
1052 case NE:
1053 reverse_compare = 0;
1054 reverse_test = 1;
1055 scode = SPU_EQ;
1056 break;
1057
1058 case EQ:
1059 scode = SPU_EQ;
1060 break;
1061 case GT:
1062 scode = SPU_GT;
1063 break;
1064 case GTU:
1065 scode = SPU_GTU;
1066 break;
1067 default:
1068 scode = SPU_EQ;
1069 break;
1070 }
1071
644459d0 1072 switch (op_mode)
1073 {
1074 case QImode:
1075 index = 0;
1076 comp_mode = QImode;
1077 break;
1078 case HImode:
1079 index = 1;
1080 comp_mode = HImode;
1081 break;
1082 case SImode:
1083 index = 2;
1084 break;
1085 case DImode:
1086 index = 3;
1087 break;
1088 case TImode:
1089 index = 4;
1090 break;
1091 case SFmode:
1092 index = 5;
1093 break;
1094 case DFmode:
1095 index = 6;
1096 break;
1097 case V16QImode:
5474166e 1098 index = 7;
1099 comp_mode = op_mode;
1100 break;
644459d0 1101 case V8HImode:
5474166e 1102 index = 8;
1103 comp_mode = op_mode;
1104 break;
644459d0 1105 case V4SImode:
5474166e 1106 index = 9;
1107 comp_mode = op_mode;
1108 break;
644459d0 1109 case V4SFmode:
5474166e 1110 index = 10;
1111 comp_mode = V4SImode;
1112 break;
644459d0 1113 case V2DFmode:
5474166e 1114 index = 11;
1115 comp_mode = V2DImode;
644459d0 1116 break;
5474166e 1117 case V2DImode:
644459d0 1118 default:
1119 abort ();
1120 }
1121
74f4459c 1122 if (GET_MODE (op1) == DFmode
07027691 1123 && (scode != SPU_GT && scode != SPU_EQ))
1124 abort ();
644459d0 1125
74f4459c 1126 if (is_set == 0 && op1 == const0_rtx
1127 && (GET_MODE (op0) == SImode
1128 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1129 {
1130 /* Don't need to set a register with the result when we are
1131 comparing against zero and branching. */
1132 reverse_test = !reverse_test;
74f4459c 1133 compare_result = op0;
644459d0 1134 }
1135 else
1136 {
1137 compare_result = gen_reg_rtx (comp_mode);
1138
1139 if (reverse_compare)
1140 {
74f4459c 1141 rtx t = op1;
1142 op1 = op0;
1143 op0 = t;
644459d0 1144 }
1145
1146 if (spu_comp_icode[index][scode] == 0)
1147 abort ();
1148
1149 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1150 (op0, op_mode))
1151 op0 = force_reg (op_mode, op0);
644459d0 1152 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1153 (op1, op_mode))
1154 op1 = force_reg (op_mode, op1);
644459d0 1155 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1156 op0, op1);
644459d0 1157 if (comp_rtx == 0)
1158 abort ();
1159 emit_insn (comp_rtx);
1160
5d70b918 1161 if (eq_test)
1162 {
1163 eq_result = gen_reg_rtx (comp_mode);
1164 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1165 op0, op1);
5d70b918 1166 if (eq_rtx == 0)
1167 abort ();
1168 emit_insn (eq_rtx);
d6bf3b14 1169 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1170 gcc_assert (ior_code != CODE_FOR_nothing);
1171 emit_insn (GEN_FCN (ior_code)
1172 (compare_result, compare_result, eq_result));
1173 }
644459d0 1174 }
1175
1176 if (is_set == 0)
1177 {
1178 rtx bcomp;
1179 rtx loc_ref;
1180
1181 /* We don't have branch on QI compare insns, so we convert the
1182 QI compare result to a HI result. */
1183 if (comp_mode == QImode)
1184 {
1185 rtx old_res = compare_result;
1186 compare_result = gen_reg_rtx (HImode);
1187 comp_mode = HImode;
1188 emit_insn (gen_extendqihi2 (compare_result, old_res));
1189 }
1190
1191 if (reverse_test)
1192 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1193 else
1194 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1195
74f4459c 1196 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1197 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1198 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1199 loc_ref, pc_rtx)));
1200 }
1201 else if (is_set == 2)
1202 {
74f4459c 1203 rtx target = operands[0];
644459d0 1204 int compare_size = GET_MODE_BITSIZE (comp_mode);
1205 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1206 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1207 rtx select_mask;
1208 rtx op_t = operands[2];
1209 rtx op_f = operands[3];
1210
1211 /* The result of the comparison can be SI, HI or QI mode. Create a
1212 mask based on that result. */
1213 if (target_size > compare_size)
1214 {
1215 select_mask = gen_reg_rtx (mode);
1216 emit_insn (gen_extend_compare (select_mask, compare_result));
1217 }
1218 else if (target_size < compare_size)
1219 select_mask =
1220 gen_rtx_SUBREG (mode, compare_result,
1221 (compare_size - target_size) / BITS_PER_UNIT);
1222 else if (comp_mode != mode)
1223 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1224 else
1225 select_mask = compare_result;
1226
1227 if (GET_MODE (target) != GET_MODE (op_t)
1228 || GET_MODE (target) != GET_MODE (op_f))
1229 abort ();
1230
1231 if (reverse_test)
1232 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1233 else
1234 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1235 }
1236 else
1237 {
74f4459c 1238 rtx target = operands[0];
644459d0 1239 if (reverse_test)
1240 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1241 gen_rtx_NOT (comp_mode, compare_result)));
1242 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1243 emit_insn (gen_extendhisi2 (target, compare_result));
1244 else if (GET_MODE (target) == SImode
1245 && GET_MODE (compare_result) == QImode)
1246 emit_insn (gen_extend_compare (target, compare_result));
1247 else
1248 emit_move_insn (target, compare_result);
1249 }
1250}
1251
1252HOST_WIDE_INT
1253const_double_to_hwint (rtx x)
1254{
1255 HOST_WIDE_INT val;
1256 REAL_VALUE_TYPE rv;
1257 if (GET_MODE (x) == SFmode)
1258 {
1259 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1260 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1261 }
1262 else if (GET_MODE (x) == DFmode)
1263 {
1264 long l[2];
1265 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1266 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1267 val = l[0];
1268 val = (val << 32) | (l[1] & 0xffffffff);
1269 }
1270 else
1271 abort ();
1272 return val;
1273}
1274
1275rtx
1276hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1277{
1278 long tv[2];
1279 REAL_VALUE_TYPE rv;
1280 gcc_assert (mode == SFmode || mode == DFmode);
1281
1282 if (mode == SFmode)
1283 tv[0] = (v << 32) >> 32;
1284 else if (mode == DFmode)
1285 {
1286 tv[1] = (v << 32) >> 32;
1287 tv[0] = v >> 32;
1288 }
1289 real_from_target (&rv, tv, mode);
1290 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1291}
1292
1293void
1294print_operand_address (FILE * file, register rtx addr)
1295{
1296 rtx reg;
1297 rtx offset;
1298
e04cf423 1299 if (GET_CODE (addr) == AND
1300 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1301 && INTVAL (XEXP (addr, 1)) == -16)
1302 addr = XEXP (addr, 0);
1303
644459d0 1304 switch (GET_CODE (addr))
1305 {
1306 case REG:
1307 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1308 break;
1309
1310 case PLUS:
1311 reg = XEXP (addr, 0);
1312 offset = XEXP (addr, 1);
1313 if (GET_CODE (offset) == REG)
1314 {
1315 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1316 reg_names[REGNO (offset)]);
1317 }
1318 else if (GET_CODE (offset) == CONST_INT)
1319 {
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1321 INTVAL (offset), reg_names[REGNO (reg)]);
1322 }
1323 else
1324 abort ();
1325 break;
1326
1327 case CONST:
1328 case LABEL_REF:
1329 case SYMBOL_REF:
1330 case CONST_INT:
1331 output_addr_const (file, addr);
1332 break;
1333
1334 default:
1335 debug_rtx (addr);
1336 abort ();
1337 }
1338}
1339
1340void
1341print_operand (FILE * file, rtx x, int code)
1342{
1343 enum machine_mode mode = GET_MODE (x);
1344 HOST_WIDE_INT val;
1345 unsigned char arr[16];
1346 int xcode = GET_CODE (x);
dea01258 1347 int i, info;
644459d0 1348 if (GET_MODE (x) == VOIDmode)
1349 switch (code)
1350 {
644459d0 1351 case 'L': /* 128 bits, signed */
1352 case 'm': /* 128 bits, signed */
1353 case 'T': /* 128 bits, signed */
1354 case 't': /* 128 bits, signed */
1355 mode = TImode;
1356 break;
644459d0 1357 case 'K': /* 64 bits, signed */
1358 case 'k': /* 64 bits, signed */
1359 case 'D': /* 64 bits, signed */
1360 case 'd': /* 64 bits, signed */
1361 mode = DImode;
1362 break;
644459d0 1363 case 'J': /* 32 bits, signed */
1364 case 'j': /* 32 bits, signed */
1365 case 's': /* 32 bits, signed */
1366 case 'S': /* 32 bits, signed */
1367 mode = SImode;
1368 break;
1369 }
1370 switch (code)
1371 {
1372
1373 case 'j': /* 32 bits, signed */
1374 case 'k': /* 64 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 if (xcode == CONST_INT
1377 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1378 {
1379 gcc_assert (logical_immediate_p (x, mode));
1380 constant_to_array (mode, x, arr);
1381 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1382 val = trunc_int_for_mode (val, SImode);
1383 switch (which_logical_immediate (val))
1384 {
1385 case SPU_ORI:
1386 break;
1387 case SPU_ORHI:
1388 fprintf (file, "h");
1389 break;
1390 case SPU_ORBI:
1391 fprintf (file, "b");
1392 break;
1393 default:
1394 gcc_unreachable();
1395 }
1396 }
1397 else
1398 gcc_unreachable();
1399 return;
1400
1401 case 'J': /* 32 bits, signed */
1402 case 'K': /* 64 bits, signed */
1403 case 'L': /* 128 bits, signed */
1404 if (xcode == CONST_INT
1405 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1406 {
1407 gcc_assert (logical_immediate_p (x, mode)
1408 || iohl_immediate_p (x, mode));
1409 constant_to_array (mode, x, arr);
1410 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1411 val = trunc_int_for_mode (val, SImode);
1412 switch (which_logical_immediate (val))
1413 {
1414 case SPU_ORI:
1415 case SPU_IOHL:
1416 break;
1417 case SPU_ORHI:
1418 val = trunc_int_for_mode (val, HImode);
1419 break;
1420 case SPU_ORBI:
1421 val = trunc_int_for_mode (val, QImode);
1422 break;
1423 default:
1424 gcc_unreachable();
1425 }
1426 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1427 }
1428 else
1429 gcc_unreachable();
1430 return;
1431
1432 case 't': /* 128 bits, signed */
1433 case 'd': /* 64 bits, signed */
1434 case 's': /* 32 bits, signed */
dea01258 1435 if (CONSTANT_P (x))
644459d0 1436 {
dea01258 1437 enum immediate_class c = classify_immediate (x, mode);
1438 switch (c)
1439 {
1440 case IC_IL1:
1441 constant_to_array (mode, x, arr);
1442 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1443 val = trunc_int_for_mode (val, SImode);
1444 switch (which_immediate_load (val))
1445 {
1446 case SPU_IL:
1447 break;
1448 case SPU_ILA:
1449 fprintf (file, "a");
1450 break;
1451 case SPU_ILH:
1452 fprintf (file, "h");
1453 break;
1454 case SPU_ILHU:
1455 fprintf (file, "hu");
1456 break;
1457 default:
1458 gcc_unreachable ();
1459 }
1460 break;
1461 case IC_CPAT:
1462 constant_to_array (mode, x, arr);
1463 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1464 if (info == 1)
1465 fprintf (file, "b");
1466 else if (info == 2)
1467 fprintf (file, "h");
1468 else if (info == 4)
1469 fprintf (file, "w");
1470 else if (info == 8)
1471 fprintf (file, "d");
1472 break;
1473 case IC_IL1s:
1474 if (xcode == CONST_VECTOR)
1475 {
1476 x = CONST_VECTOR_ELT (x, 0);
1477 xcode = GET_CODE (x);
1478 }
1479 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1480 fprintf (file, "a");
1481 else if (xcode == HIGH)
1482 fprintf (file, "hu");
1483 break;
1484 case IC_FSMBI:
5df189be 1485 case IC_FSMBI2:
dea01258 1486 case IC_IL2:
1487 case IC_IL2s:
1488 case IC_POOL:
1489 abort ();
1490 }
644459d0 1491 }
644459d0 1492 else
1493 gcc_unreachable ();
1494 return;
1495
1496 case 'T': /* 128 bits, signed */
1497 case 'D': /* 64 bits, signed */
1498 case 'S': /* 32 bits, signed */
dea01258 1499 if (CONSTANT_P (x))
644459d0 1500 {
dea01258 1501 enum immediate_class c = classify_immediate (x, mode);
1502 switch (c)
644459d0 1503 {
dea01258 1504 case IC_IL1:
1505 constant_to_array (mode, x, arr);
1506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1507 val = trunc_int_for_mode (val, SImode);
1508 switch (which_immediate_load (val))
1509 {
1510 case SPU_IL:
1511 case SPU_ILA:
1512 break;
1513 case SPU_ILH:
1514 case SPU_ILHU:
1515 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1516 break;
1517 default:
1518 gcc_unreachable ();
1519 }
1520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1521 break;
1522 case IC_FSMBI:
1523 constant_to_array (mode, x, arr);
1524 val = 0;
1525 for (i = 0; i < 16; i++)
1526 {
1527 val <<= 1;
1528 val |= arr[i] & 1;
1529 }
1530 print_operand (file, GEN_INT (val), 0);
1531 break;
1532 case IC_CPAT:
1533 constant_to_array (mode, x, arr);
1534 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1536 break;
dea01258 1537 case IC_IL1s:
dea01258 1538 if (xcode == HIGH)
5df189be 1539 x = XEXP (x, 0);
1540 if (GET_CODE (x) == CONST_VECTOR)
1541 x = CONST_VECTOR_ELT (x, 0);
1542 output_addr_const (file, x);
1543 if (xcode == HIGH)
1544 fprintf (file, "@h");
644459d0 1545 break;
dea01258 1546 case IC_IL2:
1547 case IC_IL2s:
5df189be 1548 case IC_FSMBI2:
dea01258 1549 case IC_POOL:
1550 abort ();
644459d0 1551 }
c8befdb9 1552 }
644459d0 1553 else
1554 gcc_unreachable ();
1555 return;
1556
644459d0 1557 case 'C':
1558 if (xcode == CONST_INT)
1559 {
1560 /* Only 4 least significant bits are relevant for generate
1561 control word instructions. */
1562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1563 return;
1564 }
1565 break;
1566
1567 case 'M': /* print code for c*d */
1568 if (GET_CODE (x) == CONST_INT)
1569 switch (INTVAL (x))
1570 {
1571 case 1:
1572 fprintf (file, "b");
1573 break;
1574 case 2:
1575 fprintf (file, "h");
1576 break;
1577 case 4:
1578 fprintf (file, "w");
1579 break;
1580 case 8:
1581 fprintf (file, "d");
1582 break;
1583 default:
1584 gcc_unreachable();
1585 }
1586 else
1587 gcc_unreachable();
1588 return;
1589
1590 case 'N': /* Negate the operand */
1591 if (xcode == CONST_INT)
1592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1593 else if (xcode == CONST_VECTOR)
1594 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1595 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1596 return;
1597
1598 case 'I': /* enable/disable interrupts */
1599 if (xcode == CONST_INT)
1600 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1601 return;
1602
1603 case 'b': /* branch modifiers */
1604 if (xcode == REG)
1605 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1606 else if (COMPARISON_P (x))
1607 fprintf (file, "%s", xcode == NE ? "n" : "");
1608 return;
1609
1610 case 'i': /* indirect call */
1611 if (xcode == MEM)
1612 {
1613 if (GET_CODE (XEXP (x, 0)) == REG)
1614 /* Used in indirect function calls. */
1615 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1616 else
1617 output_address (XEXP (x, 0));
1618 }
1619 return;
1620
1621 case 'p': /* load/store */
1622 if (xcode == MEM)
1623 {
1624 x = XEXP (x, 0);
1625 xcode = GET_CODE (x);
1626 }
e04cf423 1627 if (xcode == AND)
1628 {
1629 x = XEXP (x, 0);
1630 xcode = GET_CODE (x);
1631 }
644459d0 1632 if (xcode == REG)
1633 fprintf (file, "d");
1634 else if (xcode == CONST_INT)
1635 fprintf (file, "a");
1636 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1637 fprintf (file, "r");
1638 else if (xcode == PLUS || xcode == LO_SUM)
1639 {
1640 if (GET_CODE (XEXP (x, 1)) == REG)
1641 fprintf (file, "x");
1642 else
1643 fprintf (file, "d");
1644 }
1645 return;
1646
5df189be 1647 case 'e':
1648 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1649 val &= 0x7;
1650 output_addr_const (file, GEN_INT (val));
1651 return;
1652
1653 case 'f':
1654 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1655 val &= 0x1f;
1656 output_addr_const (file, GEN_INT (val));
1657 return;
1658
1659 case 'g':
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661 val &= 0x3f;
1662 output_addr_const (file, GEN_INT (val));
1663 return;
1664
1665 case 'h':
1666 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1667 val = (val >> 3) & 0x1f;
1668 output_addr_const (file, GEN_INT (val));
1669 return;
1670
1671 case 'E':
1672 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1673 val = -val;
1674 val &= 0x7;
1675 output_addr_const (file, GEN_INT (val));
1676 return;
1677
1678 case 'F':
1679 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 val = -val;
1681 val &= 0x1f;
1682 output_addr_const (file, GEN_INT (val));
1683 return;
1684
1685 case 'G':
1686 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 val = -val;
1688 val &= 0x3f;
1689 output_addr_const (file, GEN_INT (val));
1690 return;
1691
1692 case 'H':
1693 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1694 val = -(val & -8ll);
1695 val = (val >> 3) & 0x1f;
1696 output_addr_const (file, GEN_INT (val));
1697 return;
1698
56c7bfc2 1699 case 'v':
1700 case 'w':
1701 constant_to_array (mode, x, arr);
1702 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1703 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1704 return;
1705
644459d0 1706 case 0:
1707 if (xcode == REG)
1708 fprintf (file, "%s", reg_names[REGNO (x)]);
1709 else if (xcode == MEM)
1710 output_address (XEXP (x, 0));
1711 else if (xcode == CONST_VECTOR)
dea01258 1712 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1713 else
1714 output_addr_const (file, x);
1715 return;
1716
f6a0d06f 1717 /* unused letters
56c7bfc2 1718 o qr u yz
5df189be 1719 AB OPQR UVWXYZ */
644459d0 1720 default:
1721 output_operand_lossage ("invalid %%xn code");
1722 }
1723 gcc_unreachable ();
1724}
1725
644459d0 1726/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1727 caller saved register. For leaf functions it is more efficient to
1728 use a volatile register because we won't need to save and restore the
1729 pic register. This routine is only valid after register allocation
1730 is completed, so we can pick an unused register. */
1731static rtx
1732get_pic_reg (void)
1733{
1734 rtx pic_reg = pic_offset_table_rtx;
1735 if (!reload_completed && !reload_in_progress)
1736 abort ();
87a95921 1737 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1738 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1739 return pic_reg;
1740}
1741
5df189be 1742/* Split constant addresses to handle cases that are too large.
1743 Add in the pic register when in PIC mode.
1744 Split immediates that require more than 1 instruction. */
dea01258 1745int
1746spu_split_immediate (rtx * ops)
c8befdb9 1747{
dea01258 1748 enum machine_mode mode = GET_MODE (ops[0]);
1749 enum immediate_class c = classify_immediate (ops[1], mode);
1750
1751 switch (c)
c8befdb9 1752 {
dea01258 1753 case IC_IL2:
1754 {
1755 unsigned char arrhi[16];
1756 unsigned char arrlo[16];
98bbec1e 1757 rtx to, temp, hi, lo;
dea01258 1758 int i;
98bbec1e 1759 enum machine_mode imode = mode;
1760 /* We need to do reals as ints because the constant used in the
1761 IOR might not be a legitimate real constant. */
1762 imode = int_mode_for_mode (mode);
dea01258 1763 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1764 if (imode != mode)
1765 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1766 else
1767 to = ops[0];
1768 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1769 for (i = 0; i < 16; i += 4)
1770 {
1771 arrlo[i + 2] = arrhi[i + 2];
1772 arrlo[i + 3] = arrhi[i + 3];
1773 arrlo[i + 0] = arrlo[i + 1] = 0;
1774 arrhi[i + 2] = arrhi[i + 3] = 0;
1775 }
98bbec1e 1776 hi = array_to_constant (imode, arrhi);
1777 lo = array_to_constant (imode, arrlo);
1778 emit_move_insn (temp, hi);
dea01258 1779 emit_insn (gen_rtx_SET
98bbec1e 1780 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1781 return 1;
1782 }
5df189be 1783 case IC_FSMBI2:
1784 {
1785 unsigned char arr_fsmbi[16];
1786 unsigned char arr_andbi[16];
1787 rtx to, reg_fsmbi, reg_and;
1788 int i;
1789 enum machine_mode imode = mode;
1790 /* We need to do reals as ints because the constant used in the
1791 * AND might not be a legitimate real constant. */
1792 imode = int_mode_for_mode (mode);
1793 constant_to_array (mode, ops[1], arr_fsmbi);
1794 if (imode != mode)
1795 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1796 else
1797 to = ops[0];
1798 for (i = 0; i < 16; i++)
1799 if (arr_fsmbi[i] != 0)
1800 {
1801 arr_andbi[0] = arr_fsmbi[i];
1802 arr_fsmbi[i] = 0xff;
1803 }
1804 for (i = 1; i < 16; i++)
1805 arr_andbi[i] = arr_andbi[0];
1806 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1807 reg_and = array_to_constant (imode, arr_andbi);
1808 emit_move_insn (to, reg_fsmbi);
1809 emit_insn (gen_rtx_SET
1810 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1811 return 1;
1812 }
dea01258 1813 case IC_POOL:
1814 if (reload_in_progress || reload_completed)
1815 {
1816 rtx mem = force_const_mem (mode, ops[1]);
1817 if (TARGET_LARGE_MEM)
1818 {
1819 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1820 emit_move_insn (addr, XEXP (mem, 0));
1821 mem = replace_equiv_address (mem, addr);
1822 }
1823 emit_move_insn (ops[0], mem);
1824 return 1;
1825 }
1826 break;
1827 case IC_IL1s:
1828 case IC_IL2s:
1829 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1830 {
1831 if (c == IC_IL2s)
1832 {
5df189be 1833 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1834 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1835 }
1836 else if (flag_pic)
1837 emit_insn (gen_pic (ops[0], ops[1]));
1838 if (flag_pic)
1839 {
1840 rtx pic_reg = get_pic_reg ();
1841 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1842 crtl->uses_pic_offset_table = 1;
dea01258 1843 }
1844 return flag_pic || c == IC_IL2s;
1845 }
1846 break;
1847 case IC_IL1:
1848 case IC_FSMBI:
1849 case IC_CPAT:
1850 break;
c8befdb9 1851 }
dea01258 1852 return 0;
c8befdb9 1853}
1854
644459d0 1855/* SAVING is TRUE when we are generating the actual load and store
1856 instructions for REGNO. When determining the size of the stack
1857 needed for saving register we must allocate enough space for the
1858 worst case, because we don't always have the information early enough
1859 to not allocate it. But we can at least eliminate the actual loads
1860 and stores during the prologue/epilogue. */
1861static int
1862need_to_save_reg (int regno, int saving)
1863{
3072d30e 1864 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1865 return 1;
1866 if (flag_pic
1867 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1868 && (!saving || crtl->uses_pic_offset_table)
644459d0 1869 && (!saving
3072d30e 1870 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1871 return 1;
1872 return 0;
1873}
1874
1875/* This function is only correct starting with local register
1876 allocation */
1877int
1878spu_saved_regs_size (void)
1879{
1880 int reg_save_size = 0;
1881 int regno;
1882
1883 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1884 if (need_to_save_reg (regno, 0))
1885 reg_save_size += 0x10;
1886 return reg_save_size;
1887}
1888
1889static rtx
1890frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1891{
1892 rtx reg = gen_rtx_REG (V4SImode, regno);
1893 rtx mem =
1894 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1895 return emit_insn (gen_movv4si (mem, reg));
1896}
1897
1898static rtx
1899frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1900{
1901 rtx reg = gen_rtx_REG (V4SImode, regno);
1902 rtx mem =
1903 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1904 return emit_insn (gen_movv4si (reg, mem));
1905}
1906
1907/* This happens after reload, so we need to expand it. */
1908static rtx
1909frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1910{
1911 rtx insn;
1912 if (satisfies_constraint_K (GEN_INT (imm)))
1913 {
1914 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1915 }
1916 else
1917 {
3072d30e 1918 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1919 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1920 if (REGNO (src) == REGNO (scratch))
1921 abort ();
1922 }
644459d0 1923 return insn;
1924}
1925
1926/* Return nonzero if this function is known to have a null epilogue. */
1927
1928int
1929direct_return (void)
1930{
1931 if (reload_completed)
1932 {
1933 if (cfun->static_chain_decl == 0
1934 && (spu_saved_regs_size ()
1935 + get_frame_size ()
abe32cce 1936 + crtl->outgoing_args_size
1937 + crtl->args.pretend_args_size == 0)
644459d0 1938 && current_function_is_leaf)
1939 return 1;
1940 }
1941 return 0;
1942}
1943
1944/*
1945 The stack frame looks like this:
1946 +-------------+
1947 | incoming |
a8e019fa 1948 | args |
1949 AP -> +-------------+
644459d0 1950 | $lr save |
1951 +-------------+
1952 prev SP | back chain |
1953 +-------------+
1954 | var args |
abe32cce 1955 | reg save | crtl->args.pretend_args_size bytes
644459d0 1956 +-------------+
1957 | ... |
1958 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1959 FP -> +-------------+
644459d0 1960 | ... |
a8e019fa 1961 | vars | get_frame_size() bytes
1962 HFP -> +-------------+
644459d0 1963 | ... |
1964 | outgoing |
abe32cce 1965 | args | crtl->outgoing_args_size bytes
644459d0 1966 +-------------+
1967 | $lr of next |
1968 | frame |
1969 +-------------+
a8e019fa 1970 | back chain |
1971 SP -> +-------------+
644459d0 1972
1973*/
1974void
1975spu_expand_prologue (void)
1976{
1977 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1978 HOST_WIDE_INT total_size;
1979 HOST_WIDE_INT saved_regs_size;
1980 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1981 rtx scratch_reg_0, scratch_reg_1;
1982 rtx insn, real;
1983
644459d0 1984 if (flag_pic && optimize == 0)
18d50ae6 1985 crtl->uses_pic_offset_table = 1;
644459d0 1986
1987 if (spu_naked_function_p (current_function_decl))
1988 return;
1989
1990 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1991 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1992
1993 saved_regs_size = spu_saved_regs_size ();
1994 total_size = size + saved_regs_size
abe32cce 1995 + crtl->outgoing_args_size
1996 + crtl->args.pretend_args_size;
644459d0 1997
1998 if (!current_function_is_leaf
18d50ae6 1999 || cfun->calls_alloca || total_size > 0)
644459d0 2000 total_size += STACK_POINTER_OFFSET;
2001
2002 /* Save this first because code after this might use the link
2003 register as a scratch register. */
2004 if (!current_function_is_leaf)
2005 {
2006 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2007 RTX_FRAME_RELATED_P (insn) = 1;
2008 }
2009
2010 if (total_size > 0)
2011 {
abe32cce 2012 offset = -crtl->args.pretend_args_size;
644459d0 2013 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2014 if (need_to_save_reg (regno, 1))
2015 {
2016 offset -= 16;
2017 insn = frame_emit_store (regno, sp_reg, offset);
2018 RTX_FRAME_RELATED_P (insn) = 1;
2019 }
2020 }
2021
18d50ae6 2022 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2023 {
2024 rtx pic_reg = get_pic_reg ();
2025 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2026 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2027 }
2028
2029 if (total_size > 0)
2030 {
2031 if (flag_stack_check)
2032 {
d819917f 2033 /* We compare against total_size-1 because
644459d0 2034 ($sp >= total_size) <=> ($sp > total_size-1) */
2035 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2036 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2037 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2038 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2039 {
2040 emit_move_insn (scratch_v4si, size_v4si);
2041 size_v4si = scratch_v4si;
2042 }
2043 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2044 emit_insn (gen_vec_extractv4si
2045 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2046 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2047 }
2048
2049 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2050 the value of the previous $sp because we save it as the back
2051 chain. */
2052 if (total_size <= 2000)
2053 {
2054 /* In this case we save the back chain first. */
2055 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2056 insn =
2057 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2058 }
644459d0 2059 else
2060 {
2061 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2062 insn =
2063 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2064 }
2065 RTX_FRAME_RELATED_P (insn) = 1;
2066 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2068
2069 if (total_size > 2000)
2070 {
2071 /* Save the back chain ptr */
2072 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2073 }
2074
2075 if (frame_pointer_needed)
2076 {
2077 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2078 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2079 + crtl->outgoing_args_size;
644459d0 2080 /* Set the new frame_pointer */
d8dfeb55 2081 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2082 RTX_FRAME_RELATED_P (insn) = 1;
2083 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2084 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2085 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2086 }
2087 }
2088
644459d0 2089}
2090
2091void
2092spu_expand_epilogue (bool sibcall_p)
2093{
2094 int size = get_frame_size (), offset, regno;
2095 HOST_WIDE_INT saved_regs_size, total_size;
2096 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2097 rtx jump, scratch_reg_0;
2098
644459d0 2099 if (spu_naked_function_p (current_function_decl))
2100 return;
2101
2102 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2103
2104 saved_regs_size = spu_saved_regs_size ();
2105 total_size = size + saved_regs_size
abe32cce 2106 + crtl->outgoing_args_size
2107 + crtl->args.pretend_args_size;
644459d0 2108
2109 if (!current_function_is_leaf
18d50ae6 2110 || cfun->calls_alloca || total_size > 0)
644459d0 2111 total_size += STACK_POINTER_OFFSET;
2112
2113 if (total_size > 0)
2114 {
18d50ae6 2115 if (cfun->calls_alloca)
644459d0 2116 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2117 else
2118 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2119
2120
2121 if (saved_regs_size > 0)
2122 {
abe32cce 2123 offset = -crtl->args.pretend_args_size;
644459d0 2124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2125 if (need_to_save_reg (regno, 1))
2126 {
2127 offset -= 0x10;
2128 frame_emit_load (regno, sp_reg, offset);
2129 }
2130 }
2131 }
2132
2133 if (!current_function_is_leaf)
2134 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2135
2136 if (!sibcall_p)
2137 {
18b42941 2138 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2139 jump = emit_jump_insn (gen__return ());
2140 emit_barrier_after (jump);
2141 }
2142
644459d0 2143}
2144
2145rtx
2146spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2147{
2148 if (count != 0)
2149 return 0;
2150 /* This is inefficient because it ends up copying to a save-register
2151 which then gets saved even though $lr has already been saved. But
2152 it does generate better code for leaf functions and we don't need
2153 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2154 used for __builtin_return_address anyway, so maybe we don't care if
2155 it's inefficient. */
2156 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2157}
2158\f
2159
2160/* Given VAL, generate a constant appropriate for MODE.
2161 If MODE is a vector mode, every element will be VAL.
2162 For TImode, VAL will be zero extended to 128 bits. */
2163rtx
2164spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2165{
2166 rtx inner;
2167 rtvec v;
2168 int units, i;
2169
2170 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2171 || GET_MODE_CLASS (mode) == MODE_FLOAT
2172 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2173 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2174
2175 if (GET_MODE_CLASS (mode) == MODE_INT)
2176 return immed_double_const (val, 0, mode);
2177
2178 /* val is the bit representation of the float */
2179 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2180 return hwint_to_const_double (mode, val);
2181
2182 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2183 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2184 else
2185 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2186
2187 units = GET_MODE_NUNITS (mode);
2188
2189 v = rtvec_alloc (units);
2190
2191 for (i = 0; i < units; ++i)
2192 RTVEC_ELT (v, i) = inner;
2193
2194 return gen_rtx_CONST_VECTOR (mode, v);
2195}
644459d0 2196
5474166e 2197/* Create a MODE vector constant from 4 ints. */
2198rtx
2199spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2200{
2201 unsigned char arr[16];
2202 arr[0] = (a >> 24) & 0xff;
2203 arr[1] = (a >> 16) & 0xff;
2204 arr[2] = (a >> 8) & 0xff;
2205 arr[3] = (a >> 0) & 0xff;
2206 arr[4] = (b >> 24) & 0xff;
2207 arr[5] = (b >> 16) & 0xff;
2208 arr[6] = (b >> 8) & 0xff;
2209 arr[7] = (b >> 0) & 0xff;
2210 arr[8] = (c >> 24) & 0xff;
2211 arr[9] = (c >> 16) & 0xff;
2212 arr[10] = (c >> 8) & 0xff;
2213 arr[11] = (c >> 0) & 0xff;
2214 arr[12] = (d >> 24) & 0xff;
2215 arr[13] = (d >> 16) & 0xff;
2216 arr[14] = (d >> 8) & 0xff;
2217 arr[15] = (d >> 0) & 0xff;
2218 return array_to_constant(mode, arr);
2219}
5a976006 2220\f
2221/* branch hint stuff */
5474166e 2222
644459d0 2223/* An array of these is used to propagate hints to predecessor blocks. */
2224struct spu_bb_info
2225{
5a976006 2226 rtx prop_jump; /* propagated from another block */
2227 int bb_index; /* the original block. */
644459d0 2228};
5a976006 2229static struct spu_bb_info *spu_bb_info;
644459d0 2230
5a976006 2231#define STOP_HINT_P(INSN) \
2232 (GET_CODE(INSN) == CALL_INSN \
2233 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2234 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2235
2236/* 1 when RTX is a hinted branch or its target. We keep track of
2237 what has been hinted so the safe-hint code can test it easily. */
2238#define HINTED_P(RTX) \
2239 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2240
2241/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2242#define SCHED_ON_EVEN_P(RTX) \
2243 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2244
2245/* Emit a nop for INSN such that the two will dual issue. This assumes
2246 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2247 We check for TImode to handle a MULTI1 insn which has dual issued its
2248 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2249 ADDR_VEC insns. */
2250static void
2251emit_nop_for_insn (rtx insn)
644459d0 2252{
5a976006 2253 int p;
2254 rtx new_insn;
2255 p = get_pipe (insn);
2256 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2257 new_insn = emit_insn_after (gen_lnop (), insn);
2258 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2259 {
5a976006 2260 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2261 PUT_MODE (new_insn, TImode);
2262 PUT_MODE (insn, VOIDmode);
2263 }
2264 else
2265 new_insn = emit_insn_after (gen_lnop (), insn);
2266 recog_memoized (new_insn);
2267}
2268
2269/* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2272static void
2273pad_bb(void)
2274{
2275 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2276 int length;
2277 int addr;
2278
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2281
2282 /* Keep track of length added by nops. */
2283 length = 0;
2284
2285 prev_insn = 0;
2286 insn = get_insns ();
2287 if (!active_insn_p (insn))
2288 insn = next_active_insn (insn);
2289 for (; insn; insn = next_insn)
2290 {
2291 next_insn = next_active_insn (insn);
2292 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2294 {
5a976006 2295 if (hbr_insn)
2296 {
2297 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2298 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2299 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 || (a1 - a0 == 4))
2301 {
2302 prev_insn = emit_insn_before (gen_lnop (), insn);
2303 PUT_MODE (prev_insn, GET_MODE (insn));
2304 PUT_MODE (insn, TImode);
2305 length += 4;
2306 }
2307 }
2308 hbr_insn = insn;
2309 }
2310 if (INSN_CODE (insn) == CODE_FOR_blockage)
2311 {
2312 if (GET_MODE (insn) == TImode)
2313 PUT_MODE (next_insn, TImode);
2314 insn = next_insn;
2315 next_insn = next_active_insn (insn);
2316 }
2317 addr = INSN_ADDRESSES (INSN_UID (insn));
2318 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2319 {
2320 if (((addr + length) & 7) != 0)
2321 {
2322 emit_nop_for_insn (prev_insn);
2323 length += 4;
2324 }
644459d0 2325 }
5a976006 2326 else if (GET_MODE (insn) == TImode
2327 && ((next_insn && GET_MODE (next_insn) != TImode)
2328 || get_attr_type (insn) == TYPE_MULTI0)
2329 && ((addr + length) & 7) != 0)
2330 {
2331 /* prev_insn will always be set because the first insn is
2332 always 8-byte aligned. */
2333 emit_nop_for_insn (prev_insn);
2334 length += 4;
2335 }
2336 prev_insn = insn;
644459d0 2337 }
644459d0 2338}
2339
5a976006 2340\f
2341/* Routines for branch hints. */
2342
644459d0 2343static void
5a976006 2344spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2345 int distance, sbitmap blocks)
644459d0 2346{
5a976006 2347 rtx branch_label = 0;
2348 rtx hint;
2349 rtx insn;
2350 rtx table;
644459d0 2351
2352 if (before == 0 || branch == 0 || target == 0)
2353 return;
2354
5a976006 2355 /* While scheduling we require hints to be no further than 600, so
2356 we need to enforce that here too */
644459d0 2357 if (distance > 600)
2358 return;
2359
5a976006 2360 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2361 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2362 before = NEXT_INSN (before);
644459d0 2363
2364 branch_label = gen_label_rtx ();
2365 LABEL_NUSES (branch_label)++;
2366 LABEL_PRESERVE_P (branch_label) = 1;
2367 insn = emit_label_before (branch_label, branch);
2368 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2369 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2370
2371 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2372 recog_memoized (hint);
2373 HINTED_P (branch) = 1;
644459d0 2374
5a976006 2375 if (GET_CODE (target) == LABEL_REF)
2376 HINTED_P (XEXP (target, 0)) = 1;
2377 else if (tablejump_p (branch, 0, &table))
644459d0 2378 {
5a976006 2379 rtvec vec;
2380 int j;
2381 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2382 vec = XVEC (PATTERN (table), 0);
2383 else
2384 vec = XVEC (PATTERN (table), 1);
2385 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2386 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2387 }
5a976006 2388
2389 if (distance >= 588)
644459d0 2390 {
5a976006 2391 /* Make sure the hint isn't scheduled any earlier than this point,
2392 which could make it too far for the branch offest to fit */
2393 recog_memoized (emit_insn_before (gen_blockage (), hint));
2394 }
2395 else if (distance <= 8 * 4)
2396 {
2397 /* To guarantee at least 8 insns between the hint and branch we
2398 insert nops. */
2399 int d;
2400 for (d = distance; d < 8 * 4; d += 4)
2401 {
2402 insn =
2403 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2404 recog_memoized (insn);
2405 }
2406
2407 /* Make sure any nops inserted aren't scheduled before the hint. */
2408 recog_memoized (emit_insn_after (gen_blockage (), hint));
2409
2410 /* Make sure any nops inserted aren't scheduled after the call. */
2411 if (CALL_P (branch) && distance < 8 * 4)
2412 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2413 }
644459d0 2414}
2415
2416/* Returns 0 if we don't want a hint for this branch. Otherwise return
2417 the rtx for the branch target. */
2418static rtx
2419get_branch_target (rtx branch)
2420{
2421 if (GET_CODE (branch) == JUMP_INSN)
2422 {
2423 rtx set, src;
2424
2425 /* Return statements */
2426 if (GET_CODE (PATTERN (branch)) == RETURN)
2427 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2428
2429 /* jump table */
2430 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2431 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2432 return 0;
2433
fcc31b99 2434 /* ASM GOTOs. */
604157f6 2435 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2436 return NULL;
2437
644459d0 2438 set = single_set (branch);
2439 src = SET_SRC (set);
2440 if (GET_CODE (SET_DEST (set)) != PC)
2441 abort ();
2442
2443 if (GET_CODE (src) == IF_THEN_ELSE)
2444 {
2445 rtx lab = 0;
2446 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2447 if (note)
2448 {
2449 /* If the more probable case is not a fall through, then
2450 try a branch hint. */
2451 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2452 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2453 && GET_CODE (XEXP (src, 1)) != PC)
2454 lab = XEXP (src, 1);
2455 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2456 && GET_CODE (XEXP (src, 2)) != PC)
2457 lab = XEXP (src, 2);
2458 }
2459 if (lab)
2460 {
2461 if (GET_CODE (lab) == RETURN)
2462 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2463 return lab;
2464 }
2465 return 0;
2466 }
2467
2468 return src;
2469 }
2470 else if (GET_CODE (branch) == CALL_INSN)
2471 {
2472 rtx call;
2473 /* All of our call patterns are in a PARALLEL and the CALL is
2474 the first pattern in the PARALLEL. */
2475 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2476 abort ();
2477 call = XVECEXP (PATTERN (branch), 0, 0);
2478 if (GET_CODE (call) == SET)
2479 call = SET_SRC (call);
2480 if (GET_CODE (call) != CALL)
2481 abort ();
2482 return XEXP (XEXP (call, 0), 0);
2483 }
2484 return 0;
2485}
2486
5a976006 2487/* The special $hbr register is used to prevent the insn scheduler from
2488 moving hbr insns across instructions which invalidate them. It
2489 should only be used in a clobber, and this function searches for
2490 insns which clobber it. */
2491static bool
2492insn_clobbers_hbr (rtx insn)
2493{
2494 if (INSN_P (insn)
2495 && GET_CODE (PATTERN (insn)) == PARALLEL)
2496 {
2497 rtx parallel = PATTERN (insn);
2498 rtx clobber;
2499 int j;
2500 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2501 {
2502 clobber = XVECEXP (parallel, 0, j);
2503 if (GET_CODE (clobber) == CLOBBER
2504 && GET_CODE (XEXP (clobber, 0)) == REG
2505 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2506 return 1;
2507 }
2508 }
2509 return 0;
2510}
2511
2512/* Search up to 32 insns starting at FIRST:
2513 - at any kind of hinted branch, just return
2514 - at any unconditional branch in the first 15 insns, just return
2515 - at a call or indirect branch, after the first 15 insns, force it to
2516 an even address and return
2517 - at any unconditional branch, after the first 15 insns, force it to
2518 an even address.
2519 At then end of the search, insert an hbrp within 4 insns of FIRST,
2520 and an hbrp within 16 instructions of FIRST.
2521 */
644459d0 2522static void
5a976006 2523insert_hbrp_for_ilb_runout (rtx first)
644459d0 2524{
5a976006 2525 rtx insn, before_4 = 0, before_16 = 0;
2526 int addr = 0, length, first_addr = -1;
2527 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2528 int insert_lnop_after = 0;
2529 for (insn = first; insn; insn = NEXT_INSN (insn))
2530 if (INSN_P (insn))
2531 {
2532 if (first_addr == -1)
2533 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2534 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2535 length = get_attr_length (insn);
2536
2537 if (before_4 == 0 && addr + length >= 4 * 4)
2538 before_4 = insn;
2539 /* We test for 14 instructions because the first hbrp will add
2540 up to 2 instructions. */
2541 if (before_16 == 0 && addr + length >= 14 * 4)
2542 before_16 = insn;
2543
2544 if (INSN_CODE (insn) == CODE_FOR_hbr)
2545 {
2546 /* Make sure an hbrp is at least 2 cycles away from a hint.
2547 Insert an lnop after the hbrp when necessary. */
2548 if (before_4 == 0 && addr > 0)
2549 {
2550 before_4 = insn;
2551 insert_lnop_after |= 1;
2552 }
2553 else if (before_4 && addr <= 4 * 4)
2554 insert_lnop_after |= 1;
2555 if (before_16 == 0 && addr > 10 * 4)
2556 {
2557 before_16 = insn;
2558 insert_lnop_after |= 2;
2559 }
2560 else if (before_16 && addr <= 14 * 4)
2561 insert_lnop_after |= 2;
2562 }
644459d0 2563
5a976006 2564 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2565 {
2566 if (addr < hbrp_addr0)
2567 hbrp_addr0 = addr;
2568 else if (addr < hbrp_addr1)
2569 hbrp_addr1 = addr;
2570 }
644459d0 2571
5a976006 2572 if (CALL_P (insn) || JUMP_P (insn))
2573 {
2574 if (HINTED_P (insn))
2575 return;
2576
2577 /* Any branch after the first 15 insns should be on an even
2578 address to avoid a special case branch. There might be
2579 some nops and/or hbrps inserted, so we test after 10
2580 insns. */
2581 if (addr > 10 * 4)
2582 SCHED_ON_EVEN_P (insn) = 1;
2583 }
644459d0 2584
5a976006 2585 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2586 return;
2587
2588
2589 if (addr + length >= 32 * 4)
644459d0 2590 {
5a976006 2591 gcc_assert (before_4 && before_16);
2592 if (hbrp_addr0 > 4 * 4)
644459d0 2593 {
5a976006 2594 insn =
2595 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2596 recog_memoized (insn);
2597 INSN_ADDRESSES_NEW (insn,
2598 INSN_ADDRESSES (INSN_UID (before_4)));
2599 PUT_MODE (insn, GET_MODE (before_4));
2600 PUT_MODE (before_4, TImode);
2601 if (insert_lnop_after & 1)
644459d0 2602 {
5a976006 2603 insn = emit_insn_before (gen_lnop (), before_4);
2604 recog_memoized (insn);
2605 INSN_ADDRESSES_NEW (insn,
2606 INSN_ADDRESSES (INSN_UID (before_4)));
2607 PUT_MODE (insn, TImode);
644459d0 2608 }
644459d0 2609 }
5a976006 2610 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2611 && hbrp_addr1 > 16 * 4)
644459d0 2612 {
5a976006 2613 insn =
2614 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2615 recog_memoized (insn);
2616 INSN_ADDRESSES_NEW (insn,
2617 INSN_ADDRESSES (INSN_UID (before_16)));
2618 PUT_MODE (insn, GET_MODE (before_16));
2619 PUT_MODE (before_16, TImode);
2620 if (insert_lnop_after & 2)
644459d0 2621 {
5a976006 2622 insn = emit_insn_before (gen_lnop (), before_16);
2623 recog_memoized (insn);
2624 INSN_ADDRESSES_NEW (insn,
2625 INSN_ADDRESSES (INSN_UID
2626 (before_16)));
2627 PUT_MODE (insn, TImode);
644459d0 2628 }
2629 }
5a976006 2630 return;
644459d0 2631 }
644459d0 2632 }
5a976006 2633 else if (BARRIER_P (insn))
2634 return;
644459d0 2635
644459d0 2636}
5a976006 2637
2638/* The SPU might hang when it executes 48 inline instructions after a
2639 hinted branch jumps to its hinted target. The beginning of a
2640 function and the return from a call might have been hinted, and must
2641 be handled as well. To prevent a hang we insert 2 hbrps. The first
2642 should be within 6 insns of the branch target. The second should be
2643 within 22 insns of the branch target. When determining if hbrps are
2644 necessary, we look for only 32 inline instructions, because up to to
2645 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2646 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2647static void
5a976006 2648insert_hbrp (void)
644459d0 2649{
5a976006 2650 rtx insn;
2651 if (TARGET_SAFE_HINTS)
644459d0 2652 {
5a976006 2653 shorten_branches (get_insns ());
2654 /* Insert hbrp at beginning of function */
2655 insn = next_active_insn (get_insns ());
2656 if (insn)
2657 insert_hbrp_for_ilb_runout (insn);
2658 /* Insert hbrp after hinted targets. */
2659 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2660 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2661 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2662 }
644459d0 2663}
2664
5a976006 2665static int in_spu_reorg;
2666
2667/* Insert branch hints. There are no branch optimizations after this
2668 pass, so it's safe to set our branch hints now. */
644459d0 2669static void
5a976006 2670spu_machine_dependent_reorg (void)
644459d0 2671{
5a976006 2672 sbitmap blocks;
2673 basic_block bb;
2674 rtx branch, insn;
2675 rtx branch_target = 0;
2676 int branch_addr = 0, insn_addr, required_dist = 0;
2677 int i;
2678 unsigned int j;
644459d0 2679
5a976006 2680 if (!TARGET_BRANCH_HINTS || optimize == 0)
2681 {
2682 /* We still do it for unoptimized code because an external
2683 function might have hinted a call or return. */
2684 insert_hbrp ();
2685 pad_bb ();
2686 return;
2687 }
644459d0 2688
5a976006 2689 blocks = sbitmap_alloc (last_basic_block);
2690 sbitmap_zero (blocks);
644459d0 2691
5a976006 2692 in_spu_reorg = 1;
2693 compute_bb_for_insn ();
2694
2695 compact_blocks ();
2696
2697 spu_bb_info =
2698 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2699 sizeof (struct spu_bb_info));
2700
2701 /* We need exact insn addresses and lengths. */
2702 shorten_branches (get_insns ());
2703
2704 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2705 {
5a976006 2706 bb = BASIC_BLOCK (i);
2707 branch = 0;
2708 if (spu_bb_info[i].prop_jump)
644459d0 2709 {
5a976006 2710 branch = spu_bb_info[i].prop_jump;
2711 branch_target = get_branch_target (branch);
2712 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2713 required_dist = spu_hint_dist;
2714 }
2715 /* Search from end of a block to beginning. In this loop, find
2716 jumps which need a branch and emit them only when:
2717 - it's an indirect branch and we're at the insn which sets
2718 the register
2719 - we're at an insn that will invalidate the hint. e.g., a
2720 call, another hint insn, inline asm that clobbers $hbr, and
2721 some inlined operations (divmodsi4). Don't consider jumps
2722 because they are only at the end of a block and are
2723 considered when we are deciding whether to propagate
2724 - we're getting too far away from the branch. The hbr insns
2725 only have a signed 10 bit offset
2726 We go back as far as possible so the branch will be considered
2727 for propagation when we get to the beginning of the block. */
2728 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2729 {
2730 if (INSN_P (insn))
2731 {
2732 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2733 if (branch
2734 && ((GET_CODE (branch_target) == REG
2735 && set_of (branch_target, insn) != NULL_RTX)
2736 || insn_clobbers_hbr (insn)
2737 || branch_addr - insn_addr > 600))
2738 {
2739 rtx next = NEXT_INSN (insn);
2740 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2741 if (insn != BB_END (bb)
2742 && branch_addr - next_addr >= required_dist)
2743 {
2744 if (dump_file)
2745 fprintf (dump_file,
2746 "hint for %i in block %i before %i\n",
2747 INSN_UID (branch), bb->index,
2748 INSN_UID (next));
2749 spu_emit_branch_hint (next, branch, branch_target,
2750 branch_addr - next_addr, blocks);
2751 }
2752 branch = 0;
2753 }
2754
2755 /* JUMP_P will only be true at the end of a block. When
2756 branch is already set it means we've previously decided
2757 to propagate a hint for that branch into this block. */
2758 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2759 {
2760 branch = 0;
2761 if ((branch_target = get_branch_target (insn)))
2762 {
2763 branch = insn;
2764 branch_addr = insn_addr;
2765 required_dist = spu_hint_dist;
2766 }
2767 }
2768 }
2769 if (insn == BB_HEAD (bb))
2770 break;
2771 }
2772
2773 if (branch)
2774 {
2775 /* If we haven't emitted a hint for this branch yet, it might
2776 be profitable to emit it in one of the predecessor blocks,
2777 especially for loops. */
2778 rtx bbend;
2779 basic_block prev = 0, prop = 0, prev2 = 0;
2780 int loop_exit = 0, simple_loop = 0;
2781 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2782
2783 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2784 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2785 prev = EDGE_PRED (bb, j)->src;
2786 else
2787 prev2 = EDGE_PRED (bb, j)->src;
2788
2789 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2790 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2791 loop_exit = 1;
2792 else if (EDGE_SUCC (bb, j)->dest == bb)
2793 simple_loop = 1;
2794
2795 /* If this branch is a loop exit then propagate to previous
2796 fallthru block. This catches the cases when it is a simple
2797 loop or when there is an initial branch into the loop. */
2798 if (prev && (loop_exit || simple_loop)
2799 && prev->loop_depth <= bb->loop_depth)
2800 prop = prev;
2801
2802 /* If there is only one adjacent predecessor. Don't propagate
2803 outside this loop. This loop_depth test isn't perfect, but
2804 I'm not sure the loop_father member is valid at this point. */
2805 else if (prev && single_pred_p (bb)
2806 && prev->loop_depth == bb->loop_depth)
2807 prop = prev;
2808
2809 /* If this is the JOIN block of a simple IF-THEN then
2810 propogate the hint to the HEADER block. */
2811 else if (prev && prev2
2812 && EDGE_COUNT (bb->preds) == 2
2813 && EDGE_COUNT (prev->preds) == 1
2814 && EDGE_PRED (prev, 0)->src == prev2
2815 && prev2->loop_depth == bb->loop_depth
2816 && GET_CODE (branch_target) != REG)
2817 prop = prev;
2818
2819 /* Don't propagate when:
2820 - this is a simple loop and the hint would be too far
2821 - this is not a simple loop and there are 16 insns in
2822 this block already
2823 - the predecessor block ends in a branch that will be
2824 hinted
2825 - the predecessor block ends in an insn that invalidates
2826 the hint */
2827 if (prop
2828 && prop->index >= 0
2829 && (bbend = BB_END (prop))
2830 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2831 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2832 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2833 {
2834 if (dump_file)
2835 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2836 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2837 bb->index, prop->index, bb->loop_depth,
2838 INSN_UID (branch), loop_exit, simple_loop,
2839 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2840
2841 spu_bb_info[prop->index].prop_jump = branch;
2842 spu_bb_info[prop->index].bb_index = i;
2843 }
2844 else if (branch_addr - next_addr >= required_dist)
2845 {
2846 if (dump_file)
2847 fprintf (dump_file, "hint for %i in block %i before %i\n",
2848 INSN_UID (branch), bb->index,
2849 INSN_UID (NEXT_INSN (insn)));
2850 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2851 branch_addr - next_addr, blocks);
2852 }
2853 branch = 0;
644459d0 2854 }
644459d0 2855 }
5a976006 2856 free (spu_bb_info);
644459d0 2857
5a976006 2858 if (!sbitmap_empty_p (blocks))
2859 find_many_sub_basic_blocks (blocks);
2860
2861 /* We have to schedule to make sure alignment is ok. */
2862 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2863
2864 /* The hints need to be scheduled, so call it again. */
2865 schedule_insns ();
2866
2867 insert_hbrp ();
2868
2869 pad_bb ();
2870
8f1d58ad 2871 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2872 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2873 {
2874 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2875 between its branch label and the branch . We don't move the
2876 label because GCC expects it at the beginning of the block. */
2877 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2878 rtx label_ref = XVECEXP (unspec, 0, 0);
2879 rtx label = XEXP (label_ref, 0);
2880 rtx branch;
2881 int offset = 0;
2882 for (branch = NEXT_INSN (label);
2883 !JUMP_P (branch) && !CALL_P (branch);
2884 branch = NEXT_INSN (branch))
2885 if (NONJUMP_INSN_P (branch))
2886 offset += get_attr_length (branch);
2887 if (offset > 0)
2888 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2889 }
5a976006 2890
2891 if (spu_flag_var_tracking)
644459d0 2892 {
5a976006 2893 df_analyze ();
2894 timevar_push (TV_VAR_TRACKING);
2895 variable_tracking_main ();
2896 timevar_pop (TV_VAR_TRACKING);
2897 df_finish_pass (false);
644459d0 2898 }
5a976006 2899
2900 free_bb_for_insn ();
2901
2902 in_spu_reorg = 0;
644459d0 2903}
2904\f
2905
2906/* Insn scheduling routines, primarily for dual issue. */
2907static int
2908spu_sched_issue_rate (void)
2909{
2910 return 2;
2911}
2912
2913static int
5a976006 2914uses_ls_unit(rtx insn)
644459d0 2915{
5a976006 2916 rtx set = single_set (insn);
2917 if (set != 0
2918 && (GET_CODE (SET_DEST (set)) == MEM
2919 || GET_CODE (SET_SRC (set)) == MEM))
2920 return 1;
2921 return 0;
644459d0 2922}
2923
2924static int
2925get_pipe (rtx insn)
2926{
2927 enum attr_type t;
2928 /* Handle inline asm */
2929 if (INSN_CODE (insn) == -1)
2930 return -1;
2931 t = get_attr_type (insn);
2932 switch (t)
2933 {
2934 case TYPE_CONVERT:
2935 return -2;
2936 case TYPE_MULTI0:
2937 return -1;
2938
2939 case TYPE_FX2:
2940 case TYPE_FX3:
2941 case TYPE_SPR:
2942 case TYPE_NOP:
2943 case TYPE_FXB:
2944 case TYPE_FPD:
2945 case TYPE_FP6:
2946 case TYPE_FP7:
644459d0 2947 return 0;
2948
2949 case TYPE_LNOP:
2950 case TYPE_SHUF:
2951 case TYPE_LOAD:
2952 case TYPE_STORE:
2953 case TYPE_BR:
2954 case TYPE_MULTI1:
2955 case TYPE_HBR:
5a976006 2956 case TYPE_IPREFETCH:
644459d0 2957 return 1;
2958 default:
2959 abort ();
2960 }
2961}
2962
5a976006 2963
2964/* haifa-sched.c has a static variable that keeps track of the current
2965 cycle. It is passed to spu_sched_reorder, and we record it here for
2966 use by spu_sched_variable_issue. It won't be accurate if the
2967 scheduler updates it's clock_var between the two calls. */
2968static int clock_var;
2969
2970/* This is used to keep track of insn alignment. Set to 0 at the
2971 beginning of each block and increased by the "length" attr of each
2972 insn scheduled. */
2973static int spu_sched_length;
2974
2975/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2976 ready list appropriately in spu_sched_reorder(). */
2977static int pipe0_clock;
2978static int pipe1_clock;
2979
2980static int prev_clock_var;
2981
2982static int prev_priority;
2983
2984/* The SPU needs to load the next ilb sometime during the execution of
2985 the previous ilb. There is a potential conflict if every cycle has a
2986 load or store. To avoid the conflict we make sure the load/store
2987 unit is free for at least one cycle during the execution of insns in
2988 the previous ilb. */
2989static int spu_ls_first;
2990static int prev_ls_clock;
2991
2992static void
2993spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2994 int max_ready ATTRIBUTE_UNUSED)
2995{
2996 spu_sched_length = 0;
2997}
2998
2999static void
3000spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3001 int max_ready ATTRIBUTE_UNUSED)
3002{
3003 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3004 {
3005 /* When any block might be at least 8-byte aligned, assume they
3006 will all be at least 8-byte aligned to make sure dual issue
3007 works out correctly. */
3008 spu_sched_length = 0;
3009 }
3010 spu_ls_first = INT_MAX;
3011 clock_var = -1;
3012 prev_ls_clock = -1;
3013 pipe0_clock = -1;
3014 pipe1_clock = -1;
3015 prev_clock_var = -1;
3016 prev_priority = -1;
3017}
3018
644459d0 3019static int
5a976006 3020spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3021 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3022{
5a976006 3023 int len;
3024 int p;
644459d0 3025 if (GET_CODE (PATTERN (insn)) == USE
3026 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3027 || (len = get_attr_length (insn)) == 0)
3028 return more;
3029
3030 spu_sched_length += len;
3031
3032 /* Reset on inline asm */
3033 if (INSN_CODE (insn) == -1)
3034 {
3035 spu_ls_first = INT_MAX;
3036 pipe0_clock = -1;
3037 pipe1_clock = -1;
3038 return 0;
3039 }
3040 p = get_pipe (insn);
3041 if (p == 0)
3042 pipe0_clock = clock_var;
3043 else
3044 pipe1_clock = clock_var;
3045
3046 if (in_spu_reorg)
3047 {
3048 if (clock_var - prev_ls_clock > 1
3049 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3050 spu_ls_first = INT_MAX;
3051 if (uses_ls_unit (insn))
3052 {
3053 if (spu_ls_first == INT_MAX)
3054 spu_ls_first = spu_sched_length;
3055 prev_ls_clock = clock_var;
3056 }
3057
3058 /* The scheduler hasn't inserted the nop, but we will later on.
3059 Include those nops in spu_sched_length. */
3060 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3061 spu_sched_length += 4;
3062 prev_clock_var = clock_var;
3063
3064 /* more is -1 when called from spu_sched_reorder for new insns
3065 that don't have INSN_PRIORITY */
3066 if (more >= 0)
3067 prev_priority = INSN_PRIORITY (insn);
3068 }
3069
3070 /* Always try issueing more insns. spu_sched_reorder will decide
3071 when the cycle should be advanced. */
3072 return 1;
3073}
3074
3075/* This function is called for both TARGET_SCHED_REORDER and
3076 TARGET_SCHED_REORDER2. */
3077static int
3078spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3079 rtx *ready, int *nreadyp, int clock)
3080{
3081 int i, nready = *nreadyp;
3082 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3083 rtx insn;
3084
3085 clock_var = clock;
3086
3087 if (nready <= 0 || pipe1_clock >= clock)
3088 return 0;
3089
3090 /* Find any rtl insns that don't generate assembly insns and schedule
3091 them first. */
3092 for (i = nready - 1; i >= 0; i--)
3093 {
3094 insn = ready[i];
3095 if (INSN_CODE (insn) == -1
3096 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3097 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3098 {
3099 ready[i] = ready[nready - 1];
3100 ready[nready - 1] = insn;
3101 return 1;
3102 }
3103 }
3104
3105 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3106 for (i = 0; i < nready; i++)
3107 if (INSN_CODE (ready[i]) != -1)
3108 {
3109 insn = ready[i];
3110 switch (get_attr_type (insn))
3111 {
3112 default:
3113 case TYPE_MULTI0:
3114 case TYPE_CONVERT:
3115 case TYPE_FX2:
3116 case TYPE_FX3:
3117 case TYPE_SPR:
3118 case TYPE_NOP:
3119 case TYPE_FXB:
3120 case TYPE_FPD:
3121 case TYPE_FP6:
3122 case TYPE_FP7:
3123 pipe_0 = i;
3124 break;
3125 case TYPE_LOAD:
3126 case TYPE_STORE:
3127 pipe_ls = i;
3128 case TYPE_LNOP:
3129 case TYPE_SHUF:
3130 case TYPE_BR:
3131 case TYPE_MULTI1:
3132 case TYPE_HBR:
3133 pipe_1 = i;
3134 break;
3135 case TYPE_IPREFETCH:
3136 pipe_hbrp = i;
3137 break;
3138 }
3139 }
3140
3141 /* In the first scheduling phase, schedule loads and stores together
3142 to increase the chance they will get merged during postreload CSE. */
3143 if (!reload_completed && pipe_ls >= 0)
3144 {
3145 insn = ready[pipe_ls];
3146 ready[pipe_ls] = ready[nready - 1];
3147 ready[nready - 1] = insn;
3148 return 1;
3149 }
3150
3151 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3152 if (pipe_hbrp >= 0)
3153 pipe_1 = pipe_hbrp;
3154
3155 /* When we have loads/stores in every cycle of the last 15 insns and
3156 we are about to schedule another load/store, emit an hbrp insn
3157 instead. */
3158 if (in_spu_reorg
3159 && spu_sched_length - spu_ls_first >= 4 * 15
3160 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3161 {
3162 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3163 recog_memoized (insn);
3164 if (pipe0_clock < clock)
3165 PUT_MODE (insn, TImode);
3166 spu_sched_variable_issue (file, verbose, insn, -1);
3167 return 0;
3168 }
3169
3170 /* In general, we want to emit nops to increase dual issue, but dual
3171 issue isn't faster when one of the insns could be scheduled later
3172 without effecting the critical path. We look at INSN_PRIORITY to
3173 make a good guess, but it isn't perfect so -mdual-nops=n can be
3174 used to effect it. */
3175 if (in_spu_reorg && spu_dual_nops < 10)
3176 {
3177 /* When we are at an even address and we are not issueing nops to
3178 improve scheduling then we need to advance the cycle. */
3179 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3180 && (spu_dual_nops == 0
3181 || (pipe_1 != -1
3182 && prev_priority >
3183 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3184 return 0;
3185
3186 /* When at an odd address, schedule the highest priority insn
3187 without considering pipeline. */
3188 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3189 && (spu_dual_nops == 0
3190 || (prev_priority >
3191 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3192 return 1;
3193 }
3194
3195
3196 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3197 pipe0 insn in the ready list, schedule it. */
3198 if (pipe0_clock < clock && pipe_0 >= 0)
3199 schedule_i = pipe_0;
3200
3201 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3202 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3203 else
3204 schedule_i = pipe_1;
3205
3206 if (schedule_i > -1)
3207 {
3208 insn = ready[schedule_i];
3209 ready[schedule_i] = ready[nready - 1];
3210 ready[nready - 1] = insn;
3211 return 1;
3212 }
3213 return 0;
644459d0 3214}
3215
3216/* INSN is dependent on DEP_INSN. */
3217static int
5a976006 3218spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3219{
5a976006 3220 rtx set;
3221
3222 /* The blockage pattern is used to prevent instructions from being
3223 moved across it and has no cost. */
3224 if (INSN_CODE (insn) == CODE_FOR_blockage
3225 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3226 return 0;
3227
9d98604b 3228 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3229 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3230 return 0;
3231
3232 /* Make sure hbrps are spread out. */
3233 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3234 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3235 return 8;
3236
3237 /* Make sure hints and hbrps are 2 cycles apart. */
3238 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3239 || INSN_CODE (insn) == CODE_FOR_hbr)
3240 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3241 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3242 return 2;
3243
3244 /* An hbrp has no real dependency on other insns. */
3245 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3246 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3247 return 0;
3248
3249 /* Assuming that it is unlikely an argument register will be used in
3250 the first cycle of the called function, we reduce the cost for
3251 slightly better scheduling of dep_insn. When not hinted, the
3252 mispredicted branch would hide the cost as well. */
3253 if (CALL_P (insn))
3254 {
3255 rtx target = get_branch_target (insn);
3256 if (GET_CODE (target) != REG || !set_of (target, insn))
3257 return cost - 2;
3258 return cost;
3259 }
3260
3261 /* And when returning from a function, let's assume the return values
3262 are completed sooner too. */
3263 if (CALL_P (dep_insn))
644459d0 3264 return cost - 2;
5a976006 3265
3266 /* Make sure an instruction that loads from the back chain is schedule
3267 away from the return instruction so a hint is more likely to get
3268 issued. */
3269 if (INSN_CODE (insn) == CODE_FOR__return
3270 && (set = single_set (dep_insn))
3271 && GET_CODE (SET_DEST (set)) == REG
3272 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3273 return 20;
3274
644459d0 3275 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3276 scheduler makes every insn in a block anti-dependent on the final
3277 jump_insn. We adjust here so higher cost insns will get scheduled
3278 earlier. */
5a976006 3279 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3280 return insn_cost (dep_insn) - 3;
5a976006 3281
644459d0 3282 return cost;
3283}
3284\f
3285/* Create a CONST_DOUBLE from a string. */
3286struct rtx_def *
3287spu_float_const (const char *string, enum machine_mode mode)
3288{
3289 REAL_VALUE_TYPE value;
3290 value = REAL_VALUE_ATOF (string, mode);
3291 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3292}
3293
644459d0 3294int
3295spu_constant_address_p (rtx x)
3296{
3297 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3298 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3299 || GET_CODE (x) == HIGH);
3300}
3301
3302static enum spu_immediate
3303which_immediate_load (HOST_WIDE_INT val)
3304{
3305 gcc_assert (val == trunc_int_for_mode (val, SImode));
3306
3307 if (val >= -0x8000 && val <= 0x7fff)
3308 return SPU_IL;
3309 if (val >= 0 && val <= 0x3ffff)
3310 return SPU_ILA;
3311 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3312 return SPU_ILH;
3313 if ((val & 0xffff) == 0)
3314 return SPU_ILHU;
3315
3316 return SPU_NONE;
3317}
3318
dea01258 3319/* Return true when OP can be loaded by one of the il instructions, or
3320 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3321int
3322immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3323{
3324 if (CONSTANT_P (op))
3325 {
3326 enum immediate_class c = classify_immediate (op, mode);
5df189be 3327 return c == IC_IL1 || c == IC_IL1s
3072d30e 3328 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3329 }
3330 return 0;
3331}
3332
3333/* Return true if the first SIZE bytes of arr is a constant that can be
3334 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3335 represent the size and offset of the instruction to use. */
3336static int
3337cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3338{
3339 int cpat, run, i, start;
3340 cpat = 1;
3341 run = 0;
3342 start = -1;
3343 for (i = 0; i < size && cpat; i++)
3344 if (arr[i] != i+16)
3345 {
3346 if (!run)
3347 {
3348 start = i;
3349 if (arr[i] == 3)
3350 run = 1;
3351 else if (arr[i] == 2 && arr[i+1] == 3)
3352 run = 2;
3353 else if (arr[i] == 0)
3354 {
3355 while (arr[i+run] == run && i+run < 16)
3356 run++;
3357 if (run != 4 && run != 8)
3358 cpat = 0;
3359 }
3360 else
3361 cpat = 0;
3362 if ((i & (run-1)) != 0)
3363 cpat = 0;
3364 i += run;
3365 }
3366 else
3367 cpat = 0;
3368 }
b01a6dc3 3369 if (cpat && (run || size < 16))
dea01258 3370 {
3371 if (run == 0)
3372 run = 1;
3373 if (prun)
3374 *prun = run;
3375 if (pstart)
3376 *pstart = start == -1 ? 16-run : start;
3377 return 1;
3378 }
3379 return 0;
3380}
3381
3382/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3383 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3384static enum immediate_class
3385classify_immediate (rtx op, enum machine_mode mode)
644459d0 3386{
3387 HOST_WIDE_INT val;
3388 unsigned char arr[16];
5df189be 3389 int i, j, repeated, fsmbi, repeat;
dea01258 3390
3391 gcc_assert (CONSTANT_P (op));
3392
644459d0 3393 if (GET_MODE (op) != VOIDmode)
3394 mode = GET_MODE (op);
3395
dea01258 3396 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3397 if (!flag_pic
3398 && mode == V4SImode
dea01258 3399 && GET_CODE (op) == CONST_VECTOR
3400 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3401 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3402 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3403 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3404 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3405 op = CONST_VECTOR_ELT (op, 0);
644459d0 3406
dea01258 3407 switch (GET_CODE (op))
3408 {
3409 case SYMBOL_REF:
3410 case LABEL_REF:
3411 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3412
dea01258 3413 case CONST:
0cfc65d4 3414 /* We can never know if the resulting address fits in 18 bits and can be
3415 loaded with ila. For now, assume the address will not overflow if
3416 the displacement is "small" (fits 'K' constraint). */
3417 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3418 {
3419 rtx sym = XEXP (XEXP (op, 0), 0);
3420 rtx cst = XEXP (XEXP (op, 0), 1);
3421
3422 if (GET_CODE (sym) == SYMBOL_REF
3423 && GET_CODE (cst) == CONST_INT
3424 && satisfies_constraint_K (cst))
3425 return IC_IL1s;
3426 }
3427 return IC_IL2s;
644459d0 3428
dea01258 3429 case HIGH:
3430 return IC_IL1s;
3431
3432 case CONST_VECTOR:
3433 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3434 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3435 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3436 return IC_POOL;
3437 /* Fall through. */
3438
3439 case CONST_INT:
3440 case CONST_DOUBLE:
3441 constant_to_array (mode, op, arr);
644459d0 3442
dea01258 3443 /* Check that each 4-byte slot is identical. */
3444 repeated = 1;
3445 for (i = 4; i < 16; i += 4)
3446 for (j = 0; j < 4; j++)
3447 if (arr[j] != arr[i + j])
3448 repeated = 0;
3449
3450 if (repeated)
3451 {
3452 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3453 val = trunc_int_for_mode (val, SImode);
3454
3455 if (which_immediate_load (val) != SPU_NONE)
3456 return IC_IL1;
3457 }
3458
3459 /* Any mode of 2 bytes or smaller can be loaded with an il
3460 instruction. */
3461 gcc_assert (GET_MODE_SIZE (mode) > 2);
3462
3463 fsmbi = 1;
5df189be 3464 repeat = 0;
dea01258 3465 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3466 if (arr[i] != 0 && repeat == 0)
3467 repeat = arr[i];
3468 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3469 fsmbi = 0;
3470 if (fsmbi)
5df189be 3471 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3472
3473 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3474 return IC_CPAT;
3475
3476 if (repeated)
3477 return IC_IL2;
3478
3479 return IC_POOL;
3480 default:
3481 break;
3482 }
3483 gcc_unreachable ();
644459d0 3484}
3485
3486static enum spu_immediate
3487which_logical_immediate (HOST_WIDE_INT val)
3488{
3489 gcc_assert (val == trunc_int_for_mode (val, SImode));
3490
3491 if (val >= -0x200 && val <= 0x1ff)
3492 return SPU_ORI;
3493 if (val >= 0 && val <= 0xffff)
3494 return SPU_IOHL;
3495 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3496 {
3497 val = trunc_int_for_mode (val, HImode);
3498 if (val >= -0x200 && val <= 0x1ff)
3499 return SPU_ORHI;
3500 if ((val & 0xff) == ((val >> 8) & 0xff))
3501 {
3502 val = trunc_int_for_mode (val, QImode);
3503 if (val >= -0x200 && val <= 0x1ff)
3504 return SPU_ORBI;
3505 }
3506 }
3507 return SPU_NONE;
3508}
3509
5df189be 3510/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3511 CONST_DOUBLEs. */
3512static int
3513const_vector_immediate_p (rtx x)
3514{
3515 int i;
3516 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3517 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3518 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3519 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3520 return 0;
3521 return 1;
3522}
3523
644459d0 3524int
3525logical_immediate_p (rtx op, enum machine_mode mode)
3526{
3527 HOST_WIDE_INT val;
3528 unsigned char arr[16];
3529 int i, j;
3530
3531 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3532 || GET_CODE (op) == CONST_VECTOR);
3533
5df189be 3534 if (GET_CODE (op) == CONST_VECTOR
3535 && !const_vector_immediate_p (op))
3536 return 0;
3537
644459d0 3538 if (GET_MODE (op) != VOIDmode)
3539 mode = GET_MODE (op);
3540
3541 constant_to_array (mode, op, arr);
3542
3543 /* Check that bytes are repeated. */
3544 for (i = 4; i < 16; i += 4)
3545 for (j = 0; j < 4; j++)
3546 if (arr[j] != arr[i + j])
3547 return 0;
3548
3549 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3550 val = trunc_int_for_mode (val, SImode);
3551
3552 i = which_logical_immediate (val);
3553 return i != SPU_NONE && i != SPU_IOHL;
3554}
3555
3556int
3557iohl_immediate_p (rtx op, enum machine_mode mode)
3558{
3559 HOST_WIDE_INT val;
3560 unsigned char arr[16];
3561 int i, j;
3562
3563 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3564 || GET_CODE (op) == CONST_VECTOR);
3565
5df189be 3566 if (GET_CODE (op) == CONST_VECTOR
3567 && !const_vector_immediate_p (op))
3568 return 0;
3569
644459d0 3570 if (GET_MODE (op) != VOIDmode)
3571 mode = GET_MODE (op);
3572
3573 constant_to_array (mode, op, arr);
3574
3575 /* Check that bytes are repeated. */
3576 for (i = 4; i < 16; i += 4)
3577 for (j = 0; j < 4; j++)
3578 if (arr[j] != arr[i + j])
3579 return 0;
3580
3581 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3582 val = trunc_int_for_mode (val, SImode);
3583
3584 return val >= 0 && val <= 0xffff;
3585}
3586
3587int
3588arith_immediate_p (rtx op, enum machine_mode mode,
3589 HOST_WIDE_INT low, HOST_WIDE_INT high)
3590{
3591 HOST_WIDE_INT val;
3592 unsigned char arr[16];
3593 int bytes, i, j;
3594
3595 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3596 || GET_CODE (op) == CONST_VECTOR);
3597
5df189be 3598 if (GET_CODE (op) == CONST_VECTOR
3599 && !const_vector_immediate_p (op))
3600 return 0;
3601
644459d0 3602 if (GET_MODE (op) != VOIDmode)
3603 mode = GET_MODE (op);
3604
3605 constant_to_array (mode, op, arr);
3606
3607 if (VECTOR_MODE_P (mode))
3608 mode = GET_MODE_INNER (mode);
3609
3610 bytes = GET_MODE_SIZE (mode);
3611 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3612
3613 /* Check that bytes are repeated. */
3614 for (i = bytes; i < 16; i += bytes)
3615 for (j = 0; j < bytes; j++)
3616 if (arr[j] != arr[i + j])
3617 return 0;
3618
3619 val = arr[0];
3620 for (j = 1; j < bytes; j++)
3621 val = (val << 8) | arr[j];
3622
3623 val = trunc_int_for_mode (val, mode);
3624
3625 return val >= low && val <= high;
3626}
3627
56c7bfc2 3628/* TRUE when op is an immediate and an exact power of 2, and given that
3629 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3630 all entries must be the same. */
3631bool
3632exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3633{
3634 enum machine_mode int_mode;
3635 HOST_WIDE_INT val;
3636 unsigned char arr[16];
3637 int bytes, i, j;
3638
3639 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3640 || GET_CODE (op) == CONST_VECTOR);
3641
3642 if (GET_CODE (op) == CONST_VECTOR
3643 && !const_vector_immediate_p (op))
3644 return 0;
3645
3646 if (GET_MODE (op) != VOIDmode)
3647 mode = GET_MODE (op);
3648
3649 constant_to_array (mode, op, arr);
3650
3651 if (VECTOR_MODE_P (mode))
3652 mode = GET_MODE_INNER (mode);
3653
3654 bytes = GET_MODE_SIZE (mode);
3655 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3656
3657 /* Check that bytes are repeated. */
3658 for (i = bytes; i < 16; i += bytes)
3659 for (j = 0; j < bytes; j++)
3660 if (arr[j] != arr[i + j])
3661 return 0;
3662
3663 val = arr[0];
3664 for (j = 1; j < bytes; j++)
3665 val = (val << 8) | arr[j];
3666
3667 val = trunc_int_for_mode (val, int_mode);
3668
3669 /* Currently, we only handle SFmode */
3670 gcc_assert (mode == SFmode);
3671 if (mode == SFmode)
3672 {
3673 int exp = (val >> 23) - 127;
3674 return val > 0 && (val & 0x007fffff) == 0
3675 && exp >= low && exp <= high;
3676 }
3677 return FALSE;
3678}
3679
6cf5579e 3680/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3681
3682static int
3683ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3684{
3685 rtx x = *px;
3686 tree decl;
3687
3688 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3689 {
3690 rtx plus = XEXP (x, 0);
3691 rtx op0 = XEXP (plus, 0);
3692 rtx op1 = XEXP (plus, 1);
3693 if (GET_CODE (op1) == CONST_INT)
3694 x = op0;
3695 }
3696
3697 return (GET_CODE (x) == SYMBOL_REF
3698 && (decl = SYMBOL_REF_DECL (x)) != 0
3699 && TREE_CODE (decl) == VAR_DECL
3700 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3701}
3702
644459d0 3703/* We accept:
5b865faf 3704 - any 32-bit constant (SImode, SFmode)
644459d0 3705 - any constant that can be generated with fsmbi (any mode)
5b865faf 3706 - a 64-bit constant where the high and low bits are identical
644459d0 3707 (DImode, DFmode)
5b865faf 3708 - a 128-bit constant where the four 32-bit words match. */
644459d0 3709int
3710spu_legitimate_constant_p (rtx x)
3711{
5df189be 3712 if (GET_CODE (x) == HIGH)
3713 x = XEXP (x, 0);
6cf5579e 3714
3715 /* Reject any __ea qualified reference. These can't appear in
3716 instructions but must be forced to the constant pool. */
3717 if (for_each_rtx (&x, ea_symbol_ref, 0))
3718 return 0;
3719
644459d0 3720 /* V4SI with all identical symbols is valid. */
5df189be 3721 if (!flag_pic
3722 && GET_MODE (x) == V4SImode
644459d0 3723 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3724 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3725 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3726 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3727 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3728 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3729
5df189be 3730 if (GET_CODE (x) == CONST_VECTOR
3731 && !const_vector_immediate_p (x))
3732 return 0;
644459d0 3733 return 1;
3734}
3735
3736/* Valid address are:
3737 - symbol_ref, label_ref, const
3738 - reg
9d98604b 3739 - reg + const_int, where const_int is 16 byte aligned
644459d0 3740 - reg + reg, alignment doesn't matter
3741 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3742 ignore the 4 least significant bits of the const. We only care about
3743 16 byte modes because the expand phase will change all smaller MEM
3744 references to TImode. */
3745static bool
3746spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3747 rtx x, bool reg_ok_strict)
644459d0 3748{
9d98604b 3749 int aligned = GET_MODE_SIZE (mode) >= 16;
3750 if (aligned
3751 && GET_CODE (x) == AND
644459d0 3752 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3753 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3754 x = XEXP (x, 0);
3755 switch (GET_CODE (x))
3756 {
644459d0 3757 case LABEL_REF:
6cf5579e 3758 return !TARGET_LARGE_MEM;
3759
9d98604b 3760 case SYMBOL_REF:
644459d0 3761 case CONST:
6cf5579e 3762 /* Keep __ea references until reload so that spu_expand_mov can see them
3763 in MEMs. */
3764 if (ea_symbol_ref (&x, 0))
3765 return !reload_in_progress && !reload_completed;
9d98604b 3766 return !TARGET_LARGE_MEM;
644459d0 3767
3768 case CONST_INT:
3769 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3770
3771 case SUBREG:
3772 x = XEXP (x, 0);
9d98604b 3773 if (REG_P (x))
3774 return 0;
644459d0 3775
3776 case REG:
3777 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3778
3779 case PLUS:
3780 case LO_SUM:
3781 {
3782 rtx op0 = XEXP (x, 0);
3783 rtx op1 = XEXP (x, 1);
3784 if (GET_CODE (op0) == SUBREG)
3785 op0 = XEXP (op0, 0);
3786 if (GET_CODE (op1) == SUBREG)
3787 op1 = XEXP (op1, 0);
644459d0 3788 if (GET_CODE (op0) == REG
3789 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3790 && GET_CODE (op1) == CONST_INT
3791 && INTVAL (op1) >= -0x2000
3792 && INTVAL (op1) <= 0x1fff
9d98604b 3793 && (!aligned || (INTVAL (op1) & 15) == 0))
3794 return TRUE;
644459d0 3795 if (GET_CODE (op0) == REG
3796 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3797 && GET_CODE (op1) == REG
3798 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3799 return TRUE;
644459d0 3800 }
3801 break;
3802
3803 default:
3804 break;
3805 }
9d98604b 3806 return FALSE;
644459d0 3807}
3808
6cf5579e 3809/* Like spu_legitimate_address_p, except with named addresses. */
3810static bool
3811spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3812 bool reg_ok_strict, addr_space_t as)
3813{
3814 if (as == ADDR_SPACE_EA)
3815 return (REG_P (x) && (GET_MODE (x) == EAmode));
3816
3817 else if (as != ADDR_SPACE_GENERIC)
3818 gcc_unreachable ();
3819
3820 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3821}
3822
644459d0 3823/* When the address is reg + const_int, force the const_int into a
fa7637bd 3824 register. */
644459d0 3825rtx
3826spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3827 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3828{
3829 rtx op0, op1;
3830 /* Make sure both operands are registers. */
3831 if (GET_CODE (x) == PLUS)
3832 {
3833 op0 = XEXP (x, 0);
3834 op1 = XEXP (x, 1);
3835 if (ALIGNED_SYMBOL_REF_P (op0))
3836 {
3837 op0 = force_reg (Pmode, op0);
3838 mark_reg_pointer (op0, 128);
3839 }
3840 else if (GET_CODE (op0) != REG)
3841 op0 = force_reg (Pmode, op0);
3842 if (ALIGNED_SYMBOL_REF_P (op1))
3843 {
3844 op1 = force_reg (Pmode, op1);
3845 mark_reg_pointer (op1, 128);
3846 }
3847 else if (GET_CODE (op1) != REG)
3848 op1 = force_reg (Pmode, op1);
3849 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3850 }
41e3a0c7 3851 return x;
644459d0 3852}
3853
6cf5579e 3854/* Like spu_legitimate_address, except with named address support. */
3855static rtx
3856spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3857 addr_space_t as)
3858{
3859 if (as != ADDR_SPACE_GENERIC)
3860 return x;
3861
3862 return spu_legitimize_address (x, oldx, mode);
3863}
3864
644459d0 3865/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3866 struct attribute_spec.handler. */
3867static tree
3868spu_handle_fndecl_attribute (tree * node,
3869 tree name,
3870 tree args ATTRIBUTE_UNUSED,
3871 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3872{
3873 if (TREE_CODE (*node) != FUNCTION_DECL)
3874 {
67a779df 3875 warning (0, "%qE attribute only applies to functions",
3876 name);
644459d0 3877 *no_add_attrs = true;
3878 }
3879
3880 return NULL_TREE;
3881}
3882
3883/* Handle the "vector" attribute. */
3884static tree
3885spu_handle_vector_attribute (tree * node, tree name,
3886 tree args ATTRIBUTE_UNUSED,
3887 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3888{
3889 tree type = *node, result = NULL_TREE;
3890 enum machine_mode mode;
3891 int unsigned_p;
3892
3893 while (POINTER_TYPE_P (type)
3894 || TREE_CODE (type) == FUNCTION_TYPE
3895 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3896 type = TREE_TYPE (type);
3897
3898 mode = TYPE_MODE (type);
3899
3900 unsigned_p = TYPE_UNSIGNED (type);
3901 switch (mode)
3902 {
3903 case DImode:
3904 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3905 break;
3906 case SImode:
3907 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3908 break;
3909 case HImode:
3910 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3911 break;
3912 case QImode:
3913 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3914 break;
3915 case SFmode:
3916 result = V4SF_type_node;
3917 break;
3918 case DFmode:
3919 result = V2DF_type_node;
3920 break;
3921 default:
3922 break;
3923 }
3924
3925 /* Propagate qualifiers attached to the element type
3926 onto the vector type. */
3927 if (result && result != type && TYPE_QUALS (type))
3928 result = build_qualified_type (result, TYPE_QUALS (type));
3929
3930 *no_add_attrs = true; /* No need to hang on to the attribute. */
3931
3932 if (!result)
67a779df 3933 warning (0, "%qE attribute ignored", name);
644459d0 3934 else
d991e6e8 3935 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3936
3937 return NULL_TREE;
3938}
3939
f2b32076 3940/* Return nonzero if FUNC is a naked function. */
644459d0 3941static int
3942spu_naked_function_p (tree func)
3943{
3944 tree a;
3945
3946 if (TREE_CODE (func) != FUNCTION_DECL)
3947 abort ();
3948
3949 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3950 return a != NULL_TREE;
3951}
3952
3953int
3954spu_initial_elimination_offset (int from, int to)
3955{
3956 int saved_regs_size = spu_saved_regs_size ();
3957 int sp_offset = 0;
abe32cce 3958 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3959 || get_frame_size () || saved_regs_size)
3960 sp_offset = STACK_POINTER_OFFSET;
3961 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3962 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3963 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3964 return get_frame_size ();
644459d0 3965 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3966 return sp_offset + crtl->outgoing_args_size
644459d0 3967 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3968 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3969 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3970 else
3971 gcc_unreachable ();
644459d0 3972}
3973
3974rtx
fb80456a 3975spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3976{
3977 enum machine_mode mode = TYPE_MODE (type);
3978 int byte_size = ((mode == BLKmode)
3979 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3980
3981 /* Make sure small structs are left justified in a register. */
3982 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3983 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3984 {
3985 enum machine_mode smode;
3986 rtvec v;
3987 int i;
3988 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3989 int n = byte_size / UNITS_PER_WORD;
3990 v = rtvec_alloc (nregs);
3991 for (i = 0; i < n; i++)
3992 {
3993 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3994 gen_rtx_REG (TImode,
3995 FIRST_RETURN_REGNUM
3996 + i),
3997 GEN_INT (UNITS_PER_WORD * i));
3998 byte_size -= UNITS_PER_WORD;
3999 }
4000
4001 if (n < nregs)
4002 {
4003 if (byte_size < 4)
4004 byte_size = 4;
4005 smode =
4006 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4007 RTVEC_ELT (v, n) =
4008 gen_rtx_EXPR_LIST (VOIDmode,
4009 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4010 GEN_INT (UNITS_PER_WORD * n));
4011 }
4012 return gen_rtx_PARALLEL (mode, v);
4013 }
4014 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4015}
4016
ee9034d4 4017static rtx
4018spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4019 enum machine_mode mode,
ee9034d4 4020 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4021{
4022 int byte_size;
4023
4024 if (cum >= MAX_REGISTER_ARGS)
4025 return 0;
4026
4027 byte_size = ((mode == BLKmode)
4028 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4029
4030 /* The ABI does not allow parameters to be passed partially in
4031 reg and partially in stack. */
4032 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4033 return 0;
4034
4035 /* Make sure small structs are left justified in a register. */
4036 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4037 && byte_size < UNITS_PER_WORD && byte_size > 0)
4038 {
4039 enum machine_mode smode;
4040 rtx gr_reg;
4041 if (byte_size < 4)
4042 byte_size = 4;
4043 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4044 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4045 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4046 const0_rtx);
4047 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4048 }
4049 else
4050 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4051}
4052
ee9034d4 4053static void
4054spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4055 const_tree type, bool named ATTRIBUTE_UNUSED)
4056{
4057 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4058 ? 1
4059 : mode == BLKmode
4060 ? ((int_size_in_bytes (type) + 15) / 16)
4061 : mode == VOIDmode
4062 ? 1
4063 : HARD_REGNO_NREGS (cum, mode));
4064}
4065
644459d0 4066/* Variable sized types are passed by reference. */
4067static bool
4068spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4069 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4070 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4071{
4072 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4073}
4074\f
4075
4076/* Var args. */
4077
4078/* Create and return the va_list datatype.
4079
4080 On SPU, va_list is an array type equivalent to
4081
4082 typedef struct __va_list_tag
4083 {
4084 void *__args __attribute__((__aligned(16)));
4085 void *__skip __attribute__((__aligned(16)));
4086
4087 } va_list[1];
4088
fa7637bd 4089 where __args points to the arg that will be returned by the next
644459d0 4090 va_arg(), and __skip points to the previous stack frame such that
4091 when __args == __skip we should advance __args by 32 bytes. */
4092static tree
4093spu_build_builtin_va_list (void)
4094{
4095 tree f_args, f_skip, record, type_decl;
4096 bool owp;
4097
4098 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4099
4100 type_decl =
54e46243 4101 build_decl (BUILTINS_LOCATION,
4102 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4103
54e46243 4104 f_args = build_decl (BUILTINS_LOCATION,
4105 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4106 f_skip = build_decl (BUILTINS_LOCATION,
4107 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4108
4109 DECL_FIELD_CONTEXT (f_args) = record;
4110 DECL_ALIGN (f_args) = 128;
4111 DECL_USER_ALIGN (f_args) = 1;
4112
4113 DECL_FIELD_CONTEXT (f_skip) = record;
4114 DECL_ALIGN (f_skip) = 128;
4115 DECL_USER_ALIGN (f_skip) = 1;
4116
4117 TREE_CHAIN (record) = type_decl;
4118 TYPE_NAME (record) = type_decl;
4119 TYPE_FIELDS (record) = f_args;
1767a056 4120 DECL_CHAIN (f_args) = f_skip;
644459d0 4121
4122 /* We know this is being padded and we want it too. It is an internal
4123 type so hide the warnings from the user. */
4124 owp = warn_padded;
4125 warn_padded = false;
4126
4127 layout_type (record);
4128
4129 warn_padded = owp;
4130
4131 /* The correct type is an array type of one element. */
4132 return build_array_type (record, build_index_type (size_zero_node));
4133}
4134
4135/* Implement va_start by filling the va_list structure VALIST.
4136 NEXTARG points to the first anonymous stack argument.
4137
4138 The following global variables are used to initialize
4139 the va_list structure:
4140
abe32cce 4141 crtl->args.info;
644459d0 4142 the CUMULATIVE_ARGS for this function
4143
abe32cce 4144 crtl->args.arg_offset_rtx:
644459d0 4145 holds the offset of the first anonymous stack argument
4146 (relative to the virtual arg pointer). */
4147
8a58ed0a 4148static void
644459d0 4149spu_va_start (tree valist, rtx nextarg)
4150{
4151 tree f_args, f_skip;
4152 tree args, skip, t;
4153
4154 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4155 f_skip = DECL_CHAIN (f_args);
644459d0 4156
4157 valist = build_va_arg_indirect_ref (valist);
4158 args =
4159 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4160 skip =
4161 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4162
4163 /* Find the __args area. */
4164 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4165 if (crtl->args.pretend_args_size > 0)
0de36bdb 4166 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4167 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4168 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4169 TREE_SIDE_EFFECTS (t) = 1;
4170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4171
4172 /* Find the __skip area. */
4173 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4174 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4175 size_int (crtl->args.pretend_args_size
0de36bdb 4176 - STACK_POINTER_OFFSET));
75a70cf9 4177 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4178 TREE_SIDE_EFFECTS (t) = 1;
4179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4180}
4181
4182/* Gimplify va_arg by updating the va_list structure
4183 VALIST as required to retrieve an argument of type
4184 TYPE, and returning that argument.
4185
4186 ret = va_arg(VALIST, TYPE);
4187
4188 generates code equivalent to:
4189
4190 paddedsize = (sizeof(TYPE) + 15) & -16;
4191 if (VALIST.__args + paddedsize > VALIST.__skip
4192 && VALIST.__args <= VALIST.__skip)
4193 addr = VALIST.__skip + 32;
4194 else
4195 addr = VALIST.__args;
4196 VALIST.__args = addr + paddedsize;
4197 ret = *(TYPE *)addr;
4198 */
4199static tree
75a70cf9 4200spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4201 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4202{
4203 tree f_args, f_skip;
4204 tree args, skip;
4205 HOST_WIDE_INT size, rsize;
4206 tree paddedsize, addr, tmp;
4207 bool pass_by_reference_p;
4208
4209 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4210 f_skip = DECL_CHAIN (f_args);
644459d0 4211
182cf5a9 4212 valist = build_simple_mem_ref (valist);
644459d0 4213 args =
4214 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4215 skip =
4216 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4217
4218 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4219
4220 /* if an object is dynamically sized, a pointer to it is passed
4221 instead of the object itself. */
4222 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4223 false);
4224 if (pass_by_reference_p)
4225 type = build_pointer_type (type);
4226 size = int_size_in_bytes (type);
4227 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4228
4229 /* build conditional expression to calculate addr. The expression
4230 will be gimplified later. */
0de36bdb 4231 paddedsize = size_int (rsize);
75a70cf9 4232 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4233 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4234 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4235 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4236 unshare_expr (skip)));
644459d0 4237
4238 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4239 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4240 size_int (32)), unshare_expr (args));
644459d0 4241
75a70cf9 4242 gimplify_assign (addr, tmp, pre_p);
644459d0 4243
4244 /* update VALIST.__args */
0de36bdb 4245 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4246 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4247
8115f0af 4248 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4249 addr);
644459d0 4250
4251 if (pass_by_reference_p)
4252 addr = build_va_arg_indirect_ref (addr);
4253
4254 return build_va_arg_indirect_ref (addr);
4255}
4256
4257/* Save parameter registers starting with the register that corresponds
4258 to the first unnamed parameters. If the first unnamed parameter is
4259 in the stack then save no registers. Set pretend_args_size to the
4260 amount of space needed to save the registers. */
4261void
4262spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4263 tree type, int *pretend_size, int no_rtl)
4264{
4265 if (!no_rtl)
4266 {
4267 rtx tmp;
4268 int regno;
4269 int offset;
4270 int ncum = *cum;
4271
4272 /* cum currently points to the last named argument, we want to
4273 start at the next argument. */
ee9034d4 4274 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4275
4276 offset = -STACK_POINTER_OFFSET;
4277 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4278 {
4279 tmp = gen_frame_mem (V4SImode,
4280 plus_constant (virtual_incoming_args_rtx,
4281 offset));
4282 emit_move_insn (tmp,
4283 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4284 offset += 16;
4285 }
4286 *pretend_size = offset + STACK_POINTER_OFFSET;
4287 }
4288}
4289\f
4290void
4291spu_conditional_register_usage (void)
4292{
4293 if (flag_pic)
4294 {
4295 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4296 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4297 }
644459d0 4298}
4299
9d98604b 4300/* This is called any time we inspect the alignment of a register for
4301 addresses. */
644459d0 4302static int
9d98604b 4303reg_aligned_for_addr (rtx x)
644459d0 4304{
9d98604b 4305 int regno =
4306 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4307 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4308}
4309
69ced2d6 4310/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4311 into its SYMBOL_REF_FLAGS. */
4312static void
4313spu_encode_section_info (tree decl, rtx rtl, int first)
4314{
4315 default_encode_section_info (decl, rtl, first);
4316
4317 /* If a variable has a forced alignment to < 16 bytes, mark it with
4318 SYMBOL_FLAG_ALIGN1. */
4319 if (TREE_CODE (decl) == VAR_DECL
4320 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4321 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4322}
4323
644459d0 4324/* Return TRUE if we are certain the mem refers to a complete object
4325 which is both 16-byte aligned and padded to a 16-byte boundary. This
4326 would make it safe to store with a single instruction.
4327 We guarantee the alignment and padding for static objects by aligning
4328 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4329 FIXME: We currently cannot guarantee this for objects on the stack
4330 because assign_parm_setup_stack calls assign_stack_local with the
4331 alignment of the parameter mode and in that case the alignment never
4332 gets adjusted by LOCAL_ALIGNMENT. */
4333static int
4334store_with_one_insn_p (rtx mem)
4335{
9d98604b 4336 enum machine_mode mode = GET_MODE (mem);
644459d0 4337 rtx addr = XEXP (mem, 0);
9d98604b 4338 if (mode == BLKmode)
644459d0 4339 return 0;
9d98604b 4340 if (GET_MODE_SIZE (mode) >= 16)
4341 return 1;
644459d0 4342 /* Only static objects. */
4343 if (GET_CODE (addr) == SYMBOL_REF)
4344 {
4345 /* We use the associated declaration to make sure the access is
fa7637bd 4346 referring to the whole object.
644459d0 4347 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4348 if it is necessary. Will there be cases where one exists, and
4349 the other does not? Will there be cases where both exist, but
4350 have different types? */
4351 tree decl = MEM_EXPR (mem);
4352 if (decl
4353 && TREE_CODE (decl) == VAR_DECL
4354 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4355 return 1;
4356 decl = SYMBOL_REF_DECL (addr);
4357 if (decl
4358 && TREE_CODE (decl) == VAR_DECL
4359 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4360 return 1;
4361 }
4362 return 0;
4363}
4364
9d98604b 4365/* Return 1 when the address is not valid for a simple load and store as
4366 required by the '_mov*' patterns. We could make this less strict
4367 for loads, but we prefer mem's to look the same so they are more
4368 likely to be merged. */
4369static int
4370address_needs_split (rtx mem)
4371{
4372 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4373 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4374 || !(store_with_one_insn_p (mem)
4375 || mem_is_padded_component_ref (mem))))
4376 return 1;
4377
4378 return 0;
4379}
4380
6cf5579e 4381static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4382static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4383static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4384
4385/* MEM is known to be an __ea qualified memory access. Emit a call to
4386 fetch the ppu memory to local store, and return its address in local
4387 store. */
4388
4389static void
4390ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4391{
4392 if (is_store)
4393 {
4394 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4395 if (!cache_fetch_dirty)
4396 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4397 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4398 2, ea_addr, EAmode, ndirty, SImode);
4399 }
4400 else
4401 {
4402 if (!cache_fetch)
4403 cache_fetch = init_one_libfunc ("__cache_fetch");
4404 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4405 1, ea_addr, EAmode);
4406 }
4407}
4408
4409/* Like ea_load_store, but do the cache tag comparison and, for stores,
4410 dirty bit marking, inline.
4411
4412 The cache control data structure is an array of
4413
4414 struct __cache_tag_array
4415 {
4416 unsigned int tag_lo[4];
4417 unsigned int tag_hi[4];
4418 void *data_pointer[4];
4419 int reserved[4];
4420 vector unsigned short dirty_bits[4];
4421 } */
4422
4423static void
4424ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4425{
4426 rtx ea_addr_si;
4427 HOST_WIDE_INT v;
4428 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4429 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4430 rtx index_mask = gen_reg_rtx (SImode);
4431 rtx tag_arr = gen_reg_rtx (Pmode);
4432 rtx splat_mask = gen_reg_rtx (TImode);
4433 rtx splat = gen_reg_rtx (V4SImode);
4434 rtx splat_hi = NULL_RTX;
4435 rtx tag_index = gen_reg_rtx (Pmode);
4436 rtx block_off = gen_reg_rtx (SImode);
4437 rtx tag_addr = gen_reg_rtx (Pmode);
4438 rtx tag = gen_reg_rtx (V4SImode);
4439 rtx cache_tag = gen_reg_rtx (V4SImode);
4440 rtx cache_tag_hi = NULL_RTX;
4441 rtx cache_ptrs = gen_reg_rtx (TImode);
4442 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4443 rtx tag_equal = gen_reg_rtx (V4SImode);
4444 rtx tag_equal_hi = NULL_RTX;
4445 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4446 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4447 rtx eq_index = gen_reg_rtx (SImode);
4448 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4449
4450 if (spu_ea_model != 32)
4451 {
4452 splat_hi = gen_reg_rtx (V4SImode);
4453 cache_tag_hi = gen_reg_rtx (V4SImode);
4454 tag_equal_hi = gen_reg_rtx (V4SImode);
4455 }
4456
4457 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4458 emit_move_insn (tag_arr, tag_arr_sym);
4459 v = 0x0001020300010203LL;
4460 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4461 ea_addr_si = ea_addr;
4462 if (spu_ea_model != 32)
4463 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4464
4465 /* tag_index = ea_addr & (tag_array_size - 128) */
4466 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4467
4468 /* splat ea_addr to all 4 slots. */
4469 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4470 /* Similarly for high 32 bits of ea_addr. */
4471 if (spu_ea_model != 32)
4472 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4473
4474 /* block_off = ea_addr & 127 */
4475 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4476
4477 /* tag_addr = tag_arr + tag_index */
4478 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4479
4480 /* Read cache tags. */
4481 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4482 if (spu_ea_model != 32)
4483 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4484 plus_constant (tag_addr, 16)));
4485
4486 /* tag = ea_addr & -128 */
4487 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4488
4489 /* Read all four cache data pointers. */
4490 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4491 plus_constant (tag_addr, 32)));
4492
4493 /* Compare tags. */
4494 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4495 if (spu_ea_model != 32)
4496 {
4497 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4498 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4499 }
4500
4501 /* At most one of the tags compare equal, so tag_equal has one
4502 32-bit slot set to all 1's, with the other slots all zero.
4503 gbb picks off low bit from each byte in the 128-bit registers,
4504 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4505 we have a hit. */
4506 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4507 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4508
4509 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4510 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4511
4512 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4513 (rotating eq_index mod 16 bytes). */
4514 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4515 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4516
4517 /* Add block offset to form final data address. */
4518 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4519
4520 /* Check that we did hit. */
4521 hit_label = gen_label_rtx ();
4522 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4523 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4524 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4525 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4526 hit_ref, pc_rtx)));
4527 /* Say that this branch is very likely to happen. */
4528 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4529 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4530
4531 ea_load_store (mem, is_store, ea_addr, data_addr);
4532 cont_label = gen_label_rtx ();
4533 emit_jump_insn (gen_jump (cont_label));
4534 emit_barrier ();
4535
4536 emit_label (hit_label);
4537
4538 if (is_store)
4539 {
4540 HOST_WIDE_INT v_hi;
4541 rtx dirty_bits = gen_reg_rtx (TImode);
4542 rtx dirty_off = gen_reg_rtx (SImode);
4543 rtx dirty_128 = gen_reg_rtx (TImode);
4544 rtx neg_block_off = gen_reg_rtx (SImode);
4545
4546 /* Set up mask with one dirty bit per byte of the mem we are
4547 writing, starting from top bit. */
4548 v_hi = v = -1;
4549 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4550 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4551 {
4552 v_hi = v;
4553 v = 0;
4554 }
4555 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4556
4557 /* Form index into cache dirty_bits. eq_index is one of
4558 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4559 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4560 offset to each of the four dirty_bits elements. */
4561 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4562
4563 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4564
4565 /* Rotate bit mask to proper bit. */
4566 emit_insn (gen_negsi2 (neg_block_off, block_off));
4567 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4568 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4569
4570 /* Or in the new dirty bits. */
4571 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4572
4573 /* Store. */
4574 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4575 }
4576
4577 emit_label (cont_label);
4578}
4579
4580static rtx
4581expand_ea_mem (rtx mem, bool is_store)
4582{
4583 rtx ea_addr;
4584 rtx data_addr = gen_reg_rtx (Pmode);
4585 rtx new_mem;
4586
4587 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4588 if (optimize_size || optimize == 0)
4589 ea_load_store (mem, is_store, ea_addr, data_addr);
4590 else
4591 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4592
4593 if (ea_alias_set == -1)
4594 ea_alias_set = new_alias_set ();
4595
4596 /* We generate a new MEM RTX to refer to the copy of the data
4597 in the cache. We do not copy memory attributes (except the
4598 alignment) from the original MEM, as they may no longer apply
4599 to the cache copy. */
4600 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4601 set_mem_alias_set (new_mem, ea_alias_set);
4602 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4603
4604 return new_mem;
4605}
4606
644459d0 4607int
4608spu_expand_mov (rtx * ops, enum machine_mode mode)
4609{
4610 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4611 abort ();
4612
4613 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4614 {
4615 rtx from = SUBREG_REG (ops[1]);
8d72495d 4616 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4617
4618 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4619 && GET_MODE_CLASS (imode) == MODE_INT
4620 && subreg_lowpart_p (ops[1]));
4621
4622 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4623 imode = SImode;
4624 if (imode != GET_MODE (from))
4625 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4626
4627 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4628 {
d6bf3b14 4629 enum insn_code icode = convert_optab_handler (trunc_optab,
4630 mode, imode);
644459d0 4631 emit_insn (GEN_FCN (icode) (ops[0], from));
4632 }
4633 else
4634 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4635 return 1;
4636 }
4637
4638 /* At least one of the operands needs to be a register. */
4639 if ((reload_in_progress | reload_completed) == 0
4640 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4641 {
4642 rtx temp = force_reg (mode, ops[1]);
4643 emit_move_insn (ops[0], temp);
4644 return 1;
4645 }
4646 if (reload_in_progress || reload_completed)
4647 {
dea01258 4648 if (CONSTANT_P (ops[1]))
4649 return spu_split_immediate (ops);
644459d0 4650 return 0;
4651 }
9d98604b 4652
4653 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4654 extend them. */
4655 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4656 {
9d98604b 4657 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4658 if (val != INTVAL (ops[1]))
644459d0 4659 {
9d98604b 4660 emit_move_insn (ops[0], GEN_INT (val));
4661 return 1;
644459d0 4662 }
4663 }
9d98604b 4664 if (MEM_P (ops[0]))
6cf5579e 4665 {
4666 if (MEM_ADDR_SPACE (ops[0]))
4667 ops[0] = expand_ea_mem (ops[0], true);
4668 return spu_split_store (ops);
4669 }
9d98604b 4670 if (MEM_P (ops[1]))
6cf5579e 4671 {
4672 if (MEM_ADDR_SPACE (ops[1]))
4673 ops[1] = expand_ea_mem (ops[1], false);
4674 return spu_split_load (ops);
4675 }
9d98604b 4676
644459d0 4677 return 0;
4678}
4679
9d98604b 4680static void
4681spu_convert_move (rtx dst, rtx src)
644459d0 4682{
9d98604b 4683 enum machine_mode mode = GET_MODE (dst);
4684 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4685 rtx reg;
4686 gcc_assert (GET_MODE (src) == TImode);
4687 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4688 emit_insn (gen_rtx_SET (VOIDmode, reg,
4689 gen_rtx_TRUNCATE (int_mode,
4690 gen_rtx_LSHIFTRT (TImode, src,
4691 GEN_INT (int_mode == DImode ? 64 : 96)))));
4692 if (int_mode != mode)
4693 {
4694 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4695 emit_move_insn (dst, reg);
4696 }
4697}
644459d0 4698
9d98604b 4699/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4700 the address from SRC and SRC+16. Return a REG or CONST_INT that
4701 specifies how many bytes to rotate the loaded registers, plus any
4702 extra from EXTRA_ROTQBY. The address and rotate amounts are
4703 normalized to improve merging of loads and rotate computations. */
4704static rtx
4705spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4706{
4707 rtx addr = XEXP (src, 0);
4708 rtx p0, p1, rot, addr0, addr1;
4709 int rot_amt;
644459d0 4710
4711 rot = 0;
4712 rot_amt = 0;
9d98604b 4713
4714 if (MEM_ALIGN (src) >= 128)
4715 /* Address is already aligned; simply perform a TImode load. */ ;
4716 else if (GET_CODE (addr) == PLUS)
644459d0 4717 {
4718 /* 8 cases:
4719 aligned reg + aligned reg => lqx
4720 aligned reg + unaligned reg => lqx, rotqby
4721 aligned reg + aligned const => lqd
4722 aligned reg + unaligned const => lqd, rotqbyi
4723 unaligned reg + aligned reg => lqx, rotqby
4724 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4725 unaligned reg + aligned const => lqd, rotqby
4726 unaligned reg + unaligned const -> not allowed by legitimate address
4727 */
4728 p0 = XEXP (addr, 0);
4729 p1 = XEXP (addr, 1);
9d98604b 4730 if (!reg_aligned_for_addr (p0))
644459d0 4731 {
9d98604b 4732 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4733 {
9d98604b 4734 rot = gen_reg_rtx (SImode);
4735 emit_insn (gen_addsi3 (rot, p0, p1));
4736 }
4737 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4738 {
4739 if (INTVAL (p1) > 0
4740 && REG_POINTER (p0)
4741 && INTVAL (p1) * BITS_PER_UNIT
4742 < REGNO_POINTER_ALIGN (REGNO (p0)))
4743 {
4744 rot = gen_reg_rtx (SImode);
4745 emit_insn (gen_addsi3 (rot, p0, p1));
4746 addr = p0;
4747 }
4748 else
4749 {
4750 rtx x = gen_reg_rtx (SImode);
4751 emit_move_insn (x, p1);
4752 if (!spu_arith_operand (p1, SImode))
4753 p1 = x;
4754 rot = gen_reg_rtx (SImode);
4755 emit_insn (gen_addsi3 (rot, p0, p1));
4756 addr = gen_rtx_PLUS (Pmode, p0, x);
4757 }
644459d0 4758 }
4759 else
4760 rot = p0;
4761 }
4762 else
4763 {
4764 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4765 {
4766 rot_amt = INTVAL (p1) & 15;
9d98604b 4767 if (INTVAL (p1) & -16)
4768 {
4769 p1 = GEN_INT (INTVAL (p1) & -16);
4770 addr = gen_rtx_PLUS (SImode, p0, p1);
4771 }
4772 else
4773 addr = p0;
644459d0 4774 }
9d98604b 4775 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4776 rot = p1;
4777 }
4778 }
9d98604b 4779 else if (REG_P (addr))
644459d0 4780 {
9d98604b 4781 if (!reg_aligned_for_addr (addr))
644459d0 4782 rot = addr;
4783 }
4784 else if (GET_CODE (addr) == CONST)
4785 {
4786 if (GET_CODE (XEXP (addr, 0)) == PLUS
4787 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4788 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4789 {
4790 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4791 if (rot_amt & -16)
4792 addr = gen_rtx_CONST (Pmode,
4793 gen_rtx_PLUS (Pmode,
4794 XEXP (XEXP (addr, 0), 0),
4795 GEN_INT (rot_amt & -16)));
4796 else
4797 addr = XEXP (XEXP (addr, 0), 0);
4798 }
4799 else
9d98604b 4800 {
4801 rot = gen_reg_rtx (Pmode);
4802 emit_move_insn (rot, addr);
4803 }
644459d0 4804 }
4805 else if (GET_CODE (addr) == CONST_INT)
4806 {
4807 rot_amt = INTVAL (addr);
4808 addr = GEN_INT (rot_amt & -16);
4809 }
4810 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4811 {
4812 rot = gen_reg_rtx (Pmode);
4813 emit_move_insn (rot, addr);
4814 }
644459d0 4815
9d98604b 4816 rot_amt += extra_rotby;
644459d0 4817
4818 rot_amt &= 15;
4819
4820 if (rot && rot_amt)
4821 {
9d98604b 4822 rtx x = gen_reg_rtx (SImode);
4823 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4824 rot = x;
644459d0 4825 rot_amt = 0;
4826 }
9d98604b 4827 if (!rot && rot_amt)
4828 rot = GEN_INT (rot_amt);
4829
4830 addr0 = copy_rtx (addr);
4831 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4832 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4833
4834 if (dst1)
4835 {
4836 addr1 = plus_constant (copy_rtx (addr), 16);
4837 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4838 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4839 }
644459d0 4840
9d98604b 4841 return rot;
4842}
4843
4844int
4845spu_split_load (rtx * ops)
4846{
4847 enum machine_mode mode = GET_MODE (ops[0]);
4848 rtx addr, load, rot;
4849 int rot_amt;
644459d0 4850
9d98604b 4851 if (GET_MODE_SIZE (mode) >= 16)
4852 return 0;
644459d0 4853
9d98604b 4854 addr = XEXP (ops[1], 0);
4855 gcc_assert (GET_CODE (addr) != AND);
4856
4857 if (!address_needs_split (ops[1]))
4858 {
4859 ops[1] = change_address (ops[1], TImode, addr);
4860 load = gen_reg_rtx (TImode);
4861 emit_insn (gen__movti (load, ops[1]));
4862 spu_convert_move (ops[0], load);
4863 return 1;
4864 }
4865
4866 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4867
4868 load = gen_reg_rtx (TImode);
4869 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4870
4871 if (rot)
4872 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4873
9d98604b 4874 spu_convert_move (ops[0], load);
4875 return 1;
644459d0 4876}
4877
9d98604b 4878int
644459d0 4879spu_split_store (rtx * ops)
4880{
4881 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4882 rtx reg;
644459d0 4883 rtx addr, p0, p1, p1_lo, smem;
4884 int aform;
4885 int scalar;
4886
9d98604b 4887 if (GET_MODE_SIZE (mode) >= 16)
4888 return 0;
4889
644459d0 4890 addr = XEXP (ops[0], 0);
9d98604b 4891 gcc_assert (GET_CODE (addr) != AND);
4892
4893 if (!address_needs_split (ops[0]))
4894 {
4895 reg = gen_reg_rtx (TImode);
4896 emit_insn (gen_spu_convert (reg, ops[1]));
4897 ops[0] = change_address (ops[0], TImode, addr);
4898 emit_move_insn (ops[0], reg);
4899 return 1;
4900 }
644459d0 4901
4902 if (GET_CODE (addr) == PLUS)
4903 {
4904 /* 8 cases:
4905 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4906 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4907 aligned reg + aligned const => lqd, c?d, shuf, stqx
4908 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4909 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4910 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4911 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4912 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4913 */
4914 aform = 0;
4915 p0 = XEXP (addr, 0);
4916 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4917 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4918 {
4919 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4920 if (reg_aligned_for_addr (p0))
4921 {
4922 p1 = GEN_INT (INTVAL (p1) & -16);
4923 if (p1 == const0_rtx)
4924 addr = p0;
4925 else
4926 addr = gen_rtx_PLUS (SImode, p0, p1);
4927 }
4928 else
4929 {
4930 rtx x = gen_reg_rtx (SImode);
4931 emit_move_insn (x, p1);
4932 addr = gen_rtx_PLUS (SImode, p0, x);
4933 }
644459d0 4934 }
4935 }
9d98604b 4936 else if (REG_P (addr))
644459d0 4937 {
4938 aform = 0;
4939 p0 = addr;
4940 p1 = p1_lo = const0_rtx;
4941 }
4942 else
4943 {
4944 aform = 1;
4945 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4946 p1 = 0; /* aform doesn't use p1 */
4947 p1_lo = addr;
4948 if (ALIGNED_SYMBOL_REF_P (addr))
4949 p1_lo = const0_rtx;
9d98604b 4950 else if (GET_CODE (addr) == CONST
4951 && GET_CODE (XEXP (addr, 0)) == PLUS
4952 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4953 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4954 {
9d98604b 4955 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4956 if ((v & -16) != 0)
4957 addr = gen_rtx_CONST (Pmode,
4958 gen_rtx_PLUS (Pmode,
4959 XEXP (XEXP (addr, 0), 0),
4960 GEN_INT (v & -16)));
4961 else
4962 addr = XEXP (XEXP (addr, 0), 0);
4963 p1_lo = GEN_INT (v & 15);
644459d0 4964 }
4965 else if (GET_CODE (addr) == CONST_INT)
4966 {
4967 p1_lo = GEN_INT (INTVAL (addr) & 15);
4968 addr = GEN_INT (INTVAL (addr) & -16);
4969 }
9d98604b 4970 else
4971 {
4972 p1_lo = gen_reg_rtx (SImode);
4973 emit_move_insn (p1_lo, addr);
4974 }
644459d0 4975 }
4976
9d98604b 4977 reg = gen_reg_rtx (TImode);
e04cf423 4978
644459d0 4979 scalar = store_with_one_insn_p (ops[0]);
4980 if (!scalar)
4981 {
4982 /* We could copy the flags from the ops[0] MEM to mem here,
4983 We don't because we want this load to be optimized away if
4984 possible, and copying the flags will prevent that in certain
4985 cases, e.g. consider the volatile flag. */
4986
9d98604b 4987 rtx pat = gen_reg_rtx (TImode);
e04cf423 4988 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4989 set_mem_alias_set (lmem, 0);
4990 emit_insn (gen_movti (reg, lmem));
644459d0 4991
9d98604b 4992 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4993 p0 = stack_pointer_rtx;
4994 if (!p1_lo)
4995 p1_lo = const0_rtx;
4996
4997 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4998 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4999 }
644459d0 5000 else
5001 {
5002 if (GET_CODE (ops[1]) == REG)
5003 emit_insn (gen_spu_convert (reg, ops[1]));
5004 else if (GET_CODE (ops[1]) == SUBREG)
5005 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5006 else
5007 abort ();
5008 }
5009
5010 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5011 emit_insn (gen_ashlti3
5012 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5013
9d98604b 5014 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5015 /* We can't use the previous alias set because the memory has changed
5016 size and can potentially overlap objects of other types. */
5017 set_mem_alias_set (smem, 0);
5018
e04cf423 5019 emit_insn (gen_movti (smem, reg));
9d98604b 5020 return 1;
644459d0 5021}
5022
5023/* Return TRUE if X is MEM which is a struct member reference
5024 and the member can safely be loaded and stored with a single
5025 instruction because it is padded. */
5026static int
5027mem_is_padded_component_ref (rtx x)
5028{
5029 tree t = MEM_EXPR (x);
5030 tree r;
5031 if (!t || TREE_CODE (t) != COMPONENT_REF)
5032 return 0;
5033 t = TREE_OPERAND (t, 1);
5034 if (!t || TREE_CODE (t) != FIELD_DECL
5035 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5036 return 0;
5037 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5038 r = DECL_FIELD_CONTEXT (t);
5039 if (!r || TREE_CODE (r) != RECORD_TYPE)
5040 return 0;
5041 /* Make sure they are the same mode */
5042 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5043 return 0;
5044 /* If there are no following fields then the field alignment assures
fa7637bd 5045 the structure is padded to the alignment which means this field is
5046 padded too. */
644459d0 5047 if (TREE_CHAIN (t) == 0)
5048 return 1;
5049 /* If the following field is also aligned then this field will be
5050 padded. */
5051 t = TREE_CHAIN (t);
5052 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5053 return 1;
5054 return 0;
5055}
5056
c7b91b14 5057/* Parse the -mfixed-range= option string. */
5058static void
5059fix_range (const char *const_str)
5060{
5061 int i, first, last;
5062 char *str, *dash, *comma;
5063
5064 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5065 REG2 are either register names or register numbers. The effect
5066 of this option is to mark the registers in the range from REG1 to
5067 REG2 as ``fixed'' so they won't be used by the compiler. */
5068
5069 i = strlen (const_str);
5070 str = (char *) alloca (i + 1);
5071 memcpy (str, const_str, i + 1);
5072
5073 while (1)
5074 {
5075 dash = strchr (str, '-');
5076 if (!dash)
5077 {
5078 warning (0, "value of -mfixed-range must have form REG1-REG2");
5079 return;
5080 }
5081 *dash = '\0';
5082 comma = strchr (dash + 1, ',');
5083 if (comma)
5084 *comma = '\0';
5085
5086 first = decode_reg_name (str);
5087 if (first < 0)
5088 {
5089 warning (0, "unknown register name: %s", str);
5090 return;
5091 }
5092
5093 last = decode_reg_name (dash + 1);
5094 if (last < 0)
5095 {
5096 warning (0, "unknown register name: %s", dash + 1);
5097 return;
5098 }
5099
5100 *dash = '-';
5101
5102 if (first > last)
5103 {
5104 warning (0, "%s-%s is an empty range", str, dash + 1);
5105 return;
5106 }
5107
5108 for (i = first; i <= last; ++i)
5109 fixed_regs[i] = call_used_regs[i] = 1;
5110
5111 if (!comma)
5112 break;
5113
5114 *comma = ',';
5115 str = comma + 1;
5116 }
5117}
5118
644459d0 5119/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5120 can be generated using the fsmbi instruction. */
5121int
5122fsmbi_const_p (rtx x)
5123{
dea01258 5124 if (CONSTANT_P (x))
5125 {
5df189be 5126 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5127 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5128 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5129 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5130 }
5131 return 0;
5132}
5133
5134/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5135 can be generated using the cbd, chd, cwd or cdd instruction. */
5136int
5137cpat_const_p (rtx x, enum machine_mode mode)
5138{
5139 if (CONSTANT_P (x))
5140 {
5141 enum immediate_class c = classify_immediate (x, mode);
5142 return c == IC_CPAT;
5143 }
5144 return 0;
5145}
644459d0 5146
dea01258 5147rtx
5148gen_cpat_const (rtx * ops)
5149{
5150 unsigned char dst[16];
5151 int i, offset, shift, isize;
5152 if (GET_CODE (ops[3]) != CONST_INT
5153 || GET_CODE (ops[2]) != CONST_INT
5154 || (GET_CODE (ops[1]) != CONST_INT
5155 && GET_CODE (ops[1]) != REG))
5156 return 0;
5157 if (GET_CODE (ops[1]) == REG
5158 && (!REG_POINTER (ops[1])
5159 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5160 return 0;
644459d0 5161
5162 for (i = 0; i < 16; i++)
dea01258 5163 dst[i] = i + 16;
5164 isize = INTVAL (ops[3]);
5165 if (isize == 1)
5166 shift = 3;
5167 else if (isize == 2)
5168 shift = 2;
5169 else
5170 shift = 0;
5171 offset = (INTVAL (ops[2]) +
5172 (GET_CODE (ops[1]) ==
5173 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5174 for (i = 0; i < isize; i++)
5175 dst[offset + i] = i + shift;
5176 return array_to_constant (TImode, dst);
644459d0 5177}
5178
5179/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5180 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5181 than 16 bytes, the value is repeated across the rest of the array. */
5182void
5183constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5184{
5185 HOST_WIDE_INT val;
5186 int i, j, first;
5187
5188 memset (arr, 0, 16);
5189 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5190 if (GET_CODE (x) == CONST_INT
5191 || (GET_CODE (x) == CONST_DOUBLE
5192 && (mode == SFmode || mode == DFmode)))
5193 {
5194 gcc_assert (mode != VOIDmode && mode != BLKmode);
5195
5196 if (GET_CODE (x) == CONST_DOUBLE)
5197 val = const_double_to_hwint (x);
5198 else
5199 val = INTVAL (x);
5200 first = GET_MODE_SIZE (mode) - 1;
5201 for (i = first; i >= 0; i--)
5202 {
5203 arr[i] = val & 0xff;
5204 val >>= 8;
5205 }
5206 /* Splat the constant across the whole array. */
5207 for (j = 0, i = first + 1; i < 16; i++)
5208 {
5209 arr[i] = arr[j];
5210 j = (j == first) ? 0 : j + 1;
5211 }
5212 }
5213 else if (GET_CODE (x) == CONST_DOUBLE)
5214 {
5215 val = CONST_DOUBLE_LOW (x);
5216 for (i = 15; i >= 8; i--)
5217 {
5218 arr[i] = val & 0xff;
5219 val >>= 8;
5220 }
5221 val = CONST_DOUBLE_HIGH (x);
5222 for (i = 7; i >= 0; i--)
5223 {
5224 arr[i] = val & 0xff;
5225 val >>= 8;
5226 }
5227 }
5228 else if (GET_CODE (x) == CONST_VECTOR)
5229 {
5230 int units;
5231 rtx elt;
5232 mode = GET_MODE_INNER (mode);
5233 units = CONST_VECTOR_NUNITS (x);
5234 for (i = 0; i < units; i++)
5235 {
5236 elt = CONST_VECTOR_ELT (x, i);
5237 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5238 {
5239 if (GET_CODE (elt) == CONST_DOUBLE)
5240 val = const_double_to_hwint (elt);
5241 else
5242 val = INTVAL (elt);
5243 first = GET_MODE_SIZE (mode) - 1;
5244 if (first + i * GET_MODE_SIZE (mode) > 16)
5245 abort ();
5246 for (j = first; j >= 0; j--)
5247 {
5248 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5249 val >>= 8;
5250 }
5251 }
5252 }
5253 }
5254 else
5255 gcc_unreachable();
5256}
5257
5258/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5259 smaller than 16 bytes, use the bytes that would represent that value
5260 in a register, e.g., for QImode return the value of arr[3]. */
5261rtx
e96f2783 5262array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5263{
5264 enum machine_mode inner_mode;
5265 rtvec v;
5266 int units, size, i, j, k;
5267 HOST_WIDE_INT val;
5268
5269 if (GET_MODE_CLASS (mode) == MODE_INT
5270 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5271 {
5272 j = GET_MODE_SIZE (mode);
5273 i = j < 4 ? 4 - j : 0;
5274 for (val = 0; i < j; i++)
5275 val = (val << 8) | arr[i];
5276 val = trunc_int_for_mode (val, mode);
5277 return GEN_INT (val);
5278 }
5279
5280 if (mode == TImode)
5281 {
5282 HOST_WIDE_INT high;
5283 for (i = high = 0; i < 8; i++)
5284 high = (high << 8) | arr[i];
5285 for (i = 8, val = 0; i < 16; i++)
5286 val = (val << 8) | arr[i];
5287 return immed_double_const (val, high, TImode);
5288 }
5289 if (mode == SFmode)
5290 {
5291 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5292 val = trunc_int_for_mode (val, SImode);
171b6d22 5293 return hwint_to_const_double (SFmode, val);
644459d0 5294 }
5295 if (mode == DFmode)
5296 {
1f915911 5297 for (i = 0, val = 0; i < 8; i++)
5298 val = (val << 8) | arr[i];
171b6d22 5299 return hwint_to_const_double (DFmode, val);
644459d0 5300 }
5301
5302 if (!VECTOR_MODE_P (mode))
5303 abort ();
5304
5305 units = GET_MODE_NUNITS (mode);
5306 size = GET_MODE_UNIT_SIZE (mode);
5307 inner_mode = GET_MODE_INNER (mode);
5308 v = rtvec_alloc (units);
5309
5310 for (k = i = 0; i < units; ++i)
5311 {
5312 val = 0;
5313 for (j = 0; j < size; j++, k++)
5314 val = (val << 8) | arr[k];
5315
5316 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5317 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5318 else
5319 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5320 }
5321 if (k > 16)
5322 abort ();
5323
5324 return gen_rtx_CONST_VECTOR (mode, v);
5325}
5326
5327static void
5328reloc_diagnostic (rtx x)
5329{
712d2297 5330 tree decl = 0;
644459d0 5331 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5332 return;
5333
5334 if (GET_CODE (x) == SYMBOL_REF)
5335 decl = SYMBOL_REF_DECL (x);
5336 else if (GET_CODE (x) == CONST
5337 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5338 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5339
5340 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5341 if (decl && !DECL_P (decl))
5342 decl = 0;
5343
644459d0 5344 /* The decl could be a string constant. */
5345 if (decl && DECL_P (decl))
712d2297 5346 {
5347 location_t loc;
5348 /* We use last_assemble_variable_decl to get line information. It's
5349 not always going to be right and might not even be close, but will
5350 be right for the more common cases. */
5351 if (!last_assemble_variable_decl || in_section == ctors_section)
5352 loc = DECL_SOURCE_LOCATION (decl);
5353 else
5354 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5355
712d2297 5356 if (TARGET_WARN_RELOC)
5357 warning_at (loc, 0,
5358 "creating run-time relocation for %qD", decl);
5359 else
5360 error_at (loc,
5361 "creating run-time relocation for %qD", decl);
5362 }
5363 else
5364 {
5365 if (TARGET_WARN_RELOC)
5366 warning_at (input_location, 0, "creating run-time relocation");
5367 else
5368 error_at (input_location, "creating run-time relocation");
5369 }
644459d0 5370}
5371
5372/* Hook into assemble_integer so we can generate an error for run-time
5373 relocations. The SPU ABI disallows them. */
5374static bool
5375spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5376{
5377 /* By default run-time relocations aren't supported, but we allow them
5378 in case users support it in their own run-time loader. And we provide
5379 a warning for those users that don't. */
5380 if ((GET_CODE (x) == SYMBOL_REF)
5381 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5382 reloc_diagnostic (x);
5383
5384 return default_assemble_integer (x, size, aligned_p);
5385}
5386
5387static void
5388spu_asm_globalize_label (FILE * file, const char *name)
5389{
5390 fputs ("\t.global\t", file);
5391 assemble_name (file, name);
5392 fputs ("\n", file);
5393}
5394
5395static bool
f529eb25 5396spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5397 bool speed ATTRIBUTE_UNUSED)
644459d0 5398{
5399 enum machine_mode mode = GET_MODE (x);
5400 int cost = COSTS_N_INSNS (2);
5401
5402 /* Folding to a CONST_VECTOR will use extra space but there might
5403 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5404 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5405 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5406 because this cost will only be compared against a single insn.
5407 if (code == CONST_VECTOR)
5408 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5409 */
5410
5411 /* Use defaults for float operations. Not accurate but good enough. */
5412 if (mode == DFmode)
5413 {
5414 *total = COSTS_N_INSNS (13);
5415 return true;
5416 }
5417 if (mode == SFmode)
5418 {
5419 *total = COSTS_N_INSNS (6);
5420 return true;
5421 }
5422 switch (code)
5423 {
5424 case CONST_INT:
5425 if (satisfies_constraint_K (x))
5426 *total = 0;
5427 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5428 *total = COSTS_N_INSNS (1);
5429 else
5430 *total = COSTS_N_INSNS (3);
5431 return true;
5432
5433 case CONST:
5434 *total = COSTS_N_INSNS (3);
5435 return true;
5436
5437 case LABEL_REF:
5438 case SYMBOL_REF:
5439 *total = COSTS_N_INSNS (0);
5440 return true;
5441
5442 case CONST_DOUBLE:
5443 *total = COSTS_N_INSNS (5);
5444 return true;
5445
5446 case FLOAT_EXTEND:
5447 case FLOAT_TRUNCATE:
5448 case FLOAT:
5449 case UNSIGNED_FLOAT:
5450 case FIX:
5451 case UNSIGNED_FIX:
5452 *total = COSTS_N_INSNS (7);
5453 return true;
5454
5455 case PLUS:
5456 if (mode == TImode)
5457 {
5458 *total = COSTS_N_INSNS (9);
5459 return true;
5460 }
5461 break;
5462
5463 case MULT:
5464 cost =
5465 GET_CODE (XEXP (x, 0)) ==
5466 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5467 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5468 {
5469 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5470 {
5471 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5472 cost = COSTS_N_INSNS (14);
5473 if ((val & 0xffff) == 0)
5474 cost = COSTS_N_INSNS (9);
5475 else if (val > 0 && val < 0x10000)
5476 cost = COSTS_N_INSNS (11);
5477 }
5478 }
5479 *total = cost;
5480 return true;
5481 case DIV:
5482 case UDIV:
5483 case MOD:
5484 case UMOD:
5485 *total = COSTS_N_INSNS (20);
5486 return true;
5487 case ROTATE:
5488 case ROTATERT:
5489 case ASHIFT:
5490 case ASHIFTRT:
5491 case LSHIFTRT:
5492 *total = COSTS_N_INSNS (4);
5493 return true;
5494 case UNSPEC:
5495 if (XINT (x, 1) == UNSPEC_CONVERT)
5496 *total = COSTS_N_INSNS (0);
5497 else
5498 *total = COSTS_N_INSNS (4);
5499 return true;
5500 }
5501 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5502 if (GET_MODE_CLASS (mode) == MODE_INT
5503 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5504 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5505 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5506 *total = cost;
5507 return true;
5508}
5509
1bd43494 5510static enum machine_mode
5511spu_unwind_word_mode (void)
644459d0 5512{
1bd43494 5513 return SImode;
644459d0 5514}
5515
5516/* Decide whether we can make a sibling call to a function. DECL is the
5517 declaration of the function being targeted by the call and EXP is the
5518 CALL_EXPR representing the call. */
5519static bool
5520spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5521{
5522 return decl && !TARGET_LARGE_MEM;
5523}
5524
5525/* We need to correctly update the back chain pointer and the Available
5526 Stack Size (which is in the second slot of the sp register.) */
5527void
5528spu_allocate_stack (rtx op0, rtx op1)
5529{
5530 HOST_WIDE_INT v;
5531 rtx chain = gen_reg_rtx (V4SImode);
5532 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5533 rtx sp = gen_reg_rtx (V4SImode);
5534 rtx splatted = gen_reg_rtx (V4SImode);
5535 rtx pat = gen_reg_rtx (TImode);
5536
5537 /* copy the back chain so we can save it back again. */
5538 emit_move_insn (chain, stack_bot);
5539
5540 op1 = force_reg (SImode, op1);
5541
5542 v = 0x1020300010203ll;
5543 emit_move_insn (pat, immed_double_const (v, v, TImode));
5544 emit_insn (gen_shufb (splatted, op1, op1, pat));
5545
5546 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5547 emit_insn (gen_subv4si3 (sp, sp, splatted));
5548
5549 if (flag_stack_check)
5550 {
5551 rtx avail = gen_reg_rtx(SImode);
5552 rtx result = gen_reg_rtx(SImode);
5553 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5554 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5555 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5556 }
5557
5558 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5559
5560 emit_move_insn (stack_bot, chain);
5561
5562 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5563}
5564
5565void
5566spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5567{
5568 static unsigned char arr[16] =
5569 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5570 rtx temp = gen_reg_rtx (SImode);
5571 rtx temp2 = gen_reg_rtx (SImode);
5572 rtx temp3 = gen_reg_rtx (V4SImode);
5573 rtx temp4 = gen_reg_rtx (V4SImode);
5574 rtx pat = gen_reg_rtx (TImode);
5575 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5576
5577 /* Restore the backchain from the first word, sp from the second. */
5578 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5579 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5580
5581 emit_move_insn (pat, array_to_constant (TImode, arr));
5582
5583 /* Compute Available Stack Size for sp */
5584 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5585 emit_insn (gen_shufb (temp3, temp, temp, pat));
5586
5587 /* Compute Available Stack Size for back chain */
5588 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5589 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5590 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5591
5592 emit_insn (gen_addv4si3 (sp, sp, temp3));
5593 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5594}
5595
5596static void
5597spu_init_libfuncs (void)
5598{
5599 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5600 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5601 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5602 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5603 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5604 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5605 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5606 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5607 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5608 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5609 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5610
5611 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5612 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5613
5614 set_optab_libfunc (smul_optab, TImode, "__multi3");
5615 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5616 set_optab_libfunc (smod_optab, TImode, "__modti3");
5617 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5618 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5619 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5620}
5621
5622/* Make a subreg, stripping any existing subreg. We could possibly just
5623 call simplify_subreg, but in this case we know what we want. */
5624rtx
5625spu_gen_subreg (enum machine_mode mode, rtx x)
5626{
5627 if (GET_CODE (x) == SUBREG)
5628 x = SUBREG_REG (x);
5629 if (GET_MODE (x) == mode)
5630 return x;
5631 return gen_rtx_SUBREG (mode, x, 0);
5632}
5633
5634static bool
fb80456a 5635spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5636{
5637 return (TYPE_MODE (type) == BLKmode
5638 && ((type) == 0
5639 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5640 || int_size_in_bytes (type) >
5641 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5642}
5643\f
5644/* Create the built-in types and functions */
5645
c2233b46 5646enum spu_function_code
5647{
5648#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5649#include "spu-builtins.def"
5650#undef DEF_BUILTIN
5651 NUM_SPU_BUILTINS
5652};
5653
5654extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5655
644459d0 5656struct spu_builtin_description spu_builtins[] = {
5657#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5658 {fcode, icode, name, type, params},
644459d0 5659#include "spu-builtins.def"
5660#undef DEF_BUILTIN
5661};
5662
0c5c4d59 5663static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5664
5665/* Returns the spu builtin decl for CODE. */
e6925042 5666
5667static tree
5668spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5669{
5670 if (code >= NUM_SPU_BUILTINS)
5671 return error_mark_node;
5672
0c5c4d59 5673 return spu_builtin_decls[code];
e6925042 5674}
5675
5676
644459d0 5677static void
5678spu_init_builtins (void)
5679{
5680 struct spu_builtin_description *d;
5681 unsigned int i;
5682
5683 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5684 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5685 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5686 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5687 V4SF_type_node = build_vector_type (float_type_node, 4);
5688 V2DF_type_node = build_vector_type (double_type_node, 2);
5689
5690 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5691 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5692 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5693 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5694
c4ecce0c 5695 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5696
5697 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5698 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5699 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5700 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5701 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5702 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5703 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5704 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5705 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5706 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5707 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5708 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5709
5710 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5711 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5712 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5713 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5714 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5715 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5716 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5717 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5718
5719 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5720 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5721
5722 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5723
5724 spu_builtin_types[SPU_BTI_PTR] =
5725 build_pointer_type (build_qualified_type
5726 (void_type_node,
5727 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5728
5729 /* For each builtin we build a new prototype. The tree code will make
5730 sure nodes are shared. */
5731 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5732 {
5733 tree p;
5734 char name[64]; /* build_function will make a copy. */
5735 int parm;
5736
5737 if (d->name == 0)
5738 continue;
5739
5dfbd18f 5740 /* Find last parm. */
644459d0 5741 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5742 ;
644459d0 5743
5744 p = void_list_node;
5745 while (parm > 1)
5746 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5747
5748 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5749
5750 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5751 spu_builtin_decls[i] =
3726fe5e 5752 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5753 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5754 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5755
5756 /* These builtins don't throw. */
0c5c4d59 5757 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5758 }
5759}
5760
cf31d486 5761void
5762spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5763{
5764 static unsigned char arr[16] =
5765 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5766
5767 rtx temp = gen_reg_rtx (Pmode);
5768 rtx temp2 = gen_reg_rtx (V4SImode);
5769 rtx temp3 = gen_reg_rtx (V4SImode);
5770 rtx pat = gen_reg_rtx (TImode);
5771 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5772
5773 emit_move_insn (pat, array_to_constant (TImode, arr));
5774
5775 /* Restore the sp. */
5776 emit_move_insn (temp, op1);
5777 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5778
5779 /* Compute available stack size for sp. */
5780 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5781 emit_insn (gen_shufb (temp3, temp, temp, pat));
5782
5783 emit_insn (gen_addv4si3 (sp, sp, temp3));
5784 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5785}
5786
644459d0 5787int
5788spu_safe_dma (HOST_WIDE_INT channel)
5789{
006e4b96 5790 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5791}
5792
5793void
5794spu_builtin_splats (rtx ops[])
5795{
5796 enum machine_mode mode = GET_MODE (ops[0]);
5797 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5798 {
5799 unsigned char arr[16];
5800 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5801 emit_move_insn (ops[0], array_to_constant (mode, arr));
5802 }
644459d0 5803 else
5804 {
5805 rtx reg = gen_reg_rtx (TImode);
5806 rtx shuf;
5807 if (GET_CODE (ops[1]) != REG
5808 && GET_CODE (ops[1]) != SUBREG)
5809 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5810 switch (mode)
5811 {
5812 case V2DImode:
5813 case V2DFmode:
5814 shuf =
5815 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5816 TImode);
5817 break;
5818 case V4SImode:
5819 case V4SFmode:
5820 shuf =
5821 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5822 TImode);
5823 break;
5824 case V8HImode:
5825 shuf =
5826 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5827 TImode);
5828 break;
5829 case V16QImode:
5830 shuf =
5831 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5832 TImode);
5833 break;
5834 default:
5835 abort ();
5836 }
5837 emit_move_insn (reg, shuf);
5838 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5839 }
5840}
5841
5842void
5843spu_builtin_extract (rtx ops[])
5844{
5845 enum machine_mode mode;
5846 rtx rot, from, tmp;
5847
5848 mode = GET_MODE (ops[1]);
5849
5850 if (GET_CODE (ops[2]) == CONST_INT)
5851 {
5852 switch (mode)
5853 {
5854 case V16QImode:
5855 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5856 break;
5857 case V8HImode:
5858 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5859 break;
5860 case V4SFmode:
5861 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5862 break;
5863 case V4SImode:
5864 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5865 break;
5866 case V2DImode:
5867 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5868 break;
5869 case V2DFmode:
5870 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5871 break;
5872 default:
5873 abort ();
5874 }
5875 return;
5876 }
5877
5878 from = spu_gen_subreg (TImode, ops[1]);
5879 rot = gen_reg_rtx (TImode);
5880 tmp = gen_reg_rtx (SImode);
5881
5882 switch (mode)
5883 {
5884 case V16QImode:
5885 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5886 break;
5887 case V8HImode:
5888 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5889 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5890 break;
5891 case V4SFmode:
5892 case V4SImode:
5893 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5894 break;
5895 case V2DImode:
5896 case V2DFmode:
5897 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5898 break;
5899 default:
5900 abort ();
5901 }
5902 emit_insn (gen_rotqby_ti (rot, from, tmp));
5903
5904 emit_insn (gen_spu_convert (ops[0], rot));
5905}
5906
5907void
5908spu_builtin_insert (rtx ops[])
5909{
5910 enum machine_mode mode = GET_MODE (ops[0]);
5911 enum machine_mode imode = GET_MODE_INNER (mode);
5912 rtx mask = gen_reg_rtx (TImode);
5913 rtx offset;
5914
5915 if (GET_CODE (ops[3]) == CONST_INT)
5916 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5917 else
5918 {
5919 offset = gen_reg_rtx (SImode);
5920 emit_insn (gen_mulsi3
5921 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5922 }
5923 emit_insn (gen_cpat
5924 (mask, stack_pointer_rtx, offset,
5925 GEN_INT (GET_MODE_SIZE (imode))));
5926 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5927}
5928
5929void
5930spu_builtin_promote (rtx ops[])
5931{
5932 enum machine_mode mode, imode;
5933 rtx rot, from, offset;
5934 HOST_WIDE_INT pos;
5935
5936 mode = GET_MODE (ops[0]);
5937 imode = GET_MODE_INNER (mode);
5938
5939 from = gen_reg_rtx (TImode);
5940 rot = spu_gen_subreg (TImode, ops[0]);
5941
5942 emit_insn (gen_spu_convert (from, ops[1]));
5943
5944 if (GET_CODE (ops[2]) == CONST_INT)
5945 {
5946 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5947 if (GET_MODE_SIZE (imode) < 4)
5948 pos += 4 - GET_MODE_SIZE (imode);
5949 offset = GEN_INT (pos & 15);
5950 }
5951 else
5952 {
5953 offset = gen_reg_rtx (SImode);
5954 switch (mode)
5955 {
5956 case V16QImode:
5957 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5958 break;
5959 case V8HImode:
5960 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5961 emit_insn (gen_addsi3 (offset, offset, offset));
5962 break;
5963 case V4SFmode:
5964 case V4SImode:
5965 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5966 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5967 break;
5968 case V2DImode:
5969 case V2DFmode:
5970 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5971 break;
5972 default:
5973 abort ();
5974 }
5975 }
5976 emit_insn (gen_rotqby_ti (rot, from, offset));
5977}
5978
e96f2783 5979static void
5980spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5981{
e96f2783 5982 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5983 rtx shuf = gen_reg_rtx (V4SImode);
5984 rtx insn = gen_reg_rtx (V4SImode);
5985 rtx shufc;
5986 rtx insnc;
5987 rtx mem;
5988
5989 fnaddr = force_reg (SImode, fnaddr);
5990 cxt = force_reg (SImode, cxt);
5991
5992 if (TARGET_LARGE_MEM)
5993 {
5994 rtx rotl = gen_reg_rtx (V4SImode);
5995 rtx mask = gen_reg_rtx (V4SImode);
5996 rtx bi = gen_reg_rtx (SImode);
e96f2783 5997 static unsigned char const shufa[16] = {
644459d0 5998 2, 3, 0, 1, 18, 19, 16, 17,
5999 0, 1, 2, 3, 16, 17, 18, 19
6000 };
e96f2783 6001 static unsigned char const insna[16] = {
644459d0 6002 0x41, 0, 0, 79,
6003 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6004 0x60, 0x80, 0, 79,
6005 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6006 };
6007
6008 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6009 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6010
6011 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6012 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6013 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6014 emit_insn (gen_selb (insn, insnc, rotl, mask));
6015
e96f2783 6016 mem = adjust_address (m_tramp, V4SImode, 0);
6017 emit_move_insn (mem, insn);
644459d0 6018
6019 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6020 mem = adjust_address (m_tramp, Pmode, 16);
6021 emit_move_insn (mem, bi);
644459d0 6022 }
6023 else
6024 {
6025 rtx scxt = gen_reg_rtx (SImode);
6026 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6027 static unsigned char const insna[16] = {
644459d0 6028 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6029 0x30, 0, 0, 0,
6030 0, 0, 0, 0,
6031 0, 0, 0, 0
6032 };
6033
6034 shufc = gen_reg_rtx (TImode);
6035 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6036
6037 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6038 fits 18 bits and the last 4 are zeros. This will be true if
6039 the stack pointer is initialized to 0x3fff0 at program start,
6040 otherwise the ila instruction will be garbage. */
6041
6042 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6043 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6044 emit_insn (gen_cpat
6045 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6046 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6047 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6048
e96f2783 6049 mem = adjust_address (m_tramp, V4SImode, 0);
6050 emit_move_insn (mem, insn);
644459d0 6051 }
6052 emit_insn (gen_sync ());
6053}
6054
6055void
6056spu_expand_sign_extend (rtx ops[])
6057{
6058 unsigned char arr[16];
6059 rtx pat = gen_reg_rtx (TImode);
6060 rtx sign, c;
6061 int i, last;
6062 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6063 if (GET_MODE (ops[1]) == QImode)
6064 {
6065 sign = gen_reg_rtx (HImode);
6066 emit_insn (gen_extendqihi2 (sign, ops[1]));
6067 for (i = 0; i < 16; i++)
6068 arr[i] = 0x12;
6069 arr[last] = 0x13;
6070 }
6071 else
6072 {
6073 for (i = 0; i < 16; i++)
6074 arr[i] = 0x10;
6075 switch (GET_MODE (ops[1]))
6076 {
6077 case HImode:
6078 sign = gen_reg_rtx (SImode);
6079 emit_insn (gen_extendhisi2 (sign, ops[1]));
6080 arr[last] = 0x03;
6081 arr[last - 1] = 0x02;
6082 break;
6083 case SImode:
6084 sign = gen_reg_rtx (SImode);
6085 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6086 for (i = 0; i < 4; i++)
6087 arr[last - i] = 3 - i;
6088 break;
6089 case DImode:
6090 sign = gen_reg_rtx (SImode);
6091 c = gen_reg_rtx (SImode);
6092 emit_insn (gen_spu_convert (c, ops[1]));
6093 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6094 for (i = 0; i < 8; i++)
6095 arr[last - i] = 7 - i;
6096 break;
6097 default:
6098 abort ();
6099 }
6100 }
6101 emit_move_insn (pat, array_to_constant (TImode, arr));
6102 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6103}
6104
6105/* expand vector initialization. If there are any constant parts,
6106 load constant parts first. Then load any non-constant parts. */
6107void
6108spu_expand_vector_init (rtx target, rtx vals)
6109{
6110 enum machine_mode mode = GET_MODE (target);
6111 int n_elts = GET_MODE_NUNITS (mode);
6112 int n_var = 0;
6113 bool all_same = true;
790c536c 6114 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6115 int i;
6116
6117 first = XVECEXP (vals, 0, 0);
6118 for (i = 0; i < n_elts; ++i)
6119 {
6120 x = XVECEXP (vals, 0, i);
e442af0b 6121 if (!(CONST_INT_P (x)
6122 || GET_CODE (x) == CONST_DOUBLE
6123 || GET_CODE (x) == CONST_FIXED))
644459d0 6124 ++n_var;
6125 else
6126 {
6127 if (first_constant == NULL_RTX)
6128 first_constant = x;
6129 }
6130 if (i > 0 && !rtx_equal_p (x, first))
6131 all_same = false;
6132 }
6133
6134 /* if all elements are the same, use splats to repeat elements */
6135 if (all_same)
6136 {
6137 if (!CONSTANT_P (first)
6138 && !register_operand (first, GET_MODE (x)))
6139 first = force_reg (GET_MODE (first), first);
6140 emit_insn (gen_spu_splats (target, first));
6141 return;
6142 }
6143
6144 /* load constant parts */
6145 if (n_var != n_elts)
6146 {
6147 if (n_var == 0)
6148 {
6149 emit_move_insn (target,
6150 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6151 }
6152 else
6153 {
6154 rtx constant_parts_rtx = copy_rtx (vals);
6155
6156 gcc_assert (first_constant != NULL_RTX);
6157 /* fill empty slots with the first constant, this increases
6158 our chance of using splats in the recursive call below. */
6159 for (i = 0; i < n_elts; ++i)
e442af0b 6160 {
6161 x = XVECEXP (constant_parts_rtx, 0, i);
6162 if (!(CONST_INT_P (x)
6163 || GET_CODE (x) == CONST_DOUBLE
6164 || GET_CODE (x) == CONST_FIXED))
6165 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6166 }
644459d0 6167
6168 spu_expand_vector_init (target, constant_parts_rtx);
6169 }
6170 }
6171
6172 /* load variable parts */
6173 if (n_var != 0)
6174 {
6175 rtx insert_operands[4];
6176
6177 insert_operands[0] = target;
6178 insert_operands[2] = target;
6179 for (i = 0; i < n_elts; ++i)
6180 {
6181 x = XVECEXP (vals, 0, i);
e442af0b 6182 if (!(CONST_INT_P (x)
6183 || GET_CODE (x) == CONST_DOUBLE
6184 || GET_CODE (x) == CONST_FIXED))
644459d0 6185 {
6186 if (!register_operand (x, GET_MODE (x)))
6187 x = force_reg (GET_MODE (x), x);
6188 insert_operands[1] = x;
6189 insert_operands[3] = GEN_INT (i);
6190 spu_builtin_insert (insert_operands);
6191 }
6192 }
6193 }
6194}
6352eedf 6195
5474166e 6196/* Return insn index for the vector compare instruction for given CODE,
6197 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6198
6199static int
6200get_vec_cmp_insn (enum rtx_code code,
6201 enum machine_mode dest_mode,
6202 enum machine_mode op_mode)
6203
6204{
6205 switch (code)
6206 {
6207 case EQ:
6208 if (dest_mode == V16QImode && op_mode == V16QImode)
6209 return CODE_FOR_ceq_v16qi;
6210 if (dest_mode == V8HImode && op_mode == V8HImode)
6211 return CODE_FOR_ceq_v8hi;
6212 if (dest_mode == V4SImode && op_mode == V4SImode)
6213 return CODE_FOR_ceq_v4si;
6214 if (dest_mode == V4SImode && op_mode == V4SFmode)
6215 return CODE_FOR_ceq_v4sf;
6216 if (dest_mode == V2DImode && op_mode == V2DFmode)
6217 return CODE_FOR_ceq_v2df;
6218 break;
6219 case GT:
6220 if (dest_mode == V16QImode && op_mode == V16QImode)
6221 return CODE_FOR_cgt_v16qi;
6222 if (dest_mode == V8HImode && op_mode == V8HImode)
6223 return CODE_FOR_cgt_v8hi;
6224 if (dest_mode == V4SImode && op_mode == V4SImode)
6225 return CODE_FOR_cgt_v4si;
6226 if (dest_mode == V4SImode && op_mode == V4SFmode)
6227 return CODE_FOR_cgt_v4sf;
6228 if (dest_mode == V2DImode && op_mode == V2DFmode)
6229 return CODE_FOR_cgt_v2df;
6230 break;
6231 case GTU:
6232 if (dest_mode == V16QImode && op_mode == V16QImode)
6233 return CODE_FOR_clgt_v16qi;
6234 if (dest_mode == V8HImode && op_mode == V8HImode)
6235 return CODE_FOR_clgt_v8hi;
6236 if (dest_mode == V4SImode && op_mode == V4SImode)
6237 return CODE_FOR_clgt_v4si;
6238 break;
6239 default:
6240 break;
6241 }
6242 return -1;
6243}
6244
6245/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6246 DMODE is expected destination mode. This is a recursive function. */
6247
6248static rtx
6249spu_emit_vector_compare (enum rtx_code rcode,
6250 rtx op0, rtx op1,
6251 enum machine_mode dmode)
6252{
6253 int vec_cmp_insn;
6254 rtx mask;
6255 enum machine_mode dest_mode;
6256 enum machine_mode op_mode = GET_MODE (op1);
6257
6258 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6259
6260 /* Floating point vector compare instructions uses destination V4SImode.
6261 Double floating point vector compare instructions uses destination V2DImode.
6262 Move destination to appropriate mode later. */
6263 if (dmode == V4SFmode)
6264 dest_mode = V4SImode;
6265 else if (dmode == V2DFmode)
6266 dest_mode = V2DImode;
6267 else
6268 dest_mode = dmode;
6269
6270 mask = gen_reg_rtx (dest_mode);
6271 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6272
6273 if (vec_cmp_insn == -1)
6274 {
6275 bool swap_operands = false;
6276 bool try_again = false;
6277 switch (rcode)
6278 {
6279 case LT:
6280 rcode = GT;
6281 swap_operands = true;
6282 try_again = true;
6283 break;
6284 case LTU:
6285 rcode = GTU;
6286 swap_operands = true;
6287 try_again = true;
6288 break;
6289 case NE:
6290 /* Treat A != B as ~(A==B). */
6291 {
6292 enum insn_code nor_code;
6293 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6294 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6295 gcc_assert (nor_code != CODE_FOR_nothing);
6296 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6297 if (dmode != dest_mode)
6298 {
6299 rtx temp = gen_reg_rtx (dest_mode);
6300 convert_move (temp, mask, 0);
6301 return temp;
6302 }
6303 return mask;
6304 }
6305 break;
6306 case GE:
6307 case GEU:
6308 case LE:
6309 case LEU:
6310 /* Try GT/GTU/LT/LTU OR EQ */
6311 {
6312 rtx c_rtx, eq_rtx;
6313 enum insn_code ior_code;
6314 enum rtx_code new_code;
6315
6316 switch (rcode)
6317 {
6318 case GE: new_code = GT; break;
6319 case GEU: new_code = GTU; break;
6320 case LE: new_code = LT; break;
6321 case LEU: new_code = LTU; break;
6322 default:
6323 gcc_unreachable ();
6324 }
6325
6326 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6327 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6328
d6bf3b14 6329 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6330 gcc_assert (ior_code != CODE_FOR_nothing);
6331 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6332 if (dmode != dest_mode)
6333 {
6334 rtx temp = gen_reg_rtx (dest_mode);
6335 convert_move (temp, mask, 0);
6336 return temp;
6337 }
6338 return mask;
6339 }
6340 break;
6341 default:
6342 gcc_unreachable ();
6343 }
6344
6345 /* You only get two chances. */
6346 if (try_again)
6347 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6348
6349 gcc_assert (vec_cmp_insn != -1);
6350
6351 if (swap_operands)
6352 {
6353 rtx tmp;
6354 tmp = op0;
6355 op0 = op1;
6356 op1 = tmp;
6357 }
6358 }
6359
6360 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6361 if (dmode != dest_mode)
6362 {
6363 rtx temp = gen_reg_rtx (dest_mode);
6364 convert_move (temp, mask, 0);
6365 return temp;
6366 }
6367 return mask;
6368}
6369
6370
6371/* Emit vector conditional expression.
6372 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6373 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6374
6375int
6376spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6377 rtx cond, rtx cc_op0, rtx cc_op1)
6378{
6379 enum machine_mode dest_mode = GET_MODE (dest);
6380 enum rtx_code rcode = GET_CODE (cond);
6381 rtx mask;
6382
6383 /* Get the vector mask for the given relational operations. */
6384 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6385
6386 emit_insn(gen_selb (dest, op2, op1, mask));
6387
6388 return 1;
6389}
6390
6352eedf 6391static rtx
6392spu_force_reg (enum machine_mode mode, rtx op)
6393{
6394 rtx x, r;
6395 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6396 {
6397 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6398 || GET_MODE (op) == BLKmode)
6399 return force_reg (mode, convert_to_mode (mode, op, 0));
6400 abort ();
6401 }
6402
6403 r = force_reg (GET_MODE (op), op);
6404 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6405 {
6406 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6407 if (x)
6408 return x;
6409 }
6410
6411 x = gen_reg_rtx (mode);
6412 emit_insn (gen_spu_convert (x, r));
6413 return x;
6414}
6415
6416static void
6417spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6418{
6419 HOST_WIDE_INT v = 0;
6420 int lsbits;
6421 /* Check the range of immediate operands. */
6422 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6423 {
6424 int range = p - SPU_BTI_7;
5df189be 6425
6426 if (!CONSTANT_P (op))
6352eedf 6427 error ("%s expects an integer literal in the range [%d, %d].",
6428 d->name,
6429 spu_builtin_range[range].low, spu_builtin_range[range].high);
6430
6431 if (GET_CODE (op) == CONST
6432 && (GET_CODE (XEXP (op, 0)) == PLUS
6433 || GET_CODE (XEXP (op, 0)) == MINUS))
6434 {
6435 v = INTVAL (XEXP (XEXP (op, 0), 1));
6436 op = XEXP (XEXP (op, 0), 0);
6437 }
6438 else if (GET_CODE (op) == CONST_INT)
6439 v = INTVAL (op);
5df189be 6440 else if (GET_CODE (op) == CONST_VECTOR
6441 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6442 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6443
6444 /* The default for v is 0 which is valid in every range. */
6445 if (v < spu_builtin_range[range].low
6446 || v > spu_builtin_range[range].high)
6447 error ("%s expects an integer literal in the range [%d, %d]. ("
6448 HOST_WIDE_INT_PRINT_DEC ")",
6449 d->name,
6450 spu_builtin_range[range].low, spu_builtin_range[range].high,
6451 v);
6352eedf 6452
6453 switch (p)
6454 {
6455 case SPU_BTI_S10_4:
6456 lsbits = 4;
6457 break;
6458 case SPU_BTI_U16_2:
6459 /* This is only used in lqa, and stqa. Even though the insns
6460 encode 16 bits of the address (all but the 2 least
6461 significant), only 14 bits are used because it is masked to
6462 be 16 byte aligned. */
6463 lsbits = 4;
6464 break;
6465 case SPU_BTI_S16_2:
6466 /* This is used for lqr and stqr. */
6467 lsbits = 2;
6468 break;
6469 default:
6470 lsbits = 0;
6471 }
6472
6473 if (GET_CODE (op) == LABEL_REF
6474 || (GET_CODE (op) == SYMBOL_REF
6475 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6476 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6477 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6478 d->name);
6479 }
6480}
6481
6482
70ca06f8 6483static int
5df189be 6484expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6485 rtx target, rtx ops[])
6486{
bc620c5c 6487 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6488 int i = 0, a;
6352eedf 6489
6490 /* Expand the arguments into rtl. */
6491
6492 if (d->parm[0] != SPU_BTI_VOID)
6493 ops[i++] = target;
6494
70ca06f8 6495 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6496 {
5df189be 6497 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6498 if (arg == 0)
6499 abort ();
b9c74b4d 6500 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6501 }
70ca06f8 6502
6503 /* The insn pattern may have additional operands (SCRATCH).
6504 Return the number of actual non-SCRATCH operands. */
6505 gcc_assert (i <= insn_data[icode].n_operands);
6506 return i;
6352eedf 6507}
6508
6509static rtx
6510spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6511 tree exp, rtx target)
6352eedf 6512{
6513 rtx pat;
6514 rtx ops[8];
bc620c5c 6515 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6516 enum machine_mode mode, tmode;
6517 int i, p;
70ca06f8 6518 int n_operands;
6352eedf 6519 tree return_type;
6520
6521 /* Set up ops[] with values from arglist. */
70ca06f8 6522 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6523
6524 /* Handle the target operand which must be operand 0. */
6525 i = 0;
6526 if (d->parm[0] != SPU_BTI_VOID)
6527 {
6528
6529 /* We prefer the mode specified for the match_operand otherwise
6530 use the mode from the builtin function prototype. */
6531 tmode = insn_data[d->icode].operand[0].mode;
6532 if (tmode == VOIDmode)
6533 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6534
6535 /* Try to use target because not using it can lead to extra copies
6536 and when we are using all of the registers extra copies leads
6537 to extra spills. */
6538 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6539 ops[0] = target;
6540 else
6541 target = ops[0] = gen_reg_rtx (tmode);
6542
6543 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6544 abort ();
6545
6546 i++;
6547 }
6548
a76866d3 6549 if (d->fcode == SPU_MASK_FOR_LOAD)
6550 {
6551 enum machine_mode mode = insn_data[icode].operand[1].mode;
6552 tree arg;
6553 rtx addr, op, pat;
6554
6555 /* get addr */
5df189be 6556 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6557 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6558 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6559 addr = memory_address (mode, op);
6560
6561 /* negate addr */
6562 op = gen_reg_rtx (GET_MODE (addr));
6563 emit_insn (gen_rtx_SET (VOIDmode, op,
6564 gen_rtx_NEG (GET_MODE (addr), addr)));
6565 op = gen_rtx_MEM (mode, op);
6566
6567 pat = GEN_FCN (icode) (target, op);
6568 if (!pat)
6569 return 0;
6570 emit_insn (pat);
6571 return target;
6572 }
6573
6352eedf 6574 /* Ignore align_hint, but still expand it's args in case they have
6575 side effects. */
6576 if (icode == CODE_FOR_spu_align_hint)
6577 return 0;
6578
6579 /* Handle the rest of the operands. */
70ca06f8 6580 for (p = 1; i < n_operands; i++, p++)
6352eedf 6581 {
6582 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6583 mode = insn_data[d->icode].operand[i].mode;
6584 else
6585 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6586
6587 /* mode can be VOIDmode here for labels */
6588
6589 /* For specific intrinsics with an immediate operand, e.g.,
6590 si_ai(), we sometimes need to convert the scalar argument to a
6591 vector argument by splatting the scalar. */
6592 if (VECTOR_MODE_P (mode)
6593 && (GET_CODE (ops[i]) == CONST_INT
6594 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6595 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6596 {
6597 if (GET_CODE (ops[i]) == CONST_INT)
6598 ops[i] = spu_const (mode, INTVAL (ops[i]));
6599 else
6600 {
6601 rtx reg = gen_reg_rtx (mode);
6602 enum machine_mode imode = GET_MODE_INNER (mode);
6603 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6604 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6605 if (imode != GET_MODE (ops[i]))
6606 ops[i] = convert_to_mode (imode, ops[i],
6607 TYPE_UNSIGNED (spu_builtin_types
6608 [d->parm[i]]));
6609 emit_insn (gen_spu_splats (reg, ops[i]));
6610 ops[i] = reg;
6611 }
6612 }
6613
5df189be 6614 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6615
6352eedf 6616 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6617 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6618 }
6619
70ca06f8 6620 switch (n_operands)
6352eedf 6621 {
6622 case 0:
6623 pat = GEN_FCN (icode) (0);
6624 break;
6625 case 1:
6626 pat = GEN_FCN (icode) (ops[0]);
6627 break;
6628 case 2:
6629 pat = GEN_FCN (icode) (ops[0], ops[1]);
6630 break;
6631 case 3:
6632 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6633 break;
6634 case 4:
6635 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6636 break;
6637 case 5:
6638 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6639 break;
6640 case 6:
6641 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6642 break;
6643 default:
6644 abort ();
6645 }
6646
6647 if (!pat)
6648 abort ();
6649
6650 if (d->type == B_CALL || d->type == B_BISLED)
6651 emit_call_insn (pat);
6652 else if (d->type == B_JUMP)
6653 {
6654 emit_jump_insn (pat);
6655 emit_barrier ();
6656 }
6657 else
6658 emit_insn (pat);
6659
6660 return_type = spu_builtin_types[d->parm[0]];
6661 if (d->parm[0] != SPU_BTI_VOID
6662 && GET_MODE (target) != TYPE_MODE (return_type))
6663 {
6664 /* target is the return value. It should always be the mode of
6665 the builtin function prototype. */
6666 target = spu_force_reg (TYPE_MODE (return_type), target);
6667 }
6668
6669 return target;
6670}
6671
6672rtx
6673spu_expand_builtin (tree exp,
6674 rtx target,
6675 rtx subtarget ATTRIBUTE_UNUSED,
6676 enum machine_mode mode ATTRIBUTE_UNUSED,
6677 int ignore ATTRIBUTE_UNUSED)
6678{
5df189be 6679 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6680 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6681 struct spu_builtin_description *d;
6682
6683 if (fcode < NUM_SPU_BUILTINS)
6684 {
6685 d = &spu_builtins[fcode];
6686
5df189be 6687 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6688 }
6689 abort ();
6690}
6691
e99f512d 6692/* Implement targetm.vectorize.builtin_mul_widen_even. */
6693static tree
6694spu_builtin_mul_widen_even (tree type)
6695{
e99f512d 6696 switch (TYPE_MODE (type))
6697 {
6698 case V8HImode:
6699 if (TYPE_UNSIGNED (type))
0c5c4d59 6700 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6701 else
0c5c4d59 6702 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6703 break;
6704 default:
6705 return NULL_TREE;
6706 }
6707}
6708
6709/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6710static tree
6711spu_builtin_mul_widen_odd (tree type)
6712{
6713 switch (TYPE_MODE (type))
6714 {
6715 case V8HImode:
6716 if (TYPE_UNSIGNED (type))
0c5c4d59 6717 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6718 else
0c5c4d59 6719 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6720 break;
6721 default:
6722 return NULL_TREE;
6723 }
6724}
6725
a76866d3 6726/* Implement targetm.vectorize.builtin_mask_for_load. */
6727static tree
6728spu_builtin_mask_for_load (void)
6729{
0c5c4d59 6730 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6731}
5df189be 6732
a28df51d 6733/* Implement targetm.vectorize.builtin_vectorization_cost. */
6734static int
0822b158 6735spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6736 tree vectype ATTRIBUTE_UNUSED,
6737 int misalign ATTRIBUTE_UNUSED)
559093aa 6738{
6739 switch (type_of_cost)
6740 {
6741 case scalar_stmt:
6742 case vector_stmt:
6743 case vector_load:
6744 case vector_store:
6745 case vec_to_scalar:
6746 case scalar_to_vec:
6747 case cond_branch_not_taken:
6748 case vec_perm:
6749 return 1;
6750
6751 case scalar_store:
6752 return 10;
6753
6754 case scalar_load:
6755 /* Load + rotate. */
6756 return 2;
6757
6758 case unaligned_load:
6759 return 2;
6760
6761 case cond_branch_taken:
6762 return 6;
6763
6764 default:
6765 gcc_unreachable ();
6766 }
a28df51d 6767}
6768
0e87db76 6769/* Return true iff, data reference of TYPE can reach vector alignment (16)
6770 after applying N number of iterations. This routine does not determine
6771 how may iterations are required to reach desired alignment. */
6772
6773static bool
a9f1838b 6774spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6775{
6776 if (is_packed)
6777 return false;
6778
6779 /* All other types are naturally aligned. */
6780 return true;
6781}
6782
a0515226 6783/* Implement targetm.vectorize.builtin_vec_perm. */
6784tree
6785spu_builtin_vec_perm (tree type, tree *mask_element_type)
6786{
a0515226 6787 *mask_element_type = unsigned_char_type_node;
6788
6789 switch (TYPE_MODE (type))
6790 {
6791 case V16QImode:
6792 if (TYPE_UNSIGNED (type))
0c5c4d59 6793 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6794 else
0c5c4d59 6795 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6796
6797 case V8HImode:
6798 if (TYPE_UNSIGNED (type))
0c5c4d59 6799 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6800 else
0c5c4d59 6801 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6802
6803 case V4SImode:
6804 if (TYPE_UNSIGNED (type))
0c5c4d59 6805 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6806 else
0c5c4d59 6807 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6808
6809 case V2DImode:
6810 if (TYPE_UNSIGNED (type))
0c5c4d59 6811 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6812 else
0c5c4d59 6813 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6814
6815 case V4SFmode:
0c5c4d59 6816 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6817
6818 case V2DFmode:
0c5c4d59 6819 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6820
6821 default:
6822 return NULL_TREE;
6823 }
a0515226 6824}
6825
6cf5579e 6826/* Return the appropriate mode for a named address pointer. */
6827static enum machine_mode
6828spu_addr_space_pointer_mode (addr_space_t addrspace)
6829{
6830 switch (addrspace)
6831 {
6832 case ADDR_SPACE_GENERIC:
6833 return ptr_mode;
6834 case ADDR_SPACE_EA:
6835 return EAmode;
6836 default:
6837 gcc_unreachable ();
6838 }
6839}
6840
6841/* Return the appropriate mode for a named address address. */
6842static enum machine_mode
6843spu_addr_space_address_mode (addr_space_t addrspace)
6844{
6845 switch (addrspace)
6846 {
6847 case ADDR_SPACE_GENERIC:
6848 return Pmode;
6849 case ADDR_SPACE_EA:
6850 return EAmode;
6851 default:
6852 gcc_unreachable ();
6853 }
6854}
6855
6856/* Determine if one named address space is a subset of another. */
6857
6858static bool
6859spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6860{
6861 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6862 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6863
6864 if (subset == superset)
6865 return true;
6866
6867 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6868 being subsets but instead as disjoint address spaces. */
6869 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6870 return false;
6871
6872 else
6873 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6874}
6875
6876/* Convert from one address space to another. */
6877static rtx
6878spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6879{
6880 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6881 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6882
6883 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6884 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6885
6886 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6887 {
6888 rtx result, ls;
6889
6890 ls = gen_const_mem (DImode,
6891 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6892 set_mem_align (ls, 128);
6893
6894 result = gen_reg_rtx (Pmode);
6895 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6896 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6897 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6898 ls, const0_rtx, Pmode, 1);
6899
6900 emit_insn (gen_subsi3 (result, op, ls));
6901
6902 return result;
6903 }
6904
6905 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6906 {
6907 rtx result, ls;
6908
6909 ls = gen_const_mem (DImode,
6910 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6911 set_mem_align (ls, 128);
6912
6913 result = gen_reg_rtx (EAmode);
6914 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6915 op = force_reg (Pmode, op);
6916 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6917 ls, const0_rtx, EAmode, 1);
6918 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6919
6920 if (EAmode == SImode)
6921 emit_insn (gen_addsi3 (result, op, ls));
6922 else
6923 emit_insn (gen_adddi3 (result, op, ls));
6924
6925 return result;
6926 }
6927
6928 else
6929 gcc_unreachable ();
6930}
6931
6932
d52fd16a 6933/* Count the total number of instructions in each pipe and return the
6934 maximum, which is used as the Minimum Iteration Interval (MII)
6935 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6936 -2 are instructions that can go in pipe0 or pipe1. */
6937static int
6938spu_sms_res_mii (struct ddg *g)
6939{
6940 int i;
6941 unsigned t[4] = {0, 0, 0, 0};
6942
6943 for (i = 0; i < g->num_nodes; i++)
6944 {
6945 rtx insn = g->nodes[i].insn;
6946 int p = get_pipe (insn) + 2;
6947
6948 assert (p >= 0);
6949 assert (p < 4);
6950
6951 t[p]++;
6952 if (dump_file && INSN_P (insn))
6953 fprintf (dump_file, "i%d %s %d %d\n",
6954 INSN_UID (insn),
6955 insn_data[INSN_CODE(insn)].name,
6956 p, t[p]);
6957 }
6958 if (dump_file)
6959 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6960
6961 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6962}
6963
6964
5df189be 6965void
6966spu_init_expanders (void)
9d98604b 6967{
5df189be 6968 if (cfun)
9d98604b 6969 {
6970 rtx r0, r1;
6971 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6972 frame_pointer_needed is true. We don't know that until we're
6973 expanding the prologue. */
6974 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6975
6976 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6977 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6978 to be treated as aligned, so generate them here. */
6979 r0 = gen_reg_rtx (SImode);
6980 r1 = gen_reg_rtx (SImode);
6981 mark_reg_pointer (r0, 128);
6982 mark_reg_pointer (r1, 128);
6983 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6984 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6985 }
ea32e033 6986}
6987
6988static enum machine_mode
6989spu_libgcc_cmp_return_mode (void)
6990{
6991
6992/* For SPU word mode is TI mode so it is better to use SImode
6993 for compare returns. */
6994 return SImode;
6995}
6996
6997static enum machine_mode
6998spu_libgcc_shift_count_mode (void)
6999{
7000/* For SPU word mode is TI mode so it is better to use SImode
7001 for shift counts. */
7002 return SImode;
7003}
5a976006 7004
7005/* An early place to adjust some flags after GCC has finished processing
7006 * them. */
7007static void
7008asm_file_start (void)
7009{
7010 /* Variable tracking should be run after all optimizations which
7011 change order of insns. It also needs a valid CFG. */
7012 spu_flag_var_tracking = flag_var_tracking;
7013 flag_var_tracking = 0;
7014
7015 default_file_start ();
7016}
7017
a08dfd55 7018/* Implement targetm.section_type_flags. */
7019static unsigned int
7020spu_section_type_flags (tree decl, const char *name, int reloc)
7021{
7022 /* .toe needs to have type @nobits. */
7023 if (strcmp (name, ".toe") == 0)
7024 return SECTION_BSS;
6cf5579e 7025 /* Don't load _ea into the current address space. */
7026 if (strcmp (name, "._ea") == 0)
7027 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7028 return default_section_type_flags (decl, name, reloc);
7029}
c2233b46 7030
6cf5579e 7031/* Implement targetm.select_section. */
7032static section *
7033spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7034{
7035 /* Variables and constants defined in the __ea address space
7036 go into a special section named "._ea". */
7037 if (TREE_TYPE (decl) != error_mark_node
7038 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7039 {
7040 /* We might get called with string constants, but get_named_section
7041 doesn't like them as they are not DECLs. Also, we need to set
7042 flags in that case. */
7043 if (!DECL_P (decl))
7044 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7045
7046 return get_named_section (decl, "._ea", reloc);
7047 }
7048
7049 return default_elf_select_section (decl, reloc, align);
7050}
7051
7052/* Implement targetm.unique_section. */
7053static void
7054spu_unique_section (tree decl, int reloc)
7055{
7056 /* We don't support unique section names in the __ea address
7057 space for now. */
7058 if (TREE_TYPE (decl) != error_mark_node
7059 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7060 return;
7061
7062 default_unique_section (decl, reloc);
7063}
7064
56c7bfc2 7065/* Generate a constant or register which contains 2^SCALE. We assume
7066 the result is valid for MODE. Currently, MODE must be V4SFmode and
7067 SCALE must be SImode. */
7068rtx
7069spu_gen_exp2 (enum machine_mode mode, rtx scale)
7070{
7071 gcc_assert (mode == V4SFmode);
7072 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7073 if (GET_CODE (scale) != CONST_INT)
7074 {
7075 /* unsigned int exp = (127 + scale) << 23;
7076 __vector float m = (__vector float) spu_splats (exp); */
7077 rtx reg = force_reg (SImode, scale);
7078 rtx exp = gen_reg_rtx (SImode);
7079 rtx mul = gen_reg_rtx (mode);
7080 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7081 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7082 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7083 return mul;
7084 }
7085 else
7086 {
7087 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7088 unsigned char arr[16];
7089 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7090 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7091 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7092 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7093 return array_to_constant (mode, arr);
7094 }
7095}
7096
9d98604b 7097/* After reload, just change the convert into a move instruction
7098 or a dead instruction. */
7099void
7100spu_split_convert (rtx ops[])
7101{
7102 if (REGNO (ops[0]) == REGNO (ops[1]))
7103 emit_note (NOTE_INSN_DELETED);
7104 else
7105 {
7106 /* Use TImode always as this might help hard reg copyprop. */
7107 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7108 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7109 emit_insn (gen_move_insn (op0, op1));
7110 }
7111}
7112
b3878a6c 7113void
7114spu_function_profiler (FILE * file, int labelno)
7115{
7116 fprintf (file, "# profile\n");
7117 fprintf (file, "brsl $75, _mcount\n");
7118}
7119
c2233b46 7120#include "gt-spu.h"