]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
2011-02-07 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
7cf0dbf3 1/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
644459d0 24#include "insn-config.h"
25#include "conditions.h"
26#include "insn-attr.h"
27#include "flags.h"
28#include "recog.h"
29#include "obstack.h"
30#include "tree.h"
31#include "expr.h"
32#include "optabs.h"
33#include "except.h"
34#include "function.h"
35#include "output.h"
36#include "basic-block.h"
37#include "integrate.h"
0b205f4c 38#include "diagnostic-core.h"
644459d0 39#include "ggc.h"
40#include "hashtab.h"
41#include "tm_p.h"
42#include "target.h"
43#include "target-def.h"
44#include "langhooks.h"
45#include "reload.h"
46#include "cfglayout.h"
47#include "sched-int.h"
48#include "params.h"
644459d0 49#include "machmode.h"
75a70cf9 50#include "gimple.h"
644459d0 51#include "tm-constrs.h"
d52fd16a 52#include "ddg.h"
5a976006 53#include "sbitmap.h"
54#include "timevar.h"
55#include "df.h"
6352eedf 56
57/* Builtin types, data and prototypes. */
c2233b46 58
59enum spu_builtin_type_index
60{
61 SPU_BTI_END_OF_PARAMS,
62
63 /* We create new type nodes for these. */
64 SPU_BTI_V16QI,
65 SPU_BTI_V8HI,
66 SPU_BTI_V4SI,
67 SPU_BTI_V2DI,
68 SPU_BTI_V4SF,
69 SPU_BTI_V2DF,
70 SPU_BTI_UV16QI,
71 SPU_BTI_UV8HI,
72 SPU_BTI_UV4SI,
73 SPU_BTI_UV2DI,
74
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
76 SPU_BTI_QUADWORD,
77
78 /* These all correspond to intSI_type_node */
79 SPU_BTI_7,
80 SPU_BTI_S7,
81 SPU_BTI_U7,
82 SPU_BTI_S10,
83 SPU_BTI_S10_4,
84 SPU_BTI_U14,
85 SPU_BTI_16,
86 SPU_BTI_S16,
87 SPU_BTI_S16_2,
88 SPU_BTI_U16,
89 SPU_BTI_U16_2,
90 SPU_BTI_U18,
91
92 /* These correspond to the standard types */
93 SPU_BTI_INTQI,
94 SPU_BTI_INTHI,
95 SPU_BTI_INTSI,
96 SPU_BTI_INTDI,
97
98 SPU_BTI_UINTQI,
99 SPU_BTI_UINTHI,
100 SPU_BTI_UINTSI,
101 SPU_BTI_UINTDI,
102
103 SPU_BTI_FLOAT,
104 SPU_BTI_DOUBLE,
105
106 SPU_BTI_VOID,
107 SPU_BTI_PTR,
108
109 SPU_BTI_MAX
110};
111
112#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
122
123static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
124
6352eedf 125struct spu_builtin_range
126{
127 int low, high;
128};
129
130static struct spu_builtin_range spu_builtin_range[] = {
131 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
132 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll}, /* SPU_BTI_U7 */
134 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
135 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
137 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
138 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
139 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
143};
144
644459d0 145\f
146/* Target specific attribute specifications. */
147char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
148
149/* Prototypes and external defs. */
4c834714 150static void spu_option_override (void);
cc07c468 151static void spu_option_init_struct (struct gcc_options *opts);
686e2769 152static void spu_option_default_params (void);
644459d0 153static void spu_init_builtins (void);
e6925042 154static tree spu_builtin_decl (unsigned, bool);
b62e30b8 155static bool spu_scalar_mode_supported_p (enum machine_mode mode);
156static bool spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 157static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
6cf5579e 158static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
159 bool, addr_space_t);
644459d0 160static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
161static rtx get_pic_reg (void);
162static int need_to_save_reg (int regno, int saving);
163static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
164static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
165static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
166 rtx scratch);
167static void emit_nop_for_insn (rtx insn);
168static bool insn_clobbers_hbr (rtx insn);
169static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 170 int distance, sbitmap blocks);
5474166e 171static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
172 enum machine_mode dmode);
644459d0 173static rtx get_branch_target (rtx branch);
644459d0 174static void spu_machine_dependent_reorg (void);
175static int spu_sched_issue_rate (void);
176static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
177 int can_issue_more);
178static int get_pipe (rtx insn);
644459d0 179static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 180static void spu_sched_init_global (FILE *, int, int);
181static void spu_sched_init (FILE *, int, int);
182static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 183static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
184 int flags,
b62e30b8 185 bool *no_add_attrs);
644459d0 186static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
187 int flags,
b62e30b8 188 bool *no_add_attrs);
644459d0 189static int spu_naked_function_p (tree func);
b62e30b8 190static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
191 const_tree type, bool named);
ee9034d4 192static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
644459d0 196static tree spu_build_builtin_va_list (void);
8a58ed0a 197static void spu_va_start (tree, rtx);
75a70cf9 198static tree spu_gimplify_va_arg_expr (tree valist, tree type,
199 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 200static int store_with_one_insn_p (rtx mem);
644459d0 201static int mem_is_padded_component_ref (rtx x);
9d98604b 202static int reg_aligned_for_addr (rtx x);
644459d0 203static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
204static void spu_asm_globalize_label (FILE * file, const char *name);
b62e30b8 205static bool spu_rtx_costs (rtx x, int code, int outer_code,
206 int *total, bool speed);
207static bool spu_function_ok_for_sibcall (tree decl, tree exp);
644459d0 208static void spu_init_libfuncs (void);
fb80456a 209static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 210static void fix_range (const char *);
69ced2d6 211static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 212static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
6cf5579e 213static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
214 addr_space_t);
e99f512d 215static tree spu_builtin_mul_widen_even (tree);
216static tree spu_builtin_mul_widen_odd (tree);
a76866d3 217static tree spu_builtin_mask_for_load (void);
0822b158 218static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
a9f1838b 219static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 220static tree spu_builtin_vec_perm (tree, tree *);
6cf5579e 221static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
222static enum machine_mode spu_addr_space_address_mode (addr_space_t);
223static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
224static rtx spu_addr_space_convert (rtx, tree, tree);
d52fd16a 225static int spu_sms_res_mii (struct ddg *g);
5a976006 226static void asm_file_start (void);
a08dfd55 227static unsigned int spu_section_type_flags (tree, const char *, int);
6cf5579e 228static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
229static void spu_unique_section (tree, int);
9d98604b 230static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 231static void spu_trampoline_init (rtx, tree, rtx);
b2d7ede1 232static void spu_conditional_register_usage (void);
329c1e4e 233static bool spu_ref_may_alias_errno (ao_ref *);
644459d0 234
5474166e 235/* Which instruction set architecture to use. */
236int spu_arch;
237/* Which cpu are we tuning for. */
238int spu_tune;
239
5a976006 240/* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246int spu_hint_dist = (8*4) - (2*4);
247
248/* Determines whether we run variable tracking in machine dependent
249 reorganization. */
250static int spu_flag_var_tracking;
251
644459d0 252enum spu_immediate {
253 SPU_NONE,
254 SPU_IL,
255 SPU_ILA,
256 SPU_ILH,
257 SPU_ILHU,
258 SPU_ORI,
259 SPU_ORHI,
260 SPU_ORBI,
99369027 261 SPU_IOHL
644459d0 262};
dea01258 263enum immediate_class
264{
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
5df189be 272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 273};
644459d0 274
275static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 277static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
644459d0 280
1bd43494 281static enum machine_mode spu_unwind_word_mode (void);
282
ea32e033 283static enum machine_mode
284spu_libgcc_cmp_return_mode (void);
285
286static enum machine_mode
287spu_libgcc_shift_count_mode (void);
6cf5579e 288
289/* Pointer mode for __ea references. */
290#define EAmode (spu_ea_model != 32 ? DImode : SImode)
291
ef51d1e3 292\f
293/* Table of machine attributes. */
294static const struct attribute_spec spu_attribute_table[] =
295{
296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
297 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
299 { NULL, 0, 0, false, false, false, NULL }
300};
644459d0 301\f
302/* TARGET overrides. */
303
6cf5579e 304#undef TARGET_ADDR_SPACE_POINTER_MODE
305#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
306
307#undef TARGET_ADDR_SPACE_ADDRESS_MODE
308#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
309
310#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
313
314#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
316
317#undef TARGET_ADDR_SPACE_SUBSET_P
318#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
319
320#undef TARGET_ADDR_SPACE_CONVERT
321#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
322
644459d0 323#undef TARGET_INIT_BUILTINS
324#define TARGET_INIT_BUILTINS spu_init_builtins
e6925042 325#undef TARGET_BUILTIN_DECL
326#define TARGET_BUILTIN_DECL spu_builtin_decl
644459d0 327
644459d0 328#undef TARGET_EXPAND_BUILTIN
329#define TARGET_EXPAND_BUILTIN spu_expand_builtin
330
1bd43494 331#undef TARGET_UNWIND_WORD_MODE
332#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 333
41e3a0c7 334#undef TARGET_LEGITIMIZE_ADDRESS
335#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
336
6cf5579e 337/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340#undef TARGET_ASM_UNALIGNED_SI_OP
341#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
342
343#undef TARGET_ASM_ALIGNED_DI_OP
344#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
345
644459d0 346/* The .8byte directive doesn't seem to work well for a 32 bit
347 architecture. */
348#undef TARGET_ASM_UNALIGNED_DI_OP
349#define TARGET_ASM_UNALIGNED_DI_OP NULL
350
351#undef TARGET_RTX_COSTS
352#define TARGET_RTX_COSTS spu_rtx_costs
353
354#undef TARGET_ADDRESS_COST
f529eb25 355#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 356
357#undef TARGET_SCHED_ISSUE_RATE
358#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
359
5a976006 360#undef TARGET_SCHED_INIT_GLOBAL
361#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
362
363#undef TARGET_SCHED_INIT
364#define TARGET_SCHED_INIT spu_sched_init
365
644459d0 366#undef TARGET_SCHED_VARIABLE_ISSUE
367#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
368
5a976006 369#undef TARGET_SCHED_REORDER
370#define TARGET_SCHED_REORDER spu_sched_reorder
371
372#undef TARGET_SCHED_REORDER2
373#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 374
375#undef TARGET_SCHED_ADJUST_COST
376#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
377
644459d0 378#undef TARGET_ATTRIBUTE_TABLE
379#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
380
381#undef TARGET_ASM_INTEGER
382#define TARGET_ASM_INTEGER spu_assemble_integer
383
384#undef TARGET_SCALAR_MODE_SUPPORTED_P
385#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
386
387#undef TARGET_VECTOR_MODE_SUPPORTED_P
388#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
389
390#undef TARGET_FUNCTION_OK_FOR_SIBCALL
391#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
392
393#undef TARGET_ASM_GLOBALIZE_LABEL
394#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
395
396#undef TARGET_PASS_BY_REFERENCE
397#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
398
ee9034d4 399#undef TARGET_FUNCTION_ARG
400#define TARGET_FUNCTION_ARG spu_function_arg
401
402#undef TARGET_FUNCTION_ARG_ADVANCE
403#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
404
644459d0 405#undef TARGET_MUST_PASS_IN_STACK
406#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
408#undef TARGET_BUILD_BUILTIN_VA_LIST
409#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
410
8a58ed0a 411#undef TARGET_EXPAND_BUILTIN_VA_START
412#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
413
644459d0 414#undef TARGET_SETUP_INCOMING_VARARGS
415#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
416
417#undef TARGET_MACHINE_DEPENDENT_REORG
418#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
419
420#undef TARGET_GIMPLIFY_VA_ARG_EXPR
421#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
422
423#undef TARGET_DEFAULT_TARGET_FLAGS
424#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
425
426#undef TARGET_INIT_LIBFUNCS
427#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
428
429#undef TARGET_RETURN_IN_MEMORY
430#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
431
69ced2d6 432#undef TARGET_ENCODE_SECTION_INFO
433#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
434
e99f512d 435#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
436#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
437
438#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
439#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
440
a76866d3 441#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
442#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
443
a28df51d 444#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
445#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
446
202d6e5f 447#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
448#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
0e87db76 449
a0515226 450#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
451#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
452
ea32e033 453#undef TARGET_LIBGCC_CMP_RETURN_MODE
454#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
455
456#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
457#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
458
d52fd16a 459#undef TARGET_SCHED_SMS_RES_MII
460#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
461
5a976006 462#undef TARGET_ASM_FILE_START
463#define TARGET_ASM_FILE_START asm_file_start
464
a08dfd55 465#undef TARGET_SECTION_TYPE_FLAGS
466#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
467
6cf5579e 468#undef TARGET_ASM_SELECT_SECTION
469#define TARGET_ASM_SELECT_SECTION spu_select_section
470
471#undef TARGET_ASM_UNIQUE_SECTION
472#define TARGET_ASM_UNIQUE_SECTION spu_unique_section
473
fd50b071 474#undef TARGET_LEGITIMATE_ADDRESS_P
475#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
476
e96f2783 477#undef TARGET_TRAMPOLINE_INIT
478#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
479
4c834714 480#undef TARGET_OPTION_OVERRIDE
481#define TARGET_OPTION_OVERRIDE spu_option_override
482
cc07c468 483#undef TARGET_OPTION_INIT_STRUCT
484#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
02e53c17 485
686e2769 486#undef TARGET_OPTION_DEFAULT_PARAMS
487#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
488
ed6befa5 489#undef TARGET_EXCEPT_UNWIND_INFO
490#define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
491
b2d7ede1 492#undef TARGET_CONDITIONAL_REGISTER_USAGE
493#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
494
329c1e4e 495#undef TARGET_REF_MAY_ALIAS_ERRNO
496#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
497
644459d0 498struct gcc_target targetm = TARGET_INITIALIZER;
499
02e53c17 500static void
cc07c468 501spu_option_init_struct (struct gcc_options *opts)
5df189be 502{
5df189be 503 /* With so many registers this is better on by default. */
cc07c468 504 opts->x_flag_rename_registers = 1;
5df189be 505}
506
686e2769 507/* Implement TARGET_OPTION_DEFAULT_PARAMS. */
508static void
509spu_option_default_params (void)
510{
511 /* Override some of the default param values. With so many registers
512 larger values are better for these params. */
513 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
514}
515
4c834714 516/* Implement TARGET_OPTION_OVERRIDE. */
517static void
518spu_option_override (void)
644459d0 519{
14d408d9 520 /* Small loops will be unpeeled at -O3. For SPU it is more important
521 to keep code small by default. */
686e2769 522 if (!flag_unroll_loops && !flag_peel_loops)
e0b840fc 523 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
56f280c4 524 global_options.x_param_values,
525 global_options_set.x_param_values);
14d408d9 526
644459d0 527 flag_omit_frame_pointer = 1;
528
5a976006 529 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 530 if (align_functions < 8)
531 align_functions = 8;
c7b91b14 532
5a976006 533 spu_hint_dist = 8*4 - spu_max_nops*4;
534 if (spu_hint_dist < 0)
535 spu_hint_dist = 0;
536
c7b91b14 537 if (spu_fixed_range_string)
538 fix_range (spu_fixed_range_string);
5474166e 539
540 /* Determine processor architectural level. */
541 if (spu_arch_string)
542 {
543 if (strcmp (&spu_arch_string[0], "cell") == 0)
544 spu_arch = PROCESSOR_CELL;
545 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
546 spu_arch = PROCESSOR_CELLEDP;
547 else
8e181c9d 548 error ("bad value (%s) for -march= switch", spu_arch_string);
5474166e 549 }
550
551 /* Determine processor to tune for. */
552 if (spu_tune_string)
553 {
554 if (strcmp (&spu_tune_string[0], "cell") == 0)
555 spu_tune = PROCESSOR_CELL;
556 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
557 spu_tune = PROCESSOR_CELLEDP;
558 else
8e181c9d 559 error ("bad value (%s) for -mtune= switch", spu_tune_string);
5474166e 560 }
98bbec1e 561
13684256 562 /* Change defaults according to the processor architecture. */
563 if (spu_arch == PROCESSOR_CELLEDP)
564 {
565 /* If no command line option has been otherwise specified, change
566 the default to -mno-safe-hints on celledp -- only the original
567 Cell/B.E. processors require this workaround. */
568 if (!(target_flags_explicit & MASK_SAFE_HINTS))
569 target_flags &= ~MASK_SAFE_HINTS;
570 }
571
98bbec1e 572 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 573}
574\f
575/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
576 struct attribute_spec.handler. */
577
644459d0 578/* True if MODE is valid for the target. By "valid", we mean able to
579 be manipulated in non-trivial ways. In particular, this means all
580 the arithmetic is supported. */
581static bool
582spu_scalar_mode_supported_p (enum machine_mode mode)
583{
584 switch (mode)
585 {
586 case QImode:
587 case HImode:
588 case SImode:
589 case SFmode:
590 case DImode:
591 case TImode:
592 case DFmode:
593 return true;
594
595 default:
596 return false;
597 }
598}
599
600/* Similarly for vector modes. "Supported" here is less strict. At
601 least some operations are supported; need to check optabs or builtins
602 for further details. */
603static bool
604spu_vector_mode_supported_p (enum machine_mode mode)
605{
606 switch (mode)
607 {
608 case V16QImode:
609 case V8HImode:
610 case V4SImode:
611 case V2DImode:
612 case V4SFmode:
613 case V2DFmode:
614 return true;
615
616 default:
617 return false;
618 }
619}
620
621/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
622 least significant bytes of the outer mode. This function returns
623 TRUE for the SUBREG's where this is correct. */
624int
625valid_subreg (rtx op)
626{
627 enum machine_mode om = GET_MODE (op);
628 enum machine_mode im = GET_MODE (SUBREG_REG (op));
629 return om != VOIDmode && im != VOIDmode
630 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 631 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
632 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 633}
634
635/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 636 and adjust the start offset. */
644459d0 637static rtx
638adjust_operand (rtx op, HOST_WIDE_INT * start)
639{
640 enum machine_mode mode;
641 int op_size;
38aca5eb 642 /* Strip any paradoxical SUBREG. */
643 if (GET_CODE (op) == SUBREG
644 && (GET_MODE_BITSIZE (GET_MODE (op))
645 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 646 {
647 if (start)
648 *start -=
649 GET_MODE_BITSIZE (GET_MODE (op)) -
650 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
651 op = SUBREG_REG (op);
652 }
653 /* If it is smaller than SI, assure a SUBREG */
654 op_size = GET_MODE_BITSIZE (GET_MODE (op));
655 if (op_size < 32)
656 {
657 if (start)
658 *start += 32 - op_size;
659 op_size = 32;
660 }
661 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
662 mode = mode_for_size (op_size, MODE_INT, 0);
663 if (mode != GET_MODE (op))
664 op = gen_rtx_SUBREG (mode, op, 0);
665 return op;
666}
667
668void
669spu_expand_extv (rtx ops[], int unsignedp)
670{
9d98604b 671 rtx dst = ops[0], src = ops[1];
644459d0 672 HOST_WIDE_INT width = INTVAL (ops[2]);
673 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 674 HOST_WIDE_INT align_mask;
675 rtx s0, s1, mask, r0;
644459d0 676
9d98604b 677 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 678
9d98604b 679 if (MEM_P (src))
644459d0 680 {
9d98604b 681 /* First, determine if we need 1 TImode load or 2. We need only 1
682 if the bits being extracted do not cross the alignment boundary
683 as determined by the MEM and its address. */
684
685 align_mask = -MEM_ALIGN (src);
686 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 687 {
9d98604b 688 /* Alignment is sufficient for 1 load. */
689 s0 = gen_reg_rtx (TImode);
690 r0 = spu_expand_load (s0, 0, src, start / 8);
691 start &= 7;
692 if (r0)
693 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 694 }
9d98604b 695 else
696 {
697 /* Need 2 loads. */
698 s0 = gen_reg_rtx (TImode);
699 s1 = gen_reg_rtx (TImode);
700 r0 = spu_expand_load (s0, s1, src, start / 8);
701 start &= 7;
702
703 gcc_assert (start + width <= 128);
704 if (r0)
705 {
706 rtx r1 = gen_reg_rtx (SImode);
707 mask = gen_reg_rtx (TImode);
708 emit_move_insn (mask, GEN_INT (-1));
709 emit_insn (gen_rotqby_ti (s0, s0, r0));
710 emit_insn (gen_rotqby_ti (s1, s1, r0));
711 if (GET_CODE (r0) == CONST_INT)
712 r1 = GEN_INT (INTVAL (r0) & 15);
713 else
714 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
715 emit_insn (gen_shlqby_ti (mask, mask, r1));
716 emit_insn (gen_selb (s0, s1, s0, mask));
717 }
718 }
719
720 }
721 else if (GET_CODE (src) == SUBREG)
722 {
723 rtx r = SUBREG_REG (src);
724 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
725 s0 = gen_reg_rtx (TImode);
726 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
727 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
728 else
729 emit_move_insn (s0, src);
730 }
731 else
732 {
733 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
734 s0 = gen_reg_rtx (TImode);
735 emit_move_insn (s0, src);
644459d0 736 }
737
9d98604b 738 /* Now s0 is TImode and contains the bits to extract at start. */
739
740 if (start)
741 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
742
743 if (128 - width)
644459d0 744 {
9d98604b 745 tree c = build_int_cst (NULL_TREE, 128 - width);
746 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 747 }
748
9d98604b 749 emit_move_insn (dst, s0);
644459d0 750}
751
752void
753spu_expand_insv (rtx ops[])
754{
755 HOST_WIDE_INT width = INTVAL (ops[1]);
756 HOST_WIDE_INT start = INTVAL (ops[2]);
757 HOST_WIDE_INT maskbits;
4cbad5bb 758 enum machine_mode dst_mode;
644459d0 759 rtx dst = ops[0], src = ops[3];
4cbad5bb 760 int dst_size;
644459d0 761 rtx mask;
762 rtx shift_reg;
763 int shift;
764
765
766 if (GET_CODE (ops[0]) == MEM)
767 dst = gen_reg_rtx (TImode);
768 else
769 dst = adjust_operand (dst, &start);
770 dst_mode = GET_MODE (dst);
771 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
772
773 if (CONSTANT_P (src))
774 {
775 enum machine_mode m =
776 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
777 src = force_reg (m, convert_to_mode (m, src, 0));
778 }
779 src = adjust_operand (src, 0);
644459d0 780
781 mask = gen_reg_rtx (dst_mode);
782 shift_reg = gen_reg_rtx (dst_mode);
783 shift = dst_size - start - width;
784
785 /* It's not safe to use subreg here because the compiler assumes
786 that the SUBREG_REG is right justified in the SUBREG. */
787 convert_move (shift_reg, src, 1);
788
789 if (shift > 0)
790 {
791 switch (dst_mode)
792 {
793 case SImode:
794 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
795 break;
796 case DImode:
797 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
798 break;
799 case TImode:
800 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
801 break;
802 default:
803 abort ();
804 }
805 }
806 else if (shift < 0)
807 abort ();
808
809 switch (dst_size)
810 {
811 case 32:
812 maskbits = (-1ll << (32 - width - start));
813 if (start)
814 maskbits += (1ll << (32 - start));
815 emit_move_insn (mask, GEN_INT (maskbits));
816 break;
817 case 64:
818 maskbits = (-1ll << (64 - width - start));
819 if (start)
820 maskbits += (1ll << (64 - start));
821 emit_move_insn (mask, GEN_INT (maskbits));
822 break;
823 case 128:
824 {
825 unsigned char arr[16];
826 int i = start / 8;
827 memset (arr, 0, sizeof (arr));
828 arr[i] = 0xff >> (start & 7);
829 for (i++; i <= (start + width - 1) / 8; i++)
830 arr[i] = 0xff;
831 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
832 emit_move_insn (mask, array_to_constant (TImode, arr));
833 }
834 break;
835 default:
836 abort ();
837 }
838 if (GET_CODE (ops[0]) == MEM)
839 {
644459d0 840 rtx low = gen_reg_rtx (SImode);
644459d0 841 rtx rotl = gen_reg_rtx (SImode);
842 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 843 rtx addr;
844 rtx addr0;
845 rtx addr1;
644459d0 846 rtx mem;
847
9d98604b 848 addr = force_reg (Pmode, XEXP (ops[0], 0));
849 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 850 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
851 emit_insn (gen_negsi2 (rotl, low));
852 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
853 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 854 mem = change_address (ops[0], TImode, addr0);
644459d0 855 set_mem_alias_set (mem, 0);
856 emit_move_insn (dst, mem);
857 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 858 if (start + width > MEM_ALIGN (ops[0]))
859 {
860 rtx shl = gen_reg_rtx (SImode);
861 rtx mask1 = gen_reg_rtx (TImode);
862 rtx dst1 = gen_reg_rtx (TImode);
863 rtx mem1;
9d98604b 864 addr1 = plus_constant (addr, 16);
865 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 866 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
867 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 868 mem1 = change_address (ops[0], TImode, addr1);
644459d0 869 set_mem_alias_set (mem1, 0);
870 emit_move_insn (dst1, mem1);
871 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
872 emit_move_insn (mem1, dst1);
873 }
9d98604b 874 emit_move_insn (mem, dst);
644459d0 875 }
876 else
71cd778d 877 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 878}
879
880
881int
882spu_expand_block_move (rtx ops[])
883{
884 HOST_WIDE_INT bytes, align, offset;
885 rtx src, dst, sreg, dreg, target;
886 int i;
887 if (GET_CODE (ops[2]) != CONST_INT
888 || GET_CODE (ops[3]) != CONST_INT
48eb4342 889 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 890 return 0;
891
892 bytes = INTVAL (ops[2]);
893 align = INTVAL (ops[3]);
894
895 if (bytes <= 0)
896 return 1;
897
898 dst = ops[0];
899 src = ops[1];
900
901 if (align == 16)
902 {
903 for (offset = 0; offset + 16 <= bytes; offset += 16)
904 {
905 dst = adjust_address (ops[0], V16QImode, offset);
906 src = adjust_address (ops[1], V16QImode, offset);
907 emit_move_insn (dst, src);
908 }
909 if (offset < bytes)
910 {
911 rtx mask;
912 unsigned char arr[16] = { 0 };
913 for (i = 0; i < bytes - offset; i++)
914 arr[i] = 0xff;
915 dst = adjust_address (ops[0], V16QImode, offset);
916 src = adjust_address (ops[1], V16QImode, offset);
917 mask = gen_reg_rtx (V16QImode);
918 sreg = gen_reg_rtx (V16QImode);
919 dreg = gen_reg_rtx (V16QImode);
920 target = gen_reg_rtx (V16QImode);
921 emit_move_insn (mask, array_to_constant (V16QImode, arr));
922 emit_move_insn (dreg, dst);
923 emit_move_insn (sreg, src);
924 emit_insn (gen_selb (target, dreg, sreg, mask));
925 emit_move_insn (dst, target);
926 }
927 return 1;
928 }
929 return 0;
930}
931
932enum spu_comp_code
933{ SPU_EQ, SPU_GT, SPU_GTU };
934
5474166e 935int spu_comp_icode[12][3] = {
936 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
937 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
938 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
939 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
940 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
941 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
942 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
943 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
944 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
945 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
946 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
947 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 948};
949
950/* Generate a compare for CODE. Return a brand-new rtx that represents
951 the result of the compare. GCC can figure this out too if we don't
952 provide all variations of compares, but GCC always wants to use
953 WORD_MODE, we can generate better code in most cases if we do it
954 ourselves. */
955void
74f4459c 956spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 957{
958 int reverse_compare = 0;
959 int reverse_test = 0;
5d70b918 960 rtx compare_result, eq_result;
961 rtx comp_rtx, eq_rtx;
644459d0 962 enum machine_mode comp_mode;
963 enum machine_mode op_mode;
b9c74b4d 964 enum spu_comp_code scode, eq_code;
965 enum insn_code ior_code;
74f4459c 966 enum rtx_code code = GET_CODE (cmp);
967 rtx op0 = XEXP (cmp, 0);
968 rtx op1 = XEXP (cmp, 1);
644459d0 969 int index;
5d70b918 970 int eq_test = 0;
644459d0 971
74f4459c 972 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 973 and so on, to keep the constant in operand 1. */
74f4459c 974 if (GET_CODE (op1) == CONST_INT)
644459d0 975 {
74f4459c 976 HOST_WIDE_INT val = INTVAL (op1) - 1;
977 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 978 switch (code)
979 {
980 case GE:
74f4459c 981 op1 = GEN_INT (val);
644459d0 982 code = GT;
983 break;
984 case LT:
74f4459c 985 op1 = GEN_INT (val);
644459d0 986 code = LE;
987 break;
988 case GEU:
74f4459c 989 op1 = GEN_INT (val);
644459d0 990 code = GTU;
991 break;
992 case LTU:
74f4459c 993 op1 = GEN_INT (val);
644459d0 994 code = LEU;
995 break;
996 default:
997 break;
998 }
999 }
1000
5d70b918 1001 comp_mode = SImode;
74f4459c 1002 op_mode = GET_MODE (op0);
5d70b918 1003
644459d0 1004 switch (code)
1005 {
1006 case GE:
644459d0 1007 scode = SPU_GT;
07027691 1008 if (HONOR_NANS (op_mode))
5d70b918 1009 {
1010 reverse_compare = 0;
1011 reverse_test = 0;
1012 eq_test = 1;
1013 eq_code = SPU_EQ;
1014 }
1015 else
1016 {
1017 reverse_compare = 1;
1018 reverse_test = 1;
1019 }
644459d0 1020 break;
1021 case LE:
644459d0 1022 scode = SPU_GT;
07027691 1023 if (HONOR_NANS (op_mode))
5d70b918 1024 {
1025 reverse_compare = 1;
1026 reverse_test = 0;
1027 eq_test = 1;
1028 eq_code = SPU_EQ;
1029 }
1030 else
1031 {
1032 reverse_compare = 0;
1033 reverse_test = 1;
1034 }
644459d0 1035 break;
1036 case LT:
1037 reverse_compare = 1;
1038 reverse_test = 0;
1039 scode = SPU_GT;
1040 break;
1041 case GEU:
1042 reverse_compare = 1;
1043 reverse_test = 1;
1044 scode = SPU_GTU;
1045 break;
1046 case LEU:
1047 reverse_compare = 0;
1048 reverse_test = 1;
1049 scode = SPU_GTU;
1050 break;
1051 case LTU:
1052 reverse_compare = 1;
1053 reverse_test = 0;
1054 scode = SPU_GTU;
1055 break;
1056 case NE:
1057 reverse_compare = 0;
1058 reverse_test = 1;
1059 scode = SPU_EQ;
1060 break;
1061
1062 case EQ:
1063 scode = SPU_EQ;
1064 break;
1065 case GT:
1066 scode = SPU_GT;
1067 break;
1068 case GTU:
1069 scode = SPU_GTU;
1070 break;
1071 default:
1072 scode = SPU_EQ;
1073 break;
1074 }
1075
644459d0 1076 switch (op_mode)
1077 {
1078 case QImode:
1079 index = 0;
1080 comp_mode = QImode;
1081 break;
1082 case HImode:
1083 index = 1;
1084 comp_mode = HImode;
1085 break;
1086 case SImode:
1087 index = 2;
1088 break;
1089 case DImode:
1090 index = 3;
1091 break;
1092 case TImode:
1093 index = 4;
1094 break;
1095 case SFmode:
1096 index = 5;
1097 break;
1098 case DFmode:
1099 index = 6;
1100 break;
1101 case V16QImode:
5474166e 1102 index = 7;
1103 comp_mode = op_mode;
1104 break;
644459d0 1105 case V8HImode:
5474166e 1106 index = 8;
1107 comp_mode = op_mode;
1108 break;
644459d0 1109 case V4SImode:
5474166e 1110 index = 9;
1111 comp_mode = op_mode;
1112 break;
644459d0 1113 case V4SFmode:
5474166e 1114 index = 10;
1115 comp_mode = V4SImode;
1116 break;
644459d0 1117 case V2DFmode:
5474166e 1118 index = 11;
1119 comp_mode = V2DImode;
644459d0 1120 break;
5474166e 1121 case V2DImode:
644459d0 1122 default:
1123 abort ();
1124 }
1125
74f4459c 1126 if (GET_MODE (op1) == DFmode
07027691 1127 && (scode != SPU_GT && scode != SPU_EQ))
1128 abort ();
644459d0 1129
74f4459c 1130 if (is_set == 0 && op1 == const0_rtx
1131 && (GET_MODE (op0) == SImode
1132 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1133 {
1134 /* Don't need to set a register with the result when we are
1135 comparing against zero and branching. */
1136 reverse_test = !reverse_test;
74f4459c 1137 compare_result = op0;
644459d0 1138 }
1139 else
1140 {
1141 compare_result = gen_reg_rtx (comp_mode);
1142
1143 if (reverse_compare)
1144 {
74f4459c 1145 rtx t = op1;
1146 op1 = op0;
1147 op0 = t;
644459d0 1148 }
1149
1150 if (spu_comp_icode[index][scode] == 0)
1151 abort ();
1152
1153 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1154 (op0, op_mode))
1155 op0 = force_reg (op_mode, op0);
644459d0 1156 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1157 (op1, op_mode))
1158 op1 = force_reg (op_mode, op1);
644459d0 1159 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1160 op0, op1);
644459d0 1161 if (comp_rtx == 0)
1162 abort ();
1163 emit_insn (comp_rtx);
1164
5d70b918 1165 if (eq_test)
1166 {
1167 eq_result = gen_reg_rtx (comp_mode);
1168 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1169 op0, op1);
5d70b918 1170 if (eq_rtx == 0)
1171 abort ();
1172 emit_insn (eq_rtx);
d6bf3b14 1173 ior_code = optab_handler (ior_optab, comp_mode);
5d70b918 1174 gcc_assert (ior_code != CODE_FOR_nothing);
1175 emit_insn (GEN_FCN (ior_code)
1176 (compare_result, compare_result, eq_result));
1177 }
644459d0 1178 }
1179
1180 if (is_set == 0)
1181 {
1182 rtx bcomp;
1183 rtx loc_ref;
1184
1185 /* We don't have branch on QI compare insns, so we convert the
1186 QI compare result to a HI result. */
1187 if (comp_mode == QImode)
1188 {
1189 rtx old_res = compare_result;
1190 compare_result = gen_reg_rtx (HImode);
1191 comp_mode = HImode;
1192 emit_insn (gen_extendqihi2 (compare_result, old_res));
1193 }
1194
1195 if (reverse_test)
1196 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1197 else
1198 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1199
74f4459c 1200 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1201 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1202 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1203 loc_ref, pc_rtx)));
1204 }
1205 else if (is_set == 2)
1206 {
74f4459c 1207 rtx target = operands[0];
644459d0 1208 int compare_size = GET_MODE_BITSIZE (comp_mode);
1209 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1210 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1211 rtx select_mask;
1212 rtx op_t = operands[2];
1213 rtx op_f = operands[3];
1214
1215 /* The result of the comparison can be SI, HI or QI mode. Create a
1216 mask based on that result. */
1217 if (target_size > compare_size)
1218 {
1219 select_mask = gen_reg_rtx (mode);
1220 emit_insn (gen_extend_compare (select_mask, compare_result));
1221 }
1222 else if (target_size < compare_size)
1223 select_mask =
1224 gen_rtx_SUBREG (mode, compare_result,
1225 (compare_size - target_size) / BITS_PER_UNIT);
1226 else if (comp_mode != mode)
1227 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1228 else
1229 select_mask = compare_result;
1230
1231 if (GET_MODE (target) != GET_MODE (op_t)
1232 || GET_MODE (target) != GET_MODE (op_f))
1233 abort ();
1234
1235 if (reverse_test)
1236 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1237 else
1238 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1239 }
1240 else
1241 {
74f4459c 1242 rtx target = operands[0];
644459d0 1243 if (reverse_test)
1244 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1245 gen_rtx_NOT (comp_mode, compare_result)));
1246 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1247 emit_insn (gen_extendhisi2 (target, compare_result));
1248 else if (GET_MODE (target) == SImode
1249 && GET_MODE (compare_result) == QImode)
1250 emit_insn (gen_extend_compare (target, compare_result));
1251 else
1252 emit_move_insn (target, compare_result);
1253 }
1254}
1255
1256HOST_WIDE_INT
1257const_double_to_hwint (rtx x)
1258{
1259 HOST_WIDE_INT val;
1260 REAL_VALUE_TYPE rv;
1261 if (GET_MODE (x) == SFmode)
1262 {
1263 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1264 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1265 }
1266 else if (GET_MODE (x) == DFmode)
1267 {
1268 long l[2];
1269 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1270 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1271 val = l[0];
1272 val = (val << 32) | (l[1] & 0xffffffff);
1273 }
1274 else
1275 abort ();
1276 return val;
1277}
1278
1279rtx
1280hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1281{
1282 long tv[2];
1283 REAL_VALUE_TYPE rv;
1284 gcc_assert (mode == SFmode || mode == DFmode);
1285
1286 if (mode == SFmode)
1287 tv[0] = (v << 32) >> 32;
1288 else if (mode == DFmode)
1289 {
1290 tv[1] = (v << 32) >> 32;
1291 tv[0] = v >> 32;
1292 }
1293 real_from_target (&rv, tv, mode);
1294 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1295}
1296
1297void
1298print_operand_address (FILE * file, register rtx addr)
1299{
1300 rtx reg;
1301 rtx offset;
1302
e04cf423 1303 if (GET_CODE (addr) == AND
1304 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1305 && INTVAL (XEXP (addr, 1)) == -16)
1306 addr = XEXP (addr, 0);
1307
644459d0 1308 switch (GET_CODE (addr))
1309 {
1310 case REG:
1311 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1312 break;
1313
1314 case PLUS:
1315 reg = XEXP (addr, 0);
1316 offset = XEXP (addr, 1);
1317 if (GET_CODE (offset) == REG)
1318 {
1319 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1320 reg_names[REGNO (offset)]);
1321 }
1322 else if (GET_CODE (offset) == CONST_INT)
1323 {
1324 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1325 INTVAL (offset), reg_names[REGNO (reg)]);
1326 }
1327 else
1328 abort ();
1329 break;
1330
1331 case CONST:
1332 case LABEL_REF:
1333 case SYMBOL_REF:
1334 case CONST_INT:
1335 output_addr_const (file, addr);
1336 break;
1337
1338 default:
1339 debug_rtx (addr);
1340 abort ();
1341 }
1342}
1343
1344void
1345print_operand (FILE * file, rtx x, int code)
1346{
1347 enum machine_mode mode = GET_MODE (x);
1348 HOST_WIDE_INT val;
1349 unsigned char arr[16];
1350 int xcode = GET_CODE (x);
dea01258 1351 int i, info;
644459d0 1352 if (GET_MODE (x) == VOIDmode)
1353 switch (code)
1354 {
644459d0 1355 case 'L': /* 128 bits, signed */
1356 case 'm': /* 128 bits, signed */
1357 case 'T': /* 128 bits, signed */
1358 case 't': /* 128 bits, signed */
1359 mode = TImode;
1360 break;
644459d0 1361 case 'K': /* 64 bits, signed */
1362 case 'k': /* 64 bits, signed */
1363 case 'D': /* 64 bits, signed */
1364 case 'd': /* 64 bits, signed */
1365 mode = DImode;
1366 break;
644459d0 1367 case 'J': /* 32 bits, signed */
1368 case 'j': /* 32 bits, signed */
1369 case 's': /* 32 bits, signed */
1370 case 'S': /* 32 bits, signed */
1371 mode = SImode;
1372 break;
1373 }
1374 switch (code)
1375 {
1376
1377 case 'j': /* 32 bits, signed */
1378 case 'k': /* 64 bits, signed */
1379 case 'm': /* 128 bits, signed */
1380 if (xcode == CONST_INT
1381 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1382 {
1383 gcc_assert (logical_immediate_p (x, mode));
1384 constant_to_array (mode, x, arr);
1385 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1386 val = trunc_int_for_mode (val, SImode);
1387 switch (which_logical_immediate (val))
1388 {
1389 case SPU_ORI:
1390 break;
1391 case SPU_ORHI:
1392 fprintf (file, "h");
1393 break;
1394 case SPU_ORBI:
1395 fprintf (file, "b");
1396 break;
1397 default:
1398 gcc_unreachable();
1399 }
1400 }
1401 else
1402 gcc_unreachable();
1403 return;
1404
1405 case 'J': /* 32 bits, signed */
1406 case 'K': /* 64 bits, signed */
1407 case 'L': /* 128 bits, signed */
1408 if (xcode == CONST_INT
1409 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1410 {
1411 gcc_assert (logical_immediate_p (x, mode)
1412 || iohl_immediate_p (x, mode));
1413 constant_to_array (mode, x, arr);
1414 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1415 val = trunc_int_for_mode (val, SImode);
1416 switch (which_logical_immediate (val))
1417 {
1418 case SPU_ORI:
1419 case SPU_IOHL:
1420 break;
1421 case SPU_ORHI:
1422 val = trunc_int_for_mode (val, HImode);
1423 break;
1424 case SPU_ORBI:
1425 val = trunc_int_for_mode (val, QImode);
1426 break;
1427 default:
1428 gcc_unreachable();
1429 }
1430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1431 }
1432 else
1433 gcc_unreachable();
1434 return;
1435
1436 case 't': /* 128 bits, signed */
1437 case 'd': /* 64 bits, signed */
1438 case 's': /* 32 bits, signed */
dea01258 1439 if (CONSTANT_P (x))
644459d0 1440 {
dea01258 1441 enum immediate_class c = classify_immediate (x, mode);
1442 switch (c)
1443 {
1444 case IC_IL1:
1445 constant_to_array (mode, x, arr);
1446 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1447 val = trunc_int_for_mode (val, SImode);
1448 switch (which_immediate_load (val))
1449 {
1450 case SPU_IL:
1451 break;
1452 case SPU_ILA:
1453 fprintf (file, "a");
1454 break;
1455 case SPU_ILH:
1456 fprintf (file, "h");
1457 break;
1458 case SPU_ILHU:
1459 fprintf (file, "hu");
1460 break;
1461 default:
1462 gcc_unreachable ();
1463 }
1464 break;
1465 case IC_CPAT:
1466 constant_to_array (mode, x, arr);
1467 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1468 if (info == 1)
1469 fprintf (file, "b");
1470 else if (info == 2)
1471 fprintf (file, "h");
1472 else if (info == 4)
1473 fprintf (file, "w");
1474 else if (info == 8)
1475 fprintf (file, "d");
1476 break;
1477 case IC_IL1s:
1478 if (xcode == CONST_VECTOR)
1479 {
1480 x = CONST_VECTOR_ELT (x, 0);
1481 xcode = GET_CODE (x);
1482 }
1483 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1484 fprintf (file, "a");
1485 else if (xcode == HIGH)
1486 fprintf (file, "hu");
1487 break;
1488 case IC_FSMBI:
5df189be 1489 case IC_FSMBI2:
dea01258 1490 case IC_IL2:
1491 case IC_IL2s:
1492 case IC_POOL:
1493 abort ();
1494 }
644459d0 1495 }
644459d0 1496 else
1497 gcc_unreachable ();
1498 return;
1499
1500 case 'T': /* 128 bits, signed */
1501 case 'D': /* 64 bits, signed */
1502 case 'S': /* 32 bits, signed */
dea01258 1503 if (CONSTANT_P (x))
644459d0 1504 {
dea01258 1505 enum immediate_class c = classify_immediate (x, mode);
1506 switch (c)
644459d0 1507 {
dea01258 1508 case IC_IL1:
1509 constant_to_array (mode, x, arr);
1510 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1511 val = trunc_int_for_mode (val, SImode);
1512 switch (which_immediate_load (val))
1513 {
1514 case SPU_IL:
1515 case SPU_ILA:
1516 break;
1517 case SPU_ILH:
1518 case SPU_ILHU:
1519 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1520 break;
1521 default:
1522 gcc_unreachable ();
1523 }
1524 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1525 break;
1526 case IC_FSMBI:
1527 constant_to_array (mode, x, arr);
1528 val = 0;
1529 for (i = 0; i < 16; i++)
1530 {
1531 val <<= 1;
1532 val |= arr[i] & 1;
1533 }
1534 print_operand (file, GEN_INT (val), 0);
1535 break;
1536 case IC_CPAT:
1537 constant_to_array (mode, x, arr);
1538 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1539 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1540 break;
dea01258 1541 case IC_IL1s:
dea01258 1542 if (xcode == HIGH)
5df189be 1543 x = XEXP (x, 0);
1544 if (GET_CODE (x) == CONST_VECTOR)
1545 x = CONST_VECTOR_ELT (x, 0);
1546 output_addr_const (file, x);
1547 if (xcode == HIGH)
1548 fprintf (file, "@h");
644459d0 1549 break;
dea01258 1550 case IC_IL2:
1551 case IC_IL2s:
5df189be 1552 case IC_FSMBI2:
dea01258 1553 case IC_POOL:
1554 abort ();
644459d0 1555 }
c8befdb9 1556 }
644459d0 1557 else
1558 gcc_unreachable ();
1559 return;
1560
644459d0 1561 case 'C':
1562 if (xcode == CONST_INT)
1563 {
1564 /* Only 4 least significant bits are relevant for generate
1565 control word instructions. */
1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1567 return;
1568 }
1569 break;
1570
1571 case 'M': /* print code for c*d */
1572 if (GET_CODE (x) == CONST_INT)
1573 switch (INTVAL (x))
1574 {
1575 case 1:
1576 fprintf (file, "b");
1577 break;
1578 case 2:
1579 fprintf (file, "h");
1580 break;
1581 case 4:
1582 fprintf (file, "w");
1583 break;
1584 case 8:
1585 fprintf (file, "d");
1586 break;
1587 default:
1588 gcc_unreachable();
1589 }
1590 else
1591 gcc_unreachable();
1592 return;
1593
1594 case 'N': /* Negate the operand */
1595 if (xcode == CONST_INT)
1596 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1597 else if (xcode == CONST_VECTOR)
1598 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1599 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1600 return;
1601
1602 case 'I': /* enable/disable interrupts */
1603 if (xcode == CONST_INT)
1604 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1605 return;
1606
1607 case 'b': /* branch modifiers */
1608 if (xcode == REG)
1609 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1610 else if (COMPARISON_P (x))
1611 fprintf (file, "%s", xcode == NE ? "n" : "");
1612 return;
1613
1614 case 'i': /* indirect call */
1615 if (xcode == MEM)
1616 {
1617 if (GET_CODE (XEXP (x, 0)) == REG)
1618 /* Used in indirect function calls. */
1619 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1620 else
1621 output_address (XEXP (x, 0));
1622 }
1623 return;
1624
1625 case 'p': /* load/store */
1626 if (xcode == MEM)
1627 {
1628 x = XEXP (x, 0);
1629 xcode = GET_CODE (x);
1630 }
e04cf423 1631 if (xcode == AND)
1632 {
1633 x = XEXP (x, 0);
1634 xcode = GET_CODE (x);
1635 }
644459d0 1636 if (xcode == REG)
1637 fprintf (file, "d");
1638 else if (xcode == CONST_INT)
1639 fprintf (file, "a");
1640 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1641 fprintf (file, "r");
1642 else if (xcode == PLUS || xcode == LO_SUM)
1643 {
1644 if (GET_CODE (XEXP (x, 1)) == REG)
1645 fprintf (file, "x");
1646 else
1647 fprintf (file, "d");
1648 }
1649 return;
1650
5df189be 1651 case 'e':
1652 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1653 val &= 0x7;
1654 output_addr_const (file, GEN_INT (val));
1655 return;
1656
1657 case 'f':
1658 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1659 val &= 0x1f;
1660 output_addr_const (file, GEN_INT (val));
1661 return;
1662
1663 case 'g':
1664 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1665 val &= 0x3f;
1666 output_addr_const (file, GEN_INT (val));
1667 return;
1668
1669 case 'h':
1670 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1671 val = (val >> 3) & 0x1f;
1672 output_addr_const (file, GEN_INT (val));
1673 return;
1674
1675 case 'E':
1676 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1677 val = -val;
1678 val &= 0x7;
1679 output_addr_const (file, GEN_INT (val));
1680 return;
1681
1682 case 'F':
1683 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1684 val = -val;
1685 val &= 0x1f;
1686 output_addr_const (file, GEN_INT (val));
1687 return;
1688
1689 case 'G':
1690 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1691 val = -val;
1692 val &= 0x3f;
1693 output_addr_const (file, GEN_INT (val));
1694 return;
1695
1696 case 'H':
1697 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1698 val = -(val & -8ll);
1699 val = (val >> 3) & 0x1f;
1700 output_addr_const (file, GEN_INT (val));
1701 return;
1702
56c7bfc2 1703 case 'v':
1704 case 'w':
1705 constant_to_array (mode, x, arr);
1706 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1707 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1708 return;
1709
644459d0 1710 case 0:
1711 if (xcode == REG)
1712 fprintf (file, "%s", reg_names[REGNO (x)]);
1713 else if (xcode == MEM)
1714 output_address (XEXP (x, 0));
1715 else if (xcode == CONST_VECTOR)
dea01258 1716 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1717 else
1718 output_addr_const (file, x);
1719 return;
1720
f6a0d06f 1721 /* unused letters
56c7bfc2 1722 o qr u yz
5df189be 1723 AB OPQR UVWXYZ */
644459d0 1724 default:
1725 output_operand_lossage ("invalid %%xn code");
1726 }
1727 gcc_unreachable ();
1728}
1729
644459d0 1730/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1731 caller saved register. For leaf functions it is more efficient to
1732 use a volatile register because we won't need to save and restore the
1733 pic register. This routine is only valid after register allocation
1734 is completed, so we can pick an unused register. */
1735static rtx
1736get_pic_reg (void)
1737{
1738 rtx pic_reg = pic_offset_table_rtx;
1739 if (!reload_completed && !reload_in_progress)
1740 abort ();
87a95921 1741 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1742 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
644459d0 1743 return pic_reg;
1744}
1745
5df189be 1746/* Split constant addresses to handle cases that are too large.
1747 Add in the pic register when in PIC mode.
1748 Split immediates that require more than 1 instruction. */
dea01258 1749int
1750spu_split_immediate (rtx * ops)
c8befdb9 1751{
dea01258 1752 enum machine_mode mode = GET_MODE (ops[0]);
1753 enum immediate_class c = classify_immediate (ops[1], mode);
1754
1755 switch (c)
c8befdb9 1756 {
dea01258 1757 case IC_IL2:
1758 {
1759 unsigned char arrhi[16];
1760 unsigned char arrlo[16];
98bbec1e 1761 rtx to, temp, hi, lo;
dea01258 1762 int i;
98bbec1e 1763 enum machine_mode imode = mode;
1764 /* We need to do reals as ints because the constant used in the
1765 IOR might not be a legitimate real constant. */
1766 imode = int_mode_for_mode (mode);
dea01258 1767 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1768 if (imode != mode)
1769 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1770 else
1771 to = ops[0];
1772 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1773 for (i = 0; i < 16; i += 4)
1774 {
1775 arrlo[i + 2] = arrhi[i + 2];
1776 arrlo[i + 3] = arrhi[i + 3];
1777 arrlo[i + 0] = arrlo[i + 1] = 0;
1778 arrhi[i + 2] = arrhi[i + 3] = 0;
1779 }
98bbec1e 1780 hi = array_to_constant (imode, arrhi);
1781 lo = array_to_constant (imode, arrlo);
1782 emit_move_insn (temp, hi);
dea01258 1783 emit_insn (gen_rtx_SET
98bbec1e 1784 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1785 return 1;
1786 }
5df189be 1787 case IC_FSMBI2:
1788 {
1789 unsigned char arr_fsmbi[16];
1790 unsigned char arr_andbi[16];
1791 rtx to, reg_fsmbi, reg_and;
1792 int i;
1793 enum machine_mode imode = mode;
1794 /* We need to do reals as ints because the constant used in the
1795 * AND might not be a legitimate real constant. */
1796 imode = int_mode_for_mode (mode);
1797 constant_to_array (mode, ops[1], arr_fsmbi);
1798 if (imode != mode)
1799 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1800 else
1801 to = ops[0];
1802 for (i = 0; i < 16; i++)
1803 if (arr_fsmbi[i] != 0)
1804 {
1805 arr_andbi[0] = arr_fsmbi[i];
1806 arr_fsmbi[i] = 0xff;
1807 }
1808 for (i = 1; i < 16; i++)
1809 arr_andbi[i] = arr_andbi[0];
1810 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1811 reg_and = array_to_constant (imode, arr_andbi);
1812 emit_move_insn (to, reg_fsmbi);
1813 emit_insn (gen_rtx_SET
1814 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1815 return 1;
1816 }
dea01258 1817 case IC_POOL:
1818 if (reload_in_progress || reload_completed)
1819 {
1820 rtx mem = force_const_mem (mode, ops[1]);
1821 if (TARGET_LARGE_MEM)
1822 {
1823 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1824 emit_move_insn (addr, XEXP (mem, 0));
1825 mem = replace_equiv_address (mem, addr);
1826 }
1827 emit_move_insn (ops[0], mem);
1828 return 1;
1829 }
1830 break;
1831 case IC_IL1s:
1832 case IC_IL2s:
1833 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1834 {
1835 if (c == IC_IL2s)
1836 {
5df189be 1837 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1838 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1839 }
1840 else if (flag_pic)
1841 emit_insn (gen_pic (ops[0], ops[1]));
1842 if (flag_pic)
1843 {
1844 rtx pic_reg = get_pic_reg ();
1845 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1846 crtl->uses_pic_offset_table = 1;
dea01258 1847 }
1848 return flag_pic || c == IC_IL2s;
1849 }
1850 break;
1851 case IC_IL1:
1852 case IC_FSMBI:
1853 case IC_CPAT:
1854 break;
c8befdb9 1855 }
dea01258 1856 return 0;
c8befdb9 1857}
1858
644459d0 1859/* SAVING is TRUE when we are generating the actual load and store
1860 instructions for REGNO. When determining the size of the stack
1861 needed for saving register we must allocate enough space for the
1862 worst case, because we don't always have the information early enough
1863 to not allocate it. But we can at least eliminate the actual loads
1864 and stores during the prologue/epilogue. */
1865static int
1866need_to_save_reg (int regno, int saving)
1867{
3072d30e 1868 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1869 return 1;
1870 if (flag_pic
1871 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1872 && (!saving || crtl->uses_pic_offset_table)
644459d0 1873 && (!saving
3072d30e 1874 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1875 return 1;
1876 return 0;
1877}
1878
1879/* This function is only correct starting with local register
1880 allocation */
1881int
1882spu_saved_regs_size (void)
1883{
1884 int reg_save_size = 0;
1885 int regno;
1886
1887 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1888 if (need_to_save_reg (regno, 0))
1889 reg_save_size += 0x10;
1890 return reg_save_size;
1891}
1892
1893static rtx
1894frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1895{
1896 rtx reg = gen_rtx_REG (V4SImode, regno);
1897 rtx mem =
1898 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1899 return emit_insn (gen_movv4si (mem, reg));
1900}
1901
1902static rtx
1903frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1904{
1905 rtx reg = gen_rtx_REG (V4SImode, regno);
1906 rtx mem =
1907 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1908 return emit_insn (gen_movv4si (reg, mem));
1909}
1910
1911/* This happens after reload, so we need to expand it. */
1912static rtx
1913frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1914{
1915 rtx insn;
1916 if (satisfies_constraint_K (GEN_INT (imm)))
1917 {
1918 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1919 }
1920 else
1921 {
3072d30e 1922 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1923 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1924 if (REGNO (src) == REGNO (scratch))
1925 abort ();
1926 }
644459d0 1927 return insn;
1928}
1929
1930/* Return nonzero if this function is known to have a null epilogue. */
1931
1932int
1933direct_return (void)
1934{
1935 if (reload_completed)
1936 {
1937 if (cfun->static_chain_decl == 0
1938 && (spu_saved_regs_size ()
1939 + get_frame_size ()
abe32cce 1940 + crtl->outgoing_args_size
1941 + crtl->args.pretend_args_size == 0)
644459d0 1942 && current_function_is_leaf)
1943 return 1;
1944 }
1945 return 0;
1946}
1947
1948/*
1949 The stack frame looks like this:
1950 +-------------+
1951 | incoming |
a8e019fa 1952 | args |
1953 AP -> +-------------+
644459d0 1954 | $lr save |
1955 +-------------+
1956 prev SP | back chain |
1957 +-------------+
1958 | var args |
abe32cce 1959 | reg save | crtl->args.pretend_args_size bytes
644459d0 1960 +-------------+
1961 | ... |
1962 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1963 FP -> +-------------+
644459d0 1964 | ... |
a8e019fa 1965 | vars | get_frame_size() bytes
1966 HFP -> +-------------+
644459d0 1967 | ... |
1968 | outgoing |
abe32cce 1969 | args | crtl->outgoing_args_size bytes
644459d0 1970 +-------------+
1971 | $lr of next |
1972 | frame |
1973 +-------------+
a8e019fa 1974 | back chain |
1975 SP -> +-------------+
644459d0 1976
1977*/
1978void
1979spu_expand_prologue (void)
1980{
1981 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1982 HOST_WIDE_INT total_size;
1983 HOST_WIDE_INT saved_regs_size;
1984 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1985 rtx scratch_reg_0, scratch_reg_1;
1986 rtx insn, real;
1987
644459d0 1988 if (flag_pic && optimize == 0)
18d50ae6 1989 crtl->uses_pic_offset_table = 1;
644459d0 1990
1991 if (spu_naked_function_p (current_function_decl))
1992 return;
1993
1994 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1995 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1996
1997 saved_regs_size = spu_saved_regs_size ();
1998 total_size = size + saved_regs_size
abe32cce 1999 + crtl->outgoing_args_size
2000 + crtl->args.pretend_args_size;
644459d0 2001
2002 if (!current_function_is_leaf
18d50ae6 2003 || cfun->calls_alloca || total_size > 0)
644459d0 2004 total_size += STACK_POINTER_OFFSET;
2005
2006 /* Save this first because code after this might use the link
2007 register as a scratch register. */
2008 if (!current_function_is_leaf)
2009 {
2010 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2011 RTX_FRAME_RELATED_P (insn) = 1;
2012 }
2013
2014 if (total_size > 0)
2015 {
abe32cce 2016 offset = -crtl->args.pretend_args_size;
644459d0 2017 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2018 if (need_to_save_reg (regno, 1))
2019 {
2020 offset -= 16;
2021 insn = frame_emit_store (regno, sp_reg, offset);
2022 RTX_FRAME_RELATED_P (insn) = 1;
2023 }
2024 }
2025
18d50ae6 2026 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 2027 {
2028 rtx pic_reg = get_pic_reg ();
2029 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 2030 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 2031 }
2032
2033 if (total_size > 0)
2034 {
2035 if (flag_stack_check)
2036 {
d819917f 2037 /* We compare against total_size-1 because
644459d0 2038 ($sp >= total_size) <=> ($sp > total_size-1) */
2039 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2040 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2041 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2042 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2043 {
2044 emit_move_insn (scratch_v4si, size_v4si);
2045 size_v4si = scratch_v4si;
2046 }
2047 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2048 emit_insn (gen_vec_extractv4si
2049 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2050 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2051 }
2052
2053 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2054 the value of the previous $sp because we save it as the back
2055 chain. */
2056 if (total_size <= 2000)
2057 {
2058 /* In this case we save the back chain first. */
2059 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 2060 insn =
2061 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2062 }
644459d0 2063 else
2064 {
2065 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 2066 insn =
2067 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2068 }
2069 RTX_FRAME_RELATED_P (insn) = 1;
2070 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 2071 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 2072
2073 if (total_size > 2000)
2074 {
2075 /* Save the back chain ptr */
2076 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2077 }
2078
2079 if (frame_pointer_needed)
2080 {
2081 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2082 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2083 + crtl->outgoing_args_size;
644459d0 2084 /* Set the new frame_pointer */
d8dfeb55 2085 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2086 RTX_FRAME_RELATED_P (insn) = 1;
2087 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2088 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2089 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2090 }
2091 }
2092
a512540d 2093 if (flag_stack_usage)
2094 current_function_static_stack_size = total_size;
644459d0 2095}
2096
2097void
2098spu_expand_epilogue (bool sibcall_p)
2099{
2100 int size = get_frame_size (), offset, regno;
2101 HOST_WIDE_INT saved_regs_size, total_size;
2102 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2103 rtx jump, scratch_reg_0;
2104
644459d0 2105 if (spu_naked_function_p (current_function_decl))
2106 return;
2107
2108 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2109
2110 saved_regs_size = spu_saved_regs_size ();
2111 total_size = size + saved_regs_size
abe32cce 2112 + crtl->outgoing_args_size
2113 + crtl->args.pretend_args_size;
644459d0 2114
2115 if (!current_function_is_leaf
18d50ae6 2116 || cfun->calls_alloca || total_size > 0)
644459d0 2117 total_size += STACK_POINTER_OFFSET;
2118
2119 if (total_size > 0)
2120 {
18d50ae6 2121 if (cfun->calls_alloca)
644459d0 2122 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2123 else
2124 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2125
2126
2127 if (saved_regs_size > 0)
2128 {
abe32cce 2129 offset = -crtl->args.pretend_args_size;
644459d0 2130 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2131 if (need_to_save_reg (regno, 1))
2132 {
2133 offset -= 0x10;
2134 frame_emit_load (regno, sp_reg, offset);
2135 }
2136 }
2137 }
2138
2139 if (!current_function_is_leaf)
2140 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2141
2142 if (!sibcall_p)
2143 {
18b42941 2144 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2145 jump = emit_jump_insn (gen__return ());
2146 emit_barrier_after (jump);
2147 }
2148
644459d0 2149}
2150
2151rtx
2152spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2153{
2154 if (count != 0)
2155 return 0;
2156 /* This is inefficient because it ends up copying to a save-register
2157 which then gets saved even though $lr has already been saved. But
2158 it does generate better code for leaf functions and we don't need
2159 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2160 used for __builtin_return_address anyway, so maybe we don't care if
2161 it's inefficient. */
2162 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2163}
2164\f
2165
2166/* Given VAL, generate a constant appropriate for MODE.
2167 If MODE is a vector mode, every element will be VAL.
2168 For TImode, VAL will be zero extended to 128 bits. */
2169rtx
2170spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2171{
2172 rtx inner;
2173 rtvec v;
2174 int units, i;
2175
2176 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2177 || GET_MODE_CLASS (mode) == MODE_FLOAT
2178 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2179 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2180
2181 if (GET_MODE_CLASS (mode) == MODE_INT)
2182 return immed_double_const (val, 0, mode);
2183
2184 /* val is the bit representation of the float */
2185 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2186 return hwint_to_const_double (mode, val);
2187
2188 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2189 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2190 else
2191 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2192
2193 units = GET_MODE_NUNITS (mode);
2194
2195 v = rtvec_alloc (units);
2196
2197 for (i = 0; i < units; ++i)
2198 RTVEC_ELT (v, i) = inner;
2199
2200 return gen_rtx_CONST_VECTOR (mode, v);
2201}
644459d0 2202
5474166e 2203/* Create a MODE vector constant from 4 ints. */
2204rtx
2205spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2206{
2207 unsigned char arr[16];
2208 arr[0] = (a >> 24) & 0xff;
2209 arr[1] = (a >> 16) & 0xff;
2210 arr[2] = (a >> 8) & 0xff;
2211 arr[3] = (a >> 0) & 0xff;
2212 arr[4] = (b >> 24) & 0xff;
2213 arr[5] = (b >> 16) & 0xff;
2214 arr[6] = (b >> 8) & 0xff;
2215 arr[7] = (b >> 0) & 0xff;
2216 arr[8] = (c >> 24) & 0xff;
2217 arr[9] = (c >> 16) & 0xff;
2218 arr[10] = (c >> 8) & 0xff;
2219 arr[11] = (c >> 0) & 0xff;
2220 arr[12] = (d >> 24) & 0xff;
2221 arr[13] = (d >> 16) & 0xff;
2222 arr[14] = (d >> 8) & 0xff;
2223 arr[15] = (d >> 0) & 0xff;
2224 return array_to_constant(mode, arr);
2225}
5a976006 2226\f
2227/* branch hint stuff */
5474166e 2228
644459d0 2229/* An array of these is used to propagate hints to predecessor blocks. */
2230struct spu_bb_info
2231{
5a976006 2232 rtx prop_jump; /* propagated from another block */
2233 int bb_index; /* the original block. */
644459d0 2234};
5a976006 2235static struct spu_bb_info *spu_bb_info;
644459d0 2236
5a976006 2237#define STOP_HINT_P(INSN) \
2238 (GET_CODE(INSN) == CALL_INSN \
2239 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2240 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2241
2242/* 1 when RTX is a hinted branch or its target. We keep track of
2243 what has been hinted so the safe-hint code can test it easily. */
2244#define HINTED_P(RTX) \
2245 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2246
2247/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2248#define SCHED_ON_EVEN_P(RTX) \
2249 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2250
2251/* Emit a nop for INSN such that the two will dual issue. This assumes
2252 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2253 We check for TImode to handle a MULTI1 insn which has dual issued its
2254 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2255 ADDR_VEC insns. */
2256static void
2257emit_nop_for_insn (rtx insn)
644459d0 2258{
5a976006 2259 int p;
2260 rtx new_insn;
2261 p = get_pipe (insn);
2262 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2263 new_insn = emit_insn_after (gen_lnop (), insn);
2264 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2265 {
5a976006 2266 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2267 PUT_MODE (new_insn, TImode);
2268 PUT_MODE (insn, VOIDmode);
2269 }
2270 else
2271 new_insn = emit_insn_after (gen_lnop (), insn);
2272 recog_memoized (new_insn);
2fbdf9ef 2273 INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
5a976006 2274}
2275
2276/* Insert nops in basic blocks to meet dual issue alignment
2277 requirements. Also make sure hbrp and hint instructions are at least
2278 one cycle apart, possibly inserting a nop. */
2279static void
2280pad_bb(void)
2281{
2282 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2283 int length;
2284 int addr;
2285
2286 /* This sets up INSN_ADDRESSES. */
2287 shorten_branches (get_insns ());
2288
2289 /* Keep track of length added by nops. */
2290 length = 0;
2291
2292 prev_insn = 0;
2293 insn = get_insns ();
2294 if (!active_insn_p (insn))
2295 insn = next_active_insn (insn);
2296 for (; insn; insn = next_insn)
2297 {
2298 next_insn = next_active_insn (insn);
2299 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2300 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2301 {
5a976006 2302 if (hbr_insn)
2303 {
2304 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2305 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2306 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2307 || (a1 - a0 == 4))
2308 {
2309 prev_insn = emit_insn_before (gen_lnop (), insn);
2310 PUT_MODE (prev_insn, GET_MODE (insn));
2311 PUT_MODE (insn, TImode);
2fbdf9ef 2312 INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
5a976006 2313 length += 4;
2314 }
2315 }
2316 hbr_insn = insn;
2317 }
2318 if (INSN_CODE (insn) == CODE_FOR_blockage)
2319 {
2320 if (GET_MODE (insn) == TImode)
2321 PUT_MODE (next_insn, TImode);
2322 insn = next_insn;
2323 next_insn = next_active_insn (insn);
2324 }
2325 addr = INSN_ADDRESSES (INSN_UID (insn));
2326 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2327 {
2328 if (((addr + length) & 7) != 0)
2329 {
2330 emit_nop_for_insn (prev_insn);
2331 length += 4;
2332 }
644459d0 2333 }
5a976006 2334 else if (GET_MODE (insn) == TImode
2335 && ((next_insn && GET_MODE (next_insn) != TImode)
2336 || get_attr_type (insn) == TYPE_MULTI0)
2337 && ((addr + length) & 7) != 0)
2338 {
2339 /* prev_insn will always be set because the first insn is
2340 always 8-byte aligned. */
2341 emit_nop_for_insn (prev_insn);
2342 length += 4;
2343 }
2344 prev_insn = insn;
644459d0 2345 }
644459d0 2346}
2347
5a976006 2348\f
2349/* Routines for branch hints. */
2350
644459d0 2351static void
5a976006 2352spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2353 int distance, sbitmap blocks)
644459d0 2354{
5a976006 2355 rtx branch_label = 0;
2356 rtx hint;
2357 rtx insn;
2358 rtx table;
644459d0 2359
2360 if (before == 0 || branch == 0 || target == 0)
2361 return;
2362
5a976006 2363 /* While scheduling we require hints to be no further than 600, so
2364 we need to enforce that here too */
644459d0 2365 if (distance > 600)
2366 return;
2367
5a976006 2368 /* If we have a Basic block note, emit it after the basic block note. */
37534923 2369 if (NOTE_INSN_BASIC_BLOCK_P (before))
5a976006 2370 before = NEXT_INSN (before);
644459d0 2371
2372 branch_label = gen_label_rtx ();
2373 LABEL_NUSES (branch_label)++;
2374 LABEL_PRESERVE_P (branch_label) = 1;
2375 insn = emit_label_before (branch_label, branch);
2376 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2377 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2378
2379 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2380 recog_memoized (hint);
2fbdf9ef 2381 INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
5a976006 2382 HINTED_P (branch) = 1;
644459d0 2383
5a976006 2384 if (GET_CODE (target) == LABEL_REF)
2385 HINTED_P (XEXP (target, 0)) = 1;
2386 else if (tablejump_p (branch, 0, &table))
644459d0 2387 {
5a976006 2388 rtvec vec;
2389 int j;
2390 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2391 vec = XVEC (PATTERN (table), 0);
2392 else
2393 vec = XVEC (PATTERN (table), 1);
2394 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2395 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2396 }
5a976006 2397
2398 if (distance >= 588)
644459d0 2399 {
5a976006 2400 /* Make sure the hint isn't scheduled any earlier than this point,
2401 which could make it too far for the branch offest to fit */
2fbdf9ef 2402 insn = emit_insn_before (gen_blockage (), hint);
2403 recog_memoized (insn);
2404 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2405 }
2406 else if (distance <= 8 * 4)
2407 {
2408 /* To guarantee at least 8 insns between the hint and branch we
2409 insert nops. */
2410 int d;
2411 for (d = distance; d < 8 * 4; d += 4)
2412 {
2413 insn =
2414 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2415 recog_memoized (insn);
2fbdf9ef 2416 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2417 }
2418
2419 /* Make sure any nops inserted aren't scheduled before the hint. */
2fbdf9ef 2420 insn = emit_insn_after (gen_blockage (), hint);
2421 recog_memoized (insn);
2422 INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
5a976006 2423
2424 /* Make sure any nops inserted aren't scheduled after the call. */
2425 if (CALL_P (branch) && distance < 8 * 4)
2fbdf9ef 2426 {
2427 insn = emit_insn_before (gen_blockage (), branch);
2428 recog_memoized (insn);
2429 INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
2430 }
644459d0 2431 }
644459d0 2432}
2433
2434/* Returns 0 if we don't want a hint for this branch. Otherwise return
2435 the rtx for the branch target. */
2436static rtx
2437get_branch_target (rtx branch)
2438{
2439 if (GET_CODE (branch) == JUMP_INSN)
2440 {
2441 rtx set, src;
2442
2443 /* Return statements */
2444 if (GET_CODE (PATTERN (branch)) == RETURN)
2445 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2446
2447 /* jump table */
2448 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2449 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2450 return 0;
2451
fcc31b99 2452 /* ASM GOTOs. */
604157f6 2453 if (extract_asm_operands (PATTERN (branch)) != NULL)
fcc31b99 2454 return NULL;
2455
644459d0 2456 set = single_set (branch);
2457 src = SET_SRC (set);
2458 if (GET_CODE (SET_DEST (set)) != PC)
2459 abort ();
2460
2461 if (GET_CODE (src) == IF_THEN_ELSE)
2462 {
2463 rtx lab = 0;
2464 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2465 if (note)
2466 {
2467 /* If the more probable case is not a fall through, then
2468 try a branch hint. */
2469 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2470 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2471 && GET_CODE (XEXP (src, 1)) != PC)
2472 lab = XEXP (src, 1);
2473 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2474 && GET_CODE (XEXP (src, 2)) != PC)
2475 lab = XEXP (src, 2);
2476 }
2477 if (lab)
2478 {
2479 if (GET_CODE (lab) == RETURN)
2480 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2481 return lab;
2482 }
2483 return 0;
2484 }
2485
2486 return src;
2487 }
2488 else if (GET_CODE (branch) == CALL_INSN)
2489 {
2490 rtx call;
2491 /* All of our call patterns are in a PARALLEL and the CALL is
2492 the first pattern in the PARALLEL. */
2493 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2494 abort ();
2495 call = XVECEXP (PATTERN (branch), 0, 0);
2496 if (GET_CODE (call) == SET)
2497 call = SET_SRC (call);
2498 if (GET_CODE (call) != CALL)
2499 abort ();
2500 return XEXP (XEXP (call, 0), 0);
2501 }
2502 return 0;
2503}
2504
5a976006 2505/* The special $hbr register is used to prevent the insn scheduler from
2506 moving hbr insns across instructions which invalidate them. It
2507 should only be used in a clobber, and this function searches for
2508 insns which clobber it. */
2509static bool
2510insn_clobbers_hbr (rtx insn)
2511{
2512 if (INSN_P (insn)
2513 && GET_CODE (PATTERN (insn)) == PARALLEL)
2514 {
2515 rtx parallel = PATTERN (insn);
2516 rtx clobber;
2517 int j;
2518 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2519 {
2520 clobber = XVECEXP (parallel, 0, j);
2521 if (GET_CODE (clobber) == CLOBBER
2522 && GET_CODE (XEXP (clobber, 0)) == REG
2523 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2524 return 1;
2525 }
2526 }
2527 return 0;
2528}
2529
2530/* Search up to 32 insns starting at FIRST:
2531 - at any kind of hinted branch, just return
2532 - at any unconditional branch in the first 15 insns, just return
2533 - at a call or indirect branch, after the first 15 insns, force it to
2534 an even address and return
2535 - at any unconditional branch, after the first 15 insns, force it to
2536 an even address.
2537 At then end of the search, insert an hbrp within 4 insns of FIRST,
2538 and an hbrp within 16 instructions of FIRST.
2539 */
644459d0 2540static void
5a976006 2541insert_hbrp_for_ilb_runout (rtx first)
644459d0 2542{
5a976006 2543 rtx insn, before_4 = 0, before_16 = 0;
2544 int addr = 0, length, first_addr = -1;
2545 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2546 int insert_lnop_after = 0;
2547 for (insn = first; insn; insn = NEXT_INSN (insn))
2548 if (INSN_P (insn))
2549 {
2550 if (first_addr == -1)
2551 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2552 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2553 length = get_attr_length (insn);
2554
2555 if (before_4 == 0 && addr + length >= 4 * 4)
2556 before_4 = insn;
2557 /* We test for 14 instructions because the first hbrp will add
2558 up to 2 instructions. */
2559 if (before_16 == 0 && addr + length >= 14 * 4)
2560 before_16 = insn;
2561
2562 if (INSN_CODE (insn) == CODE_FOR_hbr)
2563 {
2564 /* Make sure an hbrp is at least 2 cycles away from a hint.
2565 Insert an lnop after the hbrp when necessary. */
2566 if (before_4 == 0 && addr > 0)
2567 {
2568 before_4 = insn;
2569 insert_lnop_after |= 1;
2570 }
2571 else if (before_4 && addr <= 4 * 4)
2572 insert_lnop_after |= 1;
2573 if (before_16 == 0 && addr > 10 * 4)
2574 {
2575 before_16 = insn;
2576 insert_lnop_after |= 2;
2577 }
2578 else if (before_16 && addr <= 14 * 4)
2579 insert_lnop_after |= 2;
2580 }
644459d0 2581
5a976006 2582 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2583 {
2584 if (addr < hbrp_addr0)
2585 hbrp_addr0 = addr;
2586 else if (addr < hbrp_addr1)
2587 hbrp_addr1 = addr;
2588 }
644459d0 2589
5a976006 2590 if (CALL_P (insn) || JUMP_P (insn))
2591 {
2592 if (HINTED_P (insn))
2593 return;
2594
2595 /* Any branch after the first 15 insns should be on an even
2596 address to avoid a special case branch. There might be
2597 some nops and/or hbrps inserted, so we test after 10
2598 insns. */
2599 if (addr > 10 * 4)
2600 SCHED_ON_EVEN_P (insn) = 1;
2601 }
644459d0 2602
5a976006 2603 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2604 return;
2605
2606
2607 if (addr + length >= 32 * 4)
644459d0 2608 {
5a976006 2609 gcc_assert (before_4 && before_16);
2610 if (hbrp_addr0 > 4 * 4)
644459d0 2611 {
5a976006 2612 insn =
2613 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2614 recog_memoized (insn);
2fbdf9ef 2615 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2616 INSN_ADDRESSES_NEW (insn,
2617 INSN_ADDRESSES (INSN_UID (before_4)));
2618 PUT_MODE (insn, GET_MODE (before_4));
2619 PUT_MODE (before_4, TImode);
2620 if (insert_lnop_after & 1)
644459d0 2621 {
5a976006 2622 insn = emit_insn_before (gen_lnop (), before_4);
2623 recog_memoized (insn);
2fbdf9ef 2624 INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
5a976006 2625 INSN_ADDRESSES_NEW (insn,
2626 INSN_ADDRESSES (INSN_UID (before_4)));
2627 PUT_MODE (insn, TImode);
644459d0 2628 }
644459d0 2629 }
5a976006 2630 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2631 && hbrp_addr1 > 16 * 4)
644459d0 2632 {
5a976006 2633 insn =
2634 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2635 recog_memoized (insn);
2fbdf9ef 2636 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2637 INSN_ADDRESSES_NEW (insn,
2638 INSN_ADDRESSES (INSN_UID (before_16)));
2639 PUT_MODE (insn, GET_MODE (before_16));
2640 PUT_MODE (before_16, TImode);
2641 if (insert_lnop_after & 2)
644459d0 2642 {
5a976006 2643 insn = emit_insn_before (gen_lnop (), before_16);
2644 recog_memoized (insn);
2fbdf9ef 2645 INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
5a976006 2646 INSN_ADDRESSES_NEW (insn,
2647 INSN_ADDRESSES (INSN_UID
2648 (before_16)));
2649 PUT_MODE (insn, TImode);
644459d0 2650 }
2651 }
5a976006 2652 return;
644459d0 2653 }
644459d0 2654 }
5a976006 2655 else if (BARRIER_P (insn))
2656 return;
644459d0 2657
644459d0 2658}
5a976006 2659
2660/* The SPU might hang when it executes 48 inline instructions after a
2661 hinted branch jumps to its hinted target. The beginning of a
2662 function and the return from a call might have been hinted, and must
2663 be handled as well. To prevent a hang we insert 2 hbrps. The first
2664 should be within 6 insns of the branch target. The second should be
2665 within 22 insns of the branch target. When determining if hbrps are
2666 necessary, we look for only 32 inline instructions, because up to to
2667 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2668 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2669static void
5a976006 2670insert_hbrp (void)
644459d0 2671{
5a976006 2672 rtx insn;
2673 if (TARGET_SAFE_HINTS)
644459d0 2674 {
5a976006 2675 shorten_branches (get_insns ());
2676 /* Insert hbrp at beginning of function */
2677 insn = next_active_insn (get_insns ());
2678 if (insn)
2679 insert_hbrp_for_ilb_runout (insn);
2680 /* Insert hbrp after hinted targets. */
2681 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2682 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2683 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2684 }
644459d0 2685}
2686
5a976006 2687static int in_spu_reorg;
2688
2689/* Insert branch hints. There are no branch optimizations after this
2690 pass, so it's safe to set our branch hints now. */
644459d0 2691static void
5a976006 2692spu_machine_dependent_reorg (void)
644459d0 2693{
5a976006 2694 sbitmap blocks;
2695 basic_block bb;
2696 rtx branch, insn;
2697 rtx branch_target = 0;
2698 int branch_addr = 0, insn_addr, required_dist = 0;
2699 int i;
2700 unsigned int j;
644459d0 2701
5a976006 2702 if (!TARGET_BRANCH_HINTS || optimize == 0)
2703 {
2704 /* We still do it for unoptimized code because an external
2705 function might have hinted a call or return. */
2706 insert_hbrp ();
2707 pad_bb ();
2708 return;
2709 }
644459d0 2710
5a976006 2711 blocks = sbitmap_alloc (last_basic_block);
2712 sbitmap_zero (blocks);
644459d0 2713
5a976006 2714 in_spu_reorg = 1;
2715 compute_bb_for_insn ();
2716
2717 compact_blocks ();
2718
2719 spu_bb_info =
2720 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2721 sizeof (struct spu_bb_info));
2722
2723 /* We need exact insn addresses and lengths. */
2724 shorten_branches (get_insns ());
2725
2726 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2727 {
5a976006 2728 bb = BASIC_BLOCK (i);
2729 branch = 0;
2730 if (spu_bb_info[i].prop_jump)
644459d0 2731 {
5a976006 2732 branch = spu_bb_info[i].prop_jump;
2733 branch_target = get_branch_target (branch);
2734 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2735 required_dist = spu_hint_dist;
2736 }
2737 /* Search from end of a block to beginning. In this loop, find
2738 jumps which need a branch and emit them only when:
2739 - it's an indirect branch and we're at the insn which sets
2740 the register
2741 - we're at an insn that will invalidate the hint. e.g., a
2742 call, another hint insn, inline asm that clobbers $hbr, and
2743 some inlined operations (divmodsi4). Don't consider jumps
2744 because they are only at the end of a block and are
2745 considered when we are deciding whether to propagate
2746 - we're getting too far away from the branch. The hbr insns
2747 only have a signed 10 bit offset
2748 We go back as far as possible so the branch will be considered
2749 for propagation when we get to the beginning of the block. */
2750 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2751 {
2752 if (INSN_P (insn))
2753 {
2754 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2755 if (branch
2756 && ((GET_CODE (branch_target) == REG
2757 && set_of (branch_target, insn) != NULL_RTX)
2758 || insn_clobbers_hbr (insn)
2759 || branch_addr - insn_addr > 600))
2760 {
2761 rtx next = NEXT_INSN (insn);
2762 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2763 if (insn != BB_END (bb)
2764 && branch_addr - next_addr >= required_dist)
2765 {
2766 if (dump_file)
2767 fprintf (dump_file,
2768 "hint for %i in block %i before %i\n",
2769 INSN_UID (branch), bb->index,
2770 INSN_UID (next));
2771 spu_emit_branch_hint (next, branch, branch_target,
2772 branch_addr - next_addr, blocks);
2773 }
2774 branch = 0;
2775 }
2776
2777 /* JUMP_P will only be true at the end of a block. When
2778 branch is already set it means we've previously decided
2779 to propagate a hint for that branch into this block. */
2780 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2781 {
2782 branch = 0;
2783 if ((branch_target = get_branch_target (insn)))
2784 {
2785 branch = insn;
2786 branch_addr = insn_addr;
2787 required_dist = spu_hint_dist;
2788 }
2789 }
2790 }
2791 if (insn == BB_HEAD (bb))
2792 break;
2793 }
2794
2795 if (branch)
2796 {
2797 /* If we haven't emitted a hint for this branch yet, it might
2798 be profitable to emit it in one of the predecessor blocks,
2799 especially for loops. */
2800 rtx bbend;
2801 basic_block prev = 0, prop = 0, prev2 = 0;
2802 int loop_exit = 0, simple_loop = 0;
2803 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2804
2805 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2806 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2807 prev = EDGE_PRED (bb, j)->src;
2808 else
2809 prev2 = EDGE_PRED (bb, j)->src;
2810
2811 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2812 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2813 loop_exit = 1;
2814 else if (EDGE_SUCC (bb, j)->dest == bb)
2815 simple_loop = 1;
2816
2817 /* If this branch is a loop exit then propagate to previous
2818 fallthru block. This catches the cases when it is a simple
2819 loop or when there is an initial branch into the loop. */
2820 if (prev && (loop_exit || simple_loop)
2821 && prev->loop_depth <= bb->loop_depth)
2822 prop = prev;
2823
2824 /* If there is only one adjacent predecessor. Don't propagate
2825 outside this loop. This loop_depth test isn't perfect, but
2826 I'm not sure the loop_father member is valid at this point. */
2827 else if (prev && single_pred_p (bb)
2828 && prev->loop_depth == bb->loop_depth)
2829 prop = prev;
2830
2831 /* If this is the JOIN block of a simple IF-THEN then
2832 propogate the hint to the HEADER block. */
2833 else if (prev && prev2
2834 && EDGE_COUNT (bb->preds) == 2
2835 && EDGE_COUNT (prev->preds) == 1
2836 && EDGE_PRED (prev, 0)->src == prev2
2837 && prev2->loop_depth == bb->loop_depth
2838 && GET_CODE (branch_target) != REG)
2839 prop = prev;
2840
2841 /* Don't propagate when:
2842 - this is a simple loop and the hint would be too far
2843 - this is not a simple loop and there are 16 insns in
2844 this block already
2845 - the predecessor block ends in a branch that will be
2846 hinted
2847 - the predecessor block ends in an insn that invalidates
2848 the hint */
2849 if (prop
2850 && prop->index >= 0
2851 && (bbend = BB_END (prop))
2852 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2853 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2854 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2855 {
2856 if (dump_file)
2857 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2858 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2859 bb->index, prop->index, bb->loop_depth,
2860 INSN_UID (branch), loop_exit, simple_loop,
2861 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2862
2863 spu_bb_info[prop->index].prop_jump = branch;
2864 spu_bb_info[prop->index].bb_index = i;
2865 }
2866 else if (branch_addr - next_addr >= required_dist)
2867 {
2868 if (dump_file)
2869 fprintf (dump_file, "hint for %i in block %i before %i\n",
2870 INSN_UID (branch), bb->index,
2871 INSN_UID (NEXT_INSN (insn)));
2872 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2873 branch_addr - next_addr, blocks);
2874 }
2875 branch = 0;
644459d0 2876 }
644459d0 2877 }
5a976006 2878 free (spu_bb_info);
644459d0 2879
5a976006 2880 if (!sbitmap_empty_p (blocks))
2881 find_many_sub_basic_blocks (blocks);
2882
2883 /* We have to schedule to make sure alignment is ok. */
2884 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2885
2886 /* The hints need to be scheduled, so call it again. */
2887 schedule_insns ();
2fbdf9ef 2888 df_finish_pass (true);
5a976006 2889
2890 insert_hbrp ();
2891
2892 pad_bb ();
2893
8f1d58ad 2894 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2895 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2896 {
2897 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2898 between its branch label and the branch . We don't move the
2899 label because GCC expects it at the beginning of the block. */
2900 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2901 rtx label_ref = XVECEXP (unspec, 0, 0);
2902 rtx label = XEXP (label_ref, 0);
2903 rtx branch;
2904 int offset = 0;
2905 for (branch = NEXT_INSN (label);
2906 !JUMP_P (branch) && !CALL_P (branch);
2907 branch = NEXT_INSN (branch))
2908 if (NONJUMP_INSN_P (branch))
2909 offset += get_attr_length (branch);
2910 if (offset > 0)
2911 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2912 }
5a976006 2913
2914 if (spu_flag_var_tracking)
644459d0 2915 {
5a976006 2916 df_analyze ();
2917 timevar_push (TV_VAR_TRACKING);
2918 variable_tracking_main ();
2919 timevar_pop (TV_VAR_TRACKING);
2920 df_finish_pass (false);
644459d0 2921 }
5a976006 2922
2923 free_bb_for_insn ();
2924
2925 in_spu_reorg = 0;
644459d0 2926}
2927\f
2928
2929/* Insn scheduling routines, primarily for dual issue. */
2930static int
2931spu_sched_issue_rate (void)
2932{
2933 return 2;
2934}
2935
2936static int
5a976006 2937uses_ls_unit(rtx insn)
644459d0 2938{
5a976006 2939 rtx set = single_set (insn);
2940 if (set != 0
2941 && (GET_CODE (SET_DEST (set)) == MEM
2942 || GET_CODE (SET_SRC (set)) == MEM))
2943 return 1;
2944 return 0;
644459d0 2945}
2946
2947static int
2948get_pipe (rtx insn)
2949{
2950 enum attr_type t;
2951 /* Handle inline asm */
2952 if (INSN_CODE (insn) == -1)
2953 return -1;
2954 t = get_attr_type (insn);
2955 switch (t)
2956 {
2957 case TYPE_CONVERT:
2958 return -2;
2959 case TYPE_MULTI0:
2960 return -1;
2961
2962 case TYPE_FX2:
2963 case TYPE_FX3:
2964 case TYPE_SPR:
2965 case TYPE_NOP:
2966 case TYPE_FXB:
2967 case TYPE_FPD:
2968 case TYPE_FP6:
2969 case TYPE_FP7:
644459d0 2970 return 0;
2971
2972 case TYPE_LNOP:
2973 case TYPE_SHUF:
2974 case TYPE_LOAD:
2975 case TYPE_STORE:
2976 case TYPE_BR:
2977 case TYPE_MULTI1:
2978 case TYPE_HBR:
5a976006 2979 case TYPE_IPREFETCH:
644459d0 2980 return 1;
2981 default:
2982 abort ();
2983 }
2984}
2985
5a976006 2986
2987/* haifa-sched.c has a static variable that keeps track of the current
2988 cycle. It is passed to spu_sched_reorder, and we record it here for
2989 use by spu_sched_variable_issue. It won't be accurate if the
2990 scheduler updates it's clock_var between the two calls. */
2991static int clock_var;
2992
2993/* This is used to keep track of insn alignment. Set to 0 at the
2994 beginning of each block and increased by the "length" attr of each
2995 insn scheduled. */
2996static int spu_sched_length;
2997
2998/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2999 ready list appropriately in spu_sched_reorder(). */
3000static int pipe0_clock;
3001static int pipe1_clock;
3002
3003static int prev_clock_var;
3004
3005static int prev_priority;
3006
3007/* The SPU needs to load the next ilb sometime during the execution of
3008 the previous ilb. There is a potential conflict if every cycle has a
3009 load or store. To avoid the conflict we make sure the load/store
3010 unit is free for at least one cycle during the execution of insns in
3011 the previous ilb. */
3012static int spu_ls_first;
3013static int prev_ls_clock;
3014
3015static void
3016spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3017 int max_ready ATTRIBUTE_UNUSED)
3018{
3019 spu_sched_length = 0;
3020}
3021
3022static void
3023spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3024 int max_ready ATTRIBUTE_UNUSED)
3025{
3026 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3027 {
3028 /* When any block might be at least 8-byte aligned, assume they
3029 will all be at least 8-byte aligned to make sure dual issue
3030 works out correctly. */
3031 spu_sched_length = 0;
3032 }
3033 spu_ls_first = INT_MAX;
3034 clock_var = -1;
3035 prev_ls_clock = -1;
3036 pipe0_clock = -1;
3037 pipe1_clock = -1;
3038 prev_clock_var = -1;
3039 prev_priority = -1;
3040}
3041
644459d0 3042static int
5a976006 3043spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3044 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 3045{
5a976006 3046 int len;
3047 int p;
644459d0 3048 if (GET_CODE (PATTERN (insn)) == USE
3049 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 3050 || (len = get_attr_length (insn)) == 0)
3051 return more;
3052
3053 spu_sched_length += len;
3054
3055 /* Reset on inline asm */
3056 if (INSN_CODE (insn) == -1)
3057 {
3058 spu_ls_first = INT_MAX;
3059 pipe0_clock = -1;
3060 pipe1_clock = -1;
3061 return 0;
3062 }
3063 p = get_pipe (insn);
3064 if (p == 0)
3065 pipe0_clock = clock_var;
3066 else
3067 pipe1_clock = clock_var;
3068
3069 if (in_spu_reorg)
3070 {
3071 if (clock_var - prev_ls_clock > 1
3072 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3073 spu_ls_first = INT_MAX;
3074 if (uses_ls_unit (insn))
3075 {
3076 if (spu_ls_first == INT_MAX)
3077 spu_ls_first = spu_sched_length;
3078 prev_ls_clock = clock_var;
3079 }
3080
3081 /* The scheduler hasn't inserted the nop, but we will later on.
3082 Include those nops in spu_sched_length. */
3083 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3084 spu_sched_length += 4;
3085 prev_clock_var = clock_var;
3086
3087 /* more is -1 when called from spu_sched_reorder for new insns
3088 that don't have INSN_PRIORITY */
3089 if (more >= 0)
3090 prev_priority = INSN_PRIORITY (insn);
3091 }
3092
3093 /* Always try issueing more insns. spu_sched_reorder will decide
3094 when the cycle should be advanced. */
3095 return 1;
3096}
3097
3098/* This function is called for both TARGET_SCHED_REORDER and
3099 TARGET_SCHED_REORDER2. */
3100static int
3101spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3102 rtx *ready, int *nreadyp, int clock)
3103{
3104 int i, nready = *nreadyp;
3105 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3106 rtx insn;
3107
3108 clock_var = clock;
3109
3110 if (nready <= 0 || pipe1_clock >= clock)
3111 return 0;
3112
3113 /* Find any rtl insns that don't generate assembly insns and schedule
3114 them first. */
3115 for (i = nready - 1; i >= 0; i--)
3116 {
3117 insn = ready[i];
3118 if (INSN_CODE (insn) == -1
3119 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3120 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3121 {
3122 ready[i] = ready[nready - 1];
3123 ready[nready - 1] = insn;
3124 return 1;
3125 }
3126 }
3127
3128 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3129 for (i = 0; i < nready; i++)
3130 if (INSN_CODE (ready[i]) != -1)
3131 {
3132 insn = ready[i];
3133 switch (get_attr_type (insn))
3134 {
3135 default:
3136 case TYPE_MULTI0:
3137 case TYPE_CONVERT:
3138 case TYPE_FX2:
3139 case TYPE_FX3:
3140 case TYPE_SPR:
3141 case TYPE_NOP:
3142 case TYPE_FXB:
3143 case TYPE_FPD:
3144 case TYPE_FP6:
3145 case TYPE_FP7:
3146 pipe_0 = i;
3147 break;
3148 case TYPE_LOAD:
3149 case TYPE_STORE:
3150 pipe_ls = i;
3151 case TYPE_LNOP:
3152 case TYPE_SHUF:
3153 case TYPE_BR:
3154 case TYPE_MULTI1:
3155 case TYPE_HBR:
3156 pipe_1 = i;
3157 break;
3158 case TYPE_IPREFETCH:
3159 pipe_hbrp = i;
3160 break;
3161 }
3162 }
3163
3164 /* In the first scheduling phase, schedule loads and stores together
3165 to increase the chance they will get merged during postreload CSE. */
3166 if (!reload_completed && pipe_ls >= 0)
3167 {
3168 insn = ready[pipe_ls];
3169 ready[pipe_ls] = ready[nready - 1];
3170 ready[nready - 1] = insn;
3171 return 1;
3172 }
3173
3174 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3175 if (pipe_hbrp >= 0)
3176 pipe_1 = pipe_hbrp;
3177
3178 /* When we have loads/stores in every cycle of the last 15 insns and
3179 we are about to schedule another load/store, emit an hbrp insn
3180 instead. */
3181 if (in_spu_reorg
3182 && spu_sched_length - spu_ls_first >= 4 * 15
3183 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3184 {
3185 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3186 recog_memoized (insn);
3187 if (pipe0_clock < clock)
3188 PUT_MODE (insn, TImode);
3189 spu_sched_variable_issue (file, verbose, insn, -1);
3190 return 0;
3191 }
3192
3193 /* In general, we want to emit nops to increase dual issue, but dual
3194 issue isn't faster when one of the insns could be scheduled later
3195 without effecting the critical path. We look at INSN_PRIORITY to
3196 make a good guess, but it isn't perfect so -mdual-nops=n can be
3197 used to effect it. */
3198 if (in_spu_reorg && spu_dual_nops < 10)
3199 {
3200 /* When we are at an even address and we are not issueing nops to
3201 improve scheduling then we need to advance the cycle. */
3202 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3203 && (spu_dual_nops == 0
3204 || (pipe_1 != -1
3205 && prev_priority >
3206 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3207 return 0;
3208
3209 /* When at an odd address, schedule the highest priority insn
3210 without considering pipeline. */
3211 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3212 && (spu_dual_nops == 0
3213 || (prev_priority >
3214 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3215 return 1;
3216 }
3217
3218
3219 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3220 pipe0 insn in the ready list, schedule it. */
3221 if (pipe0_clock < clock && pipe_0 >= 0)
3222 schedule_i = pipe_0;
3223
3224 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3225 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3226 else
3227 schedule_i = pipe_1;
3228
3229 if (schedule_i > -1)
3230 {
3231 insn = ready[schedule_i];
3232 ready[schedule_i] = ready[nready - 1];
3233 ready[nready - 1] = insn;
3234 return 1;
3235 }
3236 return 0;
644459d0 3237}
3238
3239/* INSN is dependent on DEP_INSN. */
3240static int
5a976006 3241spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3242{
5a976006 3243 rtx set;
3244
3245 /* The blockage pattern is used to prevent instructions from being
3246 moved across it and has no cost. */
3247 if (INSN_CODE (insn) == CODE_FOR_blockage
3248 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3249 return 0;
3250
9d98604b 3251 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3252 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3253 return 0;
3254
3255 /* Make sure hbrps are spread out. */
3256 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3257 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3258 return 8;
3259
3260 /* Make sure hints and hbrps are 2 cycles apart. */
3261 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3262 || INSN_CODE (insn) == CODE_FOR_hbr)
3263 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3264 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3265 return 2;
3266
3267 /* An hbrp has no real dependency on other insns. */
3268 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3269 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3270 return 0;
3271
3272 /* Assuming that it is unlikely an argument register will be used in
3273 the first cycle of the called function, we reduce the cost for
3274 slightly better scheduling of dep_insn. When not hinted, the
3275 mispredicted branch would hide the cost as well. */
3276 if (CALL_P (insn))
3277 {
3278 rtx target = get_branch_target (insn);
3279 if (GET_CODE (target) != REG || !set_of (target, insn))
3280 return cost - 2;
3281 return cost;
3282 }
3283
3284 /* And when returning from a function, let's assume the return values
3285 are completed sooner too. */
3286 if (CALL_P (dep_insn))
644459d0 3287 return cost - 2;
5a976006 3288
3289 /* Make sure an instruction that loads from the back chain is schedule
3290 away from the return instruction so a hint is more likely to get
3291 issued. */
3292 if (INSN_CODE (insn) == CODE_FOR__return
3293 && (set = single_set (dep_insn))
3294 && GET_CODE (SET_DEST (set)) == REG
3295 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3296 return 20;
3297
644459d0 3298 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3299 scheduler makes every insn in a block anti-dependent on the final
3300 jump_insn. We adjust here so higher cost insns will get scheduled
3301 earlier. */
5a976006 3302 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3303 return insn_cost (dep_insn) - 3;
5a976006 3304
644459d0 3305 return cost;
3306}
3307\f
3308/* Create a CONST_DOUBLE from a string. */
3309struct rtx_def *
3310spu_float_const (const char *string, enum machine_mode mode)
3311{
3312 REAL_VALUE_TYPE value;
3313 value = REAL_VALUE_ATOF (string, mode);
3314 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3315}
3316
644459d0 3317int
3318spu_constant_address_p (rtx x)
3319{
3320 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3321 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3322 || GET_CODE (x) == HIGH);
3323}
3324
3325static enum spu_immediate
3326which_immediate_load (HOST_WIDE_INT val)
3327{
3328 gcc_assert (val == trunc_int_for_mode (val, SImode));
3329
3330 if (val >= -0x8000 && val <= 0x7fff)
3331 return SPU_IL;
3332 if (val >= 0 && val <= 0x3ffff)
3333 return SPU_ILA;
3334 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3335 return SPU_ILH;
3336 if ((val & 0xffff) == 0)
3337 return SPU_ILHU;
3338
3339 return SPU_NONE;
3340}
3341
dea01258 3342/* Return true when OP can be loaded by one of the il instructions, or
3343 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3344int
3345immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3346{
3347 if (CONSTANT_P (op))
3348 {
3349 enum immediate_class c = classify_immediate (op, mode);
5df189be 3350 return c == IC_IL1 || c == IC_IL1s
3072d30e 3351 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3352 }
3353 return 0;
3354}
3355
3356/* Return true if the first SIZE bytes of arr is a constant that can be
3357 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3358 represent the size and offset of the instruction to use. */
3359static int
3360cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3361{
3362 int cpat, run, i, start;
3363 cpat = 1;
3364 run = 0;
3365 start = -1;
3366 for (i = 0; i < size && cpat; i++)
3367 if (arr[i] != i+16)
3368 {
3369 if (!run)
3370 {
3371 start = i;
3372 if (arr[i] == 3)
3373 run = 1;
3374 else if (arr[i] == 2 && arr[i+1] == 3)
3375 run = 2;
3376 else if (arr[i] == 0)
3377 {
3378 while (arr[i+run] == run && i+run < 16)
3379 run++;
3380 if (run != 4 && run != 8)
3381 cpat = 0;
3382 }
3383 else
3384 cpat = 0;
3385 if ((i & (run-1)) != 0)
3386 cpat = 0;
3387 i += run;
3388 }
3389 else
3390 cpat = 0;
3391 }
b01a6dc3 3392 if (cpat && (run || size < 16))
dea01258 3393 {
3394 if (run == 0)
3395 run = 1;
3396 if (prun)
3397 *prun = run;
3398 if (pstart)
3399 *pstart = start == -1 ? 16-run : start;
3400 return 1;
3401 }
3402 return 0;
3403}
3404
3405/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3406 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3407static enum immediate_class
3408classify_immediate (rtx op, enum machine_mode mode)
644459d0 3409{
3410 HOST_WIDE_INT val;
3411 unsigned char arr[16];
5df189be 3412 int i, j, repeated, fsmbi, repeat;
dea01258 3413
3414 gcc_assert (CONSTANT_P (op));
3415
644459d0 3416 if (GET_MODE (op) != VOIDmode)
3417 mode = GET_MODE (op);
3418
dea01258 3419 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3420 if (!flag_pic
3421 && mode == V4SImode
dea01258 3422 && GET_CODE (op) == CONST_VECTOR
3423 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3424 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3425 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3426 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3427 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3428 op = CONST_VECTOR_ELT (op, 0);
644459d0 3429
dea01258 3430 switch (GET_CODE (op))
3431 {
3432 case SYMBOL_REF:
3433 case LABEL_REF:
3434 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3435
dea01258 3436 case CONST:
0cfc65d4 3437 /* We can never know if the resulting address fits in 18 bits and can be
3438 loaded with ila. For now, assume the address will not overflow if
3439 the displacement is "small" (fits 'K' constraint). */
3440 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3441 {
3442 rtx sym = XEXP (XEXP (op, 0), 0);
3443 rtx cst = XEXP (XEXP (op, 0), 1);
3444
3445 if (GET_CODE (sym) == SYMBOL_REF
3446 && GET_CODE (cst) == CONST_INT
3447 && satisfies_constraint_K (cst))
3448 return IC_IL1s;
3449 }
3450 return IC_IL2s;
644459d0 3451
dea01258 3452 case HIGH:
3453 return IC_IL1s;
3454
3455 case CONST_VECTOR:
3456 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3457 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3458 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3459 return IC_POOL;
3460 /* Fall through. */
3461
3462 case CONST_INT:
3463 case CONST_DOUBLE:
3464 constant_to_array (mode, op, arr);
644459d0 3465
dea01258 3466 /* Check that each 4-byte slot is identical. */
3467 repeated = 1;
3468 for (i = 4; i < 16; i += 4)
3469 for (j = 0; j < 4; j++)
3470 if (arr[j] != arr[i + j])
3471 repeated = 0;
3472
3473 if (repeated)
3474 {
3475 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3476 val = trunc_int_for_mode (val, SImode);
3477
3478 if (which_immediate_load (val) != SPU_NONE)
3479 return IC_IL1;
3480 }
3481
3482 /* Any mode of 2 bytes or smaller can be loaded with an il
3483 instruction. */
3484 gcc_assert (GET_MODE_SIZE (mode) > 2);
3485
3486 fsmbi = 1;
5df189be 3487 repeat = 0;
dea01258 3488 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3489 if (arr[i] != 0 && repeat == 0)
3490 repeat = arr[i];
3491 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3492 fsmbi = 0;
3493 if (fsmbi)
5df189be 3494 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3495
3496 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3497 return IC_CPAT;
3498
3499 if (repeated)
3500 return IC_IL2;
3501
3502 return IC_POOL;
3503 default:
3504 break;
3505 }
3506 gcc_unreachable ();
644459d0 3507}
3508
3509static enum spu_immediate
3510which_logical_immediate (HOST_WIDE_INT val)
3511{
3512 gcc_assert (val == trunc_int_for_mode (val, SImode));
3513
3514 if (val >= -0x200 && val <= 0x1ff)
3515 return SPU_ORI;
3516 if (val >= 0 && val <= 0xffff)
3517 return SPU_IOHL;
3518 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3519 {
3520 val = trunc_int_for_mode (val, HImode);
3521 if (val >= -0x200 && val <= 0x1ff)
3522 return SPU_ORHI;
3523 if ((val & 0xff) == ((val >> 8) & 0xff))
3524 {
3525 val = trunc_int_for_mode (val, QImode);
3526 if (val >= -0x200 && val <= 0x1ff)
3527 return SPU_ORBI;
3528 }
3529 }
3530 return SPU_NONE;
3531}
3532
5df189be 3533/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3534 CONST_DOUBLEs. */
3535static int
3536const_vector_immediate_p (rtx x)
3537{
3538 int i;
3539 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3540 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3541 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3542 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3543 return 0;
3544 return 1;
3545}
3546
644459d0 3547int
3548logical_immediate_p (rtx op, enum machine_mode mode)
3549{
3550 HOST_WIDE_INT val;
3551 unsigned char arr[16];
3552 int i, j;
3553
3554 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3555 || GET_CODE (op) == CONST_VECTOR);
3556
5df189be 3557 if (GET_CODE (op) == CONST_VECTOR
3558 && !const_vector_immediate_p (op))
3559 return 0;
3560
644459d0 3561 if (GET_MODE (op) != VOIDmode)
3562 mode = GET_MODE (op);
3563
3564 constant_to_array (mode, op, arr);
3565
3566 /* Check that bytes are repeated. */
3567 for (i = 4; i < 16; i += 4)
3568 for (j = 0; j < 4; j++)
3569 if (arr[j] != arr[i + j])
3570 return 0;
3571
3572 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3573 val = trunc_int_for_mode (val, SImode);
3574
3575 i = which_logical_immediate (val);
3576 return i != SPU_NONE && i != SPU_IOHL;
3577}
3578
3579int
3580iohl_immediate_p (rtx op, enum machine_mode mode)
3581{
3582 HOST_WIDE_INT val;
3583 unsigned char arr[16];
3584 int i, j;
3585
3586 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3587 || GET_CODE (op) == CONST_VECTOR);
3588
5df189be 3589 if (GET_CODE (op) == CONST_VECTOR
3590 && !const_vector_immediate_p (op))
3591 return 0;
3592
644459d0 3593 if (GET_MODE (op) != VOIDmode)
3594 mode = GET_MODE (op);
3595
3596 constant_to_array (mode, op, arr);
3597
3598 /* Check that bytes are repeated. */
3599 for (i = 4; i < 16; i += 4)
3600 for (j = 0; j < 4; j++)
3601 if (arr[j] != arr[i + j])
3602 return 0;
3603
3604 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3605 val = trunc_int_for_mode (val, SImode);
3606
3607 return val >= 0 && val <= 0xffff;
3608}
3609
3610int
3611arith_immediate_p (rtx op, enum machine_mode mode,
3612 HOST_WIDE_INT low, HOST_WIDE_INT high)
3613{
3614 HOST_WIDE_INT val;
3615 unsigned char arr[16];
3616 int bytes, i, j;
3617
3618 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3619 || GET_CODE (op) == CONST_VECTOR);
3620
5df189be 3621 if (GET_CODE (op) == CONST_VECTOR
3622 && !const_vector_immediate_p (op))
3623 return 0;
3624
644459d0 3625 if (GET_MODE (op) != VOIDmode)
3626 mode = GET_MODE (op);
3627
3628 constant_to_array (mode, op, arr);
3629
3630 if (VECTOR_MODE_P (mode))
3631 mode = GET_MODE_INNER (mode);
3632
3633 bytes = GET_MODE_SIZE (mode);
3634 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3635
3636 /* Check that bytes are repeated. */
3637 for (i = bytes; i < 16; i += bytes)
3638 for (j = 0; j < bytes; j++)
3639 if (arr[j] != arr[i + j])
3640 return 0;
3641
3642 val = arr[0];
3643 for (j = 1; j < bytes; j++)
3644 val = (val << 8) | arr[j];
3645
3646 val = trunc_int_for_mode (val, mode);
3647
3648 return val >= low && val <= high;
3649}
3650
56c7bfc2 3651/* TRUE when op is an immediate and an exact power of 2, and given that
3652 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3653 all entries must be the same. */
3654bool
3655exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3656{
3657 enum machine_mode int_mode;
3658 HOST_WIDE_INT val;
3659 unsigned char arr[16];
3660 int bytes, i, j;
3661
3662 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3663 || GET_CODE (op) == CONST_VECTOR);
3664
3665 if (GET_CODE (op) == CONST_VECTOR
3666 && !const_vector_immediate_p (op))
3667 return 0;
3668
3669 if (GET_MODE (op) != VOIDmode)
3670 mode = GET_MODE (op);
3671
3672 constant_to_array (mode, op, arr);
3673
3674 if (VECTOR_MODE_P (mode))
3675 mode = GET_MODE_INNER (mode);
3676
3677 bytes = GET_MODE_SIZE (mode);
3678 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3679
3680 /* Check that bytes are repeated. */
3681 for (i = bytes; i < 16; i += bytes)
3682 for (j = 0; j < bytes; j++)
3683 if (arr[j] != arr[i + j])
3684 return 0;
3685
3686 val = arr[0];
3687 for (j = 1; j < bytes; j++)
3688 val = (val << 8) | arr[j];
3689
3690 val = trunc_int_for_mode (val, int_mode);
3691
3692 /* Currently, we only handle SFmode */
3693 gcc_assert (mode == SFmode);
3694 if (mode == SFmode)
3695 {
3696 int exp = (val >> 23) - 127;
3697 return val > 0 && (val & 0x007fffff) == 0
3698 && exp >= low && exp <= high;
3699 }
3700 return FALSE;
3701}
3702
6cf5579e 3703/* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3704
3705static int
3706ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3707{
3708 rtx x = *px;
3709 tree decl;
3710
3711 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3712 {
3713 rtx plus = XEXP (x, 0);
3714 rtx op0 = XEXP (plus, 0);
3715 rtx op1 = XEXP (plus, 1);
3716 if (GET_CODE (op1) == CONST_INT)
3717 x = op0;
3718 }
3719
3720 return (GET_CODE (x) == SYMBOL_REF
3721 && (decl = SYMBOL_REF_DECL (x)) != 0
3722 && TREE_CODE (decl) == VAR_DECL
3723 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3724}
3725
644459d0 3726/* We accept:
5b865faf 3727 - any 32-bit constant (SImode, SFmode)
644459d0 3728 - any constant that can be generated with fsmbi (any mode)
5b865faf 3729 - a 64-bit constant where the high and low bits are identical
644459d0 3730 (DImode, DFmode)
5b865faf 3731 - a 128-bit constant where the four 32-bit words match. */
644459d0 3732int
3733spu_legitimate_constant_p (rtx x)
3734{
5df189be 3735 if (GET_CODE (x) == HIGH)
3736 x = XEXP (x, 0);
6cf5579e 3737
3738 /* Reject any __ea qualified reference. These can't appear in
3739 instructions but must be forced to the constant pool. */
3740 if (for_each_rtx (&x, ea_symbol_ref, 0))
3741 return 0;
3742
644459d0 3743 /* V4SI with all identical symbols is valid. */
5df189be 3744 if (!flag_pic
3745 && GET_MODE (x) == V4SImode
644459d0 3746 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3747 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3748 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3749 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3750 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3751 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3752
5df189be 3753 if (GET_CODE (x) == CONST_VECTOR
3754 && !const_vector_immediate_p (x))
3755 return 0;
644459d0 3756 return 1;
3757}
3758
3759/* Valid address are:
3760 - symbol_ref, label_ref, const
3761 - reg
9d98604b 3762 - reg + const_int, where const_int is 16 byte aligned
644459d0 3763 - reg + reg, alignment doesn't matter
3764 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3765 ignore the 4 least significant bits of the const. We only care about
3766 16 byte modes because the expand phase will change all smaller MEM
3767 references to TImode. */
3768static bool
3769spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3770 rtx x, bool reg_ok_strict)
644459d0 3771{
9d98604b 3772 int aligned = GET_MODE_SIZE (mode) >= 16;
3773 if (aligned
3774 && GET_CODE (x) == AND
644459d0 3775 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3776 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3777 x = XEXP (x, 0);
3778 switch (GET_CODE (x))
3779 {
644459d0 3780 case LABEL_REF:
6cf5579e 3781 return !TARGET_LARGE_MEM;
3782
9d98604b 3783 case SYMBOL_REF:
644459d0 3784 case CONST:
6cf5579e 3785 /* Keep __ea references until reload so that spu_expand_mov can see them
3786 in MEMs. */
3787 if (ea_symbol_ref (&x, 0))
3788 return !reload_in_progress && !reload_completed;
9d98604b 3789 return !TARGET_LARGE_MEM;
644459d0 3790
3791 case CONST_INT:
3792 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3793
3794 case SUBREG:
3795 x = XEXP (x, 0);
9d98604b 3796 if (REG_P (x))
3797 return 0;
644459d0 3798
3799 case REG:
3800 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3801
3802 case PLUS:
3803 case LO_SUM:
3804 {
3805 rtx op0 = XEXP (x, 0);
3806 rtx op1 = XEXP (x, 1);
3807 if (GET_CODE (op0) == SUBREG)
3808 op0 = XEXP (op0, 0);
3809 if (GET_CODE (op1) == SUBREG)
3810 op1 = XEXP (op1, 0);
644459d0 3811 if (GET_CODE (op0) == REG
3812 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3813 && GET_CODE (op1) == CONST_INT
3814 && INTVAL (op1) >= -0x2000
3815 && INTVAL (op1) <= 0x1fff
9d98604b 3816 && (!aligned || (INTVAL (op1) & 15) == 0))
3817 return TRUE;
644459d0 3818 if (GET_CODE (op0) == REG
3819 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3820 && GET_CODE (op1) == REG
3821 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3822 return TRUE;
644459d0 3823 }
3824 break;
3825
3826 default:
3827 break;
3828 }
9d98604b 3829 return FALSE;
644459d0 3830}
3831
6cf5579e 3832/* Like spu_legitimate_address_p, except with named addresses. */
3833static bool
3834spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3835 bool reg_ok_strict, addr_space_t as)
3836{
3837 if (as == ADDR_SPACE_EA)
3838 return (REG_P (x) && (GET_MODE (x) == EAmode));
3839
3840 else if (as != ADDR_SPACE_GENERIC)
3841 gcc_unreachable ();
3842
3843 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3844}
3845
644459d0 3846/* When the address is reg + const_int, force the const_int into a
fa7637bd 3847 register. */
644459d0 3848rtx
3849spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3850 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3851{
3852 rtx op0, op1;
3853 /* Make sure both operands are registers. */
3854 if (GET_CODE (x) == PLUS)
3855 {
3856 op0 = XEXP (x, 0);
3857 op1 = XEXP (x, 1);
3858 if (ALIGNED_SYMBOL_REF_P (op0))
3859 {
3860 op0 = force_reg (Pmode, op0);
3861 mark_reg_pointer (op0, 128);
3862 }
3863 else if (GET_CODE (op0) != REG)
3864 op0 = force_reg (Pmode, op0);
3865 if (ALIGNED_SYMBOL_REF_P (op1))
3866 {
3867 op1 = force_reg (Pmode, op1);
3868 mark_reg_pointer (op1, 128);
3869 }
3870 else if (GET_CODE (op1) != REG)
3871 op1 = force_reg (Pmode, op1);
3872 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3873 }
41e3a0c7 3874 return x;
644459d0 3875}
3876
6cf5579e 3877/* Like spu_legitimate_address, except with named address support. */
3878static rtx
3879spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3880 addr_space_t as)
3881{
3882 if (as != ADDR_SPACE_GENERIC)
3883 return x;
3884
3885 return spu_legitimize_address (x, oldx, mode);
3886}
3887
644459d0 3888/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3889 struct attribute_spec.handler. */
3890static tree
3891spu_handle_fndecl_attribute (tree * node,
3892 tree name,
3893 tree args ATTRIBUTE_UNUSED,
3894 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3895{
3896 if (TREE_CODE (*node) != FUNCTION_DECL)
3897 {
67a779df 3898 warning (0, "%qE attribute only applies to functions",
3899 name);
644459d0 3900 *no_add_attrs = true;
3901 }
3902
3903 return NULL_TREE;
3904}
3905
3906/* Handle the "vector" attribute. */
3907static tree
3908spu_handle_vector_attribute (tree * node, tree name,
3909 tree args ATTRIBUTE_UNUSED,
3910 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3911{
3912 tree type = *node, result = NULL_TREE;
3913 enum machine_mode mode;
3914 int unsigned_p;
3915
3916 while (POINTER_TYPE_P (type)
3917 || TREE_CODE (type) == FUNCTION_TYPE
3918 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3919 type = TREE_TYPE (type);
3920
3921 mode = TYPE_MODE (type);
3922
3923 unsigned_p = TYPE_UNSIGNED (type);
3924 switch (mode)
3925 {
3926 case DImode:
3927 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3928 break;
3929 case SImode:
3930 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3931 break;
3932 case HImode:
3933 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3934 break;
3935 case QImode:
3936 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3937 break;
3938 case SFmode:
3939 result = V4SF_type_node;
3940 break;
3941 case DFmode:
3942 result = V2DF_type_node;
3943 break;
3944 default:
3945 break;
3946 }
3947
3948 /* Propagate qualifiers attached to the element type
3949 onto the vector type. */
3950 if (result && result != type && TYPE_QUALS (type))
3951 result = build_qualified_type (result, TYPE_QUALS (type));
3952
3953 *no_add_attrs = true; /* No need to hang on to the attribute. */
3954
3955 if (!result)
67a779df 3956 warning (0, "%qE attribute ignored", name);
644459d0 3957 else
d991e6e8 3958 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3959
3960 return NULL_TREE;
3961}
3962
f2b32076 3963/* Return nonzero if FUNC is a naked function. */
644459d0 3964static int
3965spu_naked_function_p (tree func)
3966{
3967 tree a;
3968
3969 if (TREE_CODE (func) != FUNCTION_DECL)
3970 abort ();
3971
3972 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3973 return a != NULL_TREE;
3974}
3975
3976int
3977spu_initial_elimination_offset (int from, int to)
3978{
3979 int saved_regs_size = spu_saved_regs_size ();
3980 int sp_offset = 0;
abe32cce 3981 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3982 || get_frame_size () || saved_regs_size)
3983 sp_offset = STACK_POINTER_OFFSET;
3984 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3985 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3986 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3987 return get_frame_size ();
644459d0 3988 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3989 return sp_offset + crtl->outgoing_args_size
644459d0 3990 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3991 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3992 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3993 else
3994 gcc_unreachable ();
644459d0 3995}
3996
3997rtx
fb80456a 3998spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3999{
4000 enum machine_mode mode = TYPE_MODE (type);
4001 int byte_size = ((mode == BLKmode)
4002 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4003
4004 /* Make sure small structs are left justified in a register. */
4005 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4006 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
4007 {
4008 enum machine_mode smode;
4009 rtvec v;
4010 int i;
4011 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4012 int n = byte_size / UNITS_PER_WORD;
4013 v = rtvec_alloc (nregs);
4014 for (i = 0; i < n; i++)
4015 {
4016 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
4017 gen_rtx_REG (TImode,
4018 FIRST_RETURN_REGNUM
4019 + i),
4020 GEN_INT (UNITS_PER_WORD * i));
4021 byte_size -= UNITS_PER_WORD;
4022 }
4023
4024 if (n < nregs)
4025 {
4026 if (byte_size < 4)
4027 byte_size = 4;
4028 smode =
4029 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4030 RTVEC_ELT (v, n) =
4031 gen_rtx_EXPR_LIST (VOIDmode,
4032 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4033 GEN_INT (UNITS_PER_WORD * n));
4034 }
4035 return gen_rtx_PARALLEL (mode, v);
4036 }
4037 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4038}
4039
ee9034d4 4040static rtx
4041spu_function_arg (CUMULATIVE_ARGS *cum,
644459d0 4042 enum machine_mode mode,
ee9034d4 4043 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4044{
4045 int byte_size;
4046
a08c5dd0 4047 if (*cum >= MAX_REGISTER_ARGS)
644459d0 4048 return 0;
4049
4050 byte_size = ((mode == BLKmode)
4051 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4052
4053 /* The ABI does not allow parameters to be passed partially in
4054 reg and partially in stack. */
a08c5dd0 4055 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
644459d0 4056 return 0;
4057
4058 /* Make sure small structs are left justified in a register. */
4059 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4060 && byte_size < UNITS_PER_WORD && byte_size > 0)
4061 {
4062 enum machine_mode smode;
4063 rtx gr_reg;
4064 if (byte_size < 4)
4065 byte_size = 4;
4066 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4067 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
a08c5dd0 4068 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
644459d0 4069 const0_rtx);
4070 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4071 }
4072 else
a08c5dd0 4073 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
644459d0 4074}
4075
ee9034d4 4076static void
4077spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4078 const_tree type, bool named ATTRIBUTE_UNUSED)
4079{
4080 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4081 ? 1
4082 : mode == BLKmode
4083 ? ((int_size_in_bytes (type) + 15) / 16)
4084 : mode == VOIDmode
4085 ? 1
4086 : HARD_REGNO_NREGS (cum, mode));
4087}
4088
644459d0 4089/* Variable sized types are passed by reference. */
4090static bool
4091spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4092 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 4093 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 4094{
4095 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4096}
4097\f
4098
4099/* Var args. */
4100
4101/* Create and return the va_list datatype.
4102
4103 On SPU, va_list is an array type equivalent to
4104
4105 typedef struct __va_list_tag
4106 {
4107 void *__args __attribute__((__aligned(16)));
4108 void *__skip __attribute__((__aligned(16)));
4109
4110 } va_list[1];
4111
fa7637bd 4112 where __args points to the arg that will be returned by the next
644459d0 4113 va_arg(), and __skip points to the previous stack frame such that
4114 when __args == __skip we should advance __args by 32 bytes. */
4115static tree
4116spu_build_builtin_va_list (void)
4117{
4118 tree f_args, f_skip, record, type_decl;
4119 bool owp;
4120
4121 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4122
4123 type_decl =
54e46243 4124 build_decl (BUILTINS_LOCATION,
4125 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 4126
54e46243 4127 f_args = build_decl (BUILTINS_LOCATION,
4128 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4129 f_skip = build_decl (BUILTINS_LOCATION,
4130 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 4131
4132 DECL_FIELD_CONTEXT (f_args) = record;
4133 DECL_ALIGN (f_args) = 128;
4134 DECL_USER_ALIGN (f_args) = 1;
4135
4136 DECL_FIELD_CONTEXT (f_skip) = record;
4137 DECL_ALIGN (f_skip) = 128;
4138 DECL_USER_ALIGN (f_skip) = 1;
4139
bc907808 4140 TYPE_STUB_DECL (record) = type_decl;
644459d0 4141 TYPE_NAME (record) = type_decl;
4142 TYPE_FIELDS (record) = f_args;
1767a056 4143 DECL_CHAIN (f_args) = f_skip;
644459d0 4144
4145 /* We know this is being padded and we want it too. It is an internal
4146 type so hide the warnings from the user. */
4147 owp = warn_padded;
4148 warn_padded = false;
4149
4150 layout_type (record);
4151
4152 warn_padded = owp;
4153
4154 /* The correct type is an array type of one element. */
4155 return build_array_type (record, build_index_type (size_zero_node));
4156}
4157
4158/* Implement va_start by filling the va_list structure VALIST.
4159 NEXTARG points to the first anonymous stack argument.
4160
4161 The following global variables are used to initialize
4162 the va_list structure:
4163
abe32cce 4164 crtl->args.info;
644459d0 4165 the CUMULATIVE_ARGS for this function
4166
abe32cce 4167 crtl->args.arg_offset_rtx:
644459d0 4168 holds the offset of the first anonymous stack argument
4169 (relative to the virtual arg pointer). */
4170
8a58ed0a 4171static void
644459d0 4172spu_va_start (tree valist, rtx nextarg)
4173{
4174 tree f_args, f_skip;
4175 tree args, skip, t;
4176
4177 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4178 f_skip = DECL_CHAIN (f_args);
644459d0 4179
170efcd4 4180 valist = build_simple_mem_ref (valist);
644459d0 4181 args =
4182 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4183 skip =
4184 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4185
4186 /* Find the __args area. */
4187 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4188 if (crtl->args.pretend_args_size > 0)
0de36bdb 4189 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4190 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4191 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4192 TREE_SIDE_EFFECTS (t) = 1;
4193 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4194
4195 /* Find the __skip area. */
4196 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4197 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4198 size_int (crtl->args.pretend_args_size
0de36bdb 4199 - STACK_POINTER_OFFSET));
75a70cf9 4200 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4201 TREE_SIDE_EFFECTS (t) = 1;
4202 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4203}
4204
4205/* Gimplify va_arg by updating the va_list structure
4206 VALIST as required to retrieve an argument of type
4207 TYPE, and returning that argument.
4208
4209 ret = va_arg(VALIST, TYPE);
4210
4211 generates code equivalent to:
4212
4213 paddedsize = (sizeof(TYPE) + 15) & -16;
4214 if (VALIST.__args + paddedsize > VALIST.__skip
4215 && VALIST.__args <= VALIST.__skip)
4216 addr = VALIST.__skip + 32;
4217 else
4218 addr = VALIST.__args;
4219 VALIST.__args = addr + paddedsize;
4220 ret = *(TYPE *)addr;
4221 */
4222static tree
75a70cf9 4223spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4224 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4225{
4226 tree f_args, f_skip;
4227 tree args, skip;
4228 HOST_WIDE_INT size, rsize;
4229 tree paddedsize, addr, tmp;
4230 bool pass_by_reference_p;
4231
4232 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
1767a056 4233 f_skip = DECL_CHAIN (f_args);
644459d0 4234
182cf5a9 4235 valist = build_simple_mem_ref (valist);
644459d0 4236 args =
4237 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4238 skip =
4239 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4240
4241 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4242
4243 /* if an object is dynamically sized, a pointer to it is passed
4244 instead of the object itself. */
4245 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4246 false);
4247 if (pass_by_reference_p)
4248 type = build_pointer_type (type);
4249 size = int_size_in_bytes (type);
4250 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4251
4252 /* build conditional expression to calculate addr. The expression
4253 will be gimplified later. */
0de36bdb 4254 paddedsize = size_int (rsize);
75a70cf9 4255 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4256 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4257 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4258 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4259 unshare_expr (skip)));
644459d0 4260
4261 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4262 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4263 size_int (32)), unshare_expr (args));
644459d0 4264
75a70cf9 4265 gimplify_assign (addr, tmp, pre_p);
644459d0 4266
4267 /* update VALIST.__args */
0de36bdb 4268 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4269 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4270
8115f0af 4271 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4272 addr);
644459d0 4273
4274 if (pass_by_reference_p)
4275 addr = build_va_arg_indirect_ref (addr);
4276
4277 return build_va_arg_indirect_ref (addr);
4278}
4279
4280/* Save parameter registers starting with the register that corresponds
4281 to the first unnamed parameters. If the first unnamed parameter is
4282 in the stack then save no registers. Set pretend_args_size to the
4283 amount of space needed to save the registers. */
4284void
4285spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4286 tree type, int *pretend_size, int no_rtl)
4287{
4288 if (!no_rtl)
4289 {
4290 rtx tmp;
4291 int regno;
4292 int offset;
4293 int ncum = *cum;
4294
4295 /* cum currently points to the last named argument, we want to
4296 start at the next argument. */
ee9034d4 4297 spu_function_arg_advance (&ncum, mode, type, true);
644459d0 4298
4299 offset = -STACK_POINTER_OFFSET;
4300 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4301 {
4302 tmp = gen_frame_mem (V4SImode,
4303 plus_constant (virtual_incoming_args_rtx,
4304 offset));
4305 emit_move_insn (tmp,
4306 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4307 offset += 16;
4308 }
4309 *pretend_size = offset + STACK_POINTER_OFFSET;
4310 }
4311}
4312\f
b2d7ede1 4313static void
644459d0 4314spu_conditional_register_usage (void)
4315{
4316 if (flag_pic)
4317 {
4318 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4319 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4320 }
644459d0 4321}
4322
9d98604b 4323/* This is called any time we inspect the alignment of a register for
4324 addresses. */
644459d0 4325static int
9d98604b 4326reg_aligned_for_addr (rtx x)
644459d0 4327{
9d98604b 4328 int regno =
4329 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4330 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4331}
4332
69ced2d6 4333/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4334 into its SYMBOL_REF_FLAGS. */
4335static void
4336spu_encode_section_info (tree decl, rtx rtl, int first)
4337{
4338 default_encode_section_info (decl, rtl, first);
4339
4340 /* If a variable has a forced alignment to < 16 bytes, mark it with
4341 SYMBOL_FLAG_ALIGN1. */
4342 if (TREE_CODE (decl) == VAR_DECL
4343 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4344 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4345}
4346
644459d0 4347/* Return TRUE if we are certain the mem refers to a complete object
4348 which is both 16-byte aligned and padded to a 16-byte boundary. This
4349 would make it safe to store with a single instruction.
4350 We guarantee the alignment and padding for static objects by aligning
4351 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4352 FIXME: We currently cannot guarantee this for objects on the stack
4353 because assign_parm_setup_stack calls assign_stack_local with the
4354 alignment of the parameter mode and in that case the alignment never
4355 gets adjusted by LOCAL_ALIGNMENT. */
4356static int
4357store_with_one_insn_p (rtx mem)
4358{
9d98604b 4359 enum machine_mode mode = GET_MODE (mem);
644459d0 4360 rtx addr = XEXP (mem, 0);
9d98604b 4361 if (mode == BLKmode)
644459d0 4362 return 0;
9d98604b 4363 if (GET_MODE_SIZE (mode) >= 16)
4364 return 1;
644459d0 4365 /* Only static objects. */
4366 if (GET_CODE (addr) == SYMBOL_REF)
4367 {
4368 /* We use the associated declaration to make sure the access is
fa7637bd 4369 referring to the whole object.
644459d0 4370 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4371 if it is necessary. Will there be cases where one exists, and
4372 the other does not? Will there be cases where both exist, but
4373 have different types? */
4374 tree decl = MEM_EXPR (mem);
4375 if (decl
4376 && TREE_CODE (decl) == VAR_DECL
4377 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4378 return 1;
4379 decl = SYMBOL_REF_DECL (addr);
4380 if (decl
4381 && TREE_CODE (decl) == VAR_DECL
4382 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4383 return 1;
4384 }
4385 return 0;
4386}
4387
9d98604b 4388/* Return 1 when the address is not valid for a simple load and store as
4389 required by the '_mov*' patterns. We could make this less strict
4390 for loads, but we prefer mem's to look the same so they are more
4391 likely to be merged. */
4392static int
4393address_needs_split (rtx mem)
4394{
4395 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4396 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4397 || !(store_with_one_insn_p (mem)
4398 || mem_is_padded_component_ref (mem))))
4399 return 1;
4400
4401 return 0;
4402}
4403
6cf5579e 4404static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4405static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4406static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4407
4408/* MEM is known to be an __ea qualified memory access. Emit a call to
4409 fetch the ppu memory to local store, and return its address in local
4410 store. */
4411
4412static void
4413ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4414{
4415 if (is_store)
4416 {
4417 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4418 if (!cache_fetch_dirty)
4419 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4420 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4421 2, ea_addr, EAmode, ndirty, SImode);
4422 }
4423 else
4424 {
4425 if (!cache_fetch)
4426 cache_fetch = init_one_libfunc ("__cache_fetch");
4427 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4428 1, ea_addr, EAmode);
4429 }
4430}
4431
4432/* Like ea_load_store, but do the cache tag comparison and, for stores,
4433 dirty bit marking, inline.
4434
4435 The cache control data structure is an array of
4436
4437 struct __cache_tag_array
4438 {
4439 unsigned int tag_lo[4];
4440 unsigned int tag_hi[4];
4441 void *data_pointer[4];
4442 int reserved[4];
4443 vector unsigned short dirty_bits[4];
4444 } */
4445
4446static void
4447ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4448{
4449 rtx ea_addr_si;
4450 HOST_WIDE_INT v;
4451 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4452 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4453 rtx index_mask = gen_reg_rtx (SImode);
4454 rtx tag_arr = gen_reg_rtx (Pmode);
4455 rtx splat_mask = gen_reg_rtx (TImode);
4456 rtx splat = gen_reg_rtx (V4SImode);
4457 rtx splat_hi = NULL_RTX;
4458 rtx tag_index = gen_reg_rtx (Pmode);
4459 rtx block_off = gen_reg_rtx (SImode);
4460 rtx tag_addr = gen_reg_rtx (Pmode);
4461 rtx tag = gen_reg_rtx (V4SImode);
4462 rtx cache_tag = gen_reg_rtx (V4SImode);
4463 rtx cache_tag_hi = NULL_RTX;
4464 rtx cache_ptrs = gen_reg_rtx (TImode);
4465 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4466 rtx tag_equal = gen_reg_rtx (V4SImode);
4467 rtx tag_equal_hi = NULL_RTX;
4468 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4469 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4470 rtx eq_index = gen_reg_rtx (SImode);
4471 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4472
4473 if (spu_ea_model != 32)
4474 {
4475 splat_hi = gen_reg_rtx (V4SImode);
4476 cache_tag_hi = gen_reg_rtx (V4SImode);
4477 tag_equal_hi = gen_reg_rtx (V4SImode);
4478 }
4479
4480 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4481 emit_move_insn (tag_arr, tag_arr_sym);
4482 v = 0x0001020300010203LL;
4483 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4484 ea_addr_si = ea_addr;
4485 if (spu_ea_model != 32)
4486 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4487
4488 /* tag_index = ea_addr & (tag_array_size - 128) */
4489 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4490
4491 /* splat ea_addr to all 4 slots. */
4492 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4493 /* Similarly for high 32 bits of ea_addr. */
4494 if (spu_ea_model != 32)
4495 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4496
4497 /* block_off = ea_addr & 127 */
4498 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4499
4500 /* tag_addr = tag_arr + tag_index */
4501 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4502
4503 /* Read cache tags. */
4504 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4505 if (spu_ea_model != 32)
4506 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4507 plus_constant (tag_addr, 16)));
4508
4509 /* tag = ea_addr & -128 */
4510 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4511
4512 /* Read all four cache data pointers. */
4513 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4514 plus_constant (tag_addr, 32)));
4515
4516 /* Compare tags. */
4517 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4518 if (spu_ea_model != 32)
4519 {
4520 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4521 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4522 }
4523
4524 /* At most one of the tags compare equal, so tag_equal has one
4525 32-bit slot set to all 1's, with the other slots all zero.
4526 gbb picks off low bit from each byte in the 128-bit registers,
4527 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4528 we have a hit. */
4529 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4530 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4531
4532 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4533 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4534
4535 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4536 (rotating eq_index mod 16 bytes). */
4537 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4538 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4539
4540 /* Add block offset to form final data address. */
4541 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4542
4543 /* Check that we did hit. */
4544 hit_label = gen_label_rtx ();
4545 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4546 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4547 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4548 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4549 hit_ref, pc_rtx)));
4550 /* Say that this branch is very likely to happen. */
4551 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
02501f7f 4552 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
6cf5579e 4553
4554 ea_load_store (mem, is_store, ea_addr, data_addr);
4555 cont_label = gen_label_rtx ();
4556 emit_jump_insn (gen_jump (cont_label));
4557 emit_barrier ();
4558
4559 emit_label (hit_label);
4560
4561 if (is_store)
4562 {
4563 HOST_WIDE_INT v_hi;
4564 rtx dirty_bits = gen_reg_rtx (TImode);
4565 rtx dirty_off = gen_reg_rtx (SImode);
4566 rtx dirty_128 = gen_reg_rtx (TImode);
4567 rtx neg_block_off = gen_reg_rtx (SImode);
4568
4569 /* Set up mask with one dirty bit per byte of the mem we are
4570 writing, starting from top bit. */
4571 v_hi = v = -1;
4572 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4573 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4574 {
4575 v_hi = v;
4576 v = 0;
4577 }
4578 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4579
4580 /* Form index into cache dirty_bits. eq_index is one of
4581 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4582 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4583 offset to each of the four dirty_bits elements. */
4584 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4585
4586 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4587
4588 /* Rotate bit mask to proper bit. */
4589 emit_insn (gen_negsi2 (neg_block_off, block_off));
4590 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4591 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4592
4593 /* Or in the new dirty bits. */
4594 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4595
4596 /* Store. */
4597 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4598 }
4599
4600 emit_label (cont_label);
4601}
4602
4603static rtx
4604expand_ea_mem (rtx mem, bool is_store)
4605{
4606 rtx ea_addr;
4607 rtx data_addr = gen_reg_rtx (Pmode);
4608 rtx new_mem;
4609
4610 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4611 if (optimize_size || optimize == 0)
4612 ea_load_store (mem, is_store, ea_addr, data_addr);
4613 else
4614 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4615
4616 if (ea_alias_set == -1)
4617 ea_alias_set = new_alias_set ();
4618
4619 /* We generate a new MEM RTX to refer to the copy of the data
4620 in the cache. We do not copy memory attributes (except the
4621 alignment) from the original MEM, as they may no longer apply
4622 to the cache copy. */
4623 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4624 set_mem_alias_set (new_mem, ea_alias_set);
4625 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4626
4627 return new_mem;
4628}
4629
644459d0 4630int
4631spu_expand_mov (rtx * ops, enum machine_mode mode)
4632{
4633 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
abe960bb 4634 {
4635 /* Perform the move in the destination SUBREG's inner mode. */
4636 ops[0] = SUBREG_REG (ops[0]);
4637 mode = GET_MODE (ops[0]);
4638 ops[1] = gen_lowpart_common (mode, ops[1]);
4639 gcc_assert (ops[1]);
4640 }
644459d0 4641
4642 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4643 {
4644 rtx from = SUBREG_REG (ops[1]);
8d72495d 4645 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4646
4647 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4648 && GET_MODE_CLASS (imode) == MODE_INT
4649 && subreg_lowpart_p (ops[1]));
4650
4651 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4652 imode = SImode;
4653 if (imode != GET_MODE (from))
4654 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4655
4656 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4657 {
d6bf3b14 4658 enum insn_code icode = convert_optab_handler (trunc_optab,
4659 mode, imode);
644459d0 4660 emit_insn (GEN_FCN (icode) (ops[0], from));
4661 }
4662 else
4663 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4664 return 1;
4665 }
4666
4667 /* At least one of the operands needs to be a register. */
4668 if ((reload_in_progress | reload_completed) == 0
4669 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4670 {
4671 rtx temp = force_reg (mode, ops[1]);
4672 emit_move_insn (ops[0], temp);
4673 return 1;
4674 }
4675 if (reload_in_progress || reload_completed)
4676 {
dea01258 4677 if (CONSTANT_P (ops[1]))
4678 return spu_split_immediate (ops);
644459d0 4679 return 0;
4680 }
9d98604b 4681
4682 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4683 extend them. */
4684 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4685 {
9d98604b 4686 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4687 if (val != INTVAL (ops[1]))
644459d0 4688 {
9d98604b 4689 emit_move_insn (ops[0], GEN_INT (val));
4690 return 1;
644459d0 4691 }
4692 }
9d98604b 4693 if (MEM_P (ops[0]))
6cf5579e 4694 {
4695 if (MEM_ADDR_SPACE (ops[0]))
4696 ops[0] = expand_ea_mem (ops[0], true);
4697 return spu_split_store (ops);
4698 }
9d98604b 4699 if (MEM_P (ops[1]))
6cf5579e 4700 {
4701 if (MEM_ADDR_SPACE (ops[1]))
4702 ops[1] = expand_ea_mem (ops[1], false);
4703 return spu_split_load (ops);
4704 }
9d98604b 4705
644459d0 4706 return 0;
4707}
4708
9d98604b 4709static void
4710spu_convert_move (rtx dst, rtx src)
644459d0 4711{
9d98604b 4712 enum machine_mode mode = GET_MODE (dst);
4713 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4714 rtx reg;
4715 gcc_assert (GET_MODE (src) == TImode);
4716 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4717 emit_insn (gen_rtx_SET (VOIDmode, reg,
4718 gen_rtx_TRUNCATE (int_mode,
4719 gen_rtx_LSHIFTRT (TImode, src,
4720 GEN_INT (int_mode == DImode ? 64 : 96)))));
4721 if (int_mode != mode)
4722 {
4723 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4724 emit_move_insn (dst, reg);
4725 }
4726}
644459d0 4727
9d98604b 4728/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4729 the address from SRC and SRC+16. Return a REG or CONST_INT that
4730 specifies how many bytes to rotate the loaded registers, plus any
4731 extra from EXTRA_ROTQBY. The address and rotate amounts are
4732 normalized to improve merging of loads and rotate computations. */
4733static rtx
4734spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4735{
4736 rtx addr = XEXP (src, 0);
4737 rtx p0, p1, rot, addr0, addr1;
4738 int rot_amt;
644459d0 4739
4740 rot = 0;
4741 rot_amt = 0;
9d98604b 4742
4743 if (MEM_ALIGN (src) >= 128)
4744 /* Address is already aligned; simply perform a TImode load. */ ;
4745 else if (GET_CODE (addr) == PLUS)
644459d0 4746 {
4747 /* 8 cases:
4748 aligned reg + aligned reg => lqx
4749 aligned reg + unaligned reg => lqx, rotqby
4750 aligned reg + aligned const => lqd
4751 aligned reg + unaligned const => lqd, rotqbyi
4752 unaligned reg + aligned reg => lqx, rotqby
4753 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4754 unaligned reg + aligned const => lqd, rotqby
4755 unaligned reg + unaligned const -> not allowed by legitimate address
4756 */
4757 p0 = XEXP (addr, 0);
4758 p1 = XEXP (addr, 1);
9d98604b 4759 if (!reg_aligned_for_addr (p0))
644459d0 4760 {
9d98604b 4761 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4762 {
9d98604b 4763 rot = gen_reg_rtx (SImode);
4764 emit_insn (gen_addsi3 (rot, p0, p1));
4765 }
4766 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4767 {
4768 if (INTVAL (p1) > 0
4769 && REG_POINTER (p0)
4770 && INTVAL (p1) * BITS_PER_UNIT
4771 < REGNO_POINTER_ALIGN (REGNO (p0)))
4772 {
4773 rot = gen_reg_rtx (SImode);
4774 emit_insn (gen_addsi3 (rot, p0, p1));
4775 addr = p0;
4776 }
4777 else
4778 {
4779 rtx x = gen_reg_rtx (SImode);
4780 emit_move_insn (x, p1);
4781 if (!spu_arith_operand (p1, SImode))
4782 p1 = x;
4783 rot = gen_reg_rtx (SImode);
4784 emit_insn (gen_addsi3 (rot, p0, p1));
4785 addr = gen_rtx_PLUS (Pmode, p0, x);
4786 }
644459d0 4787 }
4788 else
4789 rot = p0;
4790 }
4791 else
4792 {
4793 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4794 {
4795 rot_amt = INTVAL (p1) & 15;
9d98604b 4796 if (INTVAL (p1) & -16)
4797 {
4798 p1 = GEN_INT (INTVAL (p1) & -16);
4799 addr = gen_rtx_PLUS (SImode, p0, p1);
4800 }
4801 else
4802 addr = p0;
644459d0 4803 }
9d98604b 4804 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4805 rot = p1;
4806 }
4807 }
9d98604b 4808 else if (REG_P (addr))
644459d0 4809 {
9d98604b 4810 if (!reg_aligned_for_addr (addr))
644459d0 4811 rot = addr;
4812 }
4813 else if (GET_CODE (addr) == CONST)
4814 {
4815 if (GET_CODE (XEXP (addr, 0)) == PLUS
4816 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4817 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4818 {
4819 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4820 if (rot_amt & -16)
4821 addr = gen_rtx_CONST (Pmode,
4822 gen_rtx_PLUS (Pmode,
4823 XEXP (XEXP (addr, 0), 0),
4824 GEN_INT (rot_amt & -16)));
4825 else
4826 addr = XEXP (XEXP (addr, 0), 0);
4827 }
4828 else
9d98604b 4829 {
4830 rot = gen_reg_rtx (Pmode);
4831 emit_move_insn (rot, addr);
4832 }
644459d0 4833 }
4834 else if (GET_CODE (addr) == CONST_INT)
4835 {
4836 rot_amt = INTVAL (addr);
4837 addr = GEN_INT (rot_amt & -16);
4838 }
4839 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4840 {
4841 rot = gen_reg_rtx (Pmode);
4842 emit_move_insn (rot, addr);
4843 }
644459d0 4844
9d98604b 4845 rot_amt += extra_rotby;
644459d0 4846
4847 rot_amt &= 15;
4848
4849 if (rot && rot_amt)
4850 {
9d98604b 4851 rtx x = gen_reg_rtx (SImode);
4852 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4853 rot = x;
644459d0 4854 rot_amt = 0;
4855 }
9d98604b 4856 if (!rot && rot_amt)
4857 rot = GEN_INT (rot_amt);
4858
4859 addr0 = copy_rtx (addr);
4860 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4861 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4862
4863 if (dst1)
4864 {
4865 addr1 = plus_constant (copy_rtx (addr), 16);
4866 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4867 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4868 }
644459d0 4869
9d98604b 4870 return rot;
4871}
4872
4873int
4874spu_split_load (rtx * ops)
4875{
4876 enum machine_mode mode = GET_MODE (ops[0]);
4877 rtx addr, load, rot;
4878 int rot_amt;
644459d0 4879
9d98604b 4880 if (GET_MODE_SIZE (mode) >= 16)
4881 return 0;
644459d0 4882
9d98604b 4883 addr = XEXP (ops[1], 0);
4884 gcc_assert (GET_CODE (addr) != AND);
4885
4886 if (!address_needs_split (ops[1]))
4887 {
4888 ops[1] = change_address (ops[1], TImode, addr);
4889 load = gen_reg_rtx (TImode);
4890 emit_insn (gen__movti (load, ops[1]));
4891 spu_convert_move (ops[0], load);
4892 return 1;
4893 }
4894
4895 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4896
4897 load = gen_reg_rtx (TImode);
4898 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4899
4900 if (rot)
4901 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4902
9d98604b 4903 spu_convert_move (ops[0], load);
4904 return 1;
644459d0 4905}
4906
9d98604b 4907int
644459d0 4908spu_split_store (rtx * ops)
4909{
4910 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4911 rtx reg;
644459d0 4912 rtx addr, p0, p1, p1_lo, smem;
4913 int aform;
4914 int scalar;
4915
9d98604b 4916 if (GET_MODE_SIZE (mode) >= 16)
4917 return 0;
4918
644459d0 4919 addr = XEXP (ops[0], 0);
9d98604b 4920 gcc_assert (GET_CODE (addr) != AND);
4921
4922 if (!address_needs_split (ops[0]))
4923 {
4924 reg = gen_reg_rtx (TImode);
4925 emit_insn (gen_spu_convert (reg, ops[1]));
4926 ops[0] = change_address (ops[0], TImode, addr);
4927 emit_move_insn (ops[0], reg);
4928 return 1;
4929 }
644459d0 4930
4931 if (GET_CODE (addr) == PLUS)
4932 {
4933 /* 8 cases:
4934 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4935 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4936 aligned reg + aligned const => lqd, c?d, shuf, stqx
4937 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4938 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4939 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4940 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4941 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4942 */
4943 aform = 0;
4944 p0 = XEXP (addr, 0);
4945 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4946 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4947 {
4948 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4949 if (reg_aligned_for_addr (p0))
4950 {
4951 p1 = GEN_INT (INTVAL (p1) & -16);
4952 if (p1 == const0_rtx)
4953 addr = p0;
4954 else
4955 addr = gen_rtx_PLUS (SImode, p0, p1);
4956 }
4957 else
4958 {
4959 rtx x = gen_reg_rtx (SImode);
4960 emit_move_insn (x, p1);
4961 addr = gen_rtx_PLUS (SImode, p0, x);
4962 }
644459d0 4963 }
4964 }
9d98604b 4965 else if (REG_P (addr))
644459d0 4966 {
4967 aform = 0;
4968 p0 = addr;
4969 p1 = p1_lo = const0_rtx;
4970 }
4971 else
4972 {
4973 aform = 1;
4974 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4975 p1 = 0; /* aform doesn't use p1 */
4976 p1_lo = addr;
4977 if (ALIGNED_SYMBOL_REF_P (addr))
4978 p1_lo = const0_rtx;
9d98604b 4979 else if (GET_CODE (addr) == CONST
4980 && GET_CODE (XEXP (addr, 0)) == PLUS
4981 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4982 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4983 {
9d98604b 4984 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4985 if ((v & -16) != 0)
4986 addr = gen_rtx_CONST (Pmode,
4987 gen_rtx_PLUS (Pmode,
4988 XEXP (XEXP (addr, 0), 0),
4989 GEN_INT (v & -16)));
4990 else
4991 addr = XEXP (XEXP (addr, 0), 0);
4992 p1_lo = GEN_INT (v & 15);
644459d0 4993 }
4994 else if (GET_CODE (addr) == CONST_INT)
4995 {
4996 p1_lo = GEN_INT (INTVAL (addr) & 15);
4997 addr = GEN_INT (INTVAL (addr) & -16);
4998 }
9d98604b 4999 else
5000 {
5001 p1_lo = gen_reg_rtx (SImode);
5002 emit_move_insn (p1_lo, addr);
5003 }
644459d0 5004 }
5005
4cbad5bb 5006 gcc_assert (aform == 0 || aform == 1);
9d98604b 5007 reg = gen_reg_rtx (TImode);
e04cf423 5008
644459d0 5009 scalar = store_with_one_insn_p (ops[0]);
5010 if (!scalar)
5011 {
5012 /* We could copy the flags from the ops[0] MEM to mem here,
5013 We don't because we want this load to be optimized away if
5014 possible, and copying the flags will prevent that in certain
5015 cases, e.g. consider the volatile flag. */
5016
9d98604b 5017 rtx pat = gen_reg_rtx (TImode);
e04cf423 5018 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
5019 set_mem_alias_set (lmem, 0);
5020 emit_insn (gen_movti (reg, lmem));
644459d0 5021
9d98604b 5022 if (!p0 || reg_aligned_for_addr (p0))
644459d0 5023 p0 = stack_pointer_rtx;
5024 if (!p1_lo)
5025 p1_lo = const0_rtx;
5026
5027 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
5028 emit_insn (gen_shufb (reg, ops[1], reg, pat));
5029 }
644459d0 5030 else
5031 {
5032 if (GET_CODE (ops[1]) == REG)
5033 emit_insn (gen_spu_convert (reg, ops[1]));
5034 else if (GET_CODE (ops[1]) == SUBREG)
5035 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5036 else
5037 abort ();
5038 }
5039
5040 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 5041 emit_insn (gen_ashlti3
5042 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 5043
9d98604b 5044 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 5045 /* We can't use the previous alias set because the memory has changed
5046 size and can potentially overlap objects of other types. */
5047 set_mem_alias_set (smem, 0);
5048
e04cf423 5049 emit_insn (gen_movti (smem, reg));
9d98604b 5050 return 1;
644459d0 5051}
5052
5053/* Return TRUE if X is MEM which is a struct member reference
5054 and the member can safely be loaded and stored with a single
5055 instruction because it is padded. */
5056static int
5057mem_is_padded_component_ref (rtx x)
5058{
5059 tree t = MEM_EXPR (x);
5060 tree r;
5061 if (!t || TREE_CODE (t) != COMPONENT_REF)
5062 return 0;
5063 t = TREE_OPERAND (t, 1);
5064 if (!t || TREE_CODE (t) != FIELD_DECL
5065 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5066 return 0;
5067 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5068 r = DECL_FIELD_CONTEXT (t);
5069 if (!r || TREE_CODE (r) != RECORD_TYPE)
5070 return 0;
5071 /* Make sure they are the same mode */
5072 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5073 return 0;
5074 /* If there are no following fields then the field alignment assures
fa7637bd 5075 the structure is padded to the alignment which means this field is
5076 padded too. */
644459d0 5077 if (TREE_CHAIN (t) == 0)
5078 return 1;
5079 /* If the following field is also aligned then this field will be
5080 padded. */
5081 t = TREE_CHAIN (t);
5082 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5083 return 1;
5084 return 0;
5085}
5086
c7b91b14 5087/* Parse the -mfixed-range= option string. */
5088static void
5089fix_range (const char *const_str)
5090{
5091 int i, first, last;
5092 char *str, *dash, *comma;
5093
5094 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5095 REG2 are either register names or register numbers. The effect
5096 of this option is to mark the registers in the range from REG1 to
5097 REG2 as ``fixed'' so they won't be used by the compiler. */
5098
5099 i = strlen (const_str);
5100 str = (char *) alloca (i + 1);
5101 memcpy (str, const_str, i + 1);
5102
5103 while (1)
5104 {
5105 dash = strchr (str, '-');
5106 if (!dash)
5107 {
5108 warning (0, "value of -mfixed-range must have form REG1-REG2");
5109 return;
5110 }
5111 *dash = '\0';
5112 comma = strchr (dash + 1, ',');
5113 if (comma)
5114 *comma = '\0';
5115
5116 first = decode_reg_name (str);
5117 if (first < 0)
5118 {
5119 warning (0, "unknown register name: %s", str);
5120 return;
5121 }
5122
5123 last = decode_reg_name (dash + 1);
5124 if (last < 0)
5125 {
5126 warning (0, "unknown register name: %s", dash + 1);
5127 return;
5128 }
5129
5130 *dash = '-';
5131
5132 if (first > last)
5133 {
5134 warning (0, "%s-%s is an empty range", str, dash + 1);
5135 return;
5136 }
5137
5138 for (i = first; i <= last; ++i)
5139 fixed_regs[i] = call_used_regs[i] = 1;
5140
5141 if (!comma)
5142 break;
5143
5144 *comma = ',';
5145 str = comma + 1;
5146 }
5147}
5148
644459d0 5149/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5150 can be generated using the fsmbi instruction. */
5151int
5152fsmbi_const_p (rtx x)
5153{
dea01258 5154 if (CONSTANT_P (x))
5155 {
5df189be 5156 /* We can always choose TImode for CONST_INT because the high bits
dea01258 5157 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 5158 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 5159 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 5160 }
5161 return 0;
5162}
5163
5164/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5165 can be generated using the cbd, chd, cwd or cdd instruction. */
5166int
5167cpat_const_p (rtx x, enum machine_mode mode)
5168{
5169 if (CONSTANT_P (x))
5170 {
5171 enum immediate_class c = classify_immediate (x, mode);
5172 return c == IC_CPAT;
5173 }
5174 return 0;
5175}
644459d0 5176
dea01258 5177rtx
5178gen_cpat_const (rtx * ops)
5179{
5180 unsigned char dst[16];
5181 int i, offset, shift, isize;
5182 if (GET_CODE (ops[3]) != CONST_INT
5183 || GET_CODE (ops[2]) != CONST_INT
5184 || (GET_CODE (ops[1]) != CONST_INT
5185 && GET_CODE (ops[1]) != REG))
5186 return 0;
5187 if (GET_CODE (ops[1]) == REG
5188 && (!REG_POINTER (ops[1])
5189 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5190 return 0;
644459d0 5191
5192 for (i = 0; i < 16; i++)
dea01258 5193 dst[i] = i + 16;
5194 isize = INTVAL (ops[3]);
5195 if (isize == 1)
5196 shift = 3;
5197 else if (isize == 2)
5198 shift = 2;
5199 else
5200 shift = 0;
5201 offset = (INTVAL (ops[2]) +
5202 (GET_CODE (ops[1]) ==
5203 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5204 for (i = 0; i < isize; i++)
5205 dst[offset + i] = i + shift;
5206 return array_to_constant (TImode, dst);
644459d0 5207}
5208
5209/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5210 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5211 than 16 bytes, the value is repeated across the rest of the array. */
5212void
5213constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5214{
5215 HOST_WIDE_INT val;
5216 int i, j, first;
5217
5218 memset (arr, 0, 16);
5219 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5220 if (GET_CODE (x) == CONST_INT
5221 || (GET_CODE (x) == CONST_DOUBLE
5222 && (mode == SFmode || mode == DFmode)))
5223 {
5224 gcc_assert (mode != VOIDmode && mode != BLKmode);
5225
5226 if (GET_CODE (x) == CONST_DOUBLE)
5227 val = const_double_to_hwint (x);
5228 else
5229 val = INTVAL (x);
5230 first = GET_MODE_SIZE (mode) - 1;
5231 for (i = first; i >= 0; i--)
5232 {
5233 arr[i] = val & 0xff;
5234 val >>= 8;
5235 }
5236 /* Splat the constant across the whole array. */
5237 for (j = 0, i = first + 1; i < 16; i++)
5238 {
5239 arr[i] = arr[j];
5240 j = (j == first) ? 0 : j + 1;
5241 }
5242 }
5243 else if (GET_CODE (x) == CONST_DOUBLE)
5244 {
5245 val = CONST_DOUBLE_LOW (x);
5246 for (i = 15; i >= 8; i--)
5247 {
5248 arr[i] = val & 0xff;
5249 val >>= 8;
5250 }
5251 val = CONST_DOUBLE_HIGH (x);
5252 for (i = 7; i >= 0; i--)
5253 {
5254 arr[i] = val & 0xff;
5255 val >>= 8;
5256 }
5257 }
5258 else if (GET_CODE (x) == CONST_VECTOR)
5259 {
5260 int units;
5261 rtx elt;
5262 mode = GET_MODE_INNER (mode);
5263 units = CONST_VECTOR_NUNITS (x);
5264 for (i = 0; i < units; i++)
5265 {
5266 elt = CONST_VECTOR_ELT (x, i);
5267 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5268 {
5269 if (GET_CODE (elt) == CONST_DOUBLE)
5270 val = const_double_to_hwint (elt);
5271 else
5272 val = INTVAL (elt);
5273 first = GET_MODE_SIZE (mode) - 1;
5274 if (first + i * GET_MODE_SIZE (mode) > 16)
5275 abort ();
5276 for (j = first; j >= 0; j--)
5277 {
5278 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5279 val >>= 8;
5280 }
5281 }
5282 }
5283 }
5284 else
5285 gcc_unreachable();
5286}
5287
5288/* Convert a 16 byte array to a constant of mode MODE. When MODE is
5289 smaller than 16 bytes, use the bytes that would represent that value
5290 in a register, e.g., for QImode return the value of arr[3]. */
5291rtx
e96f2783 5292array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 5293{
5294 enum machine_mode inner_mode;
5295 rtvec v;
5296 int units, size, i, j, k;
5297 HOST_WIDE_INT val;
5298
5299 if (GET_MODE_CLASS (mode) == MODE_INT
5300 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5301 {
5302 j = GET_MODE_SIZE (mode);
5303 i = j < 4 ? 4 - j : 0;
5304 for (val = 0; i < j; i++)
5305 val = (val << 8) | arr[i];
5306 val = trunc_int_for_mode (val, mode);
5307 return GEN_INT (val);
5308 }
5309
5310 if (mode == TImode)
5311 {
5312 HOST_WIDE_INT high;
5313 for (i = high = 0; i < 8; i++)
5314 high = (high << 8) | arr[i];
5315 for (i = 8, val = 0; i < 16; i++)
5316 val = (val << 8) | arr[i];
5317 return immed_double_const (val, high, TImode);
5318 }
5319 if (mode == SFmode)
5320 {
5321 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5322 val = trunc_int_for_mode (val, SImode);
171b6d22 5323 return hwint_to_const_double (SFmode, val);
644459d0 5324 }
5325 if (mode == DFmode)
5326 {
1f915911 5327 for (i = 0, val = 0; i < 8; i++)
5328 val = (val << 8) | arr[i];
171b6d22 5329 return hwint_to_const_double (DFmode, val);
644459d0 5330 }
5331
5332 if (!VECTOR_MODE_P (mode))
5333 abort ();
5334
5335 units = GET_MODE_NUNITS (mode);
5336 size = GET_MODE_UNIT_SIZE (mode);
5337 inner_mode = GET_MODE_INNER (mode);
5338 v = rtvec_alloc (units);
5339
5340 for (k = i = 0; i < units; ++i)
5341 {
5342 val = 0;
5343 for (j = 0; j < size; j++, k++)
5344 val = (val << 8) | arr[k];
5345
5346 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5347 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5348 else
5349 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5350 }
5351 if (k > 16)
5352 abort ();
5353
5354 return gen_rtx_CONST_VECTOR (mode, v);
5355}
5356
5357static void
5358reloc_diagnostic (rtx x)
5359{
712d2297 5360 tree decl = 0;
644459d0 5361 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5362 return;
5363
5364 if (GET_CODE (x) == SYMBOL_REF)
5365 decl = SYMBOL_REF_DECL (x);
5366 else if (GET_CODE (x) == CONST
5367 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5368 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5369
5370 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5371 if (decl && !DECL_P (decl))
5372 decl = 0;
5373
644459d0 5374 /* The decl could be a string constant. */
5375 if (decl && DECL_P (decl))
712d2297 5376 {
5377 location_t loc;
5378 /* We use last_assemble_variable_decl to get line information. It's
5379 not always going to be right and might not even be close, but will
5380 be right for the more common cases. */
5381 if (!last_assemble_variable_decl || in_section == ctors_section)
5382 loc = DECL_SOURCE_LOCATION (decl);
5383 else
5384 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 5385
712d2297 5386 if (TARGET_WARN_RELOC)
5387 warning_at (loc, 0,
5388 "creating run-time relocation for %qD", decl);
5389 else
5390 error_at (loc,
5391 "creating run-time relocation for %qD", decl);
5392 }
5393 else
5394 {
5395 if (TARGET_WARN_RELOC)
5396 warning_at (input_location, 0, "creating run-time relocation");
5397 else
5398 error_at (input_location, "creating run-time relocation");
5399 }
644459d0 5400}
5401
5402/* Hook into assemble_integer so we can generate an error for run-time
5403 relocations. The SPU ABI disallows them. */
5404static bool
5405spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5406{
5407 /* By default run-time relocations aren't supported, but we allow them
5408 in case users support it in their own run-time loader. And we provide
5409 a warning for those users that don't. */
5410 if ((GET_CODE (x) == SYMBOL_REF)
5411 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5412 reloc_diagnostic (x);
5413
5414 return default_assemble_integer (x, size, aligned_p);
5415}
5416
5417static void
5418spu_asm_globalize_label (FILE * file, const char *name)
5419{
5420 fputs ("\t.global\t", file);
5421 assemble_name (file, name);
5422 fputs ("\n", file);
5423}
5424
5425static bool
f529eb25 5426spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5427 bool speed ATTRIBUTE_UNUSED)
644459d0 5428{
5429 enum machine_mode mode = GET_MODE (x);
5430 int cost = COSTS_N_INSNS (2);
5431
5432 /* Folding to a CONST_VECTOR will use extra space but there might
5433 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5434 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5435 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5436 because this cost will only be compared against a single insn.
5437 if (code == CONST_VECTOR)
5438 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5439 */
5440
5441 /* Use defaults for float operations. Not accurate but good enough. */
5442 if (mode == DFmode)
5443 {
5444 *total = COSTS_N_INSNS (13);
5445 return true;
5446 }
5447 if (mode == SFmode)
5448 {
5449 *total = COSTS_N_INSNS (6);
5450 return true;
5451 }
5452 switch (code)
5453 {
5454 case CONST_INT:
5455 if (satisfies_constraint_K (x))
5456 *total = 0;
5457 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5458 *total = COSTS_N_INSNS (1);
5459 else
5460 *total = COSTS_N_INSNS (3);
5461 return true;
5462
5463 case CONST:
5464 *total = COSTS_N_INSNS (3);
5465 return true;
5466
5467 case LABEL_REF:
5468 case SYMBOL_REF:
5469 *total = COSTS_N_INSNS (0);
5470 return true;
5471
5472 case CONST_DOUBLE:
5473 *total = COSTS_N_INSNS (5);
5474 return true;
5475
5476 case FLOAT_EXTEND:
5477 case FLOAT_TRUNCATE:
5478 case FLOAT:
5479 case UNSIGNED_FLOAT:
5480 case FIX:
5481 case UNSIGNED_FIX:
5482 *total = COSTS_N_INSNS (7);
5483 return true;
5484
5485 case PLUS:
5486 if (mode == TImode)
5487 {
5488 *total = COSTS_N_INSNS (9);
5489 return true;
5490 }
5491 break;
5492
5493 case MULT:
5494 cost =
5495 GET_CODE (XEXP (x, 0)) ==
5496 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5497 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5498 {
5499 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5500 {
5501 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5502 cost = COSTS_N_INSNS (14);
5503 if ((val & 0xffff) == 0)
5504 cost = COSTS_N_INSNS (9);
5505 else if (val > 0 && val < 0x10000)
5506 cost = COSTS_N_INSNS (11);
5507 }
5508 }
5509 *total = cost;
5510 return true;
5511 case DIV:
5512 case UDIV:
5513 case MOD:
5514 case UMOD:
5515 *total = COSTS_N_INSNS (20);
5516 return true;
5517 case ROTATE:
5518 case ROTATERT:
5519 case ASHIFT:
5520 case ASHIFTRT:
5521 case LSHIFTRT:
5522 *total = COSTS_N_INSNS (4);
5523 return true;
5524 case UNSPEC:
5525 if (XINT (x, 1) == UNSPEC_CONVERT)
5526 *total = COSTS_N_INSNS (0);
5527 else
5528 *total = COSTS_N_INSNS (4);
5529 return true;
5530 }
5531 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5532 if (GET_MODE_CLASS (mode) == MODE_INT
5533 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5534 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5535 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5536 *total = cost;
5537 return true;
5538}
5539
1bd43494 5540static enum machine_mode
5541spu_unwind_word_mode (void)
644459d0 5542{
1bd43494 5543 return SImode;
644459d0 5544}
5545
5546/* Decide whether we can make a sibling call to a function. DECL is the
5547 declaration of the function being targeted by the call and EXP is the
5548 CALL_EXPR representing the call. */
5549static bool
5550spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5551{
5552 return decl && !TARGET_LARGE_MEM;
5553}
5554
5555/* We need to correctly update the back chain pointer and the Available
5556 Stack Size (which is in the second slot of the sp register.) */
5557void
5558spu_allocate_stack (rtx op0, rtx op1)
5559{
5560 HOST_WIDE_INT v;
5561 rtx chain = gen_reg_rtx (V4SImode);
5562 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5563 rtx sp = gen_reg_rtx (V4SImode);
5564 rtx splatted = gen_reg_rtx (V4SImode);
5565 rtx pat = gen_reg_rtx (TImode);
5566
5567 /* copy the back chain so we can save it back again. */
5568 emit_move_insn (chain, stack_bot);
5569
5570 op1 = force_reg (SImode, op1);
5571
5572 v = 0x1020300010203ll;
5573 emit_move_insn (pat, immed_double_const (v, v, TImode));
5574 emit_insn (gen_shufb (splatted, op1, op1, pat));
5575
5576 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5577 emit_insn (gen_subv4si3 (sp, sp, splatted));
5578
5579 if (flag_stack_check)
5580 {
5581 rtx avail = gen_reg_rtx(SImode);
5582 rtx result = gen_reg_rtx(SImode);
5583 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5584 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5585 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5586 }
5587
5588 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5589
5590 emit_move_insn (stack_bot, chain);
5591
5592 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5593}
5594
5595void
5596spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5597{
5598 static unsigned char arr[16] =
5599 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5600 rtx temp = gen_reg_rtx (SImode);
5601 rtx temp2 = gen_reg_rtx (SImode);
5602 rtx temp3 = gen_reg_rtx (V4SImode);
5603 rtx temp4 = gen_reg_rtx (V4SImode);
5604 rtx pat = gen_reg_rtx (TImode);
5605 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5606
5607 /* Restore the backchain from the first word, sp from the second. */
5608 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5609 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5610
5611 emit_move_insn (pat, array_to_constant (TImode, arr));
5612
5613 /* Compute Available Stack Size for sp */
5614 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5615 emit_insn (gen_shufb (temp3, temp, temp, pat));
5616
5617 /* Compute Available Stack Size for back chain */
5618 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5619 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5620 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5621
5622 emit_insn (gen_addv4si3 (sp, sp, temp3));
5623 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5624}
5625
5626static void
5627spu_init_libfuncs (void)
5628{
5629 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5630 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5631 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5632 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5633 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5634 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5635 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5636 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5637 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5638 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5639 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5640
5641 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5642 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5643
5644 set_optab_libfunc (smul_optab, TImode, "__multi3");
5645 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5646 set_optab_libfunc (smod_optab, TImode, "__modti3");
5647 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5648 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5649 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5650}
5651
5652/* Make a subreg, stripping any existing subreg. We could possibly just
5653 call simplify_subreg, but in this case we know what we want. */
5654rtx
5655spu_gen_subreg (enum machine_mode mode, rtx x)
5656{
5657 if (GET_CODE (x) == SUBREG)
5658 x = SUBREG_REG (x);
5659 if (GET_MODE (x) == mode)
5660 return x;
5661 return gen_rtx_SUBREG (mode, x, 0);
5662}
5663
5664static bool
fb80456a 5665spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5666{
5667 return (TYPE_MODE (type) == BLKmode
5668 && ((type) == 0
5669 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5670 || int_size_in_bytes (type) >
5671 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5672}
5673\f
5674/* Create the built-in types and functions */
5675
c2233b46 5676enum spu_function_code
5677{
5678#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5679#include "spu-builtins.def"
5680#undef DEF_BUILTIN
5681 NUM_SPU_BUILTINS
5682};
5683
5684extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5685
644459d0 5686struct spu_builtin_description spu_builtins[] = {
5687#define DEF_BUILTIN(fcode, icode, name, type, params) \
0c5c4d59 5688 {fcode, icode, name, type, params},
644459d0 5689#include "spu-builtins.def"
5690#undef DEF_BUILTIN
5691};
5692
0c5c4d59 5693static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5694
5695/* Returns the spu builtin decl for CODE. */
e6925042 5696
5697static tree
5698spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5699{
5700 if (code >= NUM_SPU_BUILTINS)
5701 return error_mark_node;
5702
0c5c4d59 5703 return spu_builtin_decls[code];
e6925042 5704}
5705
5706
644459d0 5707static void
5708spu_init_builtins (void)
5709{
5710 struct spu_builtin_description *d;
5711 unsigned int i;
5712
5713 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5714 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5715 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5716 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5717 V4SF_type_node = build_vector_type (float_type_node, 4);
5718 V2DF_type_node = build_vector_type (double_type_node, 2);
5719
5720 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5721 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5722 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5723 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5724
c4ecce0c 5725 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5726
5727 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5728 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5729 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5730 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5731 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5732 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5733 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5734 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5735 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5736 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5737 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5738 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5739
5740 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5741 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5742 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5743 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5744 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5745 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5746 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5747 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5748
5749 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5750 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5751
5752 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5753
5754 spu_builtin_types[SPU_BTI_PTR] =
5755 build_pointer_type (build_qualified_type
5756 (void_type_node,
5757 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5758
5759 /* For each builtin we build a new prototype. The tree code will make
5760 sure nodes are shared. */
5761 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5762 {
5763 tree p;
5764 char name[64]; /* build_function will make a copy. */
5765 int parm;
5766
5767 if (d->name == 0)
5768 continue;
5769
5dfbd18f 5770 /* Find last parm. */
644459d0 5771 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5772 ;
644459d0 5773
5774 p = void_list_node;
5775 while (parm > 1)
5776 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5777
5778 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5779
5780 sprintf (name, "__builtin_%s", d->name);
0c5c4d59 5781 spu_builtin_decls[i] =
3726fe5e 5782 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
a76866d3 5783 if (d->fcode == SPU_MASK_FOR_LOAD)
0c5c4d59 5784 TREE_READONLY (spu_builtin_decls[i]) = 1;
5dfbd18f 5785
5786 /* These builtins don't throw. */
0c5c4d59 5787 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
644459d0 5788 }
5789}
5790
cf31d486 5791void
5792spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5793{
5794 static unsigned char arr[16] =
5795 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5796
5797 rtx temp = gen_reg_rtx (Pmode);
5798 rtx temp2 = gen_reg_rtx (V4SImode);
5799 rtx temp3 = gen_reg_rtx (V4SImode);
5800 rtx pat = gen_reg_rtx (TImode);
5801 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5802
5803 emit_move_insn (pat, array_to_constant (TImode, arr));
5804
5805 /* Restore the sp. */
5806 emit_move_insn (temp, op1);
5807 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5808
5809 /* Compute available stack size for sp. */
5810 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5811 emit_insn (gen_shufb (temp3, temp, temp, pat));
5812
5813 emit_insn (gen_addv4si3 (sp, sp, temp3));
5814 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5815}
5816
644459d0 5817int
5818spu_safe_dma (HOST_WIDE_INT channel)
5819{
006e4b96 5820 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5821}
5822
5823void
5824spu_builtin_splats (rtx ops[])
5825{
5826 enum machine_mode mode = GET_MODE (ops[0]);
5827 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5828 {
5829 unsigned char arr[16];
5830 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5831 emit_move_insn (ops[0], array_to_constant (mode, arr));
5832 }
644459d0 5833 else
5834 {
5835 rtx reg = gen_reg_rtx (TImode);
5836 rtx shuf;
5837 if (GET_CODE (ops[1]) != REG
5838 && GET_CODE (ops[1]) != SUBREG)
5839 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5840 switch (mode)
5841 {
5842 case V2DImode:
5843 case V2DFmode:
5844 shuf =
5845 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5846 TImode);
5847 break;
5848 case V4SImode:
5849 case V4SFmode:
5850 shuf =
5851 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5852 TImode);
5853 break;
5854 case V8HImode:
5855 shuf =
5856 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5857 TImode);
5858 break;
5859 case V16QImode:
5860 shuf =
5861 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5862 TImode);
5863 break;
5864 default:
5865 abort ();
5866 }
5867 emit_move_insn (reg, shuf);
5868 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5869 }
5870}
5871
5872void
5873spu_builtin_extract (rtx ops[])
5874{
5875 enum machine_mode mode;
5876 rtx rot, from, tmp;
5877
5878 mode = GET_MODE (ops[1]);
5879
5880 if (GET_CODE (ops[2]) == CONST_INT)
5881 {
5882 switch (mode)
5883 {
5884 case V16QImode:
5885 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5886 break;
5887 case V8HImode:
5888 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5889 break;
5890 case V4SFmode:
5891 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5892 break;
5893 case V4SImode:
5894 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5895 break;
5896 case V2DImode:
5897 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5898 break;
5899 case V2DFmode:
5900 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5901 break;
5902 default:
5903 abort ();
5904 }
5905 return;
5906 }
5907
5908 from = spu_gen_subreg (TImode, ops[1]);
5909 rot = gen_reg_rtx (TImode);
5910 tmp = gen_reg_rtx (SImode);
5911
5912 switch (mode)
5913 {
5914 case V16QImode:
5915 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5916 break;
5917 case V8HImode:
5918 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5919 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5920 break;
5921 case V4SFmode:
5922 case V4SImode:
5923 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5924 break;
5925 case V2DImode:
5926 case V2DFmode:
5927 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5928 break;
5929 default:
5930 abort ();
5931 }
5932 emit_insn (gen_rotqby_ti (rot, from, tmp));
5933
5934 emit_insn (gen_spu_convert (ops[0], rot));
5935}
5936
5937void
5938spu_builtin_insert (rtx ops[])
5939{
5940 enum machine_mode mode = GET_MODE (ops[0]);
5941 enum machine_mode imode = GET_MODE_INNER (mode);
5942 rtx mask = gen_reg_rtx (TImode);
5943 rtx offset;
5944
5945 if (GET_CODE (ops[3]) == CONST_INT)
5946 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5947 else
5948 {
5949 offset = gen_reg_rtx (SImode);
5950 emit_insn (gen_mulsi3
5951 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5952 }
5953 emit_insn (gen_cpat
5954 (mask, stack_pointer_rtx, offset,
5955 GEN_INT (GET_MODE_SIZE (imode))));
5956 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5957}
5958
5959void
5960spu_builtin_promote (rtx ops[])
5961{
5962 enum machine_mode mode, imode;
5963 rtx rot, from, offset;
5964 HOST_WIDE_INT pos;
5965
5966 mode = GET_MODE (ops[0]);
5967 imode = GET_MODE_INNER (mode);
5968
5969 from = gen_reg_rtx (TImode);
5970 rot = spu_gen_subreg (TImode, ops[0]);
5971
5972 emit_insn (gen_spu_convert (from, ops[1]));
5973
5974 if (GET_CODE (ops[2]) == CONST_INT)
5975 {
5976 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5977 if (GET_MODE_SIZE (imode) < 4)
5978 pos += 4 - GET_MODE_SIZE (imode);
5979 offset = GEN_INT (pos & 15);
5980 }
5981 else
5982 {
5983 offset = gen_reg_rtx (SImode);
5984 switch (mode)
5985 {
5986 case V16QImode:
5987 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5988 break;
5989 case V8HImode:
5990 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5991 emit_insn (gen_addsi3 (offset, offset, offset));
5992 break;
5993 case V4SFmode:
5994 case V4SImode:
5995 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5996 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5997 break;
5998 case V2DImode:
5999 case V2DFmode:
6000 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
6001 break;
6002 default:
6003 abort ();
6004 }
6005 }
6006 emit_insn (gen_rotqby_ti (rot, from, offset));
6007}
6008
e96f2783 6009static void
6010spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 6011{
e96f2783 6012 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 6013 rtx shuf = gen_reg_rtx (V4SImode);
6014 rtx insn = gen_reg_rtx (V4SImode);
6015 rtx shufc;
6016 rtx insnc;
6017 rtx mem;
6018
6019 fnaddr = force_reg (SImode, fnaddr);
6020 cxt = force_reg (SImode, cxt);
6021
6022 if (TARGET_LARGE_MEM)
6023 {
6024 rtx rotl = gen_reg_rtx (V4SImode);
6025 rtx mask = gen_reg_rtx (V4SImode);
6026 rtx bi = gen_reg_rtx (SImode);
e96f2783 6027 static unsigned char const shufa[16] = {
644459d0 6028 2, 3, 0, 1, 18, 19, 16, 17,
6029 0, 1, 2, 3, 16, 17, 18, 19
6030 };
e96f2783 6031 static unsigned char const insna[16] = {
644459d0 6032 0x41, 0, 0, 79,
6033 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6034 0x60, 0x80, 0, 79,
6035 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6036 };
6037
6038 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6039 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6040
6041 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 6042 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 6043 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6044 emit_insn (gen_selb (insn, insnc, rotl, mask));
6045
e96f2783 6046 mem = adjust_address (m_tramp, V4SImode, 0);
6047 emit_move_insn (mem, insn);
644459d0 6048
6049 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 6050 mem = adjust_address (m_tramp, Pmode, 16);
6051 emit_move_insn (mem, bi);
644459d0 6052 }
6053 else
6054 {
6055 rtx scxt = gen_reg_rtx (SImode);
6056 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 6057 static unsigned char const insna[16] = {
644459d0 6058 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6059 0x30, 0, 0, 0,
6060 0, 0, 0, 0,
6061 0, 0, 0, 0
6062 };
6063
6064 shufc = gen_reg_rtx (TImode);
6065 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6066
6067 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6068 fits 18 bits and the last 4 are zeros. This will be true if
6069 the stack pointer is initialized to 0x3fff0 at program start,
6070 otherwise the ila instruction will be garbage. */
6071
6072 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6073 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6074 emit_insn (gen_cpat
6075 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6076 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6077 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6078
e96f2783 6079 mem = adjust_address (m_tramp, V4SImode, 0);
6080 emit_move_insn (mem, insn);
644459d0 6081 }
6082 emit_insn (gen_sync ());
6083}
6084
6085void
6086spu_expand_sign_extend (rtx ops[])
6087{
6088 unsigned char arr[16];
6089 rtx pat = gen_reg_rtx (TImode);
6090 rtx sign, c;
6091 int i, last;
6092 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6093 if (GET_MODE (ops[1]) == QImode)
6094 {
6095 sign = gen_reg_rtx (HImode);
6096 emit_insn (gen_extendqihi2 (sign, ops[1]));
6097 for (i = 0; i < 16; i++)
6098 arr[i] = 0x12;
6099 arr[last] = 0x13;
6100 }
6101 else
6102 {
6103 for (i = 0; i < 16; i++)
6104 arr[i] = 0x10;
6105 switch (GET_MODE (ops[1]))
6106 {
6107 case HImode:
6108 sign = gen_reg_rtx (SImode);
6109 emit_insn (gen_extendhisi2 (sign, ops[1]));
6110 arr[last] = 0x03;
6111 arr[last - 1] = 0x02;
6112 break;
6113 case SImode:
6114 sign = gen_reg_rtx (SImode);
6115 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6116 for (i = 0; i < 4; i++)
6117 arr[last - i] = 3 - i;
6118 break;
6119 case DImode:
6120 sign = gen_reg_rtx (SImode);
6121 c = gen_reg_rtx (SImode);
6122 emit_insn (gen_spu_convert (c, ops[1]));
6123 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6124 for (i = 0; i < 8; i++)
6125 arr[last - i] = 7 - i;
6126 break;
6127 default:
6128 abort ();
6129 }
6130 }
6131 emit_move_insn (pat, array_to_constant (TImode, arr));
6132 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6133}
6134
6135/* expand vector initialization. If there are any constant parts,
6136 load constant parts first. Then load any non-constant parts. */
6137void
6138spu_expand_vector_init (rtx target, rtx vals)
6139{
6140 enum machine_mode mode = GET_MODE (target);
6141 int n_elts = GET_MODE_NUNITS (mode);
6142 int n_var = 0;
6143 bool all_same = true;
790c536c 6144 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 6145 int i;
6146
6147 first = XVECEXP (vals, 0, 0);
6148 for (i = 0; i < n_elts; ++i)
6149 {
6150 x = XVECEXP (vals, 0, i);
e442af0b 6151 if (!(CONST_INT_P (x)
6152 || GET_CODE (x) == CONST_DOUBLE
6153 || GET_CODE (x) == CONST_FIXED))
644459d0 6154 ++n_var;
6155 else
6156 {
6157 if (first_constant == NULL_RTX)
6158 first_constant = x;
6159 }
6160 if (i > 0 && !rtx_equal_p (x, first))
6161 all_same = false;
6162 }
6163
6164 /* if all elements are the same, use splats to repeat elements */
6165 if (all_same)
6166 {
6167 if (!CONSTANT_P (first)
6168 && !register_operand (first, GET_MODE (x)))
6169 first = force_reg (GET_MODE (first), first);
6170 emit_insn (gen_spu_splats (target, first));
6171 return;
6172 }
6173
6174 /* load constant parts */
6175 if (n_var != n_elts)
6176 {
6177 if (n_var == 0)
6178 {
6179 emit_move_insn (target,
6180 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6181 }
6182 else
6183 {
6184 rtx constant_parts_rtx = copy_rtx (vals);
6185
6186 gcc_assert (first_constant != NULL_RTX);
6187 /* fill empty slots with the first constant, this increases
6188 our chance of using splats in the recursive call below. */
6189 for (i = 0; i < n_elts; ++i)
e442af0b 6190 {
6191 x = XVECEXP (constant_parts_rtx, 0, i);
6192 if (!(CONST_INT_P (x)
6193 || GET_CODE (x) == CONST_DOUBLE
6194 || GET_CODE (x) == CONST_FIXED))
6195 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6196 }
644459d0 6197
6198 spu_expand_vector_init (target, constant_parts_rtx);
6199 }
6200 }
6201
6202 /* load variable parts */
6203 if (n_var != 0)
6204 {
6205 rtx insert_operands[4];
6206
6207 insert_operands[0] = target;
6208 insert_operands[2] = target;
6209 for (i = 0; i < n_elts; ++i)
6210 {
6211 x = XVECEXP (vals, 0, i);
e442af0b 6212 if (!(CONST_INT_P (x)
6213 || GET_CODE (x) == CONST_DOUBLE
6214 || GET_CODE (x) == CONST_FIXED))
644459d0 6215 {
6216 if (!register_operand (x, GET_MODE (x)))
6217 x = force_reg (GET_MODE (x), x);
6218 insert_operands[1] = x;
6219 insert_operands[3] = GEN_INT (i);
6220 spu_builtin_insert (insert_operands);
6221 }
6222 }
6223 }
6224}
6352eedf 6225
5474166e 6226/* Return insn index for the vector compare instruction for given CODE,
6227 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6228
6229static int
6230get_vec_cmp_insn (enum rtx_code code,
6231 enum machine_mode dest_mode,
6232 enum machine_mode op_mode)
6233
6234{
6235 switch (code)
6236 {
6237 case EQ:
6238 if (dest_mode == V16QImode && op_mode == V16QImode)
6239 return CODE_FOR_ceq_v16qi;
6240 if (dest_mode == V8HImode && op_mode == V8HImode)
6241 return CODE_FOR_ceq_v8hi;
6242 if (dest_mode == V4SImode && op_mode == V4SImode)
6243 return CODE_FOR_ceq_v4si;
6244 if (dest_mode == V4SImode && op_mode == V4SFmode)
6245 return CODE_FOR_ceq_v4sf;
6246 if (dest_mode == V2DImode && op_mode == V2DFmode)
6247 return CODE_FOR_ceq_v2df;
6248 break;
6249 case GT:
6250 if (dest_mode == V16QImode && op_mode == V16QImode)
6251 return CODE_FOR_cgt_v16qi;
6252 if (dest_mode == V8HImode && op_mode == V8HImode)
6253 return CODE_FOR_cgt_v8hi;
6254 if (dest_mode == V4SImode && op_mode == V4SImode)
6255 return CODE_FOR_cgt_v4si;
6256 if (dest_mode == V4SImode && op_mode == V4SFmode)
6257 return CODE_FOR_cgt_v4sf;
6258 if (dest_mode == V2DImode && op_mode == V2DFmode)
6259 return CODE_FOR_cgt_v2df;
6260 break;
6261 case GTU:
6262 if (dest_mode == V16QImode && op_mode == V16QImode)
6263 return CODE_FOR_clgt_v16qi;
6264 if (dest_mode == V8HImode && op_mode == V8HImode)
6265 return CODE_FOR_clgt_v8hi;
6266 if (dest_mode == V4SImode && op_mode == V4SImode)
6267 return CODE_FOR_clgt_v4si;
6268 break;
6269 default:
6270 break;
6271 }
6272 return -1;
6273}
6274
6275/* Emit vector compare for operands OP0 and OP1 using code RCODE.
6276 DMODE is expected destination mode. This is a recursive function. */
6277
6278static rtx
6279spu_emit_vector_compare (enum rtx_code rcode,
6280 rtx op0, rtx op1,
6281 enum machine_mode dmode)
6282{
6283 int vec_cmp_insn;
6284 rtx mask;
6285 enum machine_mode dest_mode;
6286 enum machine_mode op_mode = GET_MODE (op1);
6287
6288 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6289
6290 /* Floating point vector compare instructions uses destination V4SImode.
6291 Double floating point vector compare instructions uses destination V2DImode.
6292 Move destination to appropriate mode later. */
6293 if (dmode == V4SFmode)
6294 dest_mode = V4SImode;
6295 else if (dmode == V2DFmode)
6296 dest_mode = V2DImode;
6297 else
6298 dest_mode = dmode;
6299
6300 mask = gen_reg_rtx (dest_mode);
6301 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6302
6303 if (vec_cmp_insn == -1)
6304 {
6305 bool swap_operands = false;
6306 bool try_again = false;
6307 switch (rcode)
6308 {
6309 case LT:
6310 rcode = GT;
6311 swap_operands = true;
6312 try_again = true;
6313 break;
6314 case LTU:
6315 rcode = GTU;
6316 swap_operands = true;
6317 try_again = true;
6318 break;
6319 case NE:
6320 /* Treat A != B as ~(A==B). */
6321 {
6322 enum insn_code nor_code;
6323 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
d6bf3b14 6324 nor_code = optab_handler (one_cmpl_optab, dest_mode);
5474166e 6325 gcc_assert (nor_code != CODE_FOR_nothing);
6326 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6327 if (dmode != dest_mode)
6328 {
6329 rtx temp = gen_reg_rtx (dest_mode);
6330 convert_move (temp, mask, 0);
6331 return temp;
6332 }
6333 return mask;
6334 }
6335 break;
6336 case GE:
6337 case GEU:
6338 case LE:
6339 case LEU:
6340 /* Try GT/GTU/LT/LTU OR EQ */
6341 {
6342 rtx c_rtx, eq_rtx;
6343 enum insn_code ior_code;
6344 enum rtx_code new_code;
6345
6346 switch (rcode)
6347 {
6348 case GE: new_code = GT; break;
6349 case GEU: new_code = GTU; break;
6350 case LE: new_code = LT; break;
6351 case LEU: new_code = LTU; break;
6352 default:
6353 gcc_unreachable ();
6354 }
6355
6356 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6357 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6358
d6bf3b14 6359 ior_code = optab_handler (ior_optab, dest_mode);
5474166e 6360 gcc_assert (ior_code != CODE_FOR_nothing);
6361 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6362 if (dmode != dest_mode)
6363 {
6364 rtx temp = gen_reg_rtx (dest_mode);
6365 convert_move (temp, mask, 0);
6366 return temp;
6367 }
6368 return mask;
6369 }
6370 break;
6371 default:
6372 gcc_unreachable ();
6373 }
6374
6375 /* You only get two chances. */
6376 if (try_again)
6377 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6378
6379 gcc_assert (vec_cmp_insn != -1);
6380
6381 if (swap_operands)
6382 {
6383 rtx tmp;
6384 tmp = op0;
6385 op0 = op1;
6386 op1 = tmp;
6387 }
6388 }
6389
6390 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6391 if (dmode != dest_mode)
6392 {
6393 rtx temp = gen_reg_rtx (dest_mode);
6394 convert_move (temp, mask, 0);
6395 return temp;
6396 }
6397 return mask;
6398}
6399
6400
6401/* Emit vector conditional expression.
6402 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6403 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6404
6405int
6406spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6407 rtx cond, rtx cc_op0, rtx cc_op1)
6408{
6409 enum machine_mode dest_mode = GET_MODE (dest);
6410 enum rtx_code rcode = GET_CODE (cond);
6411 rtx mask;
6412
6413 /* Get the vector mask for the given relational operations. */
6414 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6415
6416 emit_insn(gen_selb (dest, op2, op1, mask));
6417
6418 return 1;
6419}
6420
6352eedf 6421static rtx
6422spu_force_reg (enum machine_mode mode, rtx op)
6423{
6424 rtx x, r;
6425 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6426 {
6427 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6428 || GET_MODE (op) == BLKmode)
6429 return force_reg (mode, convert_to_mode (mode, op, 0));
6430 abort ();
6431 }
6432
6433 r = force_reg (GET_MODE (op), op);
6434 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6435 {
6436 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6437 if (x)
6438 return x;
6439 }
6440
6441 x = gen_reg_rtx (mode);
6442 emit_insn (gen_spu_convert (x, r));
6443 return x;
6444}
6445
6446static void
6447spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6448{
6449 HOST_WIDE_INT v = 0;
6450 int lsbits;
6451 /* Check the range of immediate operands. */
6452 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6453 {
6454 int range = p - SPU_BTI_7;
5df189be 6455
6456 if (!CONSTANT_P (op))
bf776685 6457 error ("%s expects an integer literal in the range [%d, %d]",
6352eedf 6458 d->name,
6459 spu_builtin_range[range].low, spu_builtin_range[range].high);
6460
6461 if (GET_CODE (op) == CONST
6462 && (GET_CODE (XEXP (op, 0)) == PLUS
6463 || GET_CODE (XEXP (op, 0)) == MINUS))
6464 {
6465 v = INTVAL (XEXP (XEXP (op, 0), 1));
6466 op = XEXP (XEXP (op, 0), 0);
6467 }
6468 else if (GET_CODE (op) == CONST_INT)
6469 v = INTVAL (op);
5df189be 6470 else if (GET_CODE (op) == CONST_VECTOR
6471 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6472 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6473
6474 /* The default for v is 0 which is valid in every range. */
6475 if (v < spu_builtin_range[range].low
6476 || v > spu_builtin_range[range].high)
bf776685 6477 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
5df189be 6478 d->name,
6479 spu_builtin_range[range].low, spu_builtin_range[range].high,
6480 v);
6352eedf 6481
6482 switch (p)
6483 {
6484 case SPU_BTI_S10_4:
6485 lsbits = 4;
6486 break;
6487 case SPU_BTI_U16_2:
6488 /* This is only used in lqa, and stqa. Even though the insns
6489 encode 16 bits of the address (all but the 2 least
6490 significant), only 14 bits are used because it is masked to
6491 be 16 byte aligned. */
6492 lsbits = 4;
6493 break;
6494 case SPU_BTI_S16_2:
6495 /* This is used for lqr and stqr. */
6496 lsbits = 2;
6497 break;
6498 default:
6499 lsbits = 0;
6500 }
6501
6502 if (GET_CODE (op) == LABEL_REF
6503 || (GET_CODE (op) == SYMBOL_REF
6504 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6505 || (v & ((1 << lsbits) - 1)) != 0)
bf776685 6506 warning (0, "%d least significant bits of %s are ignored", lsbits,
6352eedf 6507 d->name);
6508 }
6509}
6510
6511
70ca06f8 6512static int
5df189be 6513expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6514 rtx target, rtx ops[])
6515{
bc620c5c 6516 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6517 int i = 0, a;
6352eedf 6518
6519 /* Expand the arguments into rtl. */
6520
6521 if (d->parm[0] != SPU_BTI_VOID)
6522 ops[i++] = target;
6523
70ca06f8 6524 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6525 {
5df189be 6526 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6527 if (arg == 0)
6528 abort ();
b9c74b4d 6529 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6530 }
70ca06f8 6531
6532 /* The insn pattern may have additional operands (SCRATCH).
6533 Return the number of actual non-SCRATCH operands. */
6534 gcc_assert (i <= insn_data[icode].n_operands);
6535 return i;
6352eedf 6536}
6537
6538static rtx
6539spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6540 tree exp, rtx target)
6352eedf 6541{
6542 rtx pat;
6543 rtx ops[8];
bc620c5c 6544 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6545 enum machine_mode mode, tmode;
6546 int i, p;
70ca06f8 6547 int n_operands;
6352eedf 6548 tree return_type;
6549
6550 /* Set up ops[] with values from arglist. */
70ca06f8 6551 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6552
6553 /* Handle the target operand which must be operand 0. */
6554 i = 0;
6555 if (d->parm[0] != SPU_BTI_VOID)
6556 {
6557
6558 /* We prefer the mode specified for the match_operand otherwise
6559 use the mode from the builtin function prototype. */
6560 tmode = insn_data[d->icode].operand[0].mode;
6561 if (tmode == VOIDmode)
6562 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6563
6564 /* Try to use target because not using it can lead to extra copies
6565 and when we are using all of the registers extra copies leads
6566 to extra spills. */
6567 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6568 ops[0] = target;
6569 else
6570 target = ops[0] = gen_reg_rtx (tmode);
6571
6572 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6573 abort ();
6574
6575 i++;
6576 }
6577
a76866d3 6578 if (d->fcode == SPU_MASK_FOR_LOAD)
6579 {
6580 enum machine_mode mode = insn_data[icode].operand[1].mode;
6581 tree arg;
6582 rtx addr, op, pat;
6583
6584 /* get addr */
5df189be 6585 arg = CALL_EXPR_ARG (exp, 0);
4b8ee66a 6586 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
a76866d3 6587 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6588 addr = memory_address (mode, op);
6589
6590 /* negate addr */
6591 op = gen_reg_rtx (GET_MODE (addr));
6592 emit_insn (gen_rtx_SET (VOIDmode, op,
6593 gen_rtx_NEG (GET_MODE (addr), addr)));
6594 op = gen_rtx_MEM (mode, op);
6595
6596 pat = GEN_FCN (icode) (target, op);
6597 if (!pat)
6598 return 0;
6599 emit_insn (pat);
6600 return target;
6601 }
6602
6352eedf 6603 /* Ignore align_hint, but still expand it's args in case they have
6604 side effects. */
6605 if (icode == CODE_FOR_spu_align_hint)
6606 return 0;
6607
6608 /* Handle the rest of the operands. */
70ca06f8 6609 for (p = 1; i < n_operands; i++, p++)
6352eedf 6610 {
6611 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6612 mode = insn_data[d->icode].operand[i].mode;
6613 else
6614 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6615
6616 /* mode can be VOIDmode here for labels */
6617
6618 /* For specific intrinsics with an immediate operand, e.g.,
6619 si_ai(), we sometimes need to convert the scalar argument to a
6620 vector argument by splatting the scalar. */
6621 if (VECTOR_MODE_P (mode)
6622 && (GET_CODE (ops[i]) == CONST_INT
6623 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6624 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6625 {
6626 if (GET_CODE (ops[i]) == CONST_INT)
6627 ops[i] = spu_const (mode, INTVAL (ops[i]));
6628 else
6629 {
6630 rtx reg = gen_reg_rtx (mode);
6631 enum machine_mode imode = GET_MODE_INNER (mode);
6632 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6633 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6634 if (imode != GET_MODE (ops[i]))
6635 ops[i] = convert_to_mode (imode, ops[i],
6636 TYPE_UNSIGNED (spu_builtin_types
6637 [d->parm[i]]));
6638 emit_insn (gen_spu_splats (reg, ops[i]));
6639 ops[i] = reg;
6640 }
6641 }
6642
5df189be 6643 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6644
6352eedf 6645 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6646 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6647 }
6648
70ca06f8 6649 switch (n_operands)
6352eedf 6650 {
6651 case 0:
6652 pat = GEN_FCN (icode) (0);
6653 break;
6654 case 1:
6655 pat = GEN_FCN (icode) (ops[0]);
6656 break;
6657 case 2:
6658 pat = GEN_FCN (icode) (ops[0], ops[1]);
6659 break;
6660 case 3:
6661 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6662 break;
6663 case 4:
6664 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6665 break;
6666 case 5:
6667 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6668 break;
6669 case 6:
6670 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6671 break;
6672 default:
6673 abort ();
6674 }
6675
6676 if (!pat)
6677 abort ();
6678
6679 if (d->type == B_CALL || d->type == B_BISLED)
6680 emit_call_insn (pat);
6681 else if (d->type == B_JUMP)
6682 {
6683 emit_jump_insn (pat);
6684 emit_barrier ();
6685 }
6686 else
6687 emit_insn (pat);
6688
6689 return_type = spu_builtin_types[d->parm[0]];
6690 if (d->parm[0] != SPU_BTI_VOID
6691 && GET_MODE (target) != TYPE_MODE (return_type))
6692 {
6693 /* target is the return value. It should always be the mode of
6694 the builtin function prototype. */
6695 target = spu_force_reg (TYPE_MODE (return_type), target);
6696 }
6697
6698 return target;
6699}
6700
6701rtx
6702spu_expand_builtin (tree exp,
6703 rtx target,
6704 rtx subtarget ATTRIBUTE_UNUSED,
6705 enum machine_mode mode ATTRIBUTE_UNUSED,
6706 int ignore ATTRIBUTE_UNUSED)
6707{
5df189be 6708 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3726fe5e 6709 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6352eedf 6710 struct spu_builtin_description *d;
6711
6712 if (fcode < NUM_SPU_BUILTINS)
6713 {
6714 d = &spu_builtins[fcode];
6715
5df189be 6716 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6717 }
6718 abort ();
6719}
6720
e99f512d 6721/* Implement targetm.vectorize.builtin_mul_widen_even. */
6722static tree
6723spu_builtin_mul_widen_even (tree type)
6724{
e99f512d 6725 switch (TYPE_MODE (type))
6726 {
6727 case V8HImode:
6728 if (TYPE_UNSIGNED (type))
0c5c4d59 6729 return spu_builtin_decls[SPU_MULE_0];
e99f512d 6730 else
0c5c4d59 6731 return spu_builtin_decls[SPU_MULE_1];
e99f512d 6732 break;
6733 default:
6734 return NULL_TREE;
6735 }
6736}
6737
6738/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6739static tree
6740spu_builtin_mul_widen_odd (tree type)
6741{
6742 switch (TYPE_MODE (type))
6743 {
6744 case V8HImode:
6745 if (TYPE_UNSIGNED (type))
0c5c4d59 6746 return spu_builtin_decls[SPU_MULO_1];
e99f512d 6747 else
0c5c4d59 6748 return spu_builtin_decls[SPU_MULO_0];
e99f512d 6749 break;
6750 default:
6751 return NULL_TREE;
6752 }
6753}
6754
a76866d3 6755/* Implement targetm.vectorize.builtin_mask_for_load. */
6756static tree
6757spu_builtin_mask_for_load (void)
6758{
0c5c4d59 6759 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
a76866d3 6760}
5df189be 6761
a28df51d 6762/* Implement targetm.vectorize.builtin_vectorization_cost. */
6763static int
0822b158 6764spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6765 tree vectype ATTRIBUTE_UNUSED,
6766 int misalign ATTRIBUTE_UNUSED)
559093aa 6767{
6768 switch (type_of_cost)
6769 {
6770 case scalar_stmt:
6771 case vector_stmt:
6772 case vector_load:
6773 case vector_store:
6774 case vec_to_scalar:
6775 case scalar_to_vec:
6776 case cond_branch_not_taken:
6777 case vec_perm:
6778 return 1;
6779
6780 case scalar_store:
6781 return 10;
6782
6783 case scalar_load:
6784 /* Load + rotate. */
6785 return 2;
6786
6787 case unaligned_load:
6788 return 2;
6789
6790 case cond_branch_taken:
6791 return 6;
6792
6793 default:
6794 gcc_unreachable ();
6795 }
a28df51d 6796}
6797
0e87db76 6798/* Return true iff, data reference of TYPE can reach vector alignment (16)
6799 after applying N number of iterations. This routine does not determine
6800 how may iterations are required to reach desired alignment. */
6801
6802static bool
a9f1838b 6803spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6804{
6805 if (is_packed)
6806 return false;
6807
6808 /* All other types are naturally aligned. */
6809 return true;
6810}
6811
a0515226 6812/* Implement targetm.vectorize.builtin_vec_perm. */
6813tree
6814spu_builtin_vec_perm (tree type, tree *mask_element_type)
6815{
a0515226 6816 *mask_element_type = unsigned_char_type_node;
6817
6818 switch (TYPE_MODE (type))
6819 {
6820 case V16QImode:
6821 if (TYPE_UNSIGNED (type))
0c5c4d59 6822 return spu_builtin_decls[SPU_SHUFFLE_0];
a0515226 6823 else
0c5c4d59 6824 return spu_builtin_decls[SPU_SHUFFLE_1];
a0515226 6825
6826 case V8HImode:
6827 if (TYPE_UNSIGNED (type))
0c5c4d59 6828 return spu_builtin_decls[SPU_SHUFFLE_2];
a0515226 6829 else
0c5c4d59 6830 return spu_builtin_decls[SPU_SHUFFLE_3];
a0515226 6831
6832 case V4SImode:
6833 if (TYPE_UNSIGNED (type))
0c5c4d59 6834 return spu_builtin_decls[SPU_SHUFFLE_4];
a0515226 6835 else
0c5c4d59 6836 return spu_builtin_decls[SPU_SHUFFLE_5];
a0515226 6837
6838 case V2DImode:
6839 if (TYPE_UNSIGNED (type))
0c5c4d59 6840 return spu_builtin_decls[SPU_SHUFFLE_6];
a0515226 6841 else
0c5c4d59 6842 return spu_builtin_decls[SPU_SHUFFLE_7];
a0515226 6843
6844 case V4SFmode:
0c5c4d59 6845 return spu_builtin_decls[SPU_SHUFFLE_8];
a0515226 6846
6847 case V2DFmode:
0c5c4d59 6848 return spu_builtin_decls[SPU_SHUFFLE_9];
a0515226 6849
6850 default:
6851 return NULL_TREE;
6852 }
a0515226 6853}
6854
6cf5579e 6855/* Return the appropriate mode for a named address pointer. */
6856static enum machine_mode
6857spu_addr_space_pointer_mode (addr_space_t addrspace)
6858{
6859 switch (addrspace)
6860 {
6861 case ADDR_SPACE_GENERIC:
6862 return ptr_mode;
6863 case ADDR_SPACE_EA:
6864 return EAmode;
6865 default:
6866 gcc_unreachable ();
6867 }
6868}
6869
6870/* Return the appropriate mode for a named address address. */
6871static enum machine_mode
6872spu_addr_space_address_mode (addr_space_t addrspace)
6873{
6874 switch (addrspace)
6875 {
6876 case ADDR_SPACE_GENERIC:
6877 return Pmode;
6878 case ADDR_SPACE_EA:
6879 return EAmode;
6880 default:
6881 gcc_unreachable ();
6882 }
6883}
6884
6885/* Determine if one named address space is a subset of another. */
6886
6887static bool
6888spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6889{
6890 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6891 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6892
6893 if (subset == superset)
6894 return true;
6895
6896 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6897 being subsets but instead as disjoint address spaces. */
6898 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6899 return false;
6900
6901 else
6902 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6903}
6904
6905/* Convert from one address space to another. */
6906static rtx
6907spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6908{
6909 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6910 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6911
6912 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6913 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6914
6915 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6916 {
6917 rtx result, ls;
6918
6919 ls = gen_const_mem (DImode,
6920 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6921 set_mem_align (ls, 128);
6922
6923 result = gen_reg_rtx (Pmode);
6924 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6925 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6926 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6927 ls, const0_rtx, Pmode, 1);
6928
6929 emit_insn (gen_subsi3 (result, op, ls));
6930
6931 return result;
6932 }
6933
6934 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6935 {
6936 rtx result, ls;
6937
6938 ls = gen_const_mem (DImode,
6939 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6940 set_mem_align (ls, 128);
6941
6942 result = gen_reg_rtx (EAmode);
6943 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6944 op = force_reg (Pmode, op);
6945 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6946 ls, const0_rtx, EAmode, 1);
6947 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6948
6949 if (EAmode == SImode)
6950 emit_insn (gen_addsi3 (result, op, ls));
6951 else
6952 emit_insn (gen_adddi3 (result, op, ls));
6953
6954 return result;
6955 }
6956
6957 else
6958 gcc_unreachable ();
6959}
6960
6961
d52fd16a 6962/* Count the total number of instructions in each pipe and return the
6963 maximum, which is used as the Minimum Iteration Interval (MII)
6964 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6965 -2 are instructions that can go in pipe0 or pipe1. */
6966static int
6967spu_sms_res_mii (struct ddg *g)
6968{
6969 int i;
6970 unsigned t[4] = {0, 0, 0, 0};
6971
6972 for (i = 0; i < g->num_nodes; i++)
6973 {
6974 rtx insn = g->nodes[i].insn;
6975 int p = get_pipe (insn) + 2;
6976
1e944a0b 6977 gcc_assert (p >= 0);
6978 gcc_assert (p < 4);
d52fd16a 6979
6980 t[p]++;
6981 if (dump_file && INSN_P (insn))
6982 fprintf (dump_file, "i%d %s %d %d\n",
6983 INSN_UID (insn),
6984 insn_data[INSN_CODE(insn)].name,
6985 p, t[p]);
6986 }
6987 if (dump_file)
6988 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6989
6990 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6991}
6992
6993
5df189be 6994void
6995spu_init_expanders (void)
9d98604b 6996{
5df189be 6997 if (cfun)
9d98604b 6998 {
6999 rtx r0, r1;
7000 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7001 frame_pointer_needed is true. We don't know that until we're
7002 expanding the prologue. */
7003 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
7004
7005 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7006 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7007 to be treated as aligned, so generate them here. */
7008 r0 = gen_reg_rtx (SImode);
7009 r1 = gen_reg_rtx (SImode);
7010 mark_reg_pointer (r0, 128);
7011 mark_reg_pointer (r1, 128);
7012 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
7013 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
7014 }
ea32e033 7015}
7016
7017static enum machine_mode
7018spu_libgcc_cmp_return_mode (void)
7019{
7020
7021/* For SPU word mode is TI mode so it is better to use SImode
7022 for compare returns. */
7023 return SImode;
7024}
7025
7026static enum machine_mode
7027spu_libgcc_shift_count_mode (void)
7028{
7029/* For SPU word mode is TI mode so it is better to use SImode
7030 for shift counts. */
7031 return SImode;
7032}
5a976006 7033
7034/* An early place to adjust some flags after GCC has finished processing
7035 * them. */
7036static void
7037asm_file_start (void)
7038{
7039 /* Variable tracking should be run after all optimizations which
0ef14db8 7040 change order of insns. It also needs a valid CFG. Therefore,
7041 *if* we make nontrivial changes in machine-dependent reorg,
7042 run variable tracking after those. However, if we do not run
7043 our machine-dependent reorg pass, we must still run the normal
7044 variable tracking pass (or else we will ICE in final since
7045 debug insns have not been removed). */
7046 if (TARGET_BRANCH_HINTS && optimize)
7047 {
7048 spu_flag_var_tracking = flag_var_tracking;
7049 flag_var_tracking = 0;
7050 }
5a976006 7051
7052 default_file_start ();
7053}
7054
a08dfd55 7055/* Implement targetm.section_type_flags. */
7056static unsigned int
7057spu_section_type_flags (tree decl, const char *name, int reloc)
7058{
7059 /* .toe needs to have type @nobits. */
7060 if (strcmp (name, ".toe") == 0)
7061 return SECTION_BSS;
6cf5579e 7062 /* Don't load _ea into the current address space. */
7063 if (strcmp (name, "._ea") == 0)
7064 return SECTION_WRITE | SECTION_DEBUG;
a08dfd55 7065 return default_section_type_flags (decl, name, reloc);
7066}
c2233b46 7067
6cf5579e 7068/* Implement targetm.select_section. */
7069static section *
7070spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7071{
7072 /* Variables and constants defined in the __ea address space
7073 go into a special section named "._ea". */
7074 if (TREE_TYPE (decl) != error_mark_node
7075 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7076 {
7077 /* We might get called with string constants, but get_named_section
7078 doesn't like them as they are not DECLs. Also, we need to set
7079 flags in that case. */
7080 if (!DECL_P (decl))
7081 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7082
7083 return get_named_section (decl, "._ea", reloc);
7084 }
7085
7086 return default_elf_select_section (decl, reloc, align);
7087}
7088
7089/* Implement targetm.unique_section. */
7090static void
7091spu_unique_section (tree decl, int reloc)
7092{
7093 /* We don't support unique section names in the __ea address
7094 space for now. */
7095 if (TREE_TYPE (decl) != error_mark_node
7096 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7097 return;
7098
7099 default_unique_section (decl, reloc);
7100}
7101
56c7bfc2 7102/* Generate a constant or register which contains 2^SCALE. We assume
7103 the result is valid for MODE. Currently, MODE must be V4SFmode and
7104 SCALE must be SImode. */
7105rtx
7106spu_gen_exp2 (enum machine_mode mode, rtx scale)
7107{
7108 gcc_assert (mode == V4SFmode);
7109 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7110 if (GET_CODE (scale) != CONST_INT)
7111 {
7112 /* unsigned int exp = (127 + scale) << 23;
7113 __vector float m = (__vector float) spu_splats (exp); */
7114 rtx reg = force_reg (SImode, scale);
7115 rtx exp = gen_reg_rtx (SImode);
7116 rtx mul = gen_reg_rtx (mode);
7117 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7118 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7119 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7120 return mul;
7121 }
7122 else
7123 {
7124 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7125 unsigned char arr[16];
7126 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7127 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7128 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7129 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7130 return array_to_constant (mode, arr);
7131 }
7132}
7133
9d98604b 7134/* After reload, just change the convert into a move instruction
7135 or a dead instruction. */
7136void
7137spu_split_convert (rtx ops[])
7138{
7139 if (REGNO (ops[0]) == REGNO (ops[1]))
7140 emit_note (NOTE_INSN_DELETED);
7141 else
7142 {
7143 /* Use TImode always as this might help hard reg copyprop. */
7144 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7145 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7146 emit_insn (gen_move_insn (op0, op1));
7147 }
7148}
7149
b3878a6c 7150void
4cbad5bb 7151spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
b3878a6c 7152{
7153 fprintf (file, "# profile\n");
7154 fprintf (file, "brsl $75, _mcount\n");
7155}
7156
329c1e4e 7157/* Implement targetm.ref_may_alias_errno. */
7158static bool
7159spu_ref_may_alias_errno (ao_ref *ref)
7160{
7161 tree base = ao_ref_base (ref);
7162
7163 /* With SPU newlib, errno is defined as something like
7164 _impure_data._errno
7165 The default implementation of this target macro does not
7166 recognize such expressions, so special-code for it here. */
7167
7168 if (TREE_CODE (base) == VAR_DECL
7169 && !TREE_STATIC (base)
7170 && DECL_EXTERNAL (base)
7171 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7172 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7173 "_impure_data") == 0
7174 /* _errno is the first member of _impure_data. */
7175 && ref->offset == 0)
7176 return true;
7177
7178 return default_ref_may_alias_errno (ref);
7179}
7180
c2233b46 7181#include "gt-spu.h"