]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* builtins.c (interclass_mathfn_icode): New helper.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
644459d0 51#include "machmode.h"
75a70cf9 52#include "gimple.h"
644459d0 53#include "tm-constrs.h"
d52fd16a 54#include "ddg.h"
5a976006 55#include "sbitmap.h"
56#include "timevar.h"
57#include "df.h"
6352eedf 58
59/* Builtin types, data and prototypes. */
c2233b46 60
61enum spu_builtin_type_index
62{
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112};
113
114#define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115#define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116#define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117#define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118#define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119#define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120#define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121#define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122#define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123#define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
6352eedf 127struct spu_builtin_range
128{
129 int low, high;
130};
131
132static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145};
146
644459d0 147\f
148/* Target specific attribute specifications. */
149char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151/* Prototypes and external defs. */
152static void spu_init_builtins (void);
153static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
fd50b071 155static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
644459d0 156static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157static rtx get_pic_reg (void);
158static int need_to_save_reg (int regno, int saving);
159static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163static void emit_nop_for_insn (rtx insn);
164static bool insn_clobbers_hbr (rtx insn);
165static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 166 int distance, sbitmap blocks);
5474166e 167static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
644459d0 169static rtx get_branch_target (rtx branch);
644459d0 170static void spu_machine_dependent_reorg (void);
171static int spu_sched_issue_rate (void);
172static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174static int get_pipe (rtx insn);
644459d0 175static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 176static void spu_sched_init_global (FILE *, int, int);
177static void spu_sched_init (FILE *, int, int);
178static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 179static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185static int spu_naked_function_p (tree func);
fb80456a 186static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
644459d0 188static tree spu_build_builtin_va_list (void);
8a58ed0a 189static void spu_va_start (tree, rtx);
75a70cf9 190static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 192static int store_with_one_insn_p (rtx mem);
644459d0 193static int mem_is_padded_component_ref (rtx x);
9d98604b 194static int reg_aligned_for_addr (rtx x);
644459d0 195static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196static void spu_asm_globalize_label (FILE * file, const char *name);
197static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 198 int *total, bool speed);
644459d0 199static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200static void spu_init_libfuncs (void);
fb80456a 201static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 202static void fix_range (const char *);
69ced2d6 203static void spu_encode_section_info (tree, rtx, int);
41e3a0c7 204static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
e99f512d 205static tree spu_builtin_mul_widen_even (tree);
206static tree spu_builtin_mul_widen_odd (tree);
a76866d3 207static tree spu_builtin_mask_for_load (void);
a28df51d 208static int spu_builtin_vectorization_cost (bool);
a9f1838b 209static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 210static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 211static int spu_sms_res_mii (struct ddg *g);
5a976006 212static void asm_file_start (void);
a08dfd55 213static unsigned int spu_section_type_flags (tree, const char *, int);
9d98604b 214static rtx spu_expand_load (rtx, rtx, rtx, int);
e96f2783 215static void spu_trampoline_init (rtx, tree, rtx);
644459d0 216
217extern const char *reg_names[];
644459d0 218
5474166e 219/* Which instruction set architecture to use. */
220int spu_arch;
221/* Which cpu are we tuning for. */
222int spu_tune;
223
5a976006 224/* The hardware requires 8 insns between a hint and the branch it
225 effects. This variable describes how many rtl instructions the
226 compiler needs to see before inserting a hint, and then the compiler
227 will insert enough nops to make it at least 8 insns. The default is
228 for the compiler to allow up to 2 nops be emitted. The nops are
229 inserted in pairs, so we round down. */
230int spu_hint_dist = (8*4) - (2*4);
231
232/* Determines whether we run variable tracking in machine dependent
233 reorganization. */
234static int spu_flag_var_tracking;
235
644459d0 236enum spu_immediate {
237 SPU_NONE,
238 SPU_IL,
239 SPU_ILA,
240 SPU_ILH,
241 SPU_ILHU,
242 SPU_ORI,
243 SPU_ORHI,
244 SPU_ORBI,
99369027 245 SPU_IOHL
644459d0 246};
dea01258 247enum immediate_class
248{
249 IC_POOL, /* constant pool */
250 IC_IL1, /* one il* instruction */
251 IC_IL2, /* both ilhu and iohl instructions */
252 IC_IL1s, /* one il* instruction */
253 IC_IL2s, /* both ilhu and iohl instructions */
254 IC_FSMBI, /* the fsmbi instruction */
255 IC_CPAT, /* one of the c*d instructions */
5df189be 256 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 257};
644459d0 258
259static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
260static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 261static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
262static enum immediate_class classify_immediate (rtx op,
263 enum machine_mode mode);
644459d0 264
1bd43494 265static enum machine_mode spu_unwind_word_mode (void);
266
ea32e033 267static enum machine_mode
268spu_libgcc_cmp_return_mode (void);
269
270static enum machine_mode
271spu_libgcc_shift_count_mode (void);
ef51d1e3 272\f
273/* Table of machine attributes. */
274static const struct attribute_spec spu_attribute_table[] =
275{
276 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
277 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
278 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
279 { NULL, 0, 0, false, false, false, NULL }
280};
644459d0 281\f
282/* TARGET overrides. */
283
284#undef TARGET_INIT_BUILTINS
285#define TARGET_INIT_BUILTINS spu_init_builtins
286
644459d0 287#undef TARGET_EXPAND_BUILTIN
288#define TARGET_EXPAND_BUILTIN spu_expand_builtin
289
1bd43494 290#undef TARGET_UNWIND_WORD_MODE
291#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 292
41e3a0c7 293#undef TARGET_LEGITIMIZE_ADDRESS
294#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
295
644459d0 296/* The .8byte directive doesn't seem to work well for a 32 bit
297 architecture. */
298#undef TARGET_ASM_UNALIGNED_DI_OP
299#define TARGET_ASM_UNALIGNED_DI_OP NULL
300
301#undef TARGET_RTX_COSTS
302#define TARGET_RTX_COSTS spu_rtx_costs
303
304#undef TARGET_ADDRESS_COST
f529eb25 305#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 306
307#undef TARGET_SCHED_ISSUE_RATE
308#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
309
5a976006 310#undef TARGET_SCHED_INIT_GLOBAL
311#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
312
313#undef TARGET_SCHED_INIT
314#define TARGET_SCHED_INIT spu_sched_init
315
644459d0 316#undef TARGET_SCHED_VARIABLE_ISSUE
317#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
318
5a976006 319#undef TARGET_SCHED_REORDER
320#define TARGET_SCHED_REORDER spu_sched_reorder
321
322#undef TARGET_SCHED_REORDER2
323#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 324
325#undef TARGET_SCHED_ADJUST_COST
326#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
327
644459d0 328#undef TARGET_ATTRIBUTE_TABLE
329#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
330
331#undef TARGET_ASM_INTEGER
332#define TARGET_ASM_INTEGER spu_assemble_integer
333
334#undef TARGET_SCALAR_MODE_SUPPORTED_P
335#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
336
337#undef TARGET_VECTOR_MODE_SUPPORTED_P
338#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
339
340#undef TARGET_FUNCTION_OK_FOR_SIBCALL
341#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
342
343#undef TARGET_ASM_GLOBALIZE_LABEL
344#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
345
346#undef TARGET_PASS_BY_REFERENCE
347#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
348
349#undef TARGET_MUST_PASS_IN_STACK
350#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351
352#undef TARGET_BUILD_BUILTIN_VA_LIST
353#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
354
8a58ed0a 355#undef TARGET_EXPAND_BUILTIN_VA_START
356#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
357
644459d0 358#undef TARGET_SETUP_INCOMING_VARARGS
359#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
360
361#undef TARGET_MACHINE_DEPENDENT_REORG
362#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
363
364#undef TARGET_GIMPLIFY_VA_ARG_EXPR
365#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
366
367#undef TARGET_DEFAULT_TARGET_FLAGS
368#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
369
370#undef TARGET_INIT_LIBFUNCS
371#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
372
373#undef TARGET_RETURN_IN_MEMORY
374#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
375
69ced2d6 376#undef TARGET_ENCODE_SECTION_INFO
377#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
378
e99f512d 379#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
380#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
381
382#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
383#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
384
a76866d3 385#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
386#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
387
a28df51d 388#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
389#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
390
0e87db76 391#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
392#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
393
a0515226 394#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
395#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
396
ea32e033 397#undef TARGET_LIBGCC_CMP_RETURN_MODE
398#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
399
400#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
401#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
402
d52fd16a 403#undef TARGET_SCHED_SMS_RES_MII
404#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
405
5a976006 406#undef TARGET_ASM_FILE_START
407#define TARGET_ASM_FILE_START asm_file_start
408
a08dfd55 409#undef TARGET_SECTION_TYPE_FLAGS
410#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
411
fd50b071 412#undef TARGET_LEGITIMATE_ADDRESS_P
413#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
414
e96f2783 415#undef TARGET_TRAMPOLINE_INIT
416#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
417
644459d0 418struct gcc_target targetm = TARGET_INITIALIZER;
419
5df189be 420void
421spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
422{
5df189be 423 /* Override some of the default param values. With so many registers
424 larger values are better for these params. */
425 MAX_PENDING_LIST_LENGTH = 128;
426
427 /* With so many registers this is better on by default. */
428 flag_rename_registers = 1;
429}
430
644459d0 431/* Sometimes certain combinations of command options do not make sense
432 on a particular target machine. You can define a macro
433 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
434 executed once just after all the command options have been parsed. */
435void
436spu_override_options (void)
437{
14d408d9 438 /* Small loops will be unpeeled at -O3. For SPU it is more important
439 to keep code small by default. */
440 if (!flag_unroll_loops && !flag_peel_loops
441 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
442 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
443
644459d0 444 flag_omit_frame_pointer = 1;
445
5a976006 446 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 447 if (align_functions < 8)
448 align_functions = 8;
c7b91b14 449
5a976006 450 spu_hint_dist = 8*4 - spu_max_nops*4;
451 if (spu_hint_dist < 0)
452 spu_hint_dist = 0;
453
c7b91b14 454 if (spu_fixed_range_string)
455 fix_range (spu_fixed_range_string);
5474166e 456
457 /* Determine processor architectural level. */
458 if (spu_arch_string)
459 {
460 if (strcmp (&spu_arch_string[0], "cell") == 0)
461 spu_arch = PROCESSOR_CELL;
462 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
463 spu_arch = PROCESSOR_CELLEDP;
464 else
465 error ("Unknown architecture '%s'", &spu_arch_string[0]);
466 }
467
468 /* Determine processor to tune for. */
469 if (spu_tune_string)
470 {
471 if (strcmp (&spu_tune_string[0], "cell") == 0)
472 spu_tune = PROCESSOR_CELL;
473 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
474 spu_tune = PROCESSOR_CELLEDP;
475 else
476 error ("Unknown architecture '%s'", &spu_tune_string[0]);
477 }
98bbec1e 478
13684256 479 /* Change defaults according to the processor architecture. */
480 if (spu_arch == PROCESSOR_CELLEDP)
481 {
482 /* If no command line option has been otherwise specified, change
483 the default to -mno-safe-hints on celledp -- only the original
484 Cell/B.E. processors require this workaround. */
485 if (!(target_flags_explicit & MASK_SAFE_HINTS))
486 target_flags &= ~MASK_SAFE_HINTS;
487 }
488
98bbec1e 489 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 490}
491\f
492/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
493 struct attribute_spec.handler. */
494
644459d0 495/* True if MODE is valid for the target. By "valid", we mean able to
496 be manipulated in non-trivial ways. In particular, this means all
497 the arithmetic is supported. */
498static bool
499spu_scalar_mode_supported_p (enum machine_mode mode)
500{
501 switch (mode)
502 {
503 case QImode:
504 case HImode:
505 case SImode:
506 case SFmode:
507 case DImode:
508 case TImode:
509 case DFmode:
510 return true;
511
512 default:
513 return false;
514 }
515}
516
517/* Similarly for vector modes. "Supported" here is less strict. At
518 least some operations are supported; need to check optabs or builtins
519 for further details. */
520static bool
521spu_vector_mode_supported_p (enum machine_mode mode)
522{
523 switch (mode)
524 {
525 case V16QImode:
526 case V8HImode:
527 case V4SImode:
528 case V2DImode:
529 case V4SFmode:
530 case V2DFmode:
531 return true;
532
533 default:
534 return false;
535 }
536}
537
538/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
539 least significant bytes of the outer mode. This function returns
540 TRUE for the SUBREG's where this is correct. */
541int
542valid_subreg (rtx op)
543{
544 enum machine_mode om = GET_MODE (op);
545 enum machine_mode im = GET_MODE (SUBREG_REG (op));
546 return om != VOIDmode && im != VOIDmode
547 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 548 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
549 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 550}
551
552/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 553 and adjust the start offset. */
644459d0 554static rtx
555adjust_operand (rtx op, HOST_WIDE_INT * start)
556{
557 enum machine_mode mode;
558 int op_size;
38aca5eb 559 /* Strip any paradoxical SUBREG. */
560 if (GET_CODE (op) == SUBREG
561 && (GET_MODE_BITSIZE (GET_MODE (op))
562 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 563 {
564 if (start)
565 *start -=
566 GET_MODE_BITSIZE (GET_MODE (op)) -
567 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
568 op = SUBREG_REG (op);
569 }
570 /* If it is smaller than SI, assure a SUBREG */
571 op_size = GET_MODE_BITSIZE (GET_MODE (op));
572 if (op_size < 32)
573 {
574 if (start)
575 *start += 32 - op_size;
576 op_size = 32;
577 }
578 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
579 mode = mode_for_size (op_size, MODE_INT, 0);
580 if (mode != GET_MODE (op))
581 op = gen_rtx_SUBREG (mode, op, 0);
582 return op;
583}
584
585void
586spu_expand_extv (rtx ops[], int unsignedp)
587{
9d98604b 588 rtx dst = ops[0], src = ops[1];
644459d0 589 HOST_WIDE_INT width = INTVAL (ops[2]);
590 HOST_WIDE_INT start = INTVAL (ops[3]);
9d98604b 591 HOST_WIDE_INT align_mask;
592 rtx s0, s1, mask, r0;
644459d0 593
9d98604b 594 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
644459d0 595
9d98604b 596 if (MEM_P (src))
644459d0 597 {
9d98604b 598 /* First, determine if we need 1 TImode load or 2. We need only 1
599 if the bits being extracted do not cross the alignment boundary
600 as determined by the MEM and its address. */
601
602 align_mask = -MEM_ALIGN (src);
603 if ((start & align_mask) == ((start + width - 1) & align_mask))
644459d0 604 {
9d98604b 605 /* Alignment is sufficient for 1 load. */
606 s0 = gen_reg_rtx (TImode);
607 r0 = spu_expand_load (s0, 0, src, start / 8);
608 start &= 7;
609 if (r0)
610 emit_insn (gen_rotqby_ti (s0, s0, r0));
644459d0 611 }
9d98604b 612 else
613 {
614 /* Need 2 loads. */
615 s0 = gen_reg_rtx (TImode);
616 s1 = gen_reg_rtx (TImode);
617 r0 = spu_expand_load (s0, s1, src, start / 8);
618 start &= 7;
619
620 gcc_assert (start + width <= 128);
621 if (r0)
622 {
623 rtx r1 = gen_reg_rtx (SImode);
624 mask = gen_reg_rtx (TImode);
625 emit_move_insn (mask, GEN_INT (-1));
626 emit_insn (gen_rotqby_ti (s0, s0, r0));
627 emit_insn (gen_rotqby_ti (s1, s1, r0));
628 if (GET_CODE (r0) == CONST_INT)
629 r1 = GEN_INT (INTVAL (r0) & 15);
630 else
631 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
632 emit_insn (gen_shlqby_ti (mask, mask, r1));
633 emit_insn (gen_selb (s0, s1, s0, mask));
634 }
635 }
636
637 }
638 else if (GET_CODE (src) == SUBREG)
639 {
640 rtx r = SUBREG_REG (src);
641 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
642 s0 = gen_reg_rtx (TImode);
643 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
644 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
645 else
646 emit_move_insn (s0, src);
647 }
648 else
649 {
650 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
651 s0 = gen_reg_rtx (TImode);
652 emit_move_insn (s0, src);
644459d0 653 }
654
9d98604b 655 /* Now s0 is TImode and contains the bits to extract at start. */
656
657 if (start)
658 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
659
660 if (128 - width)
644459d0 661 {
9d98604b 662 tree c = build_int_cst (NULL_TREE, 128 - width);
663 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
644459d0 664 }
665
9d98604b 666 emit_move_insn (dst, s0);
644459d0 667}
668
669void
670spu_expand_insv (rtx ops[])
671{
672 HOST_WIDE_INT width = INTVAL (ops[1]);
673 HOST_WIDE_INT start = INTVAL (ops[2]);
674 HOST_WIDE_INT maskbits;
675 enum machine_mode dst_mode, src_mode;
676 rtx dst = ops[0], src = ops[3];
677 int dst_size, src_size;
678 rtx mask;
679 rtx shift_reg;
680 int shift;
681
682
683 if (GET_CODE (ops[0]) == MEM)
684 dst = gen_reg_rtx (TImode);
685 else
686 dst = adjust_operand (dst, &start);
687 dst_mode = GET_MODE (dst);
688 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
689
690 if (CONSTANT_P (src))
691 {
692 enum machine_mode m =
693 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
694 src = force_reg (m, convert_to_mode (m, src, 0));
695 }
696 src = adjust_operand (src, 0);
697 src_mode = GET_MODE (src);
698 src_size = GET_MODE_BITSIZE (GET_MODE (src));
699
700 mask = gen_reg_rtx (dst_mode);
701 shift_reg = gen_reg_rtx (dst_mode);
702 shift = dst_size - start - width;
703
704 /* It's not safe to use subreg here because the compiler assumes
705 that the SUBREG_REG is right justified in the SUBREG. */
706 convert_move (shift_reg, src, 1);
707
708 if (shift > 0)
709 {
710 switch (dst_mode)
711 {
712 case SImode:
713 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
714 break;
715 case DImode:
716 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
717 break;
718 case TImode:
719 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
720 break;
721 default:
722 abort ();
723 }
724 }
725 else if (shift < 0)
726 abort ();
727
728 switch (dst_size)
729 {
730 case 32:
731 maskbits = (-1ll << (32 - width - start));
732 if (start)
733 maskbits += (1ll << (32 - start));
734 emit_move_insn (mask, GEN_INT (maskbits));
735 break;
736 case 64:
737 maskbits = (-1ll << (64 - width - start));
738 if (start)
739 maskbits += (1ll << (64 - start));
740 emit_move_insn (mask, GEN_INT (maskbits));
741 break;
742 case 128:
743 {
744 unsigned char arr[16];
745 int i = start / 8;
746 memset (arr, 0, sizeof (arr));
747 arr[i] = 0xff >> (start & 7);
748 for (i++; i <= (start + width - 1) / 8; i++)
749 arr[i] = 0xff;
750 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
751 emit_move_insn (mask, array_to_constant (TImode, arr));
752 }
753 break;
754 default:
755 abort ();
756 }
757 if (GET_CODE (ops[0]) == MEM)
758 {
644459d0 759 rtx low = gen_reg_rtx (SImode);
644459d0 760 rtx rotl = gen_reg_rtx (SImode);
761 rtx mask0 = gen_reg_rtx (TImode);
9d98604b 762 rtx addr;
763 rtx addr0;
764 rtx addr1;
644459d0 765 rtx mem;
766
9d98604b 767 addr = force_reg (Pmode, XEXP (ops[0], 0));
768 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
644459d0 769 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
770 emit_insn (gen_negsi2 (rotl, low));
771 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
772 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
9d98604b 773 mem = change_address (ops[0], TImode, addr0);
644459d0 774 set_mem_alias_set (mem, 0);
775 emit_move_insn (dst, mem);
776 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
644459d0 777 if (start + width > MEM_ALIGN (ops[0]))
778 {
779 rtx shl = gen_reg_rtx (SImode);
780 rtx mask1 = gen_reg_rtx (TImode);
781 rtx dst1 = gen_reg_rtx (TImode);
782 rtx mem1;
9d98604b 783 addr1 = plus_constant (addr, 16);
784 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
644459d0 785 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
786 emit_insn (gen_shlqby_ti (mask1, mask, shl));
9d98604b 787 mem1 = change_address (ops[0], TImode, addr1);
644459d0 788 set_mem_alias_set (mem1, 0);
789 emit_move_insn (dst1, mem1);
790 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
791 emit_move_insn (mem1, dst1);
792 }
9d98604b 793 emit_move_insn (mem, dst);
644459d0 794 }
795 else
71cd778d 796 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 797}
798
799
800int
801spu_expand_block_move (rtx ops[])
802{
803 HOST_WIDE_INT bytes, align, offset;
804 rtx src, dst, sreg, dreg, target;
805 int i;
806 if (GET_CODE (ops[2]) != CONST_INT
807 || GET_CODE (ops[3]) != CONST_INT
48eb4342 808 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 809 return 0;
810
811 bytes = INTVAL (ops[2]);
812 align = INTVAL (ops[3]);
813
814 if (bytes <= 0)
815 return 1;
816
817 dst = ops[0];
818 src = ops[1];
819
820 if (align == 16)
821 {
822 for (offset = 0; offset + 16 <= bytes; offset += 16)
823 {
824 dst = adjust_address (ops[0], V16QImode, offset);
825 src = adjust_address (ops[1], V16QImode, offset);
826 emit_move_insn (dst, src);
827 }
828 if (offset < bytes)
829 {
830 rtx mask;
831 unsigned char arr[16] = { 0 };
832 for (i = 0; i < bytes - offset; i++)
833 arr[i] = 0xff;
834 dst = adjust_address (ops[0], V16QImode, offset);
835 src = adjust_address (ops[1], V16QImode, offset);
836 mask = gen_reg_rtx (V16QImode);
837 sreg = gen_reg_rtx (V16QImode);
838 dreg = gen_reg_rtx (V16QImode);
839 target = gen_reg_rtx (V16QImode);
840 emit_move_insn (mask, array_to_constant (V16QImode, arr));
841 emit_move_insn (dreg, dst);
842 emit_move_insn (sreg, src);
843 emit_insn (gen_selb (target, dreg, sreg, mask));
844 emit_move_insn (dst, target);
845 }
846 return 1;
847 }
848 return 0;
849}
850
851enum spu_comp_code
852{ SPU_EQ, SPU_GT, SPU_GTU };
853
5474166e 854int spu_comp_icode[12][3] = {
855 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
856 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
857 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
858 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
859 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
860 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
861 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
862 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
863 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
864 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
865 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
866 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 867};
868
869/* Generate a compare for CODE. Return a brand-new rtx that represents
870 the result of the compare. GCC can figure this out too if we don't
871 provide all variations of compares, but GCC always wants to use
872 WORD_MODE, we can generate better code in most cases if we do it
873 ourselves. */
874void
74f4459c 875spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
644459d0 876{
877 int reverse_compare = 0;
878 int reverse_test = 0;
5d70b918 879 rtx compare_result, eq_result;
880 rtx comp_rtx, eq_rtx;
644459d0 881 enum machine_mode comp_mode;
882 enum machine_mode op_mode;
b9c74b4d 883 enum spu_comp_code scode, eq_code;
884 enum insn_code ior_code;
74f4459c 885 enum rtx_code code = GET_CODE (cmp);
886 rtx op0 = XEXP (cmp, 0);
887 rtx op1 = XEXP (cmp, 1);
644459d0 888 int index;
5d70b918 889 int eq_test = 0;
644459d0 890
74f4459c 891 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
644459d0 892 and so on, to keep the constant in operand 1. */
74f4459c 893 if (GET_CODE (op1) == CONST_INT)
644459d0 894 {
74f4459c 895 HOST_WIDE_INT val = INTVAL (op1) - 1;
896 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
644459d0 897 switch (code)
898 {
899 case GE:
74f4459c 900 op1 = GEN_INT (val);
644459d0 901 code = GT;
902 break;
903 case LT:
74f4459c 904 op1 = GEN_INT (val);
644459d0 905 code = LE;
906 break;
907 case GEU:
74f4459c 908 op1 = GEN_INT (val);
644459d0 909 code = GTU;
910 break;
911 case LTU:
74f4459c 912 op1 = GEN_INT (val);
644459d0 913 code = LEU;
914 break;
915 default:
916 break;
917 }
918 }
919
5d70b918 920 comp_mode = SImode;
74f4459c 921 op_mode = GET_MODE (op0);
5d70b918 922
644459d0 923 switch (code)
924 {
925 case GE:
644459d0 926 scode = SPU_GT;
07027691 927 if (HONOR_NANS (op_mode))
5d70b918 928 {
929 reverse_compare = 0;
930 reverse_test = 0;
931 eq_test = 1;
932 eq_code = SPU_EQ;
933 }
934 else
935 {
936 reverse_compare = 1;
937 reverse_test = 1;
938 }
644459d0 939 break;
940 case LE:
644459d0 941 scode = SPU_GT;
07027691 942 if (HONOR_NANS (op_mode))
5d70b918 943 {
944 reverse_compare = 1;
945 reverse_test = 0;
946 eq_test = 1;
947 eq_code = SPU_EQ;
948 }
949 else
950 {
951 reverse_compare = 0;
952 reverse_test = 1;
953 }
644459d0 954 break;
955 case LT:
956 reverse_compare = 1;
957 reverse_test = 0;
958 scode = SPU_GT;
959 break;
960 case GEU:
961 reverse_compare = 1;
962 reverse_test = 1;
963 scode = SPU_GTU;
964 break;
965 case LEU:
966 reverse_compare = 0;
967 reverse_test = 1;
968 scode = SPU_GTU;
969 break;
970 case LTU:
971 reverse_compare = 1;
972 reverse_test = 0;
973 scode = SPU_GTU;
974 break;
975 case NE:
976 reverse_compare = 0;
977 reverse_test = 1;
978 scode = SPU_EQ;
979 break;
980
981 case EQ:
982 scode = SPU_EQ;
983 break;
984 case GT:
985 scode = SPU_GT;
986 break;
987 case GTU:
988 scode = SPU_GTU;
989 break;
990 default:
991 scode = SPU_EQ;
992 break;
993 }
994
644459d0 995 switch (op_mode)
996 {
997 case QImode:
998 index = 0;
999 comp_mode = QImode;
1000 break;
1001 case HImode:
1002 index = 1;
1003 comp_mode = HImode;
1004 break;
1005 case SImode:
1006 index = 2;
1007 break;
1008 case DImode:
1009 index = 3;
1010 break;
1011 case TImode:
1012 index = 4;
1013 break;
1014 case SFmode:
1015 index = 5;
1016 break;
1017 case DFmode:
1018 index = 6;
1019 break;
1020 case V16QImode:
5474166e 1021 index = 7;
1022 comp_mode = op_mode;
1023 break;
644459d0 1024 case V8HImode:
5474166e 1025 index = 8;
1026 comp_mode = op_mode;
1027 break;
644459d0 1028 case V4SImode:
5474166e 1029 index = 9;
1030 comp_mode = op_mode;
1031 break;
644459d0 1032 case V4SFmode:
5474166e 1033 index = 10;
1034 comp_mode = V4SImode;
1035 break;
644459d0 1036 case V2DFmode:
5474166e 1037 index = 11;
1038 comp_mode = V2DImode;
644459d0 1039 break;
5474166e 1040 case V2DImode:
644459d0 1041 default:
1042 abort ();
1043 }
1044
74f4459c 1045 if (GET_MODE (op1) == DFmode
07027691 1046 && (scode != SPU_GT && scode != SPU_EQ))
1047 abort ();
644459d0 1048
74f4459c 1049 if (is_set == 0 && op1 == const0_rtx
1050 && (GET_MODE (op0) == SImode
1051 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
644459d0 1052 {
1053 /* Don't need to set a register with the result when we are
1054 comparing against zero and branching. */
1055 reverse_test = !reverse_test;
74f4459c 1056 compare_result = op0;
644459d0 1057 }
1058 else
1059 {
1060 compare_result = gen_reg_rtx (comp_mode);
1061
1062 if (reverse_compare)
1063 {
74f4459c 1064 rtx t = op1;
1065 op1 = op0;
1066 op0 = t;
644459d0 1067 }
1068
1069 if (spu_comp_icode[index][scode] == 0)
1070 abort ();
1071
1072 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
74f4459c 1073 (op0, op_mode))
1074 op0 = force_reg (op_mode, op0);
644459d0 1075 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
74f4459c 1076 (op1, op_mode))
1077 op1 = force_reg (op_mode, op1);
644459d0 1078 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
74f4459c 1079 op0, op1);
644459d0 1080 if (comp_rtx == 0)
1081 abort ();
1082 emit_insn (comp_rtx);
1083
5d70b918 1084 if (eq_test)
1085 {
1086 eq_result = gen_reg_rtx (comp_mode);
1087 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
74f4459c 1088 op0, op1);
5d70b918 1089 if (eq_rtx == 0)
1090 abort ();
1091 emit_insn (eq_rtx);
1092 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1093 gcc_assert (ior_code != CODE_FOR_nothing);
1094 emit_insn (GEN_FCN (ior_code)
1095 (compare_result, compare_result, eq_result));
1096 }
644459d0 1097 }
1098
1099 if (is_set == 0)
1100 {
1101 rtx bcomp;
1102 rtx loc_ref;
1103
1104 /* We don't have branch on QI compare insns, so we convert the
1105 QI compare result to a HI result. */
1106 if (comp_mode == QImode)
1107 {
1108 rtx old_res = compare_result;
1109 compare_result = gen_reg_rtx (HImode);
1110 comp_mode = HImode;
1111 emit_insn (gen_extendqihi2 (compare_result, old_res));
1112 }
1113
1114 if (reverse_test)
1115 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1116 else
1117 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1118
74f4459c 1119 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
644459d0 1120 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1121 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1122 loc_ref, pc_rtx)));
1123 }
1124 else if (is_set == 2)
1125 {
74f4459c 1126 rtx target = operands[0];
644459d0 1127 int compare_size = GET_MODE_BITSIZE (comp_mode);
1128 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1129 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1130 rtx select_mask;
1131 rtx op_t = operands[2];
1132 rtx op_f = operands[3];
1133
1134 /* The result of the comparison can be SI, HI or QI mode. Create a
1135 mask based on that result. */
1136 if (target_size > compare_size)
1137 {
1138 select_mask = gen_reg_rtx (mode);
1139 emit_insn (gen_extend_compare (select_mask, compare_result));
1140 }
1141 else if (target_size < compare_size)
1142 select_mask =
1143 gen_rtx_SUBREG (mode, compare_result,
1144 (compare_size - target_size) / BITS_PER_UNIT);
1145 else if (comp_mode != mode)
1146 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1147 else
1148 select_mask = compare_result;
1149
1150 if (GET_MODE (target) != GET_MODE (op_t)
1151 || GET_MODE (target) != GET_MODE (op_f))
1152 abort ();
1153
1154 if (reverse_test)
1155 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1156 else
1157 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1158 }
1159 else
1160 {
74f4459c 1161 rtx target = operands[0];
644459d0 1162 if (reverse_test)
1163 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1164 gen_rtx_NOT (comp_mode, compare_result)));
1165 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1166 emit_insn (gen_extendhisi2 (target, compare_result));
1167 else if (GET_MODE (target) == SImode
1168 && GET_MODE (compare_result) == QImode)
1169 emit_insn (gen_extend_compare (target, compare_result));
1170 else
1171 emit_move_insn (target, compare_result);
1172 }
1173}
1174
1175HOST_WIDE_INT
1176const_double_to_hwint (rtx x)
1177{
1178 HOST_WIDE_INT val;
1179 REAL_VALUE_TYPE rv;
1180 if (GET_MODE (x) == SFmode)
1181 {
1182 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1183 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1184 }
1185 else if (GET_MODE (x) == DFmode)
1186 {
1187 long l[2];
1188 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1189 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1190 val = l[0];
1191 val = (val << 32) | (l[1] & 0xffffffff);
1192 }
1193 else
1194 abort ();
1195 return val;
1196}
1197
1198rtx
1199hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1200{
1201 long tv[2];
1202 REAL_VALUE_TYPE rv;
1203 gcc_assert (mode == SFmode || mode == DFmode);
1204
1205 if (mode == SFmode)
1206 tv[0] = (v << 32) >> 32;
1207 else if (mode == DFmode)
1208 {
1209 tv[1] = (v << 32) >> 32;
1210 tv[0] = v >> 32;
1211 }
1212 real_from_target (&rv, tv, mode);
1213 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1214}
1215
1216void
1217print_operand_address (FILE * file, register rtx addr)
1218{
1219 rtx reg;
1220 rtx offset;
1221
e04cf423 1222 if (GET_CODE (addr) == AND
1223 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1224 && INTVAL (XEXP (addr, 1)) == -16)
1225 addr = XEXP (addr, 0);
1226
644459d0 1227 switch (GET_CODE (addr))
1228 {
1229 case REG:
1230 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1231 break;
1232
1233 case PLUS:
1234 reg = XEXP (addr, 0);
1235 offset = XEXP (addr, 1);
1236 if (GET_CODE (offset) == REG)
1237 {
1238 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1239 reg_names[REGNO (offset)]);
1240 }
1241 else if (GET_CODE (offset) == CONST_INT)
1242 {
1243 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1244 INTVAL (offset), reg_names[REGNO (reg)]);
1245 }
1246 else
1247 abort ();
1248 break;
1249
1250 case CONST:
1251 case LABEL_REF:
1252 case SYMBOL_REF:
1253 case CONST_INT:
1254 output_addr_const (file, addr);
1255 break;
1256
1257 default:
1258 debug_rtx (addr);
1259 abort ();
1260 }
1261}
1262
1263void
1264print_operand (FILE * file, rtx x, int code)
1265{
1266 enum machine_mode mode = GET_MODE (x);
1267 HOST_WIDE_INT val;
1268 unsigned char arr[16];
1269 int xcode = GET_CODE (x);
dea01258 1270 int i, info;
644459d0 1271 if (GET_MODE (x) == VOIDmode)
1272 switch (code)
1273 {
644459d0 1274 case 'L': /* 128 bits, signed */
1275 case 'm': /* 128 bits, signed */
1276 case 'T': /* 128 bits, signed */
1277 case 't': /* 128 bits, signed */
1278 mode = TImode;
1279 break;
644459d0 1280 case 'K': /* 64 bits, signed */
1281 case 'k': /* 64 bits, signed */
1282 case 'D': /* 64 bits, signed */
1283 case 'd': /* 64 bits, signed */
1284 mode = DImode;
1285 break;
644459d0 1286 case 'J': /* 32 bits, signed */
1287 case 'j': /* 32 bits, signed */
1288 case 's': /* 32 bits, signed */
1289 case 'S': /* 32 bits, signed */
1290 mode = SImode;
1291 break;
1292 }
1293 switch (code)
1294 {
1295
1296 case 'j': /* 32 bits, signed */
1297 case 'k': /* 64 bits, signed */
1298 case 'm': /* 128 bits, signed */
1299 if (xcode == CONST_INT
1300 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1301 {
1302 gcc_assert (logical_immediate_p (x, mode));
1303 constant_to_array (mode, x, arr);
1304 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1305 val = trunc_int_for_mode (val, SImode);
1306 switch (which_logical_immediate (val))
1307 {
1308 case SPU_ORI:
1309 break;
1310 case SPU_ORHI:
1311 fprintf (file, "h");
1312 break;
1313 case SPU_ORBI:
1314 fprintf (file, "b");
1315 break;
1316 default:
1317 gcc_unreachable();
1318 }
1319 }
1320 else
1321 gcc_unreachable();
1322 return;
1323
1324 case 'J': /* 32 bits, signed */
1325 case 'K': /* 64 bits, signed */
1326 case 'L': /* 128 bits, signed */
1327 if (xcode == CONST_INT
1328 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1329 {
1330 gcc_assert (logical_immediate_p (x, mode)
1331 || iohl_immediate_p (x, mode));
1332 constant_to_array (mode, x, arr);
1333 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1334 val = trunc_int_for_mode (val, SImode);
1335 switch (which_logical_immediate (val))
1336 {
1337 case SPU_ORI:
1338 case SPU_IOHL:
1339 break;
1340 case SPU_ORHI:
1341 val = trunc_int_for_mode (val, HImode);
1342 break;
1343 case SPU_ORBI:
1344 val = trunc_int_for_mode (val, QImode);
1345 break;
1346 default:
1347 gcc_unreachable();
1348 }
1349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1350 }
1351 else
1352 gcc_unreachable();
1353 return;
1354
1355 case 't': /* 128 bits, signed */
1356 case 'd': /* 64 bits, signed */
1357 case 's': /* 32 bits, signed */
dea01258 1358 if (CONSTANT_P (x))
644459d0 1359 {
dea01258 1360 enum immediate_class c = classify_immediate (x, mode);
1361 switch (c)
1362 {
1363 case IC_IL1:
1364 constant_to_array (mode, x, arr);
1365 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1366 val = trunc_int_for_mode (val, SImode);
1367 switch (which_immediate_load (val))
1368 {
1369 case SPU_IL:
1370 break;
1371 case SPU_ILA:
1372 fprintf (file, "a");
1373 break;
1374 case SPU_ILH:
1375 fprintf (file, "h");
1376 break;
1377 case SPU_ILHU:
1378 fprintf (file, "hu");
1379 break;
1380 default:
1381 gcc_unreachable ();
1382 }
1383 break;
1384 case IC_CPAT:
1385 constant_to_array (mode, x, arr);
1386 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1387 if (info == 1)
1388 fprintf (file, "b");
1389 else if (info == 2)
1390 fprintf (file, "h");
1391 else if (info == 4)
1392 fprintf (file, "w");
1393 else if (info == 8)
1394 fprintf (file, "d");
1395 break;
1396 case IC_IL1s:
1397 if (xcode == CONST_VECTOR)
1398 {
1399 x = CONST_VECTOR_ELT (x, 0);
1400 xcode = GET_CODE (x);
1401 }
1402 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1403 fprintf (file, "a");
1404 else if (xcode == HIGH)
1405 fprintf (file, "hu");
1406 break;
1407 case IC_FSMBI:
5df189be 1408 case IC_FSMBI2:
dea01258 1409 case IC_IL2:
1410 case IC_IL2s:
1411 case IC_POOL:
1412 abort ();
1413 }
644459d0 1414 }
644459d0 1415 else
1416 gcc_unreachable ();
1417 return;
1418
1419 case 'T': /* 128 bits, signed */
1420 case 'D': /* 64 bits, signed */
1421 case 'S': /* 32 bits, signed */
dea01258 1422 if (CONSTANT_P (x))
644459d0 1423 {
dea01258 1424 enum immediate_class c = classify_immediate (x, mode);
1425 switch (c)
644459d0 1426 {
dea01258 1427 case IC_IL1:
1428 constant_to_array (mode, x, arr);
1429 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1430 val = trunc_int_for_mode (val, SImode);
1431 switch (which_immediate_load (val))
1432 {
1433 case SPU_IL:
1434 case SPU_ILA:
1435 break;
1436 case SPU_ILH:
1437 case SPU_ILHU:
1438 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1439 break;
1440 default:
1441 gcc_unreachable ();
1442 }
1443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1444 break;
1445 case IC_FSMBI:
1446 constant_to_array (mode, x, arr);
1447 val = 0;
1448 for (i = 0; i < 16; i++)
1449 {
1450 val <<= 1;
1451 val |= arr[i] & 1;
1452 }
1453 print_operand (file, GEN_INT (val), 0);
1454 break;
1455 case IC_CPAT:
1456 constant_to_array (mode, x, arr);
1457 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1458 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1459 break;
dea01258 1460 case IC_IL1s:
dea01258 1461 if (xcode == HIGH)
5df189be 1462 x = XEXP (x, 0);
1463 if (GET_CODE (x) == CONST_VECTOR)
1464 x = CONST_VECTOR_ELT (x, 0);
1465 output_addr_const (file, x);
1466 if (xcode == HIGH)
1467 fprintf (file, "@h");
644459d0 1468 break;
dea01258 1469 case IC_IL2:
1470 case IC_IL2s:
5df189be 1471 case IC_FSMBI2:
dea01258 1472 case IC_POOL:
1473 abort ();
644459d0 1474 }
c8befdb9 1475 }
644459d0 1476 else
1477 gcc_unreachable ();
1478 return;
1479
644459d0 1480 case 'C':
1481 if (xcode == CONST_INT)
1482 {
1483 /* Only 4 least significant bits are relevant for generate
1484 control word instructions. */
1485 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1486 return;
1487 }
1488 break;
1489
1490 case 'M': /* print code for c*d */
1491 if (GET_CODE (x) == CONST_INT)
1492 switch (INTVAL (x))
1493 {
1494 case 1:
1495 fprintf (file, "b");
1496 break;
1497 case 2:
1498 fprintf (file, "h");
1499 break;
1500 case 4:
1501 fprintf (file, "w");
1502 break;
1503 case 8:
1504 fprintf (file, "d");
1505 break;
1506 default:
1507 gcc_unreachable();
1508 }
1509 else
1510 gcc_unreachable();
1511 return;
1512
1513 case 'N': /* Negate the operand */
1514 if (xcode == CONST_INT)
1515 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1516 else if (xcode == CONST_VECTOR)
1517 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1518 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1519 return;
1520
1521 case 'I': /* enable/disable interrupts */
1522 if (xcode == CONST_INT)
1523 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1524 return;
1525
1526 case 'b': /* branch modifiers */
1527 if (xcode == REG)
1528 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1529 else if (COMPARISON_P (x))
1530 fprintf (file, "%s", xcode == NE ? "n" : "");
1531 return;
1532
1533 case 'i': /* indirect call */
1534 if (xcode == MEM)
1535 {
1536 if (GET_CODE (XEXP (x, 0)) == REG)
1537 /* Used in indirect function calls. */
1538 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1539 else
1540 output_address (XEXP (x, 0));
1541 }
1542 return;
1543
1544 case 'p': /* load/store */
1545 if (xcode == MEM)
1546 {
1547 x = XEXP (x, 0);
1548 xcode = GET_CODE (x);
1549 }
e04cf423 1550 if (xcode == AND)
1551 {
1552 x = XEXP (x, 0);
1553 xcode = GET_CODE (x);
1554 }
644459d0 1555 if (xcode == REG)
1556 fprintf (file, "d");
1557 else if (xcode == CONST_INT)
1558 fprintf (file, "a");
1559 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1560 fprintf (file, "r");
1561 else if (xcode == PLUS || xcode == LO_SUM)
1562 {
1563 if (GET_CODE (XEXP (x, 1)) == REG)
1564 fprintf (file, "x");
1565 else
1566 fprintf (file, "d");
1567 }
1568 return;
1569
5df189be 1570 case 'e':
1571 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1572 val &= 0x7;
1573 output_addr_const (file, GEN_INT (val));
1574 return;
1575
1576 case 'f':
1577 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1578 val &= 0x1f;
1579 output_addr_const (file, GEN_INT (val));
1580 return;
1581
1582 case 'g':
1583 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1584 val &= 0x3f;
1585 output_addr_const (file, GEN_INT (val));
1586 return;
1587
1588 case 'h':
1589 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1590 val = (val >> 3) & 0x1f;
1591 output_addr_const (file, GEN_INT (val));
1592 return;
1593
1594 case 'E':
1595 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1596 val = -val;
1597 val &= 0x7;
1598 output_addr_const (file, GEN_INT (val));
1599 return;
1600
1601 case 'F':
1602 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1603 val = -val;
1604 val &= 0x1f;
1605 output_addr_const (file, GEN_INT (val));
1606 return;
1607
1608 case 'G':
1609 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1610 val = -val;
1611 val &= 0x3f;
1612 output_addr_const (file, GEN_INT (val));
1613 return;
1614
1615 case 'H':
1616 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1617 val = -(val & -8ll);
1618 val = (val >> 3) & 0x1f;
1619 output_addr_const (file, GEN_INT (val));
1620 return;
1621
56c7bfc2 1622 case 'v':
1623 case 'w':
1624 constant_to_array (mode, x, arr);
1625 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1626 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1627 return;
1628
644459d0 1629 case 0:
1630 if (xcode == REG)
1631 fprintf (file, "%s", reg_names[REGNO (x)]);
1632 else if (xcode == MEM)
1633 output_address (XEXP (x, 0));
1634 else if (xcode == CONST_VECTOR)
dea01258 1635 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1636 else
1637 output_addr_const (file, x);
1638 return;
1639
f6a0d06f 1640 /* unused letters
56c7bfc2 1641 o qr u yz
5df189be 1642 AB OPQR UVWXYZ */
644459d0 1643 default:
1644 output_operand_lossage ("invalid %%xn code");
1645 }
1646 gcc_unreachable ();
1647}
1648
1649extern char call_used_regs[];
644459d0 1650
1651/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1652 caller saved register. For leaf functions it is more efficient to
1653 use a volatile register because we won't need to save and restore the
1654 pic register. This routine is only valid after register allocation
1655 is completed, so we can pick an unused register. */
1656static rtx
1657get_pic_reg (void)
1658{
1659 rtx pic_reg = pic_offset_table_rtx;
1660 if (!reload_completed && !reload_in_progress)
1661 abort ();
1662 return pic_reg;
1663}
1664
5df189be 1665/* Split constant addresses to handle cases that are too large.
1666 Add in the pic register when in PIC mode.
1667 Split immediates that require more than 1 instruction. */
dea01258 1668int
1669spu_split_immediate (rtx * ops)
c8befdb9 1670{
dea01258 1671 enum machine_mode mode = GET_MODE (ops[0]);
1672 enum immediate_class c = classify_immediate (ops[1], mode);
1673
1674 switch (c)
c8befdb9 1675 {
dea01258 1676 case IC_IL2:
1677 {
1678 unsigned char arrhi[16];
1679 unsigned char arrlo[16];
98bbec1e 1680 rtx to, temp, hi, lo;
dea01258 1681 int i;
98bbec1e 1682 enum machine_mode imode = mode;
1683 /* We need to do reals as ints because the constant used in the
1684 IOR might not be a legitimate real constant. */
1685 imode = int_mode_for_mode (mode);
dea01258 1686 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1687 if (imode != mode)
1688 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1689 else
1690 to = ops[0];
1691 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1692 for (i = 0; i < 16; i += 4)
1693 {
1694 arrlo[i + 2] = arrhi[i + 2];
1695 arrlo[i + 3] = arrhi[i + 3];
1696 arrlo[i + 0] = arrlo[i + 1] = 0;
1697 arrhi[i + 2] = arrhi[i + 3] = 0;
1698 }
98bbec1e 1699 hi = array_to_constant (imode, arrhi);
1700 lo = array_to_constant (imode, arrlo);
1701 emit_move_insn (temp, hi);
dea01258 1702 emit_insn (gen_rtx_SET
98bbec1e 1703 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1704 return 1;
1705 }
5df189be 1706 case IC_FSMBI2:
1707 {
1708 unsigned char arr_fsmbi[16];
1709 unsigned char arr_andbi[16];
1710 rtx to, reg_fsmbi, reg_and;
1711 int i;
1712 enum machine_mode imode = mode;
1713 /* We need to do reals as ints because the constant used in the
1714 * AND might not be a legitimate real constant. */
1715 imode = int_mode_for_mode (mode);
1716 constant_to_array (mode, ops[1], arr_fsmbi);
1717 if (imode != mode)
1718 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1719 else
1720 to = ops[0];
1721 for (i = 0; i < 16; i++)
1722 if (arr_fsmbi[i] != 0)
1723 {
1724 arr_andbi[0] = arr_fsmbi[i];
1725 arr_fsmbi[i] = 0xff;
1726 }
1727 for (i = 1; i < 16; i++)
1728 arr_andbi[i] = arr_andbi[0];
1729 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1730 reg_and = array_to_constant (imode, arr_andbi);
1731 emit_move_insn (to, reg_fsmbi);
1732 emit_insn (gen_rtx_SET
1733 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1734 return 1;
1735 }
dea01258 1736 case IC_POOL:
1737 if (reload_in_progress || reload_completed)
1738 {
1739 rtx mem = force_const_mem (mode, ops[1]);
1740 if (TARGET_LARGE_MEM)
1741 {
1742 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1743 emit_move_insn (addr, XEXP (mem, 0));
1744 mem = replace_equiv_address (mem, addr);
1745 }
1746 emit_move_insn (ops[0], mem);
1747 return 1;
1748 }
1749 break;
1750 case IC_IL1s:
1751 case IC_IL2s:
1752 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1753 {
1754 if (c == IC_IL2s)
1755 {
5df189be 1756 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1757 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1758 }
1759 else if (flag_pic)
1760 emit_insn (gen_pic (ops[0], ops[1]));
1761 if (flag_pic)
1762 {
1763 rtx pic_reg = get_pic_reg ();
1764 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1765 crtl->uses_pic_offset_table = 1;
dea01258 1766 }
1767 return flag_pic || c == IC_IL2s;
1768 }
1769 break;
1770 case IC_IL1:
1771 case IC_FSMBI:
1772 case IC_CPAT:
1773 break;
c8befdb9 1774 }
dea01258 1775 return 0;
c8befdb9 1776}
1777
644459d0 1778/* SAVING is TRUE when we are generating the actual load and store
1779 instructions for REGNO. When determining the size of the stack
1780 needed for saving register we must allocate enough space for the
1781 worst case, because we don't always have the information early enough
1782 to not allocate it. But we can at least eliminate the actual loads
1783 and stores during the prologue/epilogue. */
1784static int
1785need_to_save_reg (int regno, int saving)
1786{
3072d30e 1787 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1788 return 1;
1789 if (flag_pic
1790 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1791 && (!saving || crtl->uses_pic_offset_table)
644459d0 1792 && (!saving
3072d30e 1793 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1794 return 1;
1795 return 0;
1796}
1797
1798/* This function is only correct starting with local register
1799 allocation */
1800int
1801spu_saved_regs_size (void)
1802{
1803 int reg_save_size = 0;
1804 int regno;
1805
1806 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1807 if (need_to_save_reg (regno, 0))
1808 reg_save_size += 0x10;
1809 return reg_save_size;
1810}
1811
1812static rtx
1813frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1814{
1815 rtx reg = gen_rtx_REG (V4SImode, regno);
1816 rtx mem =
1817 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1818 return emit_insn (gen_movv4si (mem, reg));
1819}
1820
1821static rtx
1822frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1823{
1824 rtx reg = gen_rtx_REG (V4SImode, regno);
1825 rtx mem =
1826 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1827 return emit_insn (gen_movv4si (reg, mem));
1828}
1829
1830/* This happens after reload, so we need to expand it. */
1831static rtx
1832frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1833{
1834 rtx insn;
1835 if (satisfies_constraint_K (GEN_INT (imm)))
1836 {
1837 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1838 }
1839 else
1840 {
3072d30e 1841 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1842 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1843 if (REGNO (src) == REGNO (scratch))
1844 abort ();
1845 }
644459d0 1846 return insn;
1847}
1848
1849/* Return nonzero if this function is known to have a null epilogue. */
1850
1851int
1852direct_return (void)
1853{
1854 if (reload_completed)
1855 {
1856 if (cfun->static_chain_decl == 0
1857 && (spu_saved_regs_size ()
1858 + get_frame_size ()
abe32cce 1859 + crtl->outgoing_args_size
1860 + crtl->args.pretend_args_size == 0)
644459d0 1861 && current_function_is_leaf)
1862 return 1;
1863 }
1864 return 0;
1865}
1866
1867/*
1868 The stack frame looks like this:
1869 +-------------+
1870 | incoming |
a8e019fa 1871 | args |
1872 AP -> +-------------+
644459d0 1873 | $lr save |
1874 +-------------+
1875 prev SP | back chain |
1876 +-------------+
1877 | var args |
abe32cce 1878 | reg save | crtl->args.pretend_args_size bytes
644459d0 1879 +-------------+
1880 | ... |
1881 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1882 FP -> +-------------+
644459d0 1883 | ... |
a8e019fa 1884 | vars | get_frame_size() bytes
1885 HFP -> +-------------+
644459d0 1886 | ... |
1887 | outgoing |
abe32cce 1888 | args | crtl->outgoing_args_size bytes
644459d0 1889 +-------------+
1890 | $lr of next |
1891 | frame |
1892 +-------------+
a8e019fa 1893 | back chain |
1894 SP -> +-------------+
644459d0 1895
1896*/
1897void
1898spu_expand_prologue (void)
1899{
1900 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1901 HOST_WIDE_INT total_size;
1902 HOST_WIDE_INT saved_regs_size;
1903 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1904 rtx scratch_reg_0, scratch_reg_1;
1905 rtx insn, real;
1906
1907 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1908 the "toplevel" insn chain. */
1909 emit_note (NOTE_INSN_DELETED);
1910
1911 if (flag_pic && optimize == 0)
18d50ae6 1912 crtl->uses_pic_offset_table = 1;
644459d0 1913
1914 if (spu_naked_function_p (current_function_decl))
1915 return;
1916
1917 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1918 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1919
1920 saved_regs_size = spu_saved_regs_size ();
1921 total_size = size + saved_regs_size
abe32cce 1922 + crtl->outgoing_args_size
1923 + crtl->args.pretend_args_size;
644459d0 1924
1925 if (!current_function_is_leaf
18d50ae6 1926 || cfun->calls_alloca || total_size > 0)
644459d0 1927 total_size += STACK_POINTER_OFFSET;
1928
1929 /* Save this first because code after this might use the link
1930 register as a scratch register. */
1931 if (!current_function_is_leaf)
1932 {
1933 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1934 RTX_FRAME_RELATED_P (insn) = 1;
1935 }
1936
1937 if (total_size > 0)
1938 {
abe32cce 1939 offset = -crtl->args.pretend_args_size;
644459d0 1940 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1941 if (need_to_save_reg (regno, 1))
1942 {
1943 offset -= 16;
1944 insn = frame_emit_store (regno, sp_reg, offset);
1945 RTX_FRAME_RELATED_P (insn) = 1;
1946 }
1947 }
1948
18d50ae6 1949 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1950 {
1951 rtx pic_reg = get_pic_reg ();
1952 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1953 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1954 }
1955
1956 if (total_size > 0)
1957 {
1958 if (flag_stack_check)
1959 {
d819917f 1960 /* We compare against total_size-1 because
644459d0 1961 ($sp >= total_size) <=> ($sp > total_size-1) */
1962 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1963 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1964 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1965 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1966 {
1967 emit_move_insn (scratch_v4si, size_v4si);
1968 size_v4si = scratch_v4si;
1969 }
1970 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1971 emit_insn (gen_vec_extractv4si
1972 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1973 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1974 }
1975
1976 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1977 the value of the previous $sp because we save it as the back
1978 chain. */
1979 if (total_size <= 2000)
1980 {
1981 /* In this case we save the back chain first. */
1982 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1983 insn =
1984 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1985 }
644459d0 1986 else
1987 {
1988 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1989 insn =
1990 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1991 }
1992 RTX_FRAME_RELATED_P (insn) = 1;
1993 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1994 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1995
1996 if (total_size > 2000)
1997 {
1998 /* Save the back chain ptr */
1999 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 2000 }
2001
2002 if (frame_pointer_needed)
2003 {
2004 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2005 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 2006 + crtl->outgoing_args_size;
644459d0 2007 /* Set the new frame_pointer */
d8dfeb55 2008 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2009 RTX_FRAME_RELATED_P (insn) = 1;
2010 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 2011 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 2012 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 2013 }
2014 }
2015
2016 emit_note (NOTE_INSN_DELETED);
2017}
2018
2019void
2020spu_expand_epilogue (bool sibcall_p)
2021{
2022 int size = get_frame_size (), offset, regno;
2023 HOST_WIDE_INT saved_regs_size, total_size;
2024 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2025 rtx jump, scratch_reg_0;
2026
2027 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2028 the "toplevel" insn chain. */
2029 emit_note (NOTE_INSN_DELETED);
2030
2031 if (spu_naked_function_p (current_function_decl))
2032 return;
2033
2034 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2035
2036 saved_regs_size = spu_saved_regs_size ();
2037 total_size = size + saved_regs_size
abe32cce 2038 + crtl->outgoing_args_size
2039 + crtl->args.pretend_args_size;
644459d0 2040
2041 if (!current_function_is_leaf
18d50ae6 2042 || cfun->calls_alloca || total_size > 0)
644459d0 2043 total_size += STACK_POINTER_OFFSET;
2044
2045 if (total_size > 0)
2046 {
18d50ae6 2047 if (cfun->calls_alloca)
644459d0 2048 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2049 else
2050 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2051
2052
2053 if (saved_regs_size > 0)
2054 {
abe32cce 2055 offset = -crtl->args.pretend_args_size;
644459d0 2056 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2057 if (need_to_save_reg (regno, 1))
2058 {
2059 offset -= 0x10;
2060 frame_emit_load (regno, sp_reg, offset);
2061 }
2062 }
2063 }
2064
2065 if (!current_function_is_leaf)
2066 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2067
2068 if (!sibcall_p)
2069 {
18b42941 2070 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 2071 jump = emit_jump_insn (gen__return ());
2072 emit_barrier_after (jump);
2073 }
2074
2075 emit_note (NOTE_INSN_DELETED);
2076}
2077
2078rtx
2079spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2080{
2081 if (count != 0)
2082 return 0;
2083 /* This is inefficient because it ends up copying to a save-register
2084 which then gets saved even though $lr has already been saved. But
2085 it does generate better code for leaf functions and we don't need
2086 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2087 used for __builtin_return_address anyway, so maybe we don't care if
2088 it's inefficient. */
2089 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2090}
2091\f
2092
2093/* Given VAL, generate a constant appropriate for MODE.
2094 If MODE is a vector mode, every element will be VAL.
2095 For TImode, VAL will be zero extended to 128 bits. */
2096rtx
2097spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2098{
2099 rtx inner;
2100 rtvec v;
2101 int units, i;
2102
2103 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2104 || GET_MODE_CLASS (mode) == MODE_FLOAT
2105 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2106 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2107
2108 if (GET_MODE_CLASS (mode) == MODE_INT)
2109 return immed_double_const (val, 0, mode);
2110
2111 /* val is the bit representation of the float */
2112 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2113 return hwint_to_const_double (mode, val);
2114
2115 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2116 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2117 else
2118 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2119
2120 units = GET_MODE_NUNITS (mode);
2121
2122 v = rtvec_alloc (units);
2123
2124 for (i = 0; i < units; ++i)
2125 RTVEC_ELT (v, i) = inner;
2126
2127 return gen_rtx_CONST_VECTOR (mode, v);
2128}
644459d0 2129
5474166e 2130/* Create a MODE vector constant from 4 ints. */
2131rtx
2132spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2133{
2134 unsigned char arr[16];
2135 arr[0] = (a >> 24) & 0xff;
2136 arr[1] = (a >> 16) & 0xff;
2137 arr[2] = (a >> 8) & 0xff;
2138 arr[3] = (a >> 0) & 0xff;
2139 arr[4] = (b >> 24) & 0xff;
2140 arr[5] = (b >> 16) & 0xff;
2141 arr[6] = (b >> 8) & 0xff;
2142 arr[7] = (b >> 0) & 0xff;
2143 arr[8] = (c >> 24) & 0xff;
2144 arr[9] = (c >> 16) & 0xff;
2145 arr[10] = (c >> 8) & 0xff;
2146 arr[11] = (c >> 0) & 0xff;
2147 arr[12] = (d >> 24) & 0xff;
2148 arr[13] = (d >> 16) & 0xff;
2149 arr[14] = (d >> 8) & 0xff;
2150 arr[15] = (d >> 0) & 0xff;
2151 return array_to_constant(mode, arr);
2152}
5a976006 2153\f
2154/* branch hint stuff */
5474166e 2155
644459d0 2156/* An array of these is used to propagate hints to predecessor blocks. */
2157struct spu_bb_info
2158{
5a976006 2159 rtx prop_jump; /* propagated from another block */
2160 int bb_index; /* the original block. */
644459d0 2161};
5a976006 2162static struct spu_bb_info *spu_bb_info;
644459d0 2163
5a976006 2164#define STOP_HINT_P(INSN) \
2165 (GET_CODE(INSN) == CALL_INSN \
2166 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2167 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2168
2169/* 1 when RTX is a hinted branch or its target. We keep track of
2170 what has been hinted so the safe-hint code can test it easily. */
2171#define HINTED_P(RTX) \
2172 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2173
2174/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2175#define SCHED_ON_EVEN_P(RTX) \
2176 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2177
2178/* Emit a nop for INSN such that the two will dual issue. This assumes
2179 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2180 We check for TImode to handle a MULTI1 insn which has dual issued its
2181 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2182 ADDR_VEC insns. */
2183static void
2184emit_nop_for_insn (rtx insn)
644459d0 2185{
5a976006 2186 int p;
2187 rtx new_insn;
2188 p = get_pipe (insn);
2189 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2190 new_insn = emit_insn_after (gen_lnop (), insn);
2191 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2192 {
5a976006 2193 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2194 PUT_MODE (new_insn, TImode);
2195 PUT_MODE (insn, VOIDmode);
2196 }
2197 else
2198 new_insn = emit_insn_after (gen_lnop (), insn);
2199 recog_memoized (new_insn);
2200}
2201
2202/* Insert nops in basic blocks to meet dual issue alignment
2203 requirements. Also make sure hbrp and hint instructions are at least
2204 one cycle apart, possibly inserting a nop. */
2205static void
2206pad_bb(void)
2207{
2208 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2209 int length;
2210 int addr;
2211
2212 /* This sets up INSN_ADDRESSES. */
2213 shorten_branches (get_insns ());
2214
2215 /* Keep track of length added by nops. */
2216 length = 0;
2217
2218 prev_insn = 0;
2219 insn = get_insns ();
2220 if (!active_insn_p (insn))
2221 insn = next_active_insn (insn);
2222 for (; insn; insn = next_insn)
2223 {
2224 next_insn = next_active_insn (insn);
2225 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2226 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2227 {
5a976006 2228 if (hbr_insn)
2229 {
2230 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2231 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2232 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2233 || (a1 - a0 == 4))
2234 {
2235 prev_insn = emit_insn_before (gen_lnop (), insn);
2236 PUT_MODE (prev_insn, GET_MODE (insn));
2237 PUT_MODE (insn, TImode);
2238 length += 4;
2239 }
2240 }
2241 hbr_insn = insn;
2242 }
2243 if (INSN_CODE (insn) == CODE_FOR_blockage)
2244 {
2245 if (GET_MODE (insn) == TImode)
2246 PUT_MODE (next_insn, TImode);
2247 insn = next_insn;
2248 next_insn = next_active_insn (insn);
2249 }
2250 addr = INSN_ADDRESSES (INSN_UID (insn));
2251 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2252 {
2253 if (((addr + length) & 7) != 0)
2254 {
2255 emit_nop_for_insn (prev_insn);
2256 length += 4;
2257 }
644459d0 2258 }
5a976006 2259 else if (GET_MODE (insn) == TImode
2260 && ((next_insn && GET_MODE (next_insn) != TImode)
2261 || get_attr_type (insn) == TYPE_MULTI0)
2262 && ((addr + length) & 7) != 0)
2263 {
2264 /* prev_insn will always be set because the first insn is
2265 always 8-byte aligned. */
2266 emit_nop_for_insn (prev_insn);
2267 length += 4;
2268 }
2269 prev_insn = insn;
644459d0 2270 }
644459d0 2271}
2272
5a976006 2273\f
2274/* Routines for branch hints. */
2275
644459d0 2276static void
5a976006 2277spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2278 int distance, sbitmap blocks)
644459d0 2279{
5a976006 2280 rtx branch_label = 0;
2281 rtx hint;
2282 rtx insn;
2283 rtx table;
644459d0 2284
2285 if (before == 0 || branch == 0 || target == 0)
2286 return;
2287
5a976006 2288 /* While scheduling we require hints to be no further than 600, so
2289 we need to enforce that here too */
644459d0 2290 if (distance > 600)
2291 return;
2292
5a976006 2293 /* If we have a Basic block note, emit it after the basic block note. */
2294 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2295 before = NEXT_INSN (before);
644459d0 2296
2297 branch_label = gen_label_rtx ();
2298 LABEL_NUSES (branch_label)++;
2299 LABEL_PRESERVE_P (branch_label) = 1;
2300 insn = emit_label_before (branch_label, branch);
2301 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2302 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2303
2304 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2305 recog_memoized (hint);
2306 HINTED_P (branch) = 1;
644459d0 2307
5a976006 2308 if (GET_CODE (target) == LABEL_REF)
2309 HINTED_P (XEXP (target, 0)) = 1;
2310 else if (tablejump_p (branch, 0, &table))
644459d0 2311 {
5a976006 2312 rtvec vec;
2313 int j;
2314 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2315 vec = XVEC (PATTERN (table), 0);
2316 else
2317 vec = XVEC (PATTERN (table), 1);
2318 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2319 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2320 }
5a976006 2321
2322 if (distance >= 588)
644459d0 2323 {
5a976006 2324 /* Make sure the hint isn't scheduled any earlier than this point,
2325 which could make it too far for the branch offest to fit */
2326 recog_memoized (emit_insn_before (gen_blockage (), hint));
2327 }
2328 else if (distance <= 8 * 4)
2329 {
2330 /* To guarantee at least 8 insns between the hint and branch we
2331 insert nops. */
2332 int d;
2333 for (d = distance; d < 8 * 4; d += 4)
2334 {
2335 insn =
2336 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2337 recog_memoized (insn);
2338 }
2339
2340 /* Make sure any nops inserted aren't scheduled before the hint. */
2341 recog_memoized (emit_insn_after (gen_blockage (), hint));
2342
2343 /* Make sure any nops inserted aren't scheduled after the call. */
2344 if (CALL_P (branch) && distance < 8 * 4)
2345 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2346 }
644459d0 2347}
2348
2349/* Returns 0 if we don't want a hint for this branch. Otherwise return
2350 the rtx for the branch target. */
2351static rtx
2352get_branch_target (rtx branch)
2353{
2354 if (GET_CODE (branch) == JUMP_INSN)
2355 {
2356 rtx set, src;
2357
2358 /* Return statements */
2359 if (GET_CODE (PATTERN (branch)) == RETURN)
2360 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2361
2362 /* jump table */
2363 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2364 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2365 return 0;
2366
2367 set = single_set (branch);
2368 src = SET_SRC (set);
2369 if (GET_CODE (SET_DEST (set)) != PC)
2370 abort ();
2371
2372 if (GET_CODE (src) == IF_THEN_ELSE)
2373 {
2374 rtx lab = 0;
2375 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2376 if (note)
2377 {
2378 /* If the more probable case is not a fall through, then
2379 try a branch hint. */
2380 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2381 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2382 && GET_CODE (XEXP (src, 1)) != PC)
2383 lab = XEXP (src, 1);
2384 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2385 && GET_CODE (XEXP (src, 2)) != PC)
2386 lab = XEXP (src, 2);
2387 }
2388 if (lab)
2389 {
2390 if (GET_CODE (lab) == RETURN)
2391 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2392 return lab;
2393 }
2394 return 0;
2395 }
2396
2397 return src;
2398 }
2399 else if (GET_CODE (branch) == CALL_INSN)
2400 {
2401 rtx call;
2402 /* All of our call patterns are in a PARALLEL and the CALL is
2403 the first pattern in the PARALLEL. */
2404 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2405 abort ();
2406 call = XVECEXP (PATTERN (branch), 0, 0);
2407 if (GET_CODE (call) == SET)
2408 call = SET_SRC (call);
2409 if (GET_CODE (call) != CALL)
2410 abort ();
2411 return XEXP (XEXP (call, 0), 0);
2412 }
2413 return 0;
2414}
2415
5a976006 2416/* The special $hbr register is used to prevent the insn scheduler from
2417 moving hbr insns across instructions which invalidate them. It
2418 should only be used in a clobber, and this function searches for
2419 insns which clobber it. */
2420static bool
2421insn_clobbers_hbr (rtx insn)
2422{
2423 if (INSN_P (insn)
2424 && GET_CODE (PATTERN (insn)) == PARALLEL)
2425 {
2426 rtx parallel = PATTERN (insn);
2427 rtx clobber;
2428 int j;
2429 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2430 {
2431 clobber = XVECEXP (parallel, 0, j);
2432 if (GET_CODE (clobber) == CLOBBER
2433 && GET_CODE (XEXP (clobber, 0)) == REG
2434 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2435 return 1;
2436 }
2437 }
2438 return 0;
2439}
2440
2441/* Search up to 32 insns starting at FIRST:
2442 - at any kind of hinted branch, just return
2443 - at any unconditional branch in the first 15 insns, just return
2444 - at a call or indirect branch, after the first 15 insns, force it to
2445 an even address and return
2446 - at any unconditional branch, after the first 15 insns, force it to
2447 an even address.
2448 At then end of the search, insert an hbrp within 4 insns of FIRST,
2449 and an hbrp within 16 instructions of FIRST.
2450 */
644459d0 2451static void
5a976006 2452insert_hbrp_for_ilb_runout (rtx first)
644459d0 2453{
5a976006 2454 rtx insn, before_4 = 0, before_16 = 0;
2455 int addr = 0, length, first_addr = -1;
2456 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2457 int insert_lnop_after = 0;
2458 for (insn = first; insn; insn = NEXT_INSN (insn))
2459 if (INSN_P (insn))
2460 {
2461 if (first_addr == -1)
2462 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2463 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2464 length = get_attr_length (insn);
2465
2466 if (before_4 == 0 && addr + length >= 4 * 4)
2467 before_4 = insn;
2468 /* We test for 14 instructions because the first hbrp will add
2469 up to 2 instructions. */
2470 if (before_16 == 0 && addr + length >= 14 * 4)
2471 before_16 = insn;
2472
2473 if (INSN_CODE (insn) == CODE_FOR_hbr)
2474 {
2475 /* Make sure an hbrp is at least 2 cycles away from a hint.
2476 Insert an lnop after the hbrp when necessary. */
2477 if (before_4 == 0 && addr > 0)
2478 {
2479 before_4 = insn;
2480 insert_lnop_after |= 1;
2481 }
2482 else if (before_4 && addr <= 4 * 4)
2483 insert_lnop_after |= 1;
2484 if (before_16 == 0 && addr > 10 * 4)
2485 {
2486 before_16 = insn;
2487 insert_lnop_after |= 2;
2488 }
2489 else if (before_16 && addr <= 14 * 4)
2490 insert_lnop_after |= 2;
2491 }
644459d0 2492
5a976006 2493 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2494 {
2495 if (addr < hbrp_addr0)
2496 hbrp_addr0 = addr;
2497 else if (addr < hbrp_addr1)
2498 hbrp_addr1 = addr;
2499 }
644459d0 2500
5a976006 2501 if (CALL_P (insn) || JUMP_P (insn))
2502 {
2503 if (HINTED_P (insn))
2504 return;
2505
2506 /* Any branch after the first 15 insns should be on an even
2507 address to avoid a special case branch. There might be
2508 some nops and/or hbrps inserted, so we test after 10
2509 insns. */
2510 if (addr > 10 * 4)
2511 SCHED_ON_EVEN_P (insn) = 1;
2512 }
644459d0 2513
5a976006 2514 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2515 return;
2516
2517
2518 if (addr + length >= 32 * 4)
644459d0 2519 {
5a976006 2520 gcc_assert (before_4 && before_16);
2521 if (hbrp_addr0 > 4 * 4)
644459d0 2522 {
5a976006 2523 insn =
2524 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2525 recog_memoized (insn);
2526 INSN_ADDRESSES_NEW (insn,
2527 INSN_ADDRESSES (INSN_UID (before_4)));
2528 PUT_MODE (insn, GET_MODE (before_4));
2529 PUT_MODE (before_4, TImode);
2530 if (insert_lnop_after & 1)
644459d0 2531 {
5a976006 2532 insn = emit_insn_before (gen_lnop (), before_4);
2533 recog_memoized (insn);
2534 INSN_ADDRESSES_NEW (insn,
2535 INSN_ADDRESSES (INSN_UID (before_4)));
2536 PUT_MODE (insn, TImode);
644459d0 2537 }
644459d0 2538 }
5a976006 2539 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2540 && hbrp_addr1 > 16 * 4)
644459d0 2541 {
5a976006 2542 insn =
2543 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2544 recog_memoized (insn);
2545 INSN_ADDRESSES_NEW (insn,
2546 INSN_ADDRESSES (INSN_UID (before_16)));
2547 PUT_MODE (insn, GET_MODE (before_16));
2548 PUT_MODE (before_16, TImode);
2549 if (insert_lnop_after & 2)
644459d0 2550 {
5a976006 2551 insn = emit_insn_before (gen_lnop (), before_16);
2552 recog_memoized (insn);
2553 INSN_ADDRESSES_NEW (insn,
2554 INSN_ADDRESSES (INSN_UID
2555 (before_16)));
2556 PUT_MODE (insn, TImode);
644459d0 2557 }
2558 }
5a976006 2559 return;
644459d0 2560 }
644459d0 2561 }
5a976006 2562 else if (BARRIER_P (insn))
2563 return;
644459d0 2564
644459d0 2565}
5a976006 2566
2567/* The SPU might hang when it executes 48 inline instructions after a
2568 hinted branch jumps to its hinted target. The beginning of a
2569 function and the return from a call might have been hinted, and must
2570 be handled as well. To prevent a hang we insert 2 hbrps. The first
2571 should be within 6 insns of the branch target. The second should be
2572 within 22 insns of the branch target. When determining if hbrps are
2573 necessary, we look for only 32 inline instructions, because up to to
2574 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2575 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2576static void
5a976006 2577insert_hbrp (void)
644459d0 2578{
5a976006 2579 rtx insn;
2580 if (TARGET_SAFE_HINTS)
644459d0 2581 {
5a976006 2582 shorten_branches (get_insns ());
2583 /* Insert hbrp at beginning of function */
2584 insn = next_active_insn (get_insns ());
2585 if (insn)
2586 insert_hbrp_for_ilb_runout (insn);
2587 /* Insert hbrp after hinted targets. */
2588 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2589 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2590 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2591 }
644459d0 2592}
2593
5a976006 2594static int in_spu_reorg;
2595
2596/* Insert branch hints. There are no branch optimizations after this
2597 pass, so it's safe to set our branch hints now. */
644459d0 2598static void
5a976006 2599spu_machine_dependent_reorg (void)
644459d0 2600{
5a976006 2601 sbitmap blocks;
2602 basic_block bb;
2603 rtx branch, insn;
2604 rtx branch_target = 0;
2605 int branch_addr = 0, insn_addr, required_dist = 0;
2606 int i;
2607 unsigned int j;
644459d0 2608
5a976006 2609 if (!TARGET_BRANCH_HINTS || optimize == 0)
2610 {
2611 /* We still do it for unoptimized code because an external
2612 function might have hinted a call or return. */
2613 insert_hbrp ();
2614 pad_bb ();
2615 return;
2616 }
644459d0 2617
5a976006 2618 blocks = sbitmap_alloc (last_basic_block);
2619 sbitmap_zero (blocks);
644459d0 2620
5a976006 2621 in_spu_reorg = 1;
2622 compute_bb_for_insn ();
2623
2624 compact_blocks ();
2625
2626 spu_bb_info =
2627 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2628 sizeof (struct spu_bb_info));
2629
2630 /* We need exact insn addresses and lengths. */
2631 shorten_branches (get_insns ());
2632
2633 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2634 {
5a976006 2635 bb = BASIC_BLOCK (i);
2636 branch = 0;
2637 if (spu_bb_info[i].prop_jump)
644459d0 2638 {
5a976006 2639 branch = spu_bb_info[i].prop_jump;
2640 branch_target = get_branch_target (branch);
2641 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2642 required_dist = spu_hint_dist;
2643 }
2644 /* Search from end of a block to beginning. In this loop, find
2645 jumps which need a branch and emit them only when:
2646 - it's an indirect branch and we're at the insn which sets
2647 the register
2648 - we're at an insn that will invalidate the hint. e.g., a
2649 call, another hint insn, inline asm that clobbers $hbr, and
2650 some inlined operations (divmodsi4). Don't consider jumps
2651 because they are only at the end of a block and are
2652 considered when we are deciding whether to propagate
2653 - we're getting too far away from the branch. The hbr insns
2654 only have a signed 10 bit offset
2655 We go back as far as possible so the branch will be considered
2656 for propagation when we get to the beginning of the block. */
2657 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2658 {
2659 if (INSN_P (insn))
2660 {
2661 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2662 if (branch
2663 && ((GET_CODE (branch_target) == REG
2664 && set_of (branch_target, insn) != NULL_RTX)
2665 || insn_clobbers_hbr (insn)
2666 || branch_addr - insn_addr > 600))
2667 {
2668 rtx next = NEXT_INSN (insn);
2669 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2670 if (insn != BB_END (bb)
2671 && branch_addr - next_addr >= required_dist)
2672 {
2673 if (dump_file)
2674 fprintf (dump_file,
2675 "hint for %i in block %i before %i\n",
2676 INSN_UID (branch), bb->index,
2677 INSN_UID (next));
2678 spu_emit_branch_hint (next, branch, branch_target,
2679 branch_addr - next_addr, blocks);
2680 }
2681 branch = 0;
2682 }
2683
2684 /* JUMP_P will only be true at the end of a block. When
2685 branch is already set it means we've previously decided
2686 to propagate a hint for that branch into this block. */
2687 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2688 {
2689 branch = 0;
2690 if ((branch_target = get_branch_target (insn)))
2691 {
2692 branch = insn;
2693 branch_addr = insn_addr;
2694 required_dist = spu_hint_dist;
2695 }
2696 }
2697 }
2698 if (insn == BB_HEAD (bb))
2699 break;
2700 }
2701
2702 if (branch)
2703 {
2704 /* If we haven't emitted a hint for this branch yet, it might
2705 be profitable to emit it in one of the predecessor blocks,
2706 especially for loops. */
2707 rtx bbend;
2708 basic_block prev = 0, prop = 0, prev2 = 0;
2709 int loop_exit = 0, simple_loop = 0;
2710 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2711
2712 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2713 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2714 prev = EDGE_PRED (bb, j)->src;
2715 else
2716 prev2 = EDGE_PRED (bb, j)->src;
2717
2718 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2719 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2720 loop_exit = 1;
2721 else if (EDGE_SUCC (bb, j)->dest == bb)
2722 simple_loop = 1;
2723
2724 /* If this branch is a loop exit then propagate to previous
2725 fallthru block. This catches the cases when it is a simple
2726 loop or when there is an initial branch into the loop. */
2727 if (prev && (loop_exit || simple_loop)
2728 && prev->loop_depth <= bb->loop_depth)
2729 prop = prev;
2730
2731 /* If there is only one adjacent predecessor. Don't propagate
2732 outside this loop. This loop_depth test isn't perfect, but
2733 I'm not sure the loop_father member is valid at this point. */
2734 else if (prev && single_pred_p (bb)
2735 && prev->loop_depth == bb->loop_depth)
2736 prop = prev;
2737
2738 /* If this is the JOIN block of a simple IF-THEN then
2739 propogate the hint to the HEADER block. */
2740 else if (prev && prev2
2741 && EDGE_COUNT (bb->preds) == 2
2742 && EDGE_COUNT (prev->preds) == 1
2743 && EDGE_PRED (prev, 0)->src == prev2
2744 && prev2->loop_depth == bb->loop_depth
2745 && GET_CODE (branch_target) != REG)
2746 prop = prev;
2747
2748 /* Don't propagate when:
2749 - this is a simple loop and the hint would be too far
2750 - this is not a simple loop and there are 16 insns in
2751 this block already
2752 - the predecessor block ends in a branch that will be
2753 hinted
2754 - the predecessor block ends in an insn that invalidates
2755 the hint */
2756 if (prop
2757 && prop->index >= 0
2758 && (bbend = BB_END (prop))
2759 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2760 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2761 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2762 {
2763 if (dump_file)
2764 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2765 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2766 bb->index, prop->index, bb->loop_depth,
2767 INSN_UID (branch), loop_exit, simple_loop,
2768 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2769
2770 spu_bb_info[prop->index].prop_jump = branch;
2771 spu_bb_info[prop->index].bb_index = i;
2772 }
2773 else if (branch_addr - next_addr >= required_dist)
2774 {
2775 if (dump_file)
2776 fprintf (dump_file, "hint for %i in block %i before %i\n",
2777 INSN_UID (branch), bb->index,
2778 INSN_UID (NEXT_INSN (insn)));
2779 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2780 branch_addr - next_addr, blocks);
2781 }
2782 branch = 0;
644459d0 2783 }
644459d0 2784 }
5a976006 2785 free (spu_bb_info);
644459d0 2786
5a976006 2787 if (!sbitmap_empty_p (blocks))
2788 find_many_sub_basic_blocks (blocks);
2789
2790 /* We have to schedule to make sure alignment is ok. */
2791 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2792
2793 /* The hints need to be scheduled, so call it again. */
2794 schedule_insns ();
2795
2796 insert_hbrp ();
2797
2798 pad_bb ();
2799
8f1d58ad 2800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2801 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2802 {
2803 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2804 between its branch label and the branch . We don't move the
2805 label because GCC expects it at the beginning of the block. */
2806 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2807 rtx label_ref = XVECEXP (unspec, 0, 0);
2808 rtx label = XEXP (label_ref, 0);
2809 rtx branch;
2810 int offset = 0;
2811 for (branch = NEXT_INSN (label);
2812 !JUMP_P (branch) && !CALL_P (branch);
2813 branch = NEXT_INSN (branch))
2814 if (NONJUMP_INSN_P (branch))
2815 offset += get_attr_length (branch);
2816 if (offset > 0)
2817 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2818 }
5a976006 2819
2820 if (spu_flag_var_tracking)
644459d0 2821 {
5a976006 2822 df_analyze ();
2823 timevar_push (TV_VAR_TRACKING);
2824 variable_tracking_main ();
2825 timevar_pop (TV_VAR_TRACKING);
2826 df_finish_pass (false);
644459d0 2827 }
5a976006 2828
2829 free_bb_for_insn ();
2830
2831 in_spu_reorg = 0;
644459d0 2832}
2833\f
2834
2835/* Insn scheduling routines, primarily for dual issue. */
2836static int
2837spu_sched_issue_rate (void)
2838{
2839 return 2;
2840}
2841
2842static int
5a976006 2843uses_ls_unit(rtx insn)
644459d0 2844{
5a976006 2845 rtx set = single_set (insn);
2846 if (set != 0
2847 && (GET_CODE (SET_DEST (set)) == MEM
2848 || GET_CODE (SET_SRC (set)) == MEM))
2849 return 1;
2850 return 0;
644459d0 2851}
2852
2853static int
2854get_pipe (rtx insn)
2855{
2856 enum attr_type t;
2857 /* Handle inline asm */
2858 if (INSN_CODE (insn) == -1)
2859 return -1;
2860 t = get_attr_type (insn);
2861 switch (t)
2862 {
2863 case TYPE_CONVERT:
2864 return -2;
2865 case TYPE_MULTI0:
2866 return -1;
2867
2868 case TYPE_FX2:
2869 case TYPE_FX3:
2870 case TYPE_SPR:
2871 case TYPE_NOP:
2872 case TYPE_FXB:
2873 case TYPE_FPD:
2874 case TYPE_FP6:
2875 case TYPE_FP7:
644459d0 2876 return 0;
2877
2878 case TYPE_LNOP:
2879 case TYPE_SHUF:
2880 case TYPE_LOAD:
2881 case TYPE_STORE:
2882 case TYPE_BR:
2883 case TYPE_MULTI1:
2884 case TYPE_HBR:
5a976006 2885 case TYPE_IPREFETCH:
644459d0 2886 return 1;
2887 default:
2888 abort ();
2889 }
2890}
2891
5a976006 2892
2893/* haifa-sched.c has a static variable that keeps track of the current
2894 cycle. It is passed to spu_sched_reorder, and we record it here for
2895 use by spu_sched_variable_issue. It won't be accurate if the
2896 scheduler updates it's clock_var between the two calls. */
2897static int clock_var;
2898
2899/* This is used to keep track of insn alignment. Set to 0 at the
2900 beginning of each block and increased by the "length" attr of each
2901 insn scheduled. */
2902static int spu_sched_length;
2903
2904/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2905 ready list appropriately in spu_sched_reorder(). */
2906static int pipe0_clock;
2907static int pipe1_clock;
2908
2909static int prev_clock_var;
2910
2911static int prev_priority;
2912
2913/* The SPU needs to load the next ilb sometime during the execution of
2914 the previous ilb. There is a potential conflict if every cycle has a
2915 load or store. To avoid the conflict we make sure the load/store
2916 unit is free for at least one cycle during the execution of insns in
2917 the previous ilb. */
2918static int spu_ls_first;
2919static int prev_ls_clock;
2920
2921static void
2922spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2923 int max_ready ATTRIBUTE_UNUSED)
2924{
2925 spu_sched_length = 0;
2926}
2927
2928static void
2929spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2930 int max_ready ATTRIBUTE_UNUSED)
2931{
2932 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2933 {
2934 /* When any block might be at least 8-byte aligned, assume they
2935 will all be at least 8-byte aligned to make sure dual issue
2936 works out correctly. */
2937 spu_sched_length = 0;
2938 }
2939 spu_ls_first = INT_MAX;
2940 clock_var = -1;
2941 prev_ls_clock = -1;
2942 pipe0_clock = -1;
2943 pipe1_clock = -1;
2944 prev_clock_var = -1;
2945 prev_priority = -1;
2946}
2947
644459d0 2948static int
5a976006 2949spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2950 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2951{
5a976006 2952 int len;
2953 int p;
644459d0 2954 if (GET_CODE (PATTERN (insn)) == USE
2955 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2956 || (len = get_attr_length (insn)) == 0)
2957 return more;
2958
2959 spu_sched_length += len;
2960
2961 /* Reset on inline asm */
2962 if (INSN_CODE (insn) == -1)
2963 {
2964 spu_ls_first = INT_MAX;
2965 pipe0_clock = -1;
2966 pipe1_clock = -1;
2967 return 0;
2968 }
2969 p = get_pipe (insn);
2970 if (p == 0)
2971 pipe0_clock = clock_var;
2972 else
2973 pipe1_clock = clock_var;
2974
2975 if (in_spu_reorg)
2976 {
2977 if (clock_var - prev_ls_clock > 1
2978 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2979 spu_ls_first = INT_MAX;
2980 if (uses_ls_unit (insn))
2981 {
2982 if (spu_ls_first == INT_MAX)
2983 spu_ls_first = spu_sched_length;
2984 prev_ls_clock = clock_var;
2985 }
2986
2987 /* The scheduler hasn't inserted the nop, but we will later on.
2988 Include those nops in spu_sched_length. */
2989 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2990 spu_sched_length += 4;
2991 prev_clock_var = clock_var;
2992
2993 /* more is -1 when called from spu_sched_reorder for new insns
2994 that don't have INSN_PRIORITY */
2995 if (more >= 0)
2996 prev_priority = INSN_PRIORITY (insn);
2997 }
2998
2999 /* Always try issueing more insns. spu_sched_reorder will decide
3000 when the cycle should be advanced. */
3001 return 1;
3002}
3003
3004/* This function is called for both TARGET_SCHED_REORDER and
3005 TARGET_SCHED_REORDER2. */
3006static int
3007spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3008 rtx *ready, int *nreadyp, int clock)
3009{
3010 int i, nready = *nreadyp;
3011 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3012 rtx insn;
3013
3014 clock_var = clock;
3015
3016 if (nready <= 0 || pipe1_clock >= clock)
3017 return 0;
3018
3019 /* Find any rtl insns that don't generate assembly insns and schedule
3020 them first. */
3021 for (i = nready - 1; i >= 0; i--)
3022 {
3023 insn = ready[i];
3024 if (INSN_CODE (insn) == -1
3025 || INSN_CODE (insn) == CODE_FOR_blockage
9d98604b 3026 || (INSN_P (insn) && get_attr_length (insn) == 0))
5a976006 3027 {
3028 ready[i] = ready[nready - 1];
3029 ready[nready - 1] = insn;
3030 return 1;
3031 }
3032 }
3033
3034 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3035 for (i = 0; i < nready; i++)
3036 if (INSN_CODE (ready[i]) != -1)
3037 {
3038 insn = ready[i];
3039 switch (get_attr_type (insn))
3040 {
3041 default:
3042 case TYPE_MULTI0:
3043 case TYPE_CONVERT:
3044 case TYPE_FX2:
3045 case TYPE_FX3:
3046 case TYPE_SPR:
3047 case TYPE_NOP:
3048 case TYPE_FXB:
3049 case TYPE_FPD:
3050 case TYPE_FP6:
3051 case TYPE_FP7:
3052 pipe_0 = i;
3053 break;
3054 case TYPE_LOAD:
3055 case TYPE_STORE:
3056 pipe_ls = i;
3057 case TYPE_LNOP:
3058 case TYPE_SHUF:
3059 case TYPE_BR:
3060 case TYPE_MULTI1:
3061 case TYPE_HBR:
3062 pipe_1 = i;
3063 break;
3064 case TYPE_IPREFETCH:
3065 pipe_hbrp = i;
3066 break;
3067 }
3068 }
3069
3070 /* In the first scheduling phase, schedule loads and stores together
3071 to increase the chance they will get merged during postreload CSE. */
3072 if (!reload_completed && pipe_ls >= 0)
3073 {
3074 insn = ready[pipe_ls];
3075 ready[pipe_ls] = ready[nready - 1];
3076 ready[nready - 1] = insn;
3077 return 1;
3078 }
3079
3080 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3081 if (pipe_hbrp >= 0)
3082 pipe_1 = pipe_hbrp;
3083
3084 /* When we have loads/stores in every cycle of the last 15 insns and
3085 we are about to schedule another load/store, emit an hbrp insn
3086 instead. */
3087 if (in_spu_reorg
3088 && spu_sched_length - spu_ls_first >= 4 * 15
3089 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3090 {
3091 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3092 recog_memoized (insn);
3093 if (pipe0_clock < clock)
3094 PUT_MODE (insn, TImode);
3095 spu_sched_variable_issue (file, verbose, insn, -1);
3096 return 0;
3097 }
3098
3099 /* In general, we want to emit nops to increase dual issue, but dual
3100 issue isn't faster when one of the insns could be scheduled later
3101 without effecting the critical path. We look at INSN_PRIORITY to
3102 make a good guess, but it isn't perfect so -mdual-nops=n can be
3103 used to effect it. */
3104 if (in_spu_reorg && spu_dual_nops < 10)
3105 {
3106 /* When we are at an even address and we are not issueing nops to
3107 improve scheduling then we need to advance the cycle. */
3108 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3109 && (spu_dual_nops == 0
3110 || (pipe_1 != -1
3111 && prev_priority >
3112 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3113 return 0;
3114
3115 /* When at an odd address, schedule the highest priority insn
3116 without considering pipeline. */
3117 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3118 && (spu_dual_nops == 0
3119 || (prev_priority >
3120 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3121 return 1;
3122 }
3123
3124
3125 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3126 pipe0 insn in the ready list, schedule it. */
3127 if (pipe0_clock < clock && pipe_0 >= 0)
3128 schedule_i = pipe_0;
3129
3130 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3131 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3132 else
3133 schedule_i = pipe_1;
3134
3135 if (schedule_i > -1)
3136 {
3137 insn = ready[schedule_i];
3138 ready[schedule_i] = ready[nready - 1];
3139 ready[nready - 1] = insn;
3140 return 1;
3141 }
3142 return 0;
644459d0 3143}
3144
3145/* INSN is dependent on DEP_INSN. */
3146static int
5a976006 3147spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3148{
5a976006 3149 rtx set;
3150
3151 /* The blockage pattern is used to prevent instructions from being
3152 moved across it and has no cost. */
3153 if (INSN_CODE (insn) == CODE_FOR_blockage
3154 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3155 return 0;
3156
9d98604b 3157 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3158 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
5a976006 3159 return 0;
3160
3161 /* Make sure hbrps are spread out. */
3162 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3163 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3164 return 8;
3165
3166 /* Make sure hints and hbrps are 2 cycles apart. */
3167 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3168 || INSN_CODE (insn) == CODE_FOR_hbr)
3169 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3170 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3171 return 2;
3172
3173 /* An hbrp has no real dependency on other insns. */
3174 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3175 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3176 return 0;
3177
3178 /* Assuming that it is unlikely an argument register will be used in
3179 the first cycle of the called function, we reduce the cost for
3180 slightly better scheduling of dep_insn. When not hinted, the
3181 mispredicted branch would hide the cost as well. */
3182 if (CALL_P (insn))
3183 {
3184 rtx target = get_branch_target (insn);
3185 if (GET_CODE (target) != REG || !set_of (target, insn))
3186 return cost - 2;
3187 return cost;
3188 }
3189
3190 /* And when returning from a function, let's assume the return values
3191 are completed sooner too. */
3192 if (CALL_P (dep_insn))
644459d0 3193 return cost - 2;
5a976006 3194
3195 /* Make sure an instruction that loads from the back chain is schedule
3196 away from the return instruction so a hint is more likely to get
3197 issued. */
3198 if (INSN_CODE (insn) == CODE_FOR__return
3199 && (set = single_set (dep_insn))
3200 && GET_CODE (SET_DEST (set)) == REG
3201 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3202 return 20;
3203
644459d0 3204 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3205 scheduler makes every insn in a block anti-dependent on the final
3206 jump_insn. We adjust here so higher cost insns will get scheduled
3207 earlier. */
5a976006 3208 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3209 return insn_cost (dep_insn) - 3;
5a976006 3210
644459d0 3211 return cost;
3212}
3213\f
3214/* Create a CONST_DOUBLE from a string. */
3215struct rtx_def *
3216spu_float_const (const char *string, enum machine_mode mode)
3217{
3218 REAL_VALUE_TYPE value;
3219 value = REAL_VALUE_ATOF (string, mode);
3220 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3221}
3222
644459d0 3223int
3224spu_constant_address_p (rtx x)
3225{
3226 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3227 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3228 || GET_CODE (x) == HIGH);
3229}
3230
3231static enum spu_immediate
3232which_immediate_load (HOST_WIDE_INT val)
3233{
3234 gcc_assert (val == trunc_int_for_mode (val, SImode));
3235
3236 if (val >= -0x8000 && val <= 0x7fff)
3237 return SPU_IL;
3238 if (val >= 0 && val <= 0x3ffff)
3239 return SPU_ILA;
3240 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3241 return SPU_ILH;
3242 if ((val & 0xffff) == 0)
3243 return SPU_ILHU;
3244
3245 return SPU_NONE;
3246}
3247
dea01258 3248/* Return true when OP can be loaded by one of the il instructions, or
3249 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3250int
3251immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3252{
3253 if (CONSTANT_P (op))
3254 {
3255 enum immediate_class c = classify_immediate (op, mode);
5df189be 3256 return c == IC_IL1 || c == IC_IL1s
3072d30e 3257 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3258 }
3259 return 0;
3260}
3261
3262/* Return true if the first SIZE bytes of arr is a constant that can be
3263 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3264 represent the size and offset of the instruction to use. */
3265static int
3266cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3267{
3268 int cpat, run, i, start;
3269 cpat = 1;
3270 run = 0;
3271 start = -1;
3272 for (i = 0; i < size && cpat; i++)
3273 if (arr[i] != i+16)
3274 {
3275 if (!run)
3276 {
3277 start = i;
3278 if (arr[i] == 3)
3279 run = 1;
3280 else if (arr[i] == 2 && arr[i+1] == 3)
3281 run = 2;
3282 else if (arr[i] == 0)
3283 {
3284 while (arr[i+run] == run && i+run < 16)
3285 run++;
3286 if (run != 4 && run != 8)
3287 cpat = 0;
3288 }
3289 else
3290 cpat = 0;
3291 if ((i & (run-1)) != 0)
3292 cpat = 0;
3293 i += run;
3294 }
3295 else
3296 cpat = 0;
3297 }
b01a6dc3 3298 if (cpat && (run || size < 16))
dea01258 3299 {
3300 if (run == 0)
3301 run = 1;
3302 if (prun)
3303 *prun = run;
3304 if (pstart)
3305 *pstart = start == -1 ? 16-run : start;
3306 return 1;
3307 }
3308 return 0;
3309}
3310
3311/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3312 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3313static enum immediate_class
3314classify_immediate (rtx op, enum machine_mode mode)
644459d0 3315{
3316 HOST_WIDE_INT val;
3317 unsigned char arr[16];
5df189be 3318 int i, j, repeated, fsmbi, repeat;
dea01258 3319
3320 gcc_assert (CONSTANT_P (op));
3321
644459d0 3322 if (GET_MODE (op) != VOIDmode)
3323 mode = GET_MODE (op);
3324
dea01258 3325 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3326 if (!flag_pic
3327 && mode == V4SImode
dea01258 3328 && GET_CODE (op) == CONST_VECTOR
3329 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3330 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3331 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3332 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3333 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3334 op = CONST_VECTOR_ELT (op, 0);
644459d0 3335
dea01258 3336 switch (GET_CODE (op))
3337 {
3338 case SYMBOL_REF:
3339 case LABEL_REF:
3340 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3341
dea01258 3342 case CONST:
0cfc65d4 3343 /* We can never know if the resulting address fits in 18 bits and can be
3344 loaded with ila. For now, assume the address will not overflow if
3345 the displacement is "small" (fits 'K' constraint). */
3346 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3347 {
3348 rtx sym = XEXP (XEXP (op, 0), 0);
3349 rtx cst = XEXP (XEXP (op, 0), 1);
3350
3351 if (GET_CODE (sym) == SYMBOL_REF
3352 && GET_CODE (cst) == CONST_INT
3353 && satisfies_constraint_K (cst))
3354 return IC_IL1s;
3355 }
3356 return IC_IL2s;
644459d0 3357
dea01258 3358 case HIGH:
3359 return IC_IL1s;
3360
3361 case CONST_VECTOR:
3362 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3363 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3364 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3365 return IC_POOL;
3366 /* Fall through. */
3367
3368 case CONST_INT:
3369 case CONST_DOUBLE:
3370 constant_to_array (mode, op, arr);
644459d0 3371
dea01258 3372 /* Check that each 4-byte slot is identical. */
3373 repeated = 1;
3374 for (i = 4; i < 16; i += 4)
3375 for (j = 0; j < 4; j++)
3376 if (arr[j] != arr[i + j])
3377 repeated = 0;
3378
3379 if (repeated)
3380 {
3381 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3382 val = trunc_int_for_mode (val, SImode);
3383
3384 if (which_immediate_load (val) != SPU_NONE)
3385 return IC_IL1;
3386 }
3387
3388 /* Any mode of 2 bytes or smaller can be loaded with an il
3389 instruction. */
3390 gcc_assert (GET_MODE_SIZE (mode) > 2);
3391
3392 fsmbi = 1;
5df189be 3393 repeat = 0;
dea01258 3394 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3395 if (arr[i] != 0 && repeat == 0)
3396 repeat = arr[i];
3397 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3398 fsmbi = 0;
3399 if (fsmbi)
5df189be 3400 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3401
3402 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3403 return IC_CPAT;
3404
3405 if (repeated)
3406 return IC_IL2;
3407
3408 return IC_POOL;
3409 default:
3410 break;
3411 }
3412 gcc_unreachable ();
644459d0 3413}
3414
3415static enum spu_immediate
3416which_logical_immediate (HOST_WIDE_INT val)
3417{
3418 gcc_assert (val == trunc_int_for_mode (val, SImode));
3419
3420 if (val >= -0x200 && val <= 0x1ff)
3421 return SPU_ORI;
3422 if (val >= 0 && val <= 0xffff)
3423 return SPU_IOHL;
3424 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3425 {
3426 val = trunc_int_for_mode (val, HImode);
3427 if (val >= -0x200 && val <= 0x1ff)
3428 return SPU_ORHI;
3429 if ((val & 0xff) == ((val >> 8) & 0xff))
3430 {
3431 val = trunc_int_for_mode (val, QImode);
3432 if (val >= -0x200 && val <= 0x1ff)
3433 return SPU_ORBI;
3434 }
3435 }
3436 return SPU_NONE;
3437}
3438
5df189be 3439/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3440 CONST_DOUBLEs. */
3441static int
3442const_vector_immediate_p (rtx x)
3443{
3444 int i;
3445 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3446 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3447 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3448 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3449 return 0;
3450 return 1;
3451}
3452
644459d0 3453int
3454logical_immediate_p (rtx op, enum machine_mode mode)
3455{
3456 HOST_WIDE_INT val;
3457 unsigned char arr[16];
3458 int i, j;
3459
3460 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3461 || GET_CODE (op) == CONST_VECTOR);
3462
5df189be 3463 if (GET_CODE (op) == CONST_VECTOR
3464 && !const_vector_immediate_p (op))
3465 return 0;
3466
644459d0 3467 if (GET_MODE (op) != VOIDmode)
3468 mode = GET_MODE (op);
3469
3470 constant_to_array (mode, op, arr);
3471
3472 /* Check that bytes are repeated. */
3473 for (i = 4; i < 16; i += 4)
3474 for (j = 0; j < 4; j++)
3475 if (arr[j] != arr[i + j])
3476 return 0;
3477
3478 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3479 val = trunc_int_for_mode (val, SImode);
3480
3481 i = which_logical_immediate (val);
3482 return i != SPU_NONE && i != SPU_IOHL;
3483}
3484
3485int
3486iohl_immediate_p (rtx op, enum machine_mode mode)
3487{
3488 HOST_WIDE_INT val;
3489 unsigned char arr[16];
3490 int i, j;
3491
3492 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3493 || GET_CODE (op) == CONST_VECTOR);
3494
5df189be 3495 if (GET_CODE (op) == CONST_VECTOR
3496 && !const_vector_immediate_p (op))
3497 return 0;
3498
644459d0 3499 if (GET_MODE (op) != VOIDmode)
3500 mode = GET_MODE (op);
3501
3502 constant_to_array (mode, op, arr);
3503
3504 /* Check that bytes are repeated. */
3505 for (i = 4; i < 16; i += 4)
3506 for (j = 0; j < 4; j++)
3507 if (arr[j] != arr[i + j])
3508 return 0;
3509
3510 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3511 val = trunc_int_for_mode (val, SImode);
3512
3513 return val >= 0 && val <= 0xffff;
3514}
3515
3516int
3517arith_immediate_p (rtx op, enum machine_mode mode,
3518 HOST_WIDE_INT low, HOST_WIDE_INT high)
3519{
3520 HOST_WIDE_INT val;
3521 unsigned char arr[16];
3522 int bytes, i, j;
3523
3524 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3525 || GET_CODE (op) == CONST_VECTOR);
3526
5df189be 3527 if (GET_CODE (op) == CONST_VECTOR
3528 && !const_vector_immediate_p (op))
3529 return 0;
3530
644459d0 3531 if (GET_MODE (op) != VOIDmode)
3532 mode = GET_MODE (op);
3533
3534 constant_to_array (mode, op, arr);
3535
3536 if (VECTOR_MODE_P (mode))
3537 mode = GET_MODE_INNER (mode);
3538
3539 bytes = GET_MODE_SIZE (mode);
3540 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3541
3542 /* Check that bytes are repeated. */
3543 for (i = bytes; i < 16; i += bytes)
3544 for (j = 0; j < bytes; j++)
3545 if (arr[j] != arr[i + j])
3546 return 0;
3547
3548 val = arr[0];
3549 for (j = 1; j < bytes; j++)
3550 val = (val << 8) | arr[j];
3551
3552 val = trunc_int_for_mode (val, mode);
3553
3554 return val >= low && val <= high;
3555}
3556
56c7bfc2 3557/* TRUE when op is an immediate and an exact power of 2, and given that
3558 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3559 all entries must be the same. */
3560bool
3561exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3562{
3563 enum machine_mode int_mode;
3564 HOST_WIDE_INT val;
3565 unsigned char arr[16];
3566 int bytes, i, j;
3567
3568 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3569 || GET_CODE (op) == CONST_VECTOR);
3570
3571 if (GET_CODE (op) == CONST_VECTOR
3572 && !const_vector_immediate_p (op))
3573 return 0;
3574
3575 if (GET_MODE (op) != VOIDmode)
3576 mode = GET_MODE (op);
3577
3578 constant_to_array (mode, op, arr);
3579
3580 if (VECTOR_MODE_P (mode))
3581 mode = GET_MODE_INNER (mode);
3582
3583 bytes = GET_MODE_SIZE (mode);
3584 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3585
3586 /* Check that bytes are repeated. */
3587 for (i = bytes; i < 16; i += bytes)
3588 for (j = 0; j < bytes; j++)
3589 if (arr[j] != arr[i + j])
3590 return 0;
3591
3592 val = arr[0];
3593 for (j = 1; j < bytes; j++)
3594 val = (val << 8) | arr[j];
3595
3596 val = trunc_int_for_mode (val, int_mode);
3597
3598 /* Currently, we only handle SFmode */
3599 gcc_assert (mode == SFmode);
3600 if (mode == SFmode)
3601 {
3602 int exp = (val >> 23) - 127;
3603 return val > 0 && (val & 0x007fffff) == 0
3604 && exp >= low && exp <= high;
3605 }
3606 return FALSE;
3607}
3608
644459d0 3609/* We accept:
5b865faf 3610 - any 32-bit constant (SImode, SFmode)
644459d0 3611 - any constant that can be generated with fsmbi (any mode)
5b865faf 3612 - a 64-bit constant where the high and low bits are identical
644459d0 3613 (DImode, DFmode)
5b865faf 3614 - a 128-bit constant where the four 32-bit words match. */
644459d0 3615int
3616spu_legitimate_constant_p (rtx x)
3617{
5df189be 3618 if (GET_CODE (x) == HIGH)
3619 x = XEXP (x, 0);
644459d0 3620 /* V4SI with all identical symbols is valid. */
5df189be 3621 if (!flag_pic
3622 && GET_MODE (x) == V4SImode
644459d0 3623 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3624 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3625 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3626 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3627 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3628 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3629
5df189be 3630 if (GET_CODE (x) == CONST_VECTOR
3631 && !const_vector_immediate_p (x))
3632 return 0;
644459d0 3633 return 1;
3634}
3635
3636/* Valid address are:
3637 - symbol_ref, label_ref, const
3638 - reg
9d98604b 3639 - reg + const_int, where const_int is 16 byte aligned
644459d0 3640 - reg + reg, alignment doesn't matter
3641 The alignment matters in the reg+const case because lqd and stqd
9d98604b 3642 ignore the 4 least significant bits of the const. We only care about
3643 16 byte modes because the expand phase will change all smaller MEM
3644 references to TImode. */
3645static bool
3646spu_legitimate_address_p (enum machine_mode mode,
fd50b071 3647 rtx x, bool reg_ok_strict)
644459d0 3648{
9d98604b 3649 int aligned = GET_MODE_SIZE (mode) >= 16;
3650 if (aligned
3651 && GET_CODE (x) == AND
644459d0 3652 && GET_CODE (XEXP (x, 1)) == CONST_INT
9d98604b 3653 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
644459d0 3654 x = XEXP (x, 0);
3655 switch (GET_CODE (x))
3656 {
644459d0 3657 case LABEL_REF:
9d98604b 3658 case SYMBOL_REF:
644459d0 3659 case CONST:
9d98604b 3660 return !TARGET_LARGE_MEM;
644459d0 3661
3662 case CONST_INT:
3663 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3664
3665 case SUBREG:
3666 x = XEXP (x, 0);
9d98604b 3667 if (REG_P (x))
3668 return 0;
644459d0 3669
3670 case REG:
3671 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3672
3673 case PLUS:
3674 case LO_SUM:
3675 {
3676 rtx op0 = XEXP (x, 0);
3677 rtx op1 = XEXP (x, 1);
3678 if (GET_CODE (op0) == SUBREG)
3679 op0 = XEXP (op0, 0);
3680 if (GET_CODE (op1) == SUBREG)
3681 op1 = XEXP (op1, 0);
644459d0 3682 if (GET_CODE (op0) == REG
3683 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3684 && GET_CODE (op1) == CONST_INT
3685 && INTVAL (op1) >= -0x2000
3686 && INTVAL (op1) <= 0x1fff
9d98604b 3687 && (!aligned || (INTVAL (op1) & 15) == 0))
3688 return TRUE;
644459d0 3689 if (GET_CODE (op0) == REG
3690 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3691 && GET_CODE (op1) == REG
3692 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
9d98604b 3693 return TRUE;
644459d0 3694 }
3695 break;
3696
3697 default:
3698 break;
3699 }
9d98604b 3700 return FALSE;
644459d0 3701}
3702
3703/* When the address is reg + const_int, force the const_int into a
fa7637bd 3704 register. */
644459d0 3705rtx
3706spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
41e3a0c7 3707 enum machine_mode mode ATTRIBUTE_UNUSED)
644459d0 3708{
3709 rtx op0, op1;
3710 /* Make sure both operands are registers. */
3711 if (GET_CODE (x) == PLUS)
3712 {
3713 op0 = XEXP (x, 0);
3714 op1 = XEXP (x, 1);
3715 if (ALIGNED_SYMBOL_REF_P (op0))
3716 {
3717 op0 = force_reg (Pmode, op0);
3718 mark_reg_pointer (op0, 128);
3719 }
3720 else if (GET_CODE (op0) != REG)
3721 op0 = force_reg (Pmode, op0);
3722 if (ALIGNED_SYMBOL_REF_P (op1))
3723 {
3724 op1 = force_reg (Pmode, op1);
3725 mark_reg_pointer (op1, 128);
3726 }
3727 else if (GET_CODE (op1) != REG)
3728 op1 = force_reg (Pmode, op1);
3729 x = gen_rtx_PLUS (Pmode, op0, op1);
644459d0 3730 }
41e3a0c7 3731 return x;
644459d0 3732}
3733
3734/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3735 struct attribute_spec.handler. */
3736static tree
3737spu_handle_fndecl_attribute (tree * node,
3738 tree name,
3739 tree args ATTRIBUTE_UNUSED,
3740 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3741{
3742 if (TREE_CODE (*node) != FUNCTION_DECL)
3743 {
67a779df 3744 warning (0, "%qE attribute only applies to functions",
3745 name);
644459d0 3746 *no_add_attrs = true;
3747 }
3748
3749 return NULL_TREE;
3750}
3751
3752/* Handle the "vector" attribute. */
3753static tree
3754spu_handle_vector_attribute (tree * node, tree name,
3755 tree args ATTRIBUTE_UNUSED,
3756 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3757{
3758 tree type = *node, result = NULL_TREE;
3759 enum machine_mode mode;
3760 int unsigned_p;
3761
3762 while (POINTER_TYPE_P (type)
3763 || TREE_CODE (type) == FUNCTION_TYPE
3764 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3765 type = TREE_TYPE (type);
3766
3767 mode = TYPE_MODE (type);
3768
3769 unsigned_p = TYPE_UNSIGNED (type);
3770 switch (mode)
3771 {
3772 case DImode:
3773 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3774 break;
3775 case SImode:
3776 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3777 break;
3778 case HImode:
3779 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3780 break;
3781 case QImode:
3782 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3783 break;
3784 case SFmode:
3785 result = V4SF_type_node;
3786 break;
3787 case DFmode:
3788 result = V2DF_type_node;
3789 break;
3790 default:
3791 break;
3792 }
3793
3794 /* Propagate qualifiers attached to the element type
3795 onto the vector type. */
3796 if (result && result != type && TYPE_QUALS (type))
3797 result = build_qualified_type (result, TYPE_QUALS (type));
3798
3799 *no_add_attrs = true; /* No need to hang on to the attribute. */
3800
3801 if (!result)
67a779df 3802 warning (0, "%qE attribute ignored", name);
644459d0 3803 else
d991e6e8 3804 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3805
3806 return NULL_TREE;
3807}
3808
f2b32076 3809/* Return nonzero if FUNC is a naked function. */
644459d0 3810static int
3811spu_naked_function_p (tree func)
3812{
3813 tree a;
3814
3815 if (TREE_CODE (func) != FUNCTION_DECL)
3816 abort ();
3817
3818 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3819 return a != NULL_TREE;
3820}
3821
3822int
3823spu_initial_elimination_offset (int from, int to)
3824{
3825 int saved_regs_size = spu_saved_regs_size ();
3826 int sp_offset = 0;
abe32cce 3827 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3828 || get_frame_size () || saved_regs_size)
3829 sp_offset = STACK_POINTER_OFFSET;
3830 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3831 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3832 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3833 return get_frame_size ();
644459d0 3834 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3835 return sp_offset + crtl->outgoing_args_size
644459d0 3836 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3837 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3838 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3839 else
3840 gcc_unreachable ();
644459d0 3841}
3842
3843rtx
fb80456a 3844spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3845{
3846 enum machine_mode mode = TYPE_MODE (type);
3847 int byte_size = ((mode == BLKmode)
3848 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3849
3850 /* Make sure small structs are left justified in a register. */
3851 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3852 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3853 {
3854 enum machine_mode smode;
3855 rtvec v;
3856 int i;
3857 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3858 int n = byte_size / UNITS_PER_WORD;
3859 v = rtvec_alloc (nregs);
3860 for (i = 0; i < n; i++)
3861 {
3862 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3863 gen_rtx_REG (TImode,
3864 FIRST_RETURN_REGNUM
3865 + i),
3866 GEN_INT (UNITS_PER_WORD * i));
3867 byte_size -= UNITS_PER_WORD;
3868 }
3869
3870 if (n < nregs)
3871 {
3872 if (byte_size < 4)
3873 byte_size = 4;
3874 smode =
3875 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3876 RTVEC_ELT (v, n) =
3877 gen_rtx_EXPR_LIST (VOIDmode,
3878 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3879 GEN_INT (UNITS_PER_WORD * n));
3880 }
3881 return gen_rtx_PARALLEL (mode, v);
3882 }
3883 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3884}
3885
3886rtx
3887spu_function_arg (CUMULATIVE_ARGS cum,
3888 enum machine_mode mode,
3889 tree type, int named ATTRIBUTE_UNUSED)
3890{
3891 int byte_size;
3892
3893 if (cum >= MAX_REGISTER_ARGS)
3894 return 0;
3895
3896 byte_size = ((mode == BLKmode)
3897 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3898
3899 /* The ABI does not allow parameters to be passed partially in
3900 reg and partially in stack. */
3901 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3902 return 0;
3903
3904 /* Make sure small structs are left justified in a register. */
3905 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3906 && byte_size < UNITS_PER_WORD && byte_size > 0)
3907 {
3908 enum machine_mode smode;
3909 rtx gr_reg;
3910 if (byte_size < 4)
3911 byte_size = 4;
3912 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3913 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3914 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3915 const0_rtx);
3916 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3917 }
3918 else
3919 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3920}
3921
3922/* Variable sized types are passed by reference. */
3923static bool
3924spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3925 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3926 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3927{
3928 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3929}
3930\f
3931
3932/* Var args. */
3933
3934/* Create and return the va_list datatype.
3935
3936 On SPU, va_list is an array type equivalent to
3937
3938 typedef struct __va_list_tag
3939 {
3940 void *__args __attribute__((__aligned(16)));
3941 void *__skip __attribute__((__aligned(16)));
3942
3943 } va_list[1];
3944
fa7637bd 3945 where __args points to the arg that will be returned by the next
644459d0 3946 va_arg(), and __skip points to the previous stack frame such that
3947 when __args == __skip we should advance __args by 32 bytes. */
3948static tree
3949spu_build_builtin_va_list (void)
3950{
3951 tree f_args, f_skip, record, type_decl;
3952 bool owp;
3953
3954 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3955
3956 type_decl =
54e46243 3957 build_decl (BUILTINS_LOCATION,
3958 TYPE_DECL, get_identifier ("__va_list_tag"), record);
644459d0 3959
54e46243 3960 f_args = build_decl (BUILTINS_LOCATION,
3961 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3962 f_skip = build_decl (BUILTINS_LOCATION,
3963 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
644459d0 3964
3965 DECL_FIELD_CONTEXT (f_args) = record;
3966 DECL_ALIGN (f_args) = 128;
3967 DECL_USER_ALIGN (f_args) = 1;
3968
3969 DECL_FIELD_CONTEXT (f_skip) = record;
3970 DECL_ALIGN (f_skip) = 128;
3971 DECL_USER_ALIGN (f_skip) = 1;
3972
3973 TREE_CHAIN (record) = type_decl;
3974 TYPE_NAME (record) = type_decl;
3975 TYPE_FIELDS (record) = f_args;
3976 TREE_CHAIN (f_args) = f_skip;
3977
3978 /* We know this is being padded and we want it too. It is an internal
3979 type so hide the warnings from the user. */
3980 owp = warn_padded;
3981 warn_padded = false;
3982
3983 layout_type (record);
3984
3985 warn_padded = owp;
3986
3987 /* The correct type is an array type of one element. */
3988 return build_array_type (record, build_index_type (size_zero_node));
3989}
3990
3991/* Implement va_start by filling the va_list structure VALIST.
3992 NEXTARG points to the first anonymous stack argument.
3993
3994 The following global variables are used to initialize
3995 the va_list structure:
3996
abe32cce 3997 crtl->args.info;
644459d0 3998 the CUMULATIVE_ARGS for this function
3999
abe32cce 4000 crtl->args.arg_offset_rtx:
644459d0 4001 holds the offset of the first anonymous stack argument
4002 (relative to the virtual arg pointer). */
4003
8a58ed0a 4004static void
644459d0 4005spu_va_start (tree valist, rtx nextarg)
4006{
4007 tree f_args, f_skip;
4008 tree args, skip, t;
4009
4010 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4011 f_skip = TREE_CHAIN (f_args);
4012
4013 valist = build_va_arg_indirect_ref (valist);
4014 args =
4015 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4016 skip =
4017 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4018
4019 /* Find the __args area. */
4020 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 4021 if (crtl->args.pretend_args_size > 0)
0de36bdb 4022 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4023 size_int (-STACK_POINTER_OFFSET));
75a70cf9 4024 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 4025 TREE_SIDE_EFFECTS (t) = 1;
4026 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4027
4028 /* Find the __skip area. */
4029 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 4030 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 4031 size_int (crtl->args.pretend_args_size
0de36bdb 4032 - STACK_POINTER_OFFSET));
75a70cf9 4033 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 4034 TREE_SIDE_EFFECTS (t) = 1;
4035 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4036}
4037
4038/* Gimplify va_arg by updating the va_list structure
4039 VALIST as required to retrieve an argument of type
4040 TYPE, and returning that argument.
4041
4042 ret = va_arg(VALIST, TYPE);
4043
4044 generates code equivalent to:
4045
4046 paddedsize = (sizeof(TYPE) + 15) & -16;
4047 if (VALIST.__args + paddedsize > VALIST.__skip
4048 && VALIST.__args <= VALIST.__skip)
4049 addr = VALIST.__skip + 32;
4050 else
4051 addr = VALIST.__args;
4052 VALIST.__args = addr + paddedsize;
4053 ret = *(TYPE *)addr;
4054 */
4055static tree
75a70cf9 4056spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4057 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 4058{
4059 tree f_args, f_skip;
4060 tree args, skip;
4061 HOST_WIDE_INT size, rsize;
4062 tree paddedsize, addr, tmp;
4063 bool pass_by_reference_p;
4064
4065 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4066 f_skip = TREE_CHAIN (f_args);
4067
4068 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4069 args =
4070 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4071 skip =
4072 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4073
4074 addr = create_tmp_var (ptr_type_node, "va_arg");
644459d0 4075
4076 /* if an object is dynamically sized, a pointer to it is passed
4077 instead of the object itself. */
4078 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4079 false);
4080 if (pass_by_reference_p)
4081 type = build_pointer_type (type);
4082 size = int_size_in_bytes (type);
4083 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4084
4085 /* build conditional expression to calculate addr. The expression
4086 will be gimplified later. */
0de36bdb 4087 paddedsize = size_int (rsize);
75a70cf9 4088 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 4089 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 4090 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4091 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4092 unshare_expr (skip)));
644459d0 4093
4094 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 4095 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4096 size_int (32)), unshare_expr (args));
644459d0 4097
75a70cf9 4098 gimplify_assign (addr, tmp, pre_p);
644459d0 4099
4100 /* update VALIST.__args */
0de36bdb 4101 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 4102 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 4103
8115f0af 4104 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4105 addr);
644459d0 4106
4107 if (pass_by_reference_p)
4108 addr = build_va_arg_indirect_ref (addr);
4109
4110 return build_va_arg_indirect_ref (addr);
4111}
4112
4113/* Save parameter registers starting with the register that corresponds
4114 to the first unnamed parameters. If the first unnamed parameter is
4115 in the stack then save no registers. Set pretend_args_size to the
4116 amount of space needed to save the registers. */
4117void
4118spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4119 tree type, int *pretend_size, int no_rtl)
4120{
4121 if (!no_rtl)
4122 {
4123 rtx tmp;
4124 int regno;
4125 int offset;
4126 int ncum = *cum;
4127
4128 /* cum currently points to the last named argument, we want to
4129 start at the next argument. */
4130 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4131
4132 offset = -STACK_POINTER_OFFSET;
4133 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4134 {
4135 tmp = gen_frame_mem (V4SImode,
4136 plus_constant (virtual_incoming_args_rtx,
4137 offset));
4138 emit_move_insn (tmp,
4139 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4140 offset += 16;
4141 }
4142 *pretend_size = offset + STACK_POINTER_OFFSET;
4143 }
4144}
4145\f
4146void
4147spu_conditional_register_usage (void)
4148{
4149 if (flag_pic)
4150 {
4151 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4152 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4153 }
644459d0 4154}
4155
9d98604b 4156/* This is called any time we inspect the alignment of a register for
4157 addresses. */
644459d0 4158static int
9d98604b 4159reg_aligned_for_addr (rtx x)
644459d0 4160{
9d98604b 4161 int regno =
4162 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4163 return REGNO_POINTER_ALIGN (regno) >= 128;
644459d0 4164}
4165
69ced2d6 4166/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4167 into its SYMBOL_REF_FLAGS. */
4168static void
4169spu_encode_section_info (tree decl, rtx rtl, int first)
4170{
4171 default_encode_section_info (decl, rtl, first);
4172
4173 /* If a variable has a forced alignment to < 16 bytes, mark it with
4174 SYMBOL_FLAG_ALIGN1. */
4175 if (TREE_CODE (decl) == VAR_DECL
4176 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4177 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4178}
4179
644459d0 4180/* Return TRUE if we are certain the mem refers to a complete object
4181 which is both 16-byte aligned and padded to a 16-byte boundary. This
4182 would make it safe to store with a single instruction.
4183 We guarantee the alignment and padding for static objects by aligning
4184 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4185 FIXME: We currently cannot guarantee this for objects on the stack
4186 because assign_parm_setup_stack calls assign_stack_local with the
4187 alignment of the parameter mode and in that case the alignment never
4188 gets adjusted by LOCAL_ALIGNMENT. */
4189static int
4190store_with_one_insn_p (rtx mem)
4191{
9d98604b 4192 enum machine_mode mode = GET_MODE (mem);
644459d0 4193 rtx addr = XEXP (mem, 0);
9d98604b 4194 if (mode == BLKmode)
644459d0 4195 return 0;
9d98604b 4196 if (GET_MODE_SIZE (mode) >= 16)
4197 return 1;
644459d0 4198 /* Only static objects. */
4199 if (GET_CODE (addr) == SYMBOL_REF)
4200 {
4201 /* We use the associated declaration to make sure the access is
fa7637bd 4202 referring to the whole object.
644459d0 4203 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4204 if it is necessary. Will there be cases where one exists, and
4205 the other does not? Will there be cases where both exist, but
4206 have different types? */
4207 tree decl = MEM_EXPR (mem);
4208 if (decl
4209 && TREE_CODE (decl) == VAR_DECL
4210 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4211 return 1;
4212 decl = SYMBOL_REF_DECL (addr);
4213 if (decl
4214 && TREE_CODE (decl) == VAR_DECL
4215 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4216 return 1;
4217 }
4218 return 0;
4219}
4220
9d98604b 4221/* Return 1 when the address is not valid for a simple load and store as
4222 required by the '_mov*' patterns. We could make this less strict
4223 for loads, but we prefer mem's to look the same so they are more
4224 likely to be merged. */
4225static int
4226address_needs_split (rtx mem)
4227{
4228 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4229 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4230 || !(store_with_one_insn_p (mem)
4231 || mem_is_padded_component_ref (mem))))
4232 return 1;
4233
4234 return 0;
4235}
4236
644459d0 4237int
4238spu_expand_mov (rtx * ops, enum machine_mode mode)
4239{
4240 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4241 abort ();
4242
4243 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4244 {
4245 rtx from = SUBREG_REG (ops[1]);
8d72495d 4246 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4247
4248 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4249 && GET_MODE_CLASS (imode) == MODE_INT
4250 && subreg_lowpart_p (ops[1]));
4251
4252 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4253 imode = SImode;
4254 if (imode != GET_MODE (from))
4255 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4256
4257 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4258 {
99bdde56 4259 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4260 emit_insn (GEN_FCN (icode) (ops[0], from));
4261 }
4262 else
4263 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4264 return 1;
4265 }
4266
4267 /* At least one of the operands needs to be a register. */
4268 if ((reload_in_progress | reload_completed) == 0
4269 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4270 {
4271 rtx temp = force_reg (mode, ops[1]);
4272 emit_move_insn (ops[0], temp);
4273 return 1;
4274 }
4275 if (reload_in_progress || reload_completed)
4276 {
dea01258 4277 if (CONSTANT_P (ops[1]))
4278 return spu_split_immediate (ops);
644459d0 4279 return 0;
4280 }
9d98604b 4281
4282 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4283 extend them. */
4284 if (GET_CODE (ops[1]) == CONST_INT)
644459d0 4285 {
9d98604b 4286 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4287 if (val != INTVAL (ops[1]))
644459d0 4288 {
9d98604b 4289 emit_move_insn (ops[0], GEN_INT (val));
4290 return 1;
644459d0 4291 }
4292 }
9d98604b 4293 if (MEM_P (ops[0]))
4294 return spu_split_store (ops);
4295 if (MEM_P (ops[1]))
4296 return spu_split_load (ops);
4297
644459d0 4298 return 0;
4299}
4300
9d98604b 4301static void
4302spu_convert_move (rtx dst, rtx src)
644459d0 4303{
9d98604b 4304 enum machine_mode mode = GET_MODE (dst);
4305 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4306 rtx reg;
4307 gcc_assert (GET_MODE (src) == TImode);
4308 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4309 emit_insn (gen_rtx_SET (VOIDmode, reg,
4310 gen_rtx_TRUNCATE (int_mode,
4311 gen_rtx_LSHIFTRT (TImode, src,
4312 GEN_INT (int_mode == DImode ? 64 : 96)))));
4313 if (int_mode != mode)
4314 {
4315 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4316 emit_move_insn (dst, reg);
4317 }
4318}
644459d0 4319
9d98604b 4320/* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4321 the address from SRC and SRC+16. Return a REG or CONST_INT that
4322 specifies how many bytes to rotate the loaded registers, plus any
4323 extra from EXTRA_ROTQBY. The address and rotate amounts are
4324 normalized to improve merging of loads and rotate computations. */
4325static rtx
4326spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4327{
4328 rtx addr = XEXP (src, 0);
4329 rtx p0, p1, rot, addr0, addr1;
4330 int rot_amt;
644459d0 4331
4332 rot = 0;
4333 rot_amt = 0;
9d98604b 4334
4335 if (MEM_ALIGN (src) >= 128)
4336 /* Address is already aligned; simply perform a TImode load. */ ;
4337 else if (GET_CODE (addr) == PLUS)
644459d0 4338 {
4339 /* 8 cases:
4340 aligned reg + aligned reg => lqx
4341 aligned reg + unaligned reg => lqx, rotqby
4342 aligned reg + aligned const => lqd
4343 aligned reg + unaligned const => lqd, rotqbyi
4344 unaligned reg + aligned reg => lqx, rotqby
4345 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4346 unaligned reg + aligned const => lqd, rotqby
4347 unaligned reg + unaligned const -> not allowed by legitimate address
4348 */
4349 p0 = XEXP (addr, 0);
4350 p1 = XEXP (addr, 1);
9d98604b 4351 if (!reg_aligned_for_addr (p0))
644459d0 4352 {
9d98604b 4353 if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4354 {
9d98604b 4355 rot = gen_reg_rtx (SImode);
4356 emit_insn (gen_addsi3 (rot, p0, p1));
4357 }
4358 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4359 {
4360 if (INTVAL (p1) > 0
4361 && REG_POINTER (p0)
4362 && INTVAL (p1) * BITS_PER_UNIT
4363 < REGNO_POINTER_ALIGN (REGNO (p0)))
4364 {
4365 rot = gen_reg_rtx (SImode);
4366 emit_insn (gen_addsi3 (rot, p0, p1));
4367 addr = p0;
4368 }
4369 else
4370 {
4371 rtx x = gen_reg_rtx (SImode);
4372 emit_move_insn (x, p1);
4373 if (!spu_arith_operand (p1, SImode))
4374 p1 = x;
4375 rot = gen_reg_rtx (SImode);
4376 emit_insn (gen_addsi3 (rot, p0, p1));
4377 addr = gen_rtx_PLUS (Pmode, p0, x);
4378 }
644459d0 4379 }
4380 else
4381 rot = p0;
4382 }
4383 else
4384 {
4385 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4386 {
4387 rot_amt = INTVAL (p1) & 15;
9d98604b 4388 if (INTVAL (p1) & -16)
4389 {
4390 p1 = GEN_INT (INTVAL (p1) & -16);
4391 addr = gen_rtx_PLUS (SImode, p0, p1);
4392 }
4393 else
4394 addr = p0;
644459d0 4395 }
9d98604b 4396 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
644459d0 4397 rot = p1;
4398 }
4399 }
9d98604b 4400 else if (REG_P (addr))
644459d0 4401 {
9d98604b 4402 if (!reg_aligned_for_addr (addr))
644459d0 4403 rot = addr;
4404 }
4405 else if (GET_CODE (addr) == CONST)
4406 {
4407 if (GET_CODE (XEXP (addr, 0)) == PLUS
4408 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4409 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4410 {
4411 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4412 if (rot_amt & -16)
4413 addr = gen_rtx_CONST (Pmode,
4414 gen_rtx_PLUS (Pmode,
4415 XEXP (XEXP (addr, 0), 0),
4416 GEN_INT (rot_amt & -16)));
4417 else
4418 addr = XEXP (XEXP (addr, 0), 0);
4419 }
4420 else
9d98604b 4421 {
4422 rot = gen_reg_rtx (Pmode);
4423 emit_move_insn (rot, addr);
4424 }
644459d0 4425 }
4426 else if (GET_CODE (addr) == CONST_INT)
4427 {
4428 rot_amt = INTVAL (addr);
4429 addr = GEN_INT (rot_amt & -16);
4430 }
4431 else if (!ALIGNED_SYMBOL_REF_P (addr))
9d98604b 4432 {
4433 rot = gen_reg_rtx (Pmode);
4434 emit_move_insn (rot, addr);
4435 }
644459d0 4436
9d98604b 4437 rot_amt += extra_rotby;
644459d0 4438
4439 rot_amt &= 15;
4440
4441 if (rot && rot_amt)
4442 {
9d98604b 4443 rtx x = gen_reg_rtx (SImode);
4444 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4445 rot = x;
644459d0 4446 rot_amt = 0;
4447 }
9d98604b 4448 if (!rot && rot_amt)
4449 rot = GEN_INT (rot_amt);
4450
4451 addr0 = copy_rtx (addr);
4452 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4453 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4454
4455 if (dst1)
4456 {
4457 addr1 = plus_constant (copy_rtx (addr), 16);
4458 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4459 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4460 }
644459d0 4461
9d98604b 4462 return rot;
4463}
4464
4465int
4466spu_split_load (rtx * ops)
4467{
4468 enum machine_mode mode = GET_MODE (ops[0]);
4469 rtx addr, load, rot;
4470 int rot_amt;
644459d0 4471
9d98604b 4472 if (GET_MODE_SIZE (mode) >= 16)
4473 return 0;
644459d0 4474
9d98604b 4475 addr = XEXP (ops[1], 0);
4476 gcc_assert (GET_CODE (addr) != AND);
4477
4478 if (!address_needs_split (ops[1]))
4479 {
4480 ops[1] = change_address (ops[1], TImode, addr);
4481 load = gen_reg_rtx (TImode);
4482 emit_insn (gen__movti (load, ops[1]));
4483 spu_convert_move (ops[0], load);
4484 return 1;
4485 }
4486
4487 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4488
4489 load = gen_reg_rtx (TImode);
4490 rot = spu_expand_load (load, 0, ops[1], rot_amt);
644459d0 4491
4492 if (rot)
4493 emit_insn (gen_rotqby_ti (load, load, rot));
644459d0 4494
9d98604b 4495 spu_convert_move (ops[0], load);
4496 return 1;
644459d0 4497}
4498
9d98604b 4499int
644459d0 4500spu_split_store (rtx * ops)
4501{
4502 enum machine_mode mode = GET_MODE (ops[0]);
9d98604b 4503 rtx reg;
644459d0 4504 rtx addr, p0, p1, p1_lo, smem;
4505 int aform;
4506 int scalar;
4507
9d98604b 4508 if (GET_MODE_SIZE (mode) >= 16)
4509 return 0;
4510
644459d0 4511 addr = XEXP (ops[0], 0);
9d98604b 4512 gcc_assert (GET_CODE (addr) != AND);
4513
4514 if (!address_needs_split (ops[0]))
4515 {
4516 reg = gen_reg_rtx (TImode);
4517 emit_insn (gen_spu_convert (reg, ops[1]));
4518 ops[0] = change_address (ops[0], TImode, addr);
4519 emit_move_insn (ops[0], reg);
4520 return 1;
4521 }
644459d0 4522
4523 if (GET_CODE (addr) == PLUS)
4524 {
4525 /* 8 cases:
4526 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4527 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4528 aligned reg + aligned const => lqd, c?d, shuf, stqx
4529 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4530 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4531 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4532 unaligned reg + aligned const => lqd, c?d, shuf, stqx
9d98604b 4533 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
644459d0 4534 */
4535 aform = 0;
4536 p0 = XEXP (addr, 0);
4537 p1 = p1_lo = XEXP (addr, 1);
9d98604b 4538 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
644459d0 4539 {
4540 p1_lo = GEN_INT (INTVAL (p1) & 15);
9d98604b 4541 if (reg_aligned_for_addr (p0))
4542 {
4543 p1 = GEN_INT (INTVAL (p1) & -16);
4544 if (p1 == const0_rtx)
4545 addr = p0;
4546 else
4547 addr = gen_rtx_PLUS (SImode, p0, p1);
4548 }
4549 else
4550 {
4551 rtx x = gen_reg_rtx (SImode);
4552 emit_move_insn (x, p1);
4553 addr = gen_rtx_PLUS (SImode, p0, x);
4554 }
644459d0 4555 }
4556 }
9d98604b 4557 else if (REG_P (addr))
644459d0 4558 {
4559 aform = 0;
4560 p0 = addr;
4561 p1 = p1_lo = const0_rtx;
4562 }
4563 else
4564 {
4565 aform = 1;
4566 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4567 p1 = 0; /* aform doesn't use p1 */
4568 p1_lo = addr;
4569 if (ALIGNED_SYMBOL_REF_P (addr))
4570 p1_lo = const0_rtx;
9d98604b 4571 else if (GET_CODE (addr) == CONST
4572 && GET_CODE (XEXP (addr, 0)) == PLUS
4573 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4574 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
644459d0 4575 {
9d98604b 4576 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4577 if ((v & -16) != 0)
4578 addr = gen_rtx_CONST (Pmode,
4579 gen_rtx_PLUS (Pmode,
4580 XEXP (XEXP (addr, 0), 0),
4581 GEN_INT (v & -16)));
4582 else
4583 addr = XEXP (XEXP (addr, 0), 0);
4584 p1_lo = GEN_INT (v & 15);
644459d0 4585 }
4586 else if (GET_CODE (addr) == CONST_INT)
4587 {
4588 p1_lo = GEN_INT (INTVAL (addr) & 15);
4589 addr = GEN_INT (INTVAL (addr) & -16);
4590 }
9d98604b 4591 else
4592 {
4593 p1_lo = gen_reg_rtx (SImode);
4594 emit_move_insn (p1_lo, addr);
4595 }
644459d0 4596 }
4597
9d98604b 4598 reg = gen_reg_rtx (TImode);
e04cf423 4599
644459d0 4600 scalar = store_with_one_insn_p (ops[0]);
4601 if (!scalar)
4602 {
4603 /* We could copy the flags from the ops[0] MEM to mem here,
4604 We don't because we want this load to be optimized away if
4605 possible, and copying the flags will prevent that in certain
4606 cases, e.g. consider the volatile flag. */
4607
9d98604b 4608 rtx pat = gen_reg_rtx (TImode);
e04cf423 4609 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4610 set_mem_alias_set (lmem, 0);
4611 emit_insn (gen_movti (reg, lmem));
644459d0 4612
9d98604b 4613 if (!p0 || reg_aligned_for_addr (p0))
644459d0 4614 p0 = stack_pointer_rtx;
4615 if (!p1_lo)
4616 p1_lo = const0_rtx;
4617
4618 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4619 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4620 }
644459d0 4621 else
4622 {
4623 if (GET_CODE (ops[1]) == REG)
4624 emit_insn (gen_spu_convert (reg, ops[1]));
4625 else if (GET_CODE (ops[1]) == SUBREG)
4626 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4627 else
4628 abort ();
4629 }
4630
4631 if (GET_MODE_SIZE (mode) < 4 && scalar)
9d98604b 4632 emit_insn (gen_ashlti3
4633 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
644459d0 4634
9d98604b 4635 smem = change_address (ops[0], TImode, copy_rtx (addr));
644459d0 4636 /* We can't use the previous alias set because the memory has changed
4637 size and can potentially overlap objects of other types. */
4638 set_mem_alias_set (smem, 0);
4639
e04cf423 4640 emit_insn (gen_movti (smem, reg));
9d98604b 4641 return 1;
644459d0 4642}
4643
4644/* Return TRUE if X is MEM which is a struct member reference
4645 and the member can safely be loaded and stored with a single
4646 instruction because it is padded. */
4647static int
4648mem_is_padded_component_ref (rtx x)
4649{
4650 tree t = MEM_EXPR (x);
4651 tree r;
4652 if (!t || TREE_CODE (t) != COMPONENT_REF)
4653 return 0;
4654 t = TREE_OPERAND (t, 1);
4655 if (!t || TREE_CODE (t) != FIELD_DECL
4656 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4657 return 0;
4658 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4659 r = DECL_FIELD_CONTEXT (t);
4660 if (!r || TREE_CODE (r) != RECORD_TYPE)
4661 return 0;
4662 /* Make sure they are the same mode */
4663 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4664 return 0;
4665 /* If there are no following fields then the field alignment assures
fa7637bd 4666 the structure is padded to the alignment which means this field is
4667 padded too. */
644459d0 4668 if (TREE_CHAIN (t) == 0)
4669 return 1;
4670 /* If the following field is also aligned then this field will be
4671 padded. */
4672 t = TREE_CHAIN (t);
4673 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4674 return 1;
4675 return 0;
4676}
4677
c7b91b14 4678/* Parse the -mfixed-range= option string. */
4679static void
4680fix_range (const char *const_str)
4681{
4682 int i, first, last;
4683 char *str, *dash, *comma;
4684
4685 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4686 REG2 are either register names or register numbers. The effect
4687 of this option is to mark the registers in the range from REG1 to
4688 REG2 as ``fixed'' so they won't be used by the compiler. */
4689
4690 i = strlen (const_str);
4691 str = (char *) alloca (i + 1);
4692 memcpy (str, const_str, i + 1);
4693
4694 while (1)
4695 {
4696 dash = strchr (str, '-');
4697 if (!dash)
4698 {
4699 warning (0, "value of -mfixed-range must have form REG1-REG2");
4700 return;
4701 }
4702 *dash = '\0';
4703 comma = strchr (dash + 1, ',');
4704 if (comma)
4705 *comma = '\0';
4706
4707 first = decode_reg_name (str);
4708 if (first < 0)
4709 {
4710 warning (0, "unknown register name: %s", str);
4711 return;
4712 }
4713
4714 last = decode_reg_name (dash + 1);
4715 if (last < 0)
4716 {
4717 warning (0, "unknown register name: %s", dash + 1);
4718 return;
4719 }
4720
4721 *dash = '-';
4722
4723 if (first > last)
4724 {
4725 warning (0, "%s-%s is an empty range", str, dash + 1);
4726 return;
4727 }
4728
4729 for (i = first; i <= last; ++i)
4730 fixed_regs[i] = call_used_regs[i] = 1;
4731
4732 if (!comma)
4733 break;
4734
4735 *comma = ',';
4736 str = comma + 1;
4737 }
4738}
4739
644459d0 4740/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4741 can be generated using the fsmbi instruction. */
4742int
4743fsmbi_const_p (rtx x)
4744{
dea01258 4745 if (CONSTANT_P (x))
4746 {
5df189be 4747 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4748 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4749 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4750 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4751 }
4752 return 0;
4753}
4754
4755/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4756 can be generated using the cbd, chd, cwd or cdd instruction. */
4757int
4758cpat_const_p (rtx x, enum machine_mode mode)
4759{
4760 if (CONSTANT_P (x))
4761 {
4762 enum immediate_class c = classify_immediate (x, mode);
4763 return c == IC_CPAT;
4764 }
4765 return 0;
4766}
644459d0 4767
dea01258 4768rtx
4769gen_cpat_const (rtx * ops)
4770{
4771 unsigned char dst[16];
4772 int i, offset, shift, isize;
4773 if (GET_CODE (ops[3]) != CONST_INT
4774 || GET_CODE (ops[2]) != CONST_INT
4775 || (GET_CODE (ops[1]) != CONST_INT
4776 && GET_CODE (ops[1]) != REG))
4777 return 0;
4778 if (GET_CODE (ops[1]) == REG
4779 && (!REG_POINTER (ops[1])
4780 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4781 return 0;
644459d0 4782
4783 for (i = 0; i < 16; i++)
dea01258 4784 dst[i] = i + 16;
4785 isize = INTVAL (ops[3]);
4786 if (isize == 1)
4787 shift = 3;
4788 else if (isize == 2)
4789 shift = 2;
4790 else
4791 shift = 0;
4792 offset = (INTVAL (ops[2]) +
4793 (GET_CODE (ops[1]) ==
4794 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4795 for (i = 0; i < isize; i++)
4796 dst[offset + i] = i + shift;
4797 return array_to_constant (TImode, dst);
644459d0 4798}
4799
4800/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4801 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4802 than 16 bytes, the value is repeated across the rest of the array. */
4803void
4804constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4805{
4806 HOST_WIDE_INT val;
4807 int i, j, first;
4808
4809 memset (arr, 0, 16);
4810 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4811 if (GET_CODE (x) == CONST_INT
4812 || (GET_CODE (x) == CONST_DOUBLE
4813 && (mode == SFmode || mode == DFmode)))
4814 {
4815 gcc_assert (mode != VOIDmode && mode != BLKmode);
4816
4817 if (GET_CODE (x) == CONST_DOUBLE)
4818 val = const_double_to_hwint (x);
4819 else
4820 val = INTVAL (x);
4821 first = GET_MODE_SIZE (mode) - 1;
4822 for (i = first; i >= 0; i--)
4823 {
4824 arr[i] = val & 0xff;
4825 val >>= 8;
4826 }
4827 /* Splat the constant across the whole array. */
4828 for (j = 0, i = first + 1; i < 16; i++)
4829 {
4830 arr[i] = arr[j];
4831 j = (j == first) ? 0 : j + 1;
4832 }
4833 }
4834 else if (GET_CODE (x) == CONST_DOUBLE)
4835 {
4836 val = CONST_DOUBLE_LOW (x);
4837 for (i = 15; i >= 8; i--)
4838 {
4839 arr[i] = val & 0xff;
4840 val >>= 8;
4841 }
4842 val = CONST_DOUBLE_HIGH (x);
4843 for (i = 7; i >= 0; i--)
4844 {
4845 arr[i] = val & 0xff;
4846 val >>= 8;
4847 }
4848 }
4849 else if (GET_CODE (x) == CONST_VECTOR)
4850 {
4851 int units;
4852 rtx elt;
4853 mode = GET_MODE_INNER (mode);
4854 units = CONST_VECTOR_NUNITS (x);
4855 for (i = 0; i < units; i++)
4856 {
4857 elt = CONST_VECTOR_ELT (x, i);
4858 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4859 {
4860 if (GET_CODE (elt) == CONST_DOUBLE)
4861 val = const_double_to_hwint (elt);
4862 else
4863 val = INTVAL (elt);
4864 first = GET_MODE_SIZE (mode) - 1;
4865 if (first + i * GET_MODE_SIZE (mode) > 16)
4866 abort ();
4867 for (j = first; j >= 0; j--)
4868 {
4869 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4870 val >>= 8;
4871 }
4872 }
4873 }
4874 }
4875 else
4876 gcc_unreachable();
4877}
4878
4879/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4880 smaller than 16 bytes, use the bytes that would represent that value
4881 in a register, e.g., for QImode return the value of arr[3]. */
4882rtx
e96f2783 4883array_to_constant (enum machine_mode mode, const unsigned char arr[16])
644459d0 4884{
4885 enum machine_mode inner_mode;
4886 rtvec v;
4887 int units, size, i, j, k;
4888 HOST_WIDE_INT val;
4889
4890 if (GET_MODE_CLASS (mode) == MODE_INT
4891 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4892 {
4893 j = GET_MODE_SIZE (mode);
4894 i = j < 4 ? 4 - j : 0;
4895 for (val = 0; i < j; i++)
4896 val = (val << 8) | arr[i];
4897 val = trunc_int_for_mode (val, mode);
4898 return GEN_INT (val);
4899 }
4900
4901 if (mode == TImode)
4902 {
4903 HOST_WIDE_INT high;
4904 for (i = high = 0; i < 8; i++)
4905 high = (high << 8) | arr[i];
4906 for (i = 8, val = 0; i < 16; i++)
4907 val = (val << 8) | arr[i];
4908 return immed_double_const (val, high, TImode);
4909 }
4910 if (mode == SFmode)
4911 {
4912 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4913 val = trunc_int_for_mode (val, SImode);
171b6d22 4914 return hwint_to_const_double (SFmode, val);
644459d0 4915 }
4916 if (mode == DFmode)
4917 {
1f915911 4918 for (i = 0, val = 0; i < 8; i++)
4919 val = (val << 8) | arr[i];
171b6d22 4920 return hwint_to_const_double (DFmode, val);
644459d0 4921 }
4922
4923 if (!VECTOR_MODE_P (mode))
4924 abort ();
4925
4926 units = GET_MODE_NUNITS (mode);
4927 size = GET_MODE_UNIT_SIZE (mode);
4928 inner_mode = GET_MODE_INNER (mode);
4929 v = rtvec_alloc (units);
4930
4931 for (k = i = 0; i < units; ++i)
4932 {
4933 val = 0;
4934 for (j = 0; j < size; j++, k++)
4935 val = (val << 8) | arr[k];
4936
4937 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4938 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4939 else
4940 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4941 }
4942 if (k > 16)
4943 abort ();
4944
4945 return gen_rtx_CONST_VECTOR (mode, v);
4946}
4947
4948static void
4949reloc_diagnostic (rtx x)
4950{
712d2297 4951 tree decl = 0;
644459d0 4952 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4953 return;
4954
4955 if (GET_CODE (x) == SYMBOL_REF)
4956 decl = SYMBOL_REF_DECL (x);
4957 else if (GET_CODE (x) == CONST
4958 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4959 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4960
4961 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4962 if (decl && !DECL_P (decl))
4963 decl = 0;
4964
644459d0 4965 /* The decl could be a string constant. */
4966 if (decl && DECL_P (decl))
712d2297 4967 {
4968 location_t loc;
4969 /* We use last_assemble_variable_decl to get line information. It's
4970 not always going to be right and might not even be close, but will
4971 be right for the more common cases. */
4972 if (!last_assemble_variable_decl || in_section == ctors_section)
4973 loc = DECL_SOURCE_LOCATION (decl);
4974 else
4975 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
644459d0 4976
712d2297 4977 if (TARGET_WARN_RELOC)
4978 warning_at (loc, 0,
4979 "creating run-time relocation for %qD", decl);
4980 else
4981 error_at (loc,
4982 "creating run-time relocation for %qD", decl);
4983 }
4984 else
4985 {
4986 if (TARGET_WARN_RELOC)
4987 warning_at (input_location, 0, "creating run-time relocation");
4988 else
4989 error_at (input_location, "creating run-time relocation");
4990 }
644459d0 4991}
4992
4993/* Hook into assemble_integer so we can generate an error for run-time
4994 relocations. The SPU ABI disallows them. */
4995static bool
4996spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4997{
4998 /* By default run-time relocations aren't supported, but we allow them
4999 in case users support it in their own run-time loader. And we provide
5000 a warning for those users that don't. */
5001 if ((GET_CODE (x) == SYMBOL_REF)
5002 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5003 reloc_diagnostic (x);
5004
5005 return default_assemble_integer (x, size, aligned_p);
5006}
5007
5008static void
5009spu_asm_globalize_label (FILE * file, const char *name)
5010{
5011 fputs ("\t.global\t", file);
5012 assemble_name (file, name);
5013 fputs ("\n", file);
5014}
5015
5016static bool
f529eb25 5017spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5018 bool speed ATTRIBUTE_UNUSED)
644459d0 5019{
5020 enum machine_mode mode = GET_MODE (x);
5021 int cost = COSTS_N_INSNS (2);
5022
5023 /* Folding to a CONST_VECTOR will use extra space but there might
5024 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 5025 only if it allows us to fold away multiple insns. Changing the cost
644459d0 5026 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5027 because this cost will only be compared against a single insn.
5028 if (code == CONST_VECTOR)
5029 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5030 */
5031
5032 /* Use defaults for float operations. Not accurate but good enough. */
5033 if (mode == DFmode)
5034 {
5035 *total = COSTS_N_INSNS (13);
5036 return true;
5037 }
5038 if (mode == SFmode)
5039 {
5040 *total = COSTS_N_INSNS (6);
5041 return true;
5042 }
5043 switch (code)
5044 {
5045 case CONST_INT:
5046 if (satisfies_constraint_K (x))
5047 *total = 0;
5048 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5049 *total = COSTS_N_INSNS (1);
5050 else
5051 *total = COSTS_N_INSNS (3);
5052 return true;
5053
5054 case CONST:
5055 *total = COSTS_N_INSNS (3);
5056 return true;
5057
5058 case LABEL_REF:
5059 case SYMBOL_REF:
5060 *total = COSTS_N_INSNS (0);
5061 return true;
5062
5063 case CONST_DOUBLE:
5064 *total = COSTS_N_INSNS (5);
5065 return true;
5066
5067 case FLOAT_EXTEND:
5068 case FLOAT_TRUNCATE:
5069 case FLOAT:
5070 case UNSIGNED_FLOAT:
5071 case FIX:
5072 case UNSIGNED_FIX:
5073 *total = COSTS_N_INSNS (7);
5074 return true;
5075
5076 case PLUS:
5077 if (mode == TImode)
5078 {
5079 *total = COSTS_N_INSNS (9);
5080 return true;
5081 }
5082 break;
5083
5084 case MULT:
5085 cost =
5086 GET_CODE (XEXP (x, 0)) ==
5087 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5088 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5089 {
5090 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5091 {
5092 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5093 cost = COSTS_N_INSNS (14);
5094 if ((val & 0xffff) == 0)
5095 cost = COSTS_N_INSNS (9);
5096 else if (val > 0 && val < 0x10000)
5097 cost = COSTS_N_INSNS (11);
5098 }
5099 }
5100 *total = cost;
5101 return true;
5102 case DIV:
5103 case UDIV:
5104 case MOD:
5105 case UMOD:
5106 *total = COSTS_N_INSNS (20);
5107 return true;
5108 case ROTATE:
5109 case ROTATERT:
5110 case ASHIFT:
5111 case ASHIFTRT:
5112 case LSHIFTRT:
5113 *total = COSTS_N_INSNS (4);
5114 return true;
5115 case UNSPEC:
5116 if (XINT (x, 1) == UNSPEC_CONVERT)
5117 *total = COSTS_N_INSNS (0);
5118 else
5119 *total = COSTS_N_INSNS (4);
5120 return true;
5121 }
5122 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5123 if (GET_MODE_CLASS (mode) == MODE_INT
5124 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5125 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5126 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5127 *total = cost;
5128 return true;
5129}
5130
1bd43494 5131static enum machine_mode
5132spu_unwind_word_mode (void)
644459d0 5133{
1bd43494 5134 return SImode;
644459d0 5135}
5136
5137/* Decide whether we can make a sibling call to a function. DECL is the
5138 declaration of the function being targeted by the call and EXP is the
5139 CALL_EXPR representing the call. */
5140static bool
5141spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5142{
5143 return decl && !TARGET_LARGE_MEM;
5144}
5145
5146/* We need to correctly update the back chain pointer and the Available
5147 Stack Size (which is in the second slot of the sp register.) */
5148void
5149spu_allocate_stack (rtx op0, rtx op1)
5150{
5151 HOST_WIDE_INT v;
5152 rtx chain = gen_reg_rtx (V4SImode);
5153 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5154 rtx sp = gen_reg_rtx (V4SImode);
5155 rtx splatted = gen_reg_rtx (V4SImode);
5156 rtx pat = gen_reg_rtx (TImode);
5157
5158 /* copy the back chain so we can save it back again. */
5159 emit_move_insn (chain, stack_bot);
5160
5161 op1 = force_reg (SImode, op1);
5162
5163 v = 0x1020300010203ll;
5164 emit_move_insn (pat, immed_double_const (v, v, TImode));
5165 emit_insn (gen_shufb (splatted, op1, op1, pat));
5166
5167 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5168 emit_insn (gen_subv4si3 (sp, sp, splatted));
5169
5170 if (flag_stack_check)
5171 {
5172 rtx avail = gen_reg_rtx(SImode);
5173 rtx result = gen_reg_rtx(SImode);
5174 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5175 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5176 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5177 }
5178
5179 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5180
5181 emit_move_insn (stack_bot, chain);
5182
5183 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5184}
5185
5186void
5187spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5188{
5189 static unsigned char arr[16] =
5190 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5191 rtx temp = gen_reg_rtx (SImode);
5192 rtx temp2 = gen_reg_rtx (SImode);
5193 rtx temp3 = gen_reg_rtx (V4SImode);
5194 rtx temp4 = gen_reg_rtx (V4SImode);
5195 rtx pat = gen_reg_rtx (TImode);
5196 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5197
5198 /* Restore the backchain from the first word, sp from the second. */
5199 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5200 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5201
5202 emit_move_insn (pat, array_to_constant (TImode, arr));
5203
5204 /* Compute Available Stack Size for sp */
5205 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5206 emit_insn (gen_shufb (temp3, temp, temp, pat));
5207
5208 /* Compute Available Stack Size for back chain */
5209 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5210 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5211 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5212
5213 emit_insn (gen_addv4si3 (sp, sp, temp3));
5214 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5215}
5216
5217static void
5218spu_init_libfuncs (void)
5219{
5220 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5221 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5222 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5223 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5224 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5225 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5226 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5227 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5228 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5229 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5230 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5231
5232 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5233 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5234
5235 set_optab_libfunc (smul_optab, TImode, "__multi3");
5236 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5237 set_optab_libfunc (smod_optab, TImode, "__modti3");
5238 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5239 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5240 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5241}
5242
5243/* Make a subreg, stripping any existing subreg. We could possibly just
5244 call simplify_subreg, but in this case we know what we want. */
5245rtx
5246spu_gen_subreg (enum machine_mode mode, rtx x)
5247{
5248 if (GET_CODE (x) == SUBREG)
5249 x = SUBREG_REG (x);
5250 if (GET_MODE (x) == mode)
5251 return x;
5252 return gen_rtx_SUBREG (mode, x, 0);
5253}
5254
5255static bool
fb80456a 5256spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5257{
5258 return (TYPE_MODE (type) == BLKmode
5259 && ((type) == 0
5260 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5261 || int_size_in_bytes (type) >
5262 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5263}
5264\f
5265/* Create the built-in types and functions */
5266
c2233b46 5267enum spu_function_code
5268{
5269#define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5270#include "spu-builtins.def"
5271#undef DEF_BUILTIN
5272 NUM_SPU_BUILTINS
5273};
5274
5275extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5276
644459d0 5277struct spu_builtin_description spu_builtins[] = {
5278#define DEF_BUILTIN(fcode, icode, name, type, params) \
5279 {fcode, icode, name, type, params, NULL_TREE},
5280#include "spu-builtins.def"
5281#undef DEF_BUILTIN
5282};
5283
5284static void
5285spu_init_builtins (void)
5286{
5287 struct spu_builtin_description *d;
5288 unsigned int i;
5289
5290 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5291 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5292 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5293 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5294 V4SF_type_node = build_vector_type (float_type_node, 4);
5295 V2DF_type_node = build_vector_type (double_type_node, 2);
5296
5297 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5298 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5299 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5300 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5301
c4ecce0c 5302 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5303
5304 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5305 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5306 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5307 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5308 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5309 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5310 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5311 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5312 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5313 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5314 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5315 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5316
5317 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5318 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5319 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5320 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5321 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5322 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5323 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5324 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5325
5326 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5327 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5328
5329 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5330
5331 spu_builtin_types[SPU_BTI_PTR] =
5332 build_pointer_type (build_qualified_type
5333 (void_type_node,
5334 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5335
5336 /* For each builtin we build a new prototype. The tree code will make
5337 sure nodes are shared. */
5338 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5339 {
5340 tree p;
5341 char name[64]; /* build_function will make a copy. */
5342 int parm;
5343
5344 if (d->name == 0)
5345 continue;
5346
5dfbd18f 5347 /* Find last parm. */
644459d0 5348 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5349 ;
644459d0 5350
5351 p = void_list_node;
5352 while (parm > 1)
5353 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5354
5355 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5356
5357 sprintf (name, "__builtin_%s", d->name);
5358 d->fndecl =
5359 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5360 NULL, NULL_TREE);
a76866d3 5361 if (d->fcode == SPU_MASK_FOR_LOAD)
5362 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5363
5364 /* These builtins don't throw. */
5365 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5366 }
5367}
5368
cf31d486 5369void
5370spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5371{
5372 static unsigned char arr[16] =
5373 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5374
5375 rtx temp = gen_reg_rtx (Pmode);
5376 rtx temp2 = gen_reg_rtx (V4SImode);
5377 rtx temp3 = gen_reg_rtx (V4SImode);
5378 rtx pat = gen_reg_rtx (TImode);
5379 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5380
5381 emit_move_insn (pat, array_to_constant (TImode, arr));
5382
5383 /* Restore the sp. */
5384 emit_move_insn (temp, op1);
5385 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5386
5387 /* Compute available stack size for sp. */
5388 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5389 emit_insn (gen_shufb (temp3, temp, temp, pat));
5390
5391 emit_insn (gen_addv4si3 (sp, sp, temp3));
5392 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5393}
5394
644459d0 5395int
5396spu_safe_dma (HOST_WIDE_INT channel)
5397{
006e4b96 5398 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5399}
5400
5401void
5402spu_builtin_splats (rtx ops[])
5403{
5404 enum machine_mode mode = GET_MODE (ops[0]);
5405 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5406 {
5407 unsigned char arr[16];
5408 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5409 emit_move_insn (ops[0], array_to_constant (mode, arr));
5410 }
644459d0 5411 else
5412 {
5413 rtx reg = gen_reg_rtx (TImode);
5414 rtx shuf;
5415 if (GET_CODE (ops[1]) != REG
5416 && GET_CODE (ops[1]) != SUBREG)
5417 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5418 switch (mode)
5419 {
5420 case V2DImode:
5421 case V2DFmode:
5422 shuf =
5423 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5424 TImode);
5425 break;
5426 case V4SImode:
5427 case V4SFmode:
5428 shuf =
5429 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5430 TImode);
5431 break;
5432 case V8HImode:
5433 shuf =
5434 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5435 TImode);
5436 break;
5437 case V16QImode:
5438 shuf =
5439 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5440 TImode);
5441 break;
5442 default:
5443 abort ();
5444 }
5445 emit_move_insn (reg, shuf);
5446 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5447 }
5448}
5449
5450void
5451spu_builtin_extract (rtx ops[])
5452{
5453 enum machine_mode mode;
5454 rtx rot, from, tmp;
5455
5456 mode = GET_MODE (ops[1]);
5457
5458 if (GET_CODE (ops[2]) == CONST_INT)
5459 {
5460 switch (mode)
5461 {
5462 case V16QImode:
5463 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5464 break;
5465 case V8HImode:
5466 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5467 break;
5468 case V4SFmode:
5469 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5470 break;
5471 case V4SImode:
5472 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5473 break;
5474 case V2DImode:
5475 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5476 break;
5477 case V2DFmode:
5478 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5479 break;
5480 default:
5481 abort ();
5482 }
5483 return;
5484 }
5485
5486 from = spu_gen_subreg (TImode, ops[1]);
5487 rot = gen_reg_rtx (TImode);
5488 tmp = gen_reg_rtx (SImode);
5489
5490 switch (mode)
5491 {
5492 case V16QImode:
5493 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5494 break;
5495 case V8HImode:
5496 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5497 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5498 break;
5499 case V4SFmode:
5500 case V4SImode:
5501 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5502 break;
5503 case V2DImode:
5504 case V2DFmode:
5505 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5506 break;
5507 default:
5508 abort ();
5509 }
5510 emit_insn (gen_rotqby_ti (rot, from, tmp));
5511
5512 emit_insn (gen_spu_convert (ops[0], rot));
5513}
5514
5515void
5516spu_builtin_insert (rtx ops[])
5517{
5518 enum machine_mode mode = GET_MODE (ops[0]);
5519 enum machine_mode imode = GET_MODE_INNER (mode);
5520 rtx mask = gen_reg_rtx (TImode);
5521 rtx offset;
5522
5523 if (GET_CODE (ops[3]) == CONST_INT)
5524 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5525 else
5526 {
5527 offset = gen_reg_rtx (SImode);
5528 emit_insn (gen_mulsi3
5529 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5530 }
5531 emit_insn (gen_cpat
5532 (mask, stack_pointer_rtx, offset,
5533 GEN_INT (GET_MODE_SIZE (imode))));
5534 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5535}
5536
5537void
5538spu_builtin_promote (rtx ops[])
5539{
5540 enum machine_mode mode, imode;
5541 rtx rot, from, offset;
5542 HOST_WIDE_INT pos;
5543
5544 mode = GET_MODE (ops[0]);
5545 imode = GET_MODE_INNER (mode);
5546
5547 from = gen_reg_rtx (TImode);
5548 rot = spu_gen_subreg (TImode, ops[0]);
5549
5550 emit_insn (gen_spu_convert (from, ops[1]));
5551
5552 if (GET_CODE (ops[2]) == CONST_INT)
5553 {
5554 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5555 if (GET_MODE_SIZE (imode) < 4)
5556 pos += 4 - GET_MODE_SIZE (imode);
5557 offset = GEN_INT (pos & 15);
5558 }
5559 else
5560 {
5561 offset = gen_reg_rtx (SImode);
5562 switch (mode)
5563 {
5564 case V16QImode:
5565 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5566 break;
5567 case V8HImode:
5568 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5569 emit_insn (gen_addsi3 (offset, offset, offset));
5570 break;
5571 case V4SFmode:
5572 case V4SImode:
5573 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5574 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5575 break;
5576 case V2DImode:
5577 case V2DFmode:
5578 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5579 break;
5580 default:
5581 abort ();
5582 }
5583 }
5584 emit_insn (gen_rotqby_ti (rot, from, offset));
5585}
5586
e96f2783 5587static void
5588spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
644459d0 5589{
e96f2783 5590 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
644459d0 5591 rtx shuf = gen_reg_rtx (V4SImode);
5592 rtx insn = gen_reg_rtx (V4SImode);
5593 rtx shufc;
5594 rtx insnc;
5595 rtx mem;
5596
5597 fnaddr = force_reg (SImode, fnaddr);
5598 cxt = force_reg (SImode, cxt);
5599
5600 if (TARGET_LARGE_MEM)
5601 {
5602 rtx rotl = gen_reg_rtx (V4SImode);
5603 rtx mask = gen_reg_rtx (V4SImode);
5604 rtx bi = gen_reg_rtx (SImode);
e96f2783 5605 static unsigned char const shufa[16] = {
644459d0 5606 2, 3, 0, 1, 18, 19, 16, 17,
5607 0, 1, 2, 3, 16, 17, 18, 19
5608 };
e96f2783 5609 static unsigned char const insna[16] = {
644459d0 5610 0x41, 0, 0, 79,
5611 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5612 0x60, 0x80, 0, 79,
5613 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5614 };
5615
5616 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5617 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5618
5619 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5620 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5621 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5622 emit_insn (gen_selb (insn, insnc, rotl, mask));
5623
e96f2783 5624 mem = adjust_address (m_tramp, V4SImode, 0);
5625 emit_move_insn (mem, insn);
644459d0 5626
5627 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
e96f2783 5628 mem = adjust_address (m_tramp, Pmode, 16);
5629 emit_move_insn (mem, bi);
644459d0 5630 }
5631 else
5632 {
5633 rtx scxt = gen_reg_rtx (SImode);
5634 rtx sfnaddr = gen_reg_rtx (SImode);
e96f2783 5635 static unsigned char const insna[16] = {
644459d0 5636 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5637 0x30, 0, 0, 0,
5638 0, 0, 0, 0,
5639 0, 0, 0, 0
5640 };
5641
5642 shufc = gen_reg_rtx (TImode);
5643 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5644
5645 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5646 fits 18 bits and the last 4 are zeros. This will be true if
5647 the stack pointer is initialized to 0x3fff0 at program start,
5648 otherwise the ila instruction will be garbage. */
5649
5650 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5651 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5652 emit_insn (gen_cpat
5653 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5654 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5655 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5656
e96f2783 5657 mem = adjust_address (m_tramp, V4SImode, 0);
5658 emit_move_insn (mem, insn);
644459d0 5659 }
5660 emit_insn (gen_sync ());
5661}
5662
5663void
5664spu_expand_sign_extend (rtx ops[])
5665{
5666 unsigned char arr[16];
5667 rtx pat = gen_reg_rtx (TImode);
5668 rtx sign, c;
5669 int i, last;
5670 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5671 if (GET_MODE (ops[1]) == QImode)
5672 {
5673 sign = gen_reg_rtx (HImode);
5674 emit_insn (gen_extendqihi2 (sign, ops[1]));
5675 for (i = 0; i < 16; i++)
5676 arr[i] = 0x12;
5677 arr[last] = 0x13;
5678 }
5679 else
5680 {
5681 for (i = 0; i < 16; i++)
5682 arr[i] = 0x10;
5683 switch (GET_MODE (ops[1]))
5684 {
5685 case HImode:
5686 sign = gen_reg_rtx (SImode);
5687 emit_insn (gen_extendhisi2 (sign, ops[1]));
5688 arr[last] = 0x03;
5689 arr[last - 1] = 0x02;
5690 break;
5691 case SImode:
5692 sign = gen_reg_rtx (SImode);
5693 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5694 for (i = 0; i < 4; i++)
5695 arr[last - i] = 3 - i;
5696 break;
5697 case DImode:
5698 sign = gen_reg_rtx (SImode);
5699 c = gen_reg_rtx (SImode);
5700 emit_insn (gen_spu_convert (c, ops[1]));
5701 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5702 for (i = 0; i < 8; i++)
5703 arr[last - i] = 7 - i;
5704 break;
5705 default:
5706 abort ();
5707 }
5708 }
5709 emit_move_insn (pat, array_to_constant (TImode, arr));
5710 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5711}
5712
5713/* expand vector initialization. If there are any constant parts,
5714 load constant parts first. Then load any non-constant parts. */
5715void
5716spu_expand_vector_init (rtx target, rtx vals)
5717{
5718 enum machine_mode mode = GET_MODE (target);
5719 int n_elts = GET_MODE_NUNITS (mode);
5720 int n_var = 0;
5721 bool all_same = true;
790c536c 5722 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5723 int i;
5724
5725 first = XVECEXP (vals, 0, 0);
5726 for (i = 0; i < n_elts; ++i)
5727 {
5728 x = XVECEXP (vals, 0, i);
e442af0b 5729 if (!(CONST_INT_P (x)
5730 || GET_CODE (x) == CONST_DOUBLE
5731 || GET_CODE (x) == CONST_FIXED))
644459d0 5732 ++n_var;
5733 else
5734 {
5735 if (first_constant == NULL_RTX)
5736 first_constant = x;
5737 }
5738 if (i > 0 && !rtx_equal_p (x, first))
5739 all_same = false;
5740 }
5741
5742 /* if all elements are the same, use splats to repeat elements */
5743 if (all_same)
5744 {
5745 if (!CONSTANT_P (first)
5746 && !register_operand (first, GET_MODE (x)))
5747 first = force_reg (GET_MODE (first), first);
5748 emit_insn (gen_spu_splats (target, first));
5749 return;
5750 }
5751
5752 /* load constant parts */
5753 if (n_var != n_elts)
5754 {
5755 if (n_var == 0)
5756 {
5757 emit_move_insn (target,
5758 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5759 }
5760 else
5761 {
5762 rtx constant_parts_rtx = copy_rtx (vals);
5763
5764 gcc_assert (first_constant != NULL_RTX);
5765 /* fill empty slots with the first constant, this increases
5766 our chance of using splats in the recursive call below. */
5767 for (i = 0; i < n_elts; ++i)
e442af0b 5768 {
5769 x = XVECEXP (constant_parts_rtx, 0, i);
5770 if (!(CONST_INT_P (x)
5771 || GET_CODE (x) == CONST_DOUBLE
5772 || GET_CODE (x) == CONST_FIXED))
5773 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5774 }
644459d0 5775
5776 spu_expand_vector_init (target, constant_parts_rtx);
5777 }
5778 }
5779
5780 /* load variable parts */
5781 if (n_var != 0)
5782 {
5783 rtx insert_operands[4];
5784
5785 insert_operands[0] = target;
5786 insert_operands[2] = target;
5787 for (i = 0; i < n_elts; ++i)
5788 {
5789 x = XVECEXP (vals, 0, i);
e442af0b 5790 if (!(CONST_INT_P (x)
5791 || GET_CODE (x) == CONST_DOUBLE
5792 || GET_CODE (x) == CONST_FIXED))
644459d0 5793 {
5794 if (!register_operand (x, GET_MODE (x)))
5795 x = force_reg (GET_MODE (x), x);
5796 insert_operands[1] = x;
5797 insert_operands[3] = GEN_INT (i);
5798 spu_builtin_insert (insert_operands);
5799 }
5800 }
5801 }
5802}
6352eedf 5803
5474166e 5804/* Return insn index for the vector compare instruction for given CODE,
5805 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5806
5807static int
5808get_vec_cmp_insn (enum rtx_code code,
5809 enum machine_mode dest_mode,
5810 enum machine_mode op_mode)
5811
5812{
5813 switch (code)
5814 {
5815 case EQ:
5816 if (dest_mode == V16QImode && op_mode == V16QImode)
5817 return CODE_FOR_ceq_v16qi;
5818 if (dest_mode == V8HImode && op_mode == V8HImode)
5819 return CODE_FOR_ceq_v8hi;
5820 if (dest_mode == V4SImode && op_mode == V4SImode)
5821 return CODE_FOR_ceq_v4si;
5822 if (dest_mode == V4SImode && op_mode == V4SFmode)
5823 return CODE_FOR_ceq_v4sf;
5824 if (dest_mode == V2DImode && op_mode == V2DFmode)
5825 return CODE_FOR_ceq_v2df;
5826 break;
5827 case GT:
5828 if (dest_mode == V16QImode && op_mode == V16QImode)
5829 return CODE_FOR_cgt_v16qi;
5830 if (dest_mode == V8HImode && op_mode == V8HImode)
5831 return CODE_FOR_cgt_v8hi;
5832 if (dest_mode == V4SImode && op_mode == V4SImode)
5833 return CODE_FOR_cgt_v4si;
5834 if (dest_mode == V4SImode && op_mode == V4SFmode)
5835 return CODE_FOR_cgt_v4sf;
5836 if (dest_mode == V2DImode && op_mode == V2DFmode)
5837 return CODE_FOR_cgt_v2df;
5838 break;
5839 case GTU:
5840 if (dest_mode == V16QImode && op_mode == V16QImode)
5841 return CODE_FOR_clgt_v16qi;
5842 if (dest_mode == V8HImode && op_mode == V8HImode)
5843 return CODE_FOR_clgt_v8hi;
5844 if (dest_mode == V4SImode && op_mode == V4SImode)
5845 return CODE_FOR_clgt_v4si;
5846 break;
5847 default:
5848 break;
5849 }
5850 return -1;
5851}
5852
5853/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5854 DMODE is expected destination mode. This is a recursive function. */
5855
5856static rtx
5857spu_emit_vector_compare (enum rtx_code rcode,
5858 rtx op0, rtx op1,
5859 enum machine_mode dmode)
5860{
5861 int vec_cmp_insn;
5862 rtx mask;
5863 enum machine_mode dest_mode;
5864 enum machine_mode op_mode = GET_MODE (op1);
5865
5866 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5867
5868 /* Floating point vector compare instructions uses destination V4SImode.
5869 Double floating point vector compare instructions uses destination V2DImode.
5870 Move destination to appropriate mode later. */
5871 if (dmode == V4SFmode)
5872 dest_mode = V4SImode;
5873 else if (dmode == V2DFmode)
5874 dest_mode = V2DImode;
5875 else
5876 dest_mode = dmode;
5877
5878 mask = gen_reg_rtx (dest_mode);
5879 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5880
5881 if (vec_cmp_insn == -1)
5882 {
5883 bool swap_operands = false;
5884 bool try_again = false;
5885 switch (rcode)
5886 {
5887 case LT:
5888 rcode = GT;
5889 swap_operands = true;
5890 try_again = true;
5891 break;
5892 case LTU:
5893 rcode = GTU;
5894 swap_operands = true;
5895 try_again = true;
5896 break;
5897 case NE:
5898 /* Treat A != B as ~(A==B). */
5899 {
5900 enum insn_code nor_code;
5901 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5902 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5903 gcc_assert (nor_code != CODE_FOR_nothing);
5904 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5905 if (dmode != dest_mode)
5906 {
5907 rtx temp = gen_reg_rtx (dest_mode);
5908 convert_move (temp, mask, 0);
5909 return temp;
5910 }
5911 return mask;
5912 }
5913 break;
5914 case GE:
5915 case GEU:
5916 case LE:
5917 case LEU:
5918 /* Try GT/GTU/LT/LTU OR EQ */
5919 {
5920 rtx c_rtx, eq_rtx;
5921 enum insn_code ior_code;
5922 enum rtx_code new_code;
5923
5924 switch (rcode)
5925 {
5926 case GE: new_code = GT; break;
5927 case GEU: new_code = GTU; break;
5928 case LE: new_code = LT; break;
5929 case LEU: new_code = LTU; break;
5930 default:
5931 gcc_unreachable ();
5932 }
5933
5934 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5935 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5936
99bdde56 5937 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5938 gcc_assert (ior_code != CODE_FOR_nothing);
5939 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5940 if (dmode != dest_mode)
5941 {
5942 rtx temp = gen_reg_rtx (dest_mode);
5943 convert_move (temp, mask, 0);
5944 return temp;
5945 }
5946 return mask;
5947 }
5948 break;
5949 default:
5950 gcc_unreachable ();
5951 }
5952
5953 /* You only get two chances. */
5954 if (try_again)
5955 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5956
5957 gcc_assert (vec_cmp_insn != -1);
5958
5959 if (swap_operands)
5960 {
5961 rtx tmp;
5962 tmp = op0;
5963 op0 = op1;
5964 op1 = tmp;
5965 }
5966 }
5967
5968 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5969 if (dmode != dest_mode)
5970 {
5971 rtx temp = gen_reg_rtx (dest_mode);
5972 convert_move (temp, mask, 0);
5973 return temp;
5974 }
5975 return mask;
5976}
5977
5978
5979/* Emit vector conditional expression.
5980 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5981 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5982
5983int
5984spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5985 rtx cond, rtx cc_op0, rtx cc_op1)
5986{
5987 enum machine_mode dest_mode = GET_MODE (dest);
5988 enum rtx_code rcode = GET_CODE (cond);
5989 rtx mask;
5990
5991 /* Get the vector mask for the given relational operations. */
5992 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5993
5994 emit_insn(gen_selb (dest, op2, op1, mask));
5995
5996 return 1;
5997}
5998
6352eedf 5999static rtx
6000spu_force_reg (enum machine_mode mode, rtx op)
6001{
6002 rtx x, r;
6003 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6004 {
6005 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6006 || GET_MODE (op) == BLKmode)
6007 return force_reg (mode, convert_to_mode (mode, op, 0));
6008 abort ();
6009 }
6010
6011 r = force_reg (GET_MODE (op), op);
6012 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6013 {
6014 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6015 if (x)
6016 return x;
6017 }
6018
6019 x = gen_reg_rtx (mode);
6020 emit_insn (gen_spu_convert (x, r));
6021 return x;
6022}
6023
6024static void
6025spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6026{
6027 HOST_WIDE_INT v = 0;
6028 int lsbits;
6029 /* Check the range of immediate operands. */
6030 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6031 {
6032 int range = p - SPU_BTI_7;
5df189be 6033
6034 if (!CONSTANT_P (op))
6352eedf 6035 error ("%s expects an integer literal in the range [%d, %d].",
6036 d->name,
6037 spu_builtin_range[range].low, spu_builtin_range[range].high);
6038
6039 if (GET_CODE (op) == CONST
6040 && (GET_CODE (XEXP (op, 0)) == PLUS
6041 || GET_CODE (XEXP (op, 0)) == MINUS))
6042 {
6043 v = INTVAL (XEXP (XEXP (op, 0), 1));
6044 op = XEXP (XEXP (op, 0), 0);
6045 }
6046 else if (GET_CODE (op) == CONST_INT)
6047 v = INTVAL (op);
5df189be 6048 else if (GET_CODE (op) == CONST_VECTOR
6049 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6050 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6051
6052 /* The default for v is 0 which is valid in every range. */
6053 if (v < spu_builtin_range[range].low
6054 || v > spu_builtin_range[range].high)
6055 error ("%s expects an integer literal in the range [%d, %d]. ("
6056 HOST_WIDE_INT_PRINT_DEC ")",
6057 d->name,
6058 spu_builtin_range[range].low, spu_builtin_range[range].high,
6059 v);
6352eedf 6060
6061 switch (p)
6062 {
6063 case SPU_BTI_S10_4:
6064 lsbits = 4;
6065 break;
6066 case SPU_BTI_U16_2:
6067 /* This is only used in lqa, and stqa. Even though the insns
6068 encode 16 bits of the address (all but the 2 least
6069 significant), only 14 bits are used because it is masked to
6070 be 16 byte aligned. */
6071 lsbits = 4;
6072 break;
6073 case SPU_BTI_S16_2:
6074 /* This is used for lqr and stqr. */
6075 lsbits = 2;
6076 break;
6077 default:
6078 lsbits = 0;
6079 }
6080
6081 if (GET_CODE (op) == LABEL_REF
6082 || (GET_CODE (op) == SYMBOL_REF
6083 && SYMBOL_REF_FUNCTION_P (op))
5df189be 6084 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 6085 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6086 d->name);
6087 }
6088}
6089
6090
70ca06f8 6091static int
5df189be 6092expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 6093 rtx target, rtx ops[])
6094{
bc620c5c 6095 enum insn_code icode = (enum insn_code) d->icode;
5df189be 6096 int i = 0, a;
6352eedf 6097
6098 /* Expand the arguments into rtl. */
6099
6100 if (d->parm[0] != SPU_BTI_VOID)
6101 ops[i++] = target;
6102
70ca06f8 6103 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 6104 {
5df189be 6105 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 6106 if (arg == 0)
6107 abort ();
b9c74b4d 6108 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 6109 }
70ca06f8 6110
6111 /* The insn pattern may have additional operands (SCRATCH).
6112 Return the number of actual non-SCRATCH operands. */
6113 gcc_assert (i <= insn_data[icode].n_operands);
6114 return i;
6352eedf 6115}
6116
6117static rtx
6118spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 6119 tree exp, rtx target)
6352eedf 6120{
6121 rtx pat;
6122 rtx ops[8];
bc620c5c 6123 enum insn_code icode = (enum insn_code) d->icode;
6352eedf 6124 enum machine_mode mode, tmode;
6125 int i, p;
70ca06f8 6126 int n_operands;
6352eedf 6127 tree return_type;
6128
6129 /* Set up ops[] with values from arglist. */
70ca06f8 6130 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 6131
6132 /* Handle the target operand which must be operand 0. */
6133 i = 0;
6134 if (d->parm[0] != SPU_BTI_VOID)
6135 {
6136
6137 /* We prefer the mode specified for the match_operand otherwise
6138 use the mode from the builtin function prototype. */
6139 tmode = insn_data[d->icode].operand[0].mode;
6140 if (tmode == VOIDmode)
6141 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6142
6143 /* Try to use target because not using it can lead to extra copies
6144 and when we are using all of the registers extra copies leads
6145 to extra spills. */
6146 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6147 ops[0] = target;
6148 else
6149 target = ops[0] = gen_reg_rtx (tmode);
6150
6151 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6152 abort ();
6153
6154 i++;
6155 }
6156
a76866d3 6157 if (d->fcode == SPU_MASK_FOR_LOAD)
6158 {
6159 enum machine_mode mode = insn_data[icode].operand[1].mode;
6160 tree arg;
6161 rtx addr, op, pat;
6162
6163 /* get addr */
5df189be 6164 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 6165 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6166 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6167 addr = memory_address (mode, op);
6168
6169 /* negate addr */
6170 op = gen_reg_rtx (GET_MODE (addr));
6171 emit_insn (gen_rtx_SET (VOIDmode, op,
6172 gen_rtx_NEG (GET_MODE (addr), addr)));
6173 op = gen_rtx_MEM (mode, op);
6174
6175 pat = GEN_FCN (icode) (target, op);
6176 if (!pat)
6177 return 0;
6178 emit_insn (pat);
6179 return target;
6180 }
6181
6352eedf 6182 /* Ignore align_hint, but still expand it's args in case they have
6183 side effects. */
6184 if (icode == CODE_FOR_spu_align_hint)
6185 return 0;
6186
6187 /* Handle the rest of the operands. */
70ca06f8 6188 for (p = 1; i < n_operands; i++, p++)
6352eedf 6189 {
6190 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6191 mode = insn_data[d->icode].operand[i].mode;
6192 else
6193 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6194
6195 /* mode can be VOIDmode here for labels */
6196
6197 /* For specific intrinsics with an immediate operand, e.g.,
6198 si_ai(), we sometimes need to convert the scalar argument to a
6199 vector argument by splatting the scalar. */
6200 if (VECTOR_MODE_P (mode)
6201 && (GET_CODE (ops[i]) == CONST_INT
6202 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 6203 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 6204 {
6205 if (GET_CODE (ops[i]) == CONST_INT)
6206 ops[i] = spu_const (mode, INTVAL (ops[i]));
6207 else
6208 {
6209 rtx reg = gen_reg_rtx (mode);
6210 enum machine_mode imode = GET_MODE_INNER (mode);
6211 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6212 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6213 if (imode != GET_MODE (ops[i]))
6214 ops[i] = convert_to_mode (imode, ops[i],
6215 TYPE_UNSIGNED (spu_builtin_types
6216 [d->parm[i]]));
6217 emit_insn (gen_spu_splats (reg, ops[i]));
6218 ops[i] = reg;
6219 }
6220 }
6221
5df189be 6222 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6223
6352eedf 6224 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6225 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6226 }
6227
70ca06f8 6228 switch (n_operands)
6352eedf 6229 {
6230 case 0:
6231 pat = GEN_FCN (icode) (0);
6232 break;
6233 case 1:
6234 pat = GEN_FCN (icode) (ops[0]);
6235 break;
6236 case 2:
6237 pat = GEN_FCN (icode) (ops[0], ops[1]);
6238 break;
6239 case 3:
6240 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6241 break;
6242 case 4:
6243 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6244 break;
6245 case 5:
6246 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6247 break;
6248 case 6:
6249 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6250 break;
6251 default:
6252 abort ();
6253 }
6254
6255 if (!pat)
6256 abort ();
6257
6258 if (d->type == B_CALL || d->type == B_BISLED)
6259 emit_call_insn (pat);
6260 else if (d->type == B_JUMP)
6261 {
6262 emit_jump_insn (pat);
6263 emit_barrier ();
6264 }
6265 else
6266 emit_insn (pat);
6267
6268 return_type = spu_builtin_types[d->parm[0]];
6269 if (d->parm[0] != SPU_BTI_VOID
6270 && GET_MODE (target) != TYPE_MODE (return_type))
6271 {
6272 /* target is the return value. It should always be the mode of
6273 the builtin function prototype. */
6274 target = spu_force_reg (TYPE_MODE (return_type), target);
6275 }
6276
6277 return target;
6278}
6279
6280rtx
6281spu_expand_builtin (tree exp,
6282 rtx target,
6283 rtx subtarget ATTRIBUTE_UNUSED,
6284 enum machine_mode mode ATTRIBUTE_UNUSED,
6285 int ignore ATTRIBUTE_UNUSED)
6286{
5df189be 6287 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6288 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6289 struct spu_builtin_description *d;
6290
6291 if (fcode < NUM_SPU_BUILTINS)
6292 {
6293 d = &spu_builtins[fcode];
6294
5df189be 6295 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6296 }
6297 abort ();
6298}
6299
e99f512d 6300/* Implement targetm.vectorize.builtin_mul_widen_even. */
6301static tree
6302spu_builtin_mul_widen_even (tree type)
6303{
e99f512d 6304 switch (TYPE_MODE (type))
6305 {
6306 case V8HImode:
6307 if (TYPE_UNSIGNED (type))
6308 return spu_builtins[SPU_MULE_0].fndecl;
6309 else
6310 return spu_builtins[SPU_MULE_1].fndecl;
6311 break;
6312 default:
6313 return NULL_TREE;
6314 }
6315}
6316
6317/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6318static tree
6319spu_builtin_mul_widen_odd (tree type)
6320{
6321 switch (TYPE_MODE (type))
6322 {
6323 case V8HImode:
6324 if (TYPE_UNSIGNED (type))
6325 return spu_builtins[SPU_MULO_1].fndecl;
6326 else
6327 return spu_builtins[SPU_MULO_0].fndecl;
6328 break;
6329 default:
6330 return NULL_TREE;
6331 }
6332}
6333
a76866d3 6334/* Implement targetm.vectorize.builtin_mask_for_load. */
6335static tree
6336spu_builtin_mask_for_load (void)
6337{
6338 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6339 gcc_assert (d);
6340 return d->fndecl;
6341}
5df189be 6342
a28df51d 6343/* Implement targetm.vectorize.builtin_vectorization_cost. */
6344static int
6345spu_builtin_vectorization_cost (bool runtime_test)
6346{
6347 /* If the branch of the runtime test is taken - i.e. - the vectorized
6348 version is skipped - this incurs a misprediction cost (because the
6349 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6350 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6351 when the vectorized version is executed. */
6352 if (runtime_test)
6353 return -19;
6354 else
6355 return 0;
6356}
6357
0e87db76 6358/* Return true iff, data reference of TYPE can reach vector alignment (16)
6359 after applying N number of iterations. This routine does not determine
6360 how may iterations are required to reach desired alignment. */
6361
6362static bool
a9f1838b 6363spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6364{
6365 if (is_packed)
6366 return false;
6367
6368 /* All other types are naturally aligned. */
6369 return true;
6370}
6371
a0515226 6372/* Implement targetm.vectorize.builtin_vec_perm. */
6373tree
6374spu_builtin_vec_perm (tree type, tree *mask_element_type)
6375{
6376 struct spu_builtin_description *d;
6377
6378 *mask_element_type = unsigned_char_type_node;
6379
6380 switch (TYPE_MODE (type))
6381 {
6382 case V16QImode:
6383 if (TYPE_UNSIGNED (type))
6384 d = &spu_builtins[SPU_SHUFFLE_0];
6385 else
6386 d = &spu_builtins[SPU_SHUFFLE_1];
6387 break;
6388
6389 case V8HImode:
6390 if (TYPE_UNSIGNED (type))
6391 d = &spu_builtins[SPU_SHUFFLE_2];
6392 else
6393 d = &spu_builtins[SPU_SHUFFLE_3];
6394 break;
6395
6396 case V4SImode:
6397 if (TYPE_UNSIGNED (type))
6398 d = &spu_builtins[SPU_SHUFFLE_4];
6399 else
6400 d = &spu_builtins[SPU_SHUFFLE_5];
6401 break;
6402
6403 case V2DImode:
6404 if (TYPE_UNSIGNED (type))
6405 d = &spu_builtins[SPU_SHUFFLE_6];
6406 else
6407 d = &spu_builtins[SPU_SHUFFLE_7];
6408 break;
6409
6410 case V4SFmode:
6411 d = &spu_builtins[SPU_SHUFFLE_8];
6412 break;
6413
6414 case V2DFmode:
6415 d = &spu_builtins[SPU_SHUFFLE_9];
6416 break;
6417
6418 default:
6419 return NULL_TREE;
6420 }
6421
6422 gcc_assert (d);
6423 return d->fndecl;
6424}
6425
d52fd16a 6426/* Count the total number of instructions in each pipe and return the
6427 maximum, which is used as the Minimum Iteration Interval (MII)
6428 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6429 -2 are instructions that can go in pipe0 or pipe1. */
6430static int
6431spu_sms_res_mii (struct ddg *g)
6432{
6433 int i;
6434 unsigned t[4] = {0, 0, 0, 0};
6435
6436 for (i = 0; i < g->num_nodes; i++)
6437 {
6438 rtx insn = g->nodes[i].insn;
6439 int p = get_pipe (insn) + 2;
6440
6441 assert (p >= 0);
6442 assert (p < 4);
6443
6444 t[p]++;
6445 if (dump_file && INSN_P (insn))
6446 fprintf (dump_file, "i%d %s %d %d\n",
6447 INSN_UID (insn),
6448 insn_data[INSN_CODE(insn)].name,
6449 p, t[p]);
6450 }
6451 if (dump_file)
6452 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6453
6454 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6455}
6456
6457
5df189be 6458void
6459spu_init_expanders (void)
9d98604b 6460{
5df189be 6461 if (cfun)
9d98604b 6462 {
6463 rtx r0, r1;
6464 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6465 frame_pointer_needed is true. We don't know that until we're
6466 expanding the prologue. */
6467 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6468
6469 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6470 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6471 to be treated as aligned, so generate them here. */
6472 r0 = gen_reg_rtx (SImode);
6473 r1 = gen_reg_rtx (SImode);
6474 mark_reg_pointer (r0, 128);
6475 mark_reg_pointer (r1, 128);
6476 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6477 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6478 }
ea32e033 6479}
6480
6481static enum machine_mode
6482spu_libgcc_cmp_return_mode (void)
6483{
6484
6485/* For SPU word mode is TI mode so it is better to use SImode
6486 for compare returns. */
6487 return SImode;
6488}
6489
6490static enum machine_mode
6491spu_libgcc_shift_count_mode (void)
6492{
6493/* For SPU word mode is TI mode so it is better to use SImode
6494 for shift counts. */
6495 return SImode;
6496}
5a976006 6497
6498/* An early place to adjust some flags after GCC has finished processing
6499 * them. */
6500static void
6501asm_file_start (void)
6502{
6503 /* Variable tracking should be run after all optimizations which
6504 change order of insns. It also needs a valid CFG. */
6505 spu_flag_var_tracking = flag_var_tracking;
6506 flag_var_tracking = 0;
6507
6508 default_file_start ();
6509}
6510
a08dfd55 6511/* Implement targetm.section_type_flags. */
6512static unsigned int
6513spu_section_type_flags (tree decl, const char *name, int reloc)
6514{
6515 /* .toe needs to have type @nobits. */
6516 if (strcmp (name, ".toe") == 0)
6517 return SECTION_BSS;
6518 return default_section_type_flags (decl, name, reloc);
6519}
c2233b46 6520
56c7bfc2 6521/* Generate a constant or register which contains 2^SCALE. We assume
6522 the result is valid for MODE. Currently, MODE must be V4SFmode and
6523 SCALE must be SImode. */
6524rtx
6525spu_gen_exp2 (enum machine_mode mode, rtx scale)
6526{
6527 gcc_assert (mode == V4SFmode);
6528 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6529 if (GET_CODE (scale) != CONST_INT)
6530 {
6531 /* unsigned int exp = (127 + scale) << 23;
6532 __vector float m = (__vector float) spu_splats (exp); */
6533 rtx reg = force_reg (SImode, scale);
6534 rtx exp = gen_reg_rtx (SImode);
6535 rtx mul = gen_reg_rtx (mode);
6536 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6537 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6538 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6539 return mul;
6540 }
6541 else
6542 {
6543 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6544 unsigned char arr[16];
6545 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6546 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6547 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6548 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6549 return array_to_constant (mode, arr);
6550 }
6551}
6552
9d98604b 6553/* After reload, just change the convert into a move instruction
6554 or a dead instruction. */
6555void
6556spu_split_convert (rtx ops[])
6557{
6558 if (REGNO (ops[0]) == REGNO (ops[1]))
6559 emit_note (NOTE_INSN_DELETED);
6560 else
6561 {
6562 /* Use TImode always as this might help hard reg copyprop. */
6563 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6564 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6565 emit_insn (gen_move_insn (op0, op1));
6566 }
6567}
6568
c2233b46 6569#include "gt-spu.h"