]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* gimplify.c (gimplify_modify_expr_rhs) <VAR_DECL>: Do not do a direct
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
5a976006 57#include "sbitmap.h"
58#include "timevar.h"
59#include "df.h"
6352eedf 60
61/* Builtin types, data and prototypes. */
62struct spu_builtin_range
63{
64 int low, high;
65};
66
67static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80};
81
644459d0 82\f
83/* Target specific attribute specifications. */
84char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
85
86/* Prototypes and external defs. */
87static void spu_init_builtins (void);
88static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91static rtx get_pic_reg (void);
92static int need_to_save_reg (int regno, int saving);
93static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97static void emit_nop_for_insn (rtx insn);
98static bool insn_clobbers_hbr (rtx insn);
99static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 100 int distance, sbitmap blocks);
5474166e 101static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
644459d0 103static rtx get_branch_target (rtx branch);
644459d0 104static void spu_machine_dependent_reorg (void);
105static int spu_sched_issue_rate (void);
106static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108static int get_pipe (rtx insn);
644459d0 109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 110static void spu_sched_init_global (FILE *, int, int);
111static void spu_sched_init (FILE *, int, int);
112static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 113static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119static int spu_naked_function_p (tree func);
fb80456a 120static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
644459d0 122static tree spu_build_builtin_va_list (void);
8a58ed0a 123static void spu_va_start (tree, rtx);
75a70cf9 124static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 126static int regno_aligned_for_load (int regno);
127static int store_with_one_insn_p (rtx mem);
644459d0 128static int mem_is_padded_component_ref (rtx x);
129static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130static void spu_asm_globalize_label (FILE * file, const char *name);
131static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 132 int *total, bool speed);
644459d0 133static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134static void spu_init_libfuncs (void);
fb80456a 135static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 136static void fix_range (const char *);
69ced2d6 137static void spu_encode_section_info (tree, rtx, int);
e99f512d 138static tree spu_builtin_mul_widen_even (tree);
139static tree spu_builtin_mul_widen_odd (tree);
a76866d3 140static tree spu_builtin_mask_for_load (void);
a28df51d 141static int spu_builtin_vectorization_cost (bool);
a9f1838b 142static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 143static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 144static int spu_sms_res_mii (struct ddg *g);
5a976006 145static void asm_file_start (void);
a08dfd55 146static unsigned int spu_section_type_flags (tree, const char *, int);
644459d0 147
148extern const char *reg_names[];
149rtx spu_compare_op0, spu_compare_op1;
150
5474166e 151/* Which instruction set architecture to use. */
152int spu_arch;
153/* Which cpu are we tuning for. */
154int spu_tune;
155
5a976006 156/* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162int spu_hint_dist = (8*4) - (2*4);
163
164/* Determines whether we run variable tracking in machine dependent
165 reorganization. */
166static int spu_flag_var_tracking;
167
644459d0 168enum spu_immediate {
169 SPU_NONE,
170 SPU_IL,
171 SPU_ILA,
172 SPU_ILH,
173 SPU_ILHU,
174 SPU_ORI,
175 SPU_ORHI,
176 SPU_ORBI,
99369027 177 SPU_IOHL
644459d0 178};
dea01258 179enum immediate_class
180{
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
5df189be 188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 189};
644459d0 190
191static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 193static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
644459d0 196
1bd43494 197static enum machine_mode spu_unwind_word_mode (void);
198
ea32e033 199static enum machine_mode
200spu_libgcc_cmp_return_mode (void);
201
202static enum machine_mode
203spu_libgcc_shift_count_mode (void);
204
644459d0 205/* Built in types. */
206tree spu_builtin_types[SPU_BTI_MAX];
207\f
208/* TARGET overrides. */
209
210#undef TARGET_INIT_BUILTINS
211#define TARGET_INIT_BUILTINS spu_init_builtins
212
644459d0 213#undef TARGET_EXPAND_BUILTIN
214#define TARGET_EXPAND_BUILTIN spu_expand_builtin
215
1bd43494 216#undef TARGET_UNWIND_WORD_MODE
217#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 218
219/* The .8byte directive doesn't seem to work well for a 32 bit
220 architecture. */
221#undef TARGET_ASM_UNALIGNED_DI_OP
222#define TARGET_ASM_UNALIGNED_DI_OP NULL
223
224#undef TARGET_RTX_COSTS
225#define TARGET_RTX_COSTS spu_rtx_costs
226
227#undef TARGET_ADDRESS_COST
f529eb25 228#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 229
230#undef TARGET_SCHED_ISSUE_RATE
231#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
232
5a976006 233#undef TARGET_SCHED_INIT_GLOBAL
234#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
235
236#undef TARGET_SCHED_INIT
237#define TARGET_SCHED_INIT spu_sched_init
238
644459d0 239#undef TARGET_SCHED_VARIABLE_ISSUE
240#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
241
5a976006 242#undef TARGET_SCHED_REORDER
243#define TARGET_SCHED_REORDER spu_sched_reorder
244
245#undef TARGET_SCHED_REORDER2
246#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 247
248#undef TARGET_SCHED_ADJUST_COST
249#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
250
251const struct attribute_spec spu_attribute_table[];
252#undef TARGET_ATTRIBUTE_TABLE
253#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
254
255#undef TARGET_ASM_INTEGER
256#define TARGET_ASM_INTEGER spu_assemble_integer
257
258#undef TARGET_SCALAR_MODE_SUPPORTED_P
259#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
260
261#undef TARGET_VECTOR_MODE_SUPPORTED_P
262#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
263
264#undef TARGET_FUNCTION_OK_FOR_SIBCALL
265#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
266
267#undef TARGET_ASM_GLOBALIZE_LABEL
268#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
269
270#undef TARGET_PASS_BY_REFERENCE
271#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
272
273#undef TARGET_MUST_PASS_IN_STACK
274#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
275
276#undef TARGET_BUILD_BUILTIN_VA_LIST
277#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
278
8a58ed0a 279#undef TARGET_EXPAND_BUILTIN_VA_START
280#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
281
644459d0 282#undef TARGET_SETUP_INCOMING_VARARGS
283#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
284
285#undef TARGET_MACHINE_DEPENDENT_REORG
286#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
287
288#undef TARGET_GIMPLIFY_VA_ARG_EXPR
289#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
290
291#undef TARGET_DEFAULT_TARGET_FLAGS
292#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
293
294#undef TARGET_INIT_LIBFUNCS
295#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
296
297#undef TARGET_RETURN_IN_MEMORY
298#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
299
69ced2d6 300#undef TARGET_ENCODE_SECTION_INFO
301#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
302
e99f512d 303#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
305
306#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
308
a76866d3 309#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
311
a28df51d 312#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
314
0e87db76 315#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
317
a0515226 318#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
320
ea32e033 321#undef TARGET_LIBGCC_CMP_RETURN_MODE
322#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
323
324#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
326
d52fd16a 327#undef TARGET_SCHED_SMS_RES_MII
328#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
329
5a976006 330#undef TARGET_ASM_FILE_START
331#define TARGET_ASM_FILE_START asm_file_start
332
a08dfd55 333#undef TARGET_SECTION_TYPE_FLAGS
334#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
335
644459d0 336struct gcc_target targetm = TARGET_INITIALIZER;
337
5df189be 338void
339spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
340{
5df189be 341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
344
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
347}
348
644459d0 349/* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
353void
354spu_override_options (void)
355{
14d408d9 356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
361
644459d0 362 flag_omit_frame_pointer = 1;
363
5a976006 364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 365 if (align_functions < 8)
366 align_functions = 8;
c7b91b14 367
5a976006 368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
370 spu_hint_dist = 0;
371
c7b91b14 372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
5474166e 374
375 /* Determine processor architectural level. */
376 if (spu_arch_string)
377 {
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
382 else
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
384 }
385
386 /* Determine processor to tune for. */
387 if (spu_tune_string)
388 {
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
393 else
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
395 }
98bbec1e 396
13684256 397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
399 {
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
405 }
406
98bbec1e 407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 408}
409\f
410/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
412
413/* Table of machine attributes. */
414const struct attribute_spec spu_attribute_table[] =
415{
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
420};
421
422/* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
425static bool
426spu_scalar_mode_supported_p (enum machine_mode mode)
427{
428 switch (mode)
429 {
430 case QImode:
431 case HImode:
432 case SImode:
433 case SFmode:
434 case DImode:
435 case TImode:
436 case DFmode:
437 return true;
438
439 default:
440 return false;
441 }
442}
443
444/* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
447static bool
448spu_vector_mode_supported_p (enum machine_mode mode)
449{
450 switch (mode)
451 {
452 case V16QImode:
453 case V8HImode:
454 case V4SImode:
455 case V2DImode:
456 case V4SFmode:
457 case V2DFmode:
458 return true;
459
460 default:
461 return false;
462 }
463}
464
465/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
468int
469valid_subreg (rtx op)
470{
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 477}
478
479/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 480 and adjust the start offset. */
644459d0 481static rtx
482adjust_operand (rtx op, HOST_WIDE_INT * start)
483{
484 enum machine_mode mode;
485 int op_size;
38aca5eb 486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 490 {
491 if (start)
492 *start -=
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
496 }
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
499 if (op_size < 32)
500 {
501 if (start)
502 *start += 32 - op_size;
503 op_size = 32;
504 }
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
509 return op;
510}
511
512void
513spu_expand_extv (rtx ops[], int unsignedp)
514{
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
520 rtx s;
521
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
525
644459d0 526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
529
530 if (start > 0)
531 {
532 s = gen_reg_rtx (src_mode);
533 switch (src_mode)
534 {
535 case SImode:
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
537 break;
538 case DImode:
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
540 break;
541 case TImode:
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
543 break;
544 default:
545 abort ();
546 }
547 src = s;
548 }
549
550 if (width < src_size)
551 {
552 rtx pat;
553 int icode;
554 switch (src_mode)
555 {
556 case SImode:
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
558 break;
559 case DImode:
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
561 break;
562 case TImode:
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
564 break;
565 default:
566 abort ();
567 }
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
570 emit_insn (pat);
571 src = s;
572 }
573
574 convert_move (dst, src, unsignedp);
575}
576
577void
578spu_expand_insv (rtx ops[])
579{
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
586 rtx mask;
587 rtx shift_reg;
588 int shift;
589
590
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
593 else
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
597
598 if (CONSTANT_P (src))
599 {
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
603 }
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
607
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
611
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
615
616 if (shift > 0)
617 {
618 switch (dst_mode)
619 {
620 case SImode:
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
622 break;
623 case DImode:
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
625 break;
626 case TImode:
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
628 break;
629 default:
630 abort ();
631 }
632 }
633 else if (shift < 0)
634 abort ();
635
636 switch (dst_size)
637 {
638 case 32:
639 maskbits = (-1ll << (32 - width - start));
640 if (start)
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
643 break;
644 case 64:
645 maskbits = (-1ll << (64 - width - start));
646 if (start)
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
649 break;
650 case 128:
651 {
652 unsigned char arr[16];
653 int i = start / 8;
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
657 arr[i] = 0xff;
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
660 }
661 break;
662 default:
663 abort ();
664 }
665 if (GET_CODE (ops[0]) == MEM)
666 {
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
672 rtx mem;
673
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
686 {
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
690 rtx mem1;
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
698 }
699 }
700 else
71cd778d 701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 702}
703
704
705int
706spu_expand_block_move (rtx ops[])
707{
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
710 int i;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
48eb4342 713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 714 return 0;
715
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
718
719 if (bytes <= 0)
720 return 1;
721
722 dst = ops[0];
723 src = ops[1];
724
725 if (align == 16)
726 {
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
728 {
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
732 }
733 if (offset < bytes)
734 {
735 rtx mask;
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
738 arr[i] = 0xff;
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
750 }
751 return 1;
752 }
753 return 0;
754}
755
756enum spu_comp_code
757{ SPU_EQ, SPU_GT, SPU_GTU };
758
5474166e 759int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 772};
773
774/* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
778 ourselves. */
779void
780spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
781{
782 int reverse_compare = 0;
783 int reverse_test = 0;
5d70b918 784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
644459d0 786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
b9c74b4d 789 enum spu_comp_code scode, eq_code;
790 enum insn_code ior_code;
644459d0 791 int index;
5d70b918 792 int eq_test = 0;
644459d0 793
794 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
795 and so on, to keep the constant in operand 1. */
796 if (GET_CODE (spu_compare_op1) == CONST_INT)
797 {
798 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
799 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
800 switch (code)
801 {
802 case GE:
803 spu_compare_op1 = GEN_INT (val);
804 code = GT;
805 break;
806 case LT:
807 spu_compare_op1 = GEN_INT (val);
808 code = LE;
809 break;
810 case GEU:
811 spu_compare_op1 = GEN_INT (val);
812 code = GTU;
813 break;
814 case LTU:
815 spu_compare_op1 = GEN_INT (val);
816 code = LEU;
817 break;
818 default:
819 break;
820 }
821 }
822
5d70b918 823 comp_mode = SImode;
824 op_mode = GET_MODE (spu_compare_op0);
825
644459d0 826 switch (code)
827 {
828 case GE:
644459d0 829 scode = SPU_GT;
07027691 830 if (HONOR_NANS (op_mode))
5d70b918 831 {
832 reverse_compare = 0;
833 reverse_test = 0;
834 eq_test = 1;
835 eq_code = SPU_EQ;
836 }
837 else
838 {
839 reverse_compare = 1;
840 reverse_test = 1;
841 }
644459d0 842 break;
843 case LE:
644459d0 844 scode = SPU_GT;
07027691 845 if (HONOR_NANS (op_mode))
5d70b918 846 {
847 reverse_compare = 1;
848 reverse_test = 0;
849 eq_test = 1;
850 eq_code = SPU_EQ;
851 }
852 else
853 {
854 reverse_compare = 0;
855 reverse_test = 1;
856 }
644459d0 857 break;
858 case LT:
859 reverse_compare = 1;
860 reverse_test = 0;
861 scode = SPU_GT;
862 break;
863 case GEU:
864 reverse_compare = 1;
865 reverse_test = 1;
866 scode = SPU_GTU;
867 break;
868 case LEU:
869 reverse_compare = 0;
870 reverse_test = 1;
871 scode = SPU_GTU;
872 break;
873 case LTU:
874 reverse_compare = 1;
875 reverse_test = 0;
876 scode = SPU_GTU;
877 break;
878 case NE:
879 reverse_compare = 0;
880 reverse_test = 1;
881 scode = SPU_EQ;
882 break;
883
884 case EQ:
885 scode = SPU_EQ;
886 break;
887 case GT:
888 scode = SPU_GT;
889 break;
890 case GTU:
891 scode = SPU_GTU;
892 break;
893 default:
894 scode = SPU_EQ;
895 break;
896 }
897
644459d0 898 switch (op_mode)
899 {
900 case QImode:
901 index = 0;
902 comp_mode = QImode;
903 break;
904 case HImode:
905 index = 1;
906 comp_mode = HImode;
907 break;
908 case SImode:
909 index = 2;
910 break;
911 case DImode:
912 index = 3;
913 break;
914 case TImode:
915 index = 4;
916 break;
917 case SFmode:
918 index = 5;
919 break;
920 case DFmode:
921 index = 6;
922 break;
923 case V16QImode:
5474166e 924 index = 7;
925 comp_mode = op_mode;
926 break;
644459d0 927 case V8HImode:
5474166e 928 index = 8;
929 comp_mode = op_mode;
930 break;
644459d0 931 case V4SImode:
5474166e 932 index = 9;
933 comp_mode = op_mode;
934 break;
644459d0 935 case V4SFmode:
5474166e 936 index = 10;
937 comp_mode = V4SImode;
938 break;
644459d0 939 case V2DFmode:
5474166e 940 index = 11;
941 comp_mode = V2DImode;
644459d0 942 break;
5474166e 943 case V2DImode:
644459d0 944 default:
945 abort ();
946 }
947
07027691 948 if (GET_MODE (spu_compare_op1) == DFmode
949 && (scode != SPU_GT && scode != SPU_EQ))
950 abort ();
644459d0 951
952 if (is_set == 0 && spu_compare_op1 == const0_rtx
953 && (GET_MODE (spu_compare_op0) == SImode
954 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
955 {
956 /* Don't need to set a register with the result when we are
957 comparing against zero and branching. */
958 reverse_test = !reverse_test;
959 compare_result = spu_compare_op0;
960 }
961 else
962 {
963 compare_result = gen_reg_rtx (comp_mode);
964
965 if (reverse_compare)
966 {
967 rtx t = spu_compare_op1;
968 spu_compare_op1 = spu_compare_op0;
969 spu_compare_op0 = t;
970 }
971
972 if (spu_comp_icode[index][scode] == 0)
973 abort ();
974
975 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
976 (spu_compare_op0, op_mode))
977 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
978 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
979 (spu_compare_op1, op_mode))
980 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
981 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
982 spu_compare_op0,
983 spu_compare_op1);
984 if (comp_rtx == 0)
985 abort ();
986 emit_insn (comp_rtx);
987
5d70b918 988 if (eq_test)
989 {
990 eq_result = gen_reg_rtx (comp_mode);
991 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
992 spu_compare_op0,
993 spu_compare_op1);
994 if (eq_rtx == 0)
995 abort ();
996 emit_insn (eq_rtx);
997 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
998 gcc_assert (ior_code != CODE_FOR_nothing);
999 emit_insn (GEN_FCN (ior_code)
1000 (compare_result, compare_result, eq_result));
1001 }
644459d0 1002 }
1003
1004 if (is_set == 0)
1005 {
1006 rtx bcomp;
1007 rtx loc_ref;
1008
1009 /* We don't have branch on QI compare insns, so we convert the
1010 QI compare result to a HI result. */
1011 if (comp_mode == QImode)
1012 {
1013 rtx old_res = compare_result;
1014 compare_result = gen_reg_rtx (HImode);
1015 comp_mode = HImode;
1016 emit_insn (gen_extendqihi2 (compare_result, old_res));
1017 }
1018
1019 if (reverse_test)
1020 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1021 else
1022 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1023
1024 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1025 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1026 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1027 loc_ref, pc_rtx)));
1028 }
1029 else if (is_set == 2)
1030 {
1031 int compare_size = GET_MODE_BITSIZE (comp_mode);
1032 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1033 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1034 rtx select_mask;
1035 rtx op_t = operands[2];
1036 rtx op_f = operands[3];
1037
1038 /* The result of the comparison can be SI, HI or QI mode. Create a
1039 mask based on that result. */
1040 if (target_size > compare_size)
1041 {
1042 select_mask = gen_reg_rtx (mode);
1043 emit_insn (gen_extend_compare (select_mask, compare_result));
1044 }
1045 else if (target_size < compare_size)
1046 select_mask =
1047 gen_rtx_SUBREG (mode, compare_result,
1048 (compare_size - target_size) / BITS_PER_UNIT);
1049 else if (comp_mode != mode)
1050 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1051 else
1052 select_mask = compare_result;
1053
1054 if (GET_MODE (target) != GET_MODE (op_t)
1055 || GET_MODE (target) != GET_MODE (op_f))
1056 abort ();
1057
1058 if (reverse_test)
1059 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1060 else
1061 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1062 }
1063 else
1064 {
1065 if (reverse_test)
1066 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1067 gen_rtx_NOT (comp_mode, compare_result)));
1068 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1069 emit_insn (gen_extendhisi2 (target, compare_result));
1070 else if (GET_MODE (target) == SImode
1071 && GET_MODE (compare_result) == QImode)
1072 emit_insn (gen_extend_compare (target, compare_result));
1073 else
1074 emit_move_insn (target, compare_result);
1075 }
1076}
1077
1078HOST_WIDE_INT
1079const_double_to_hwint (rtx x)
1080{
1081 HOST_WIDE_INT val;
1082 REAL_VALUE_TYPE rv;
1083 if (GET_MODE (x) == SFmode)
1084 {
1085 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1086 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1087 }
1088 else if (GET_MODE (x) == DFmode)
1089 {
1090 long l[2];
1091 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1092 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1093 val = l[0];
1094 val = (val << 32) | (l[1] & 0xffffffff);
1095 }
1096 else
1097 abort ();
1098 return val;
1099}
1100
1101rtx
1102hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1103{
1104 long tv[2];
1105 REAL_VALUE_TYPE rv;
1106 gcc_assert (mode == SFmode || mode == DFmode);
1107
1108 if (mode == SFmode)
1109 tv[0] = (v << 32) >> 32;
1110 else if (mode == DFmode)
1111 {
1112 tv[1] = (v << 32) >> 32;
1113 tv[0] = v >> 32;
1114 }
1115 real_from_target (&rv, tv, mode);
1116 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1117}
1118
1119void
1120print_operand_address (FILE * file, register rtx addr)
1121{
1122 rtx reg;
1123 rtx offset;
1124
e04cf423 1125 if (GET_CODE (addr) == AND
1126 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1127 && INTVAL (XEXP (addr, 1)) == -16)
1128 addr = XEXP (addr, 0);
1129
644459d0 1130 switch (GET_CODE (addr))
1131 {
1132 case REG:
1133 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1134 break;
1135
1136 case PLUS:
1137 reg = XEXP (addr, 0);
1138 offset = XEXP (addr, 1);
1139 if (GET_CODE (offset) == REG)
1140 {
1141 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1142 reg_names[REGNO (offset)]);
1143 }
1144 else if (GET_CODE (offset) == CONST_INT)
1145 {
1146 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1147 INTVAL (offset), reg_names[REGNO (reg)]);
1148 }
1149 else
1150 abort ();
1151 break;
1152
1153 case CONST:
1154 case LABEL_REF:
1155 case SYMBOL_REF:
1156 case CONST_INT:
1157 output_addr_const (file, addr);
1158 break;
1159
1160 default:
1161 debug_rtx (addr);
1162 abort ();
1163 }
1164}
1165
1166void
1167print_operand (FILE * file, rtx x, int code)
1168{
1169 enum machine_mode mode = GET_MODE (x);
1170 HOST_WIDE_INT val;
1171 unsigned char arr[16];
1172 int xcode = GET_CODE (x);
dea01258 1173 int i, info;
644459d0 1174 if (GET_MODE (x) == VOIDmode)
1175 switch (code)
1176 {
644459d0 1177 case 'L': /* 128 bits, signed */
1178 case 'm': /* 128 bits, signed */
1179 case 'T': /* 128 bits, signed */
1180 case 't': /* 128 bits, signed */
1181 mode = TImode;
1182 break;
644459d0 1183 case 'K': /* 64 bits, signed */
1184 case 'k': /* 64 bits, signed */
1185 case 'D': /* 64 bits, signed */
1186 case 'd': /* 64 bits, signed */
1187 mode = DImode;
1188 break;
644459d0 1189 case 'J': /* 32 bits, signed */
1190 case 'j': /* 32 bits, signed */
1191 case 's': /* 32 bits, signed */
1192 case 'S': /* 32 bits, signed */
1193 mode = SImode;
1194 break;
1195 }
1196 switch (code)
1197 {
1198
1199 case 'j': /* 32 bits, signed */
1200 case 'k': /* 64 bits, signed */
1201 case 'm': /* 128 bits, signed */
1202 if (xcode == CONST_INT
1203 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1204 {
1205 gcc_assert (logical_immediate_p (x, mode));
1206 constant_to_array (mode, x, arr);
1207 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1208 val = trunc_int_for_mode (val, SImode);
1209 switch (which_logical_immediate (val))
1210 {
1211 case SPU_ORI:
1212 break;
1213 case SPU_ORHI:
1214 fprintf (file, "h");
1215 break;
1216 case SPU_ORBI:
1217 fprintf (file, "b");
1218 break;
1219 default:
1220 gcc_unreachable();
1221 }
1222 }
1223 else
1224 gcc_unreachable();
1225 return;
1226
1227 case 'J': /* 32 bits, signed */
1228 case 'K': /* 64 bits, signed */
1229 case 'L': /* 128 bits, signed */
1230 if (xcode == CONST_INT
1231 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1232 {
1233 gcc_assert (logical_immediate_p (x, mode)
1234 || iohl_immediate_p (x, mode));
1235 constant_to_array (mode, x, arr);
1236 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1237 val = trunc_int_for_mode (val, SImode);
1238 switch (which_logical_immediate (val))
1239 {
1240 case SPU_ORI:
1241 case SPU_IOHL:
1242 break;
1243 case SPU_ORHI:
1244 val = trunc_int_for_mode (val, HImode);
1245 break;
1246 case SPU_ORBI:
1247 val = trunc_int_for_mode (val, QImode);
1248 break;
1249 default:
1250 gcc_unreachable();
1251 }
1252 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1253 }
1254 else
1255 gcc_unreachable();
1256 return;
1257
1258 case 't': /* 128 bits, signed */
1259 case 'd': /* 64 bits, signed */
1260 case 's': /* 32 bits, signed */
dea01258 1261 if (CONSTANT_P (x))
644459d0 1262 {
dea01258 1263 enum immediate_class c = classify_immediate (x, mode);
1264 switch (c)
1265 {
1266 case IC_IL1:
1267 constant_to_array (mode, x, arr);
1268 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1269 val = trunc_int_for_mode (val, SImode);
1270 switch (which_immediate_load (val))
1271 {
1272 case SPU_IL:
1273 break;
1274 case SPU_ILA:
1275 fprintf (file, "a");
1276 break;
1277 case SPU_ILH:
1278 fprintf (file, "h");
1279 break;
1280 case SPU_ILHU:
1281 fprintf (file, "hu");
1282 break;
1283 default:
1284 gcc_unreachable ();
1285 }
1286 break;
1287 case IC_CPAT:
1288 constant_to_array (mode, x, arr);
1289 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1290 if (info == 1)
1291 fprintf (file, "b");
1292 else if (info == 2)
1293 fprintf (file, "h");
1294 else if (info == 4)
1295 fprintf (file, "w");
1296 else if (info == 8)
1297 fprintf (file, "d");
1298 break;
1299 case IC_IL1s:
1300 if (xcode == CONST_VECTOR)
1301 {
1302 x = CONST_VECTOR_ELT (x, 0);
1303 xcode = GET_CODE (x);
1304 }
1305 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1306 fprintf (file, "a");
1307 else if (xcode == HIGH)
1308 fprintf (file, "hu");
1309 break;
1310 case IC_FSMBI:
5df189be 1311 case IC_FSMBI2:
dea01258 1312 case IC_IL2:
1313 case IC_IL2s:
1314 case IC_POOL:
1315 abort ();
1316 }
644459d0 1317 }
644459d0 1318 else
1319 gcc_unreachable ();
1320 return;
1321
1322 case 'T': /* 128 bits, signed */
1323 case 'D': /* 64 bits, signed */
1324 case 'S': /* 32 bits, signed */
dea01258 1325 if (CONSTANT_P (x))
644459d0 1326 {
dea01258 1327 enum immediate_class c = classify_immediate (x, mode);
1328 switch (c)
644459d0 1329 {
dea01258 1330 case IC_IL1:
1331 constant_to_array (mode, x, arr);
1332 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1333 val = trunc_int_for_mode (val, SImode);
1334 switch (which_immediate_load (val))
1335 {
1336 case SPU_IL:
1337 case SPU_ILA:
1338 break;
1339 case SPU_ILH:
1340 case SPU_ILHU:
1341 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1342 break;
1343 default:
1344 gcc_unreachable ();
1345 }
1346 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1347 break;
1348 case IC_FSMBI:
1349 constant_to_array (mode, x, arr);
1350 val = 0;
1351 for (i = 0; i < 16; i++)
1352 {
1353 val <<= 1;
1354 val |= arr[i] & 1;
1355 }
1356 print_operand (file, GEN_INT (val), 0);
1357 break;
1358 case IC_CPAT:
1359 constant_to_array (mode, x, arr);
1360 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1362 break;
dea01258 1363 case IC_IL1s:
dea01258 1364 if (xcode == HIGH)
5df189be 1365 x = XEXP (x, 0);
1366 if (GET_CODE (x) == CONST_VECTOR)
1367 x = CONST_VECTOR_ELT (x, 0);
1368 output_addr_const (file, x);
1369 if (xcode == HIGH)
1370 fprintf (file, "@h");
644459d0 1371 break;
dea01258 1372 case IC_IL2:
1373 case IC_IL2s:
5df189be 1374 case IC_FSMBI2:
dea01258 1375 case IC_POOL:
1376 abort ();
644459d0 1377 }
c8befdb9 1378 }
644459d0 1379 else
1380 gcc_unreachable ();
1381 return;
1382
644459d0 1383 case 'C':
1384 if (xcode == CONST_INT)
1385 {
1386 /* Only 4 least significant bits are relevant for generate
1387 control word instructions. */
1388 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1389 return;
1390 }
1391 break;
1392
1393 case 'M': /* print code for c*d */
1394 if (GET_CODE (x) == CONST_INT)
1395 switch (INTVAL (x))
1396 {
1397 case 1:
1398 fprintf (file, "b");
1399 break;
1400 case 2:
1401 fprintf (file, "h");
1402 break;
1403 case 4:
1404 fprintf (file, "w");
1405 break;
1406 case 8:
1407 fprintf (file, "d");
1408 break;
1409 default:
1410 gcc_unreachable();
1411 }
1412 else
1413 gcc_unreachable();
1414 return;
1415
1416 case 'N': /* Negate the operand */
1417 if (xcode == CONST_INT)
1418 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1419 else if (xcode == CONST_VECTOR)
1420 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1421 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1422 return;
1423
1424 case 'I': /* enable/disable interrupts */
1425 if (xcode == CONST_INT)
1426 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1427 return;
1428
1429 case 'b': /* branch modifiers */
1430 if (xcode == REG)
1431 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1432 else if (COMPARISON_P (x))
1433 fprintf (file, "%s", xcode == NE ? "n" : "");
1434 return;
1435
1436 case 'i': /* indirect call */
1437 if (xcode == MEM)
1438 {
1439 if (GET_CODE (XEXP (x, 0)) == REG)
1440 /* Used in indirect function calls. */
1441 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1442 else
1443 output_address (XEXP (x, 0));
1444 }
1445 return;
1446
1447 case 'p': /* load/store */
1448 if (xcode == MEM)
1449 {
1450 x = XEXP (x, 0);
1451 xcode = GET_CODE (x);
1452 }
e04cf423 1453 if (xcode == AND)
1454 {
1455 x = XEXP (x, 0);
1456 xcode = GET_CODE (x);
1457 }
644459d0 1458 if (xcode == REG)
1459 fprintf (file, "d");
1460 else if (xcode == CONST_INT)
1461 fprintf (file, "a");
1462 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1463 fprintf (file, "r");
1464 else if (xcode == PLUS || xcode == LO_SUM)
1465 {
1466 if (GET_CODE (XEXP (x, 1)) == REG)
1467 fprintf (file, "x");
1468 else
1469 fprintf (file, "d");
1470 }
1471 return;
1472
5df189be 1473 case 'e':
1474 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1475 val &= 0x7;
1476 output_addr_const (file, GEN_INT (val));
1477 return;
1478
1479 case 'f':
1480 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1481 val &= 0x1f;
1482 output_addr_const (file, GEN_INT (val));
1483 return;
1484
1485 case 'g':
1486 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1487 val &= 0x3f;
1488 output_addr_const (file, GEN_INT (val));
1489 return;
1490
1491 case 'h':
1492 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1493 val = (val >> 3) & 0x1f;
1494 output_addr_const (file, GEN_INT (val));
1495 return;
1496
1497 case 'E':
1498 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1499 val = -val;
1500 val &= 0x7;
1501 output_addr_const (file, GEN_INT (val));
1502 return;
1503
1504 case 'F':
1505 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1506 val = -val;
1507 val &= 0x1f;
1508 output_addr_const (file, GEN_INT (val));
1509 return;
1510
1511 case 'G':
1512 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1513 val = -val;
1514 val &= 0x3f;
1515 output_addr_const (file, GEN_INT (val));
1516 return;
1517
1518 case 'H':
1519 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1520 val = -(val & -8ll);
1521 val = (val >> 3) & 0x1f;
1522 output_addr_const (file, GEN_INT (val));
1523 return;
1524
644459d0 1525 case 0:
1526 if (xcode == REG)
1527 fprintf (file, "%s", reg_names[REGNO (x)]);
1528 else if (xcode == MEM)
1529 output_address (XEXP (x, 0));
1530 else if (xcode == CONST_VECTOR)
dea01258 1531 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1532 else
1533 output_addr_const (file, x);
1534 return;
1535
f6a0d06f 1536 /* unused letters
5df189be 1537 o qr uvw yz
1538 AB OPQR UVWXYZ */
644459d0 1539 default:
1540 output_operand_lossage ("invalid %%xn code");
1541 }
1542 gcc_unreachable ();
1543}
1544
1545extern char call_used_regs[];
644459d0 1546
1547/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1548 caller saved register. For leaf functions it is more efficient to
1549 use a volatile register because we won't need to save and restore the
1550 pic register. This routine is only valid after register allocation
1551 is completed, so we can pick an unused register. */
1552static rtx
1553get_pic_reg (void)
1554{
1555 rtx pic_reg = pic_offset_table_rtx;
1556 if (!reload_completed && !reload_in_progress)
1557 abort ();
1558 return pic_reg;
1559}
1560
5df189be 1561/* Split constant addresses to handle cases that are too large.
1562 Add in the pic register when in PIC mode.
1563 Split immediates that require more than 1 instruction. */
dea01258 1564int
1565spu_split_immediate (rtx * ops)
c8befdb9 1566{
dea01258 1567 enum machine_mode mode = GET_MODE (ops[0]);
1568 enum immediate_class c = classify_immediate (ops[1], mode);
1569
1570 switch (c)
c8befdb9 1571 {
dea01258 1572 case IC_IL2:
1573 {
1574 unsigned char arrhi[16];
1575 unsigned char arrlo[16];
98bbec1e 1576 rtx to, temp, hi, lo;
dea01258 1577 int i;
98bbec1e 1578 enum machine_mode imode = mode;
1579 /* We need to do reals as ints because the constant used in the
1580 IOR might not be a legitimate real constant. */
1581 imode = int_mode_for_mode (mode);
dea01258 1582 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1583 if (imode != mode)
1584 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1585 else
1586 to = ops[0];
1587 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1588 for (i = 0; i < 16; i += 4)
1589 {
1590 arrlo[i + 2] = arrhi[i + 2];
1591 arrlo[i + 3] = arrhi[i + 3];
1592 arrlo[i + 0] = arrlo[i + 1] = 0;
1593 arrhi[i + 2] = arrhi[i + 3] = 0;
1594 }
98bbec1e 1595 hi = array_to_constant (imode, arrhi);
1596 lo = array_to_constant (imode, arrlo);
1597 emit_move_insn (temp, hi);
dea01258 1598 emit_insn (gen_rtx_SET
98bbec1e 1599 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1600 return 1;
1601 }
5df189be 1602 case IC_FSMBI2:
1603 {
1604 unsigned char arr_fsmbi[16];
1605 unsigned char arr_andbi[16];
1606 rtx to, reg_fsmbi, reg_and;
1607 int i;
1608 enum machine_mode imode = mode;
1609 /* We need to do reals as ints because the constant used in the
1610 * AND might not be a legitimate real constant. */
1611 imode = int_mode_for_mode (mode);
1612 constant_to_array (mode, ops[1], arr_fsmbi);
1613 if (imode != mode)
1614 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1615 else
1616 to = ops[0];
1617 for (i = 0; i < 16; i++)
1618 if (arr_fsmbi[i] != 0)
1619 {
1620 arr_andbi[0] = arr_fsmbi[i];
1621 arr_fsmbi[i] = 0xff;
1622 }
1623 for (i = 1; i < 16; i++)
1624 arr_andbi[i] = arr_andbi[0];
1625 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1626 reg_and = array_to_constant (imode, arr_andbi);
1627 emit_move_insn (to, reg_fsmbi);
1628 emit_insn (gen_rtx_SET
1629 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1630 return 1;
1631 }
dea01258 1632 case IC_POOL:
1633 if (reload_in_progress || reload_completed)
1634 {
1635 rtx mem = force_const_mem (mode, ops[1]);
1636 if (TARGET_LARGE_MEM)
1637 {
1638 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1639 emit_move_insn (addr, XEXP (mem, 0));
1640 mem = replace_equiv_address (mem, addr);
1641 }
1642 emit_move_insn (ops[0], mem);
1643 return 1;
1644 }
1645 break;
1646 case IC_IL1s:
1647 case IC_IL2s:
1648 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1649 {
1650 if (c == IC_IL2s)
1651 {
5df189be 1652 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1653 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1654 }
1655 else if (flag_pic)
1656 emit_insn (gen_pic (ops[0], ops[1]));
1657 if (flag_pic)
1658 {
1659 rtx pic_reg = get_pic_reg ();
1660 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1661 crtl->uses_pic_offset_table = 1;
dea01258 1662 }
1663 return flag_pic || c == IC_IL2s;
1664 }
1665 break;
1666 case IC_IL1:
1667 case IC_FSMBI:
1668 case IC_CPAT:
1669 break;
c8befdb9 1670 }
dea01258 1671 return 0;
c8befdb9 1672}
1673
644459d0 1674/* SAVING is TRUE when we are generating the actual load and store
1675 instructions for REGNO. When determining the size of the stack
1676 needed for saving register we must allocate enough space for the
1677 worst case, because we don't always have the information early enough
1678 to not allocate it. But we can at least eliminate the actual loads
1679 and stores during the prologue/epilogue. */
1680static int
1681need_to_save_reg (int regno, int saving)
1682{
3072d30e 1683 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1684 return 1;
1685 if (flag_pic
1686 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1687 && (!saving || crtl->uses_pic_offset_table)
644459d0 1688 && (!saving
3072d30e 1689 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1690 return 1;
1691 return 0;
1692}
1693
1694/* This function is only correct starting with local register
1695 allocation */
1696int
1697spu_saved_regs_size (void)
1698{
1699 int reg_save_size = 0;
1700 int regno;
1701
1702 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1703 if (need_to_save_reg (regno, 0))
1704 reg_save_size += 0x10;
1705 return reg_save_size;
1706}
1707
1708static rtx
1709frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1710{
1711 rtx reg = gen_rtx_REG (V4SImode, regno);
1712 rtx mem =
1713 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1714 return emit_insn (gen_movv4si (mem, reg));
1715}
1716
1717static rtx
1718frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1719{
1720 rtx reg = gen_rtx_REG (V4SImode, regno);
1721 rtx mem =
1722 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1723 return emit_insn (gen_movv4si (reg, mem));
1724}
1725
1726/* This happens after reload, so we need to expand it. */
1727static rtx
1728frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1729{
1730 rtx insn;
1731 if (satisfies_constraint_K (GEN_INT (imm)))
1732 {
1733 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1734 }
1735 else
1736 {
3072d30e 1737 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1738 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1739 if (REGNO (src) == REGNO (scratch))
1740 abort ();
1741 }
644459d0 1742 return insn;
1743}
1744
1745/* Return nonzero if this function is known to have a null epilogue. */
1746
1747int
1748direct_return (void)
1749{
1750 if (reload_completed)
1751 {
1752 if (cfun->static_chain_decl == 0
1753 && (spu_saved_regs_size ()
1754 + get_frame_size ()
abe32cce 1755 + crtl->outgoing_args_size
1756 + crtl->args.pretend_args_size == 0)
644459d0 1757 && current_function_is_leaf)
1758 return 1;
1759 }
1760 return 0;
1761}
1762
1763/*
1764 The stack frame looks like this:
1765 +-------------+
1766 | incoming |
a8e019fa 1767 | args |
1768 AP -> +-------------+
644459d0 1769 | $lr save |
1770 +-------------+
1771 prev SP | back chain |
1772 +-------------+
1773 | var args |
abe32cce 1774 | reg save | crtl->args.pretend_args_size bytes
644459d0 1775 +-------------+
1776 | ... |
1777 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1778 FP -> +-------------+
644459d0 1779 | ... |
a8e019fa 1780 | vars | get_frame_size() bytes
1781 HFP -> +-------------+
644459d0 1782 | ... |
1783 | outgoing |
abe32cce 1784 | args | crtl->outgoing_args_size bytes
644459d0 1785 +-------------+
1786 | $lr of next |
1787 | frame |
1788 +-------------+
a8e019fa 1789 | back chain |
1790 SP -> +-------------+
644459d0 1791
1792*/
1793void
1794spu_expand_prologue (void)
1795{
1796 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1797 HOST_WIDE_INT total_size;
1798 HOST_WIDE_INT saved_regs_size;
1799 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1800 rtx scratch_reg_0, scratch_reg_1;
1801 rtx insn, real;
1802
1803 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1804 the "toplevel" insn chain. */
1805 emit_note (NOTE_INSN_DELETED);
1806
1807 if (flag_pic && optimize == 0)
18d50ae6 1808 crtl->uses_pic_offset_table = 1;
644459d0 1809
1810 if (spu_naked_function_p (current_function_decl))
1811 return;
1812
1813 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1814 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1815
1816 saved_regs_size = spu_saved_regs_size ();
1817 total_size = size + saved_regs_size
abe32cce 1818 + crtl->outgoing_args_size
1819 + crtl->args.pretend_args_size;
644459d0 1820
1821 if (!current_function_is_leaf
18d50ae6 1822 || cfun->calls_alloca || total_size > 0)
644459d0 1823 total_size += STACK_POINTER_OFFSET;
1824
1825 /* Save this first because code after this might use the link
1826 register as a scratch register. */
1827 if (!current_function_is_leaf)
1828 {
1829 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1830 RTX_FRAME_RELATED_P (insn) = 1;
1831 }
1832
1833 if (total_size > 0)
1834 {
abe32cce 1835 offset = -crtl->args.pretend_args_size;
644459d0 1836 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1837 if (need_to_save_reg (regno, 1))
1838 {
1839 offset -= 16;
1840 insn = frame_emit_store (regno, sp_reg, offset);
1841 RTX_FRAME_RELATED_P (insn) = 1;
1842 }
1843 }
1844
18d50ae6 1845 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1846 {
1847 rtx pic_reg = get_pic_reg ();
1848 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1849 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1850 }
1851
1852 if (total_size > 0)
1853 {
1854 if (flag_stack_check)
1855 {
d819917f 1856 /* We compare against total_size-1 because
644459d0 1857 ($sp >= total_size) <=> ($sp > total_size-1) */
1858 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1859 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1860 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1861 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1862 {
1863 emit_move_insn (scratch_v4si, size_v4si);
1864 size_v4si = scratch_v4si;
1865 }
1866 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1867 emit_insn (gen_vec_extractv4si
1868 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1869 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1870 }
1871
1872 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1873 the value of the previous $sp because we save it as the back
1874 chain. */
1875 if (total_size <= 2000)
1876 {
1877 /* In this case we save the back chain first. */
1878 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1879 insn =
1880 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1881 }
644459d0 1882 else
1883 {
1884 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1885 insn =
1886 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1887 }
1888 RTX_FRAME_RELATED_P (insn) = 1;
1889 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
b9c74b4d 1890 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
644459d0 1891
1892 if (total_size > 2000)
1893 {
1894 /* Save the back chain ptr */
1895 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1896 }
1897
1898 if (frame_pointer_needed)
1899 {
1900 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1901 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1902 + crtl->outgoing_args_size;
644459d0 1903 /* Set the new frame_pointer */
d8dfeb55 1904 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1906 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
b9c74b4d 1907 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
5df189be 1908 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1909 }
1910 }
1911
1912 emit_note (NOTE_INSN_DELETED);
1913}
1914
1915void
1916spu_expand_epilogue (bool sibcall_p)
1917{
1918 int size = get_frame_size (), offset, regno;
1919 HOST_WIDE_INT saved_regs_size, total_size;
1920 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1921 rtx jump, scratch_reg_0;
1922
1923 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1924 the "toplevel" insn chain. */
1925 emit_note (NOTE_INSN_DELETED);
1926
1927 if (spu_naked_function_p (current_function_decl))
1928 return;
1929
1930 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1931
1932 saved_regs_size = spu_saved_regs_size ();
1933 total_size = size + saved_regs_size
abe32cce 1934 + crtl->outgoing_args_size
1935 + crtl->args.pretend_args_size;
644459d0 1936
1937 if (!current_function_is_leaf
18d50ae6 1938 || cfun->calls_alloca || total_size > 0)
644459d0 1939 total_size += STACK_POINTER_OFFSET;
1940
1941 if (total_size > 0)
1942 {
18d50ae6 1943 if (cfun->calls_alloca)
644459d0 1944 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1945 else
1946 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1947
1948
1949 if (saved_regs_size > 0)
1950 {
abe32cce 1951 offset = -crtl->args.pretend_args_size;
644459d0 1952 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1953 if (need_to_save_reg (regno, 1))
1954 {
1955 offset -= 0x10;
1956 frame_emit_load (regno, sp_reg, offset);
1957 }
1958 }
1959 }
1960
1961 if (!current_function_is_leaf)
1962 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1963
1964 if (!sibcall_p)
1965 {
18b42941 1966 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1967 jump = emit_jump_insn (gen__return ());
1968 emit_barrier_after (jump);
1969 }
1970
1971 emit_note (NOTE_INSN_DELETED);
1972}
1973
1974rtx
1975spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1976{
1977 if (count != 0)
1978 return 0;
1979 /* This is inefficient because it ends up copying to a save-register
1980 which then gets saved even though $lr has already been saved. But
1981 it does generate better code for leaf functions and we don't need
1982 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1983 used for __builtin_return_address anyway, so maybe we don't care if
1984 it's inefficient. */
1985 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1986}
1987\f
1988
1989/* Given VAL, generate a constant appropriate for MODE.
1990 If MODE is a vector mode, every element will be VAL.
1991 For TImode, VAL will be zero extended to 128 bits. */
1992rtx
1993spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1994{
1995 rtx inner;
1996 rtvec v;
1997 int units, i;
1998
1999 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2000 || GET_MODE_CLASS (mode) == MODE_FLOAT
2001 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2002 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2003
2004 if (GET_MODE_CLASS (mode) == MODE_INT)
2005 return immed_double_const (val, 0, mode);
2006
2007 /* val is the bit representation of the float */
2008 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2009 return hwint_to_const_double (mode, val);
2010
2011 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2012 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2013 else
2014 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2015
2016 units = GET_MODE_NUNITS (mode);
2017
2018 v = rtvec_alloc (units);
2019
2020 for (i = 0; i < units; ++i)
2021 RTVEC_ELT (v, i) = inner;
2022
2023 return gen_rtx_CONST_VECTOR (mode, v);
2024}
644459d0 2025
5474166e 2026/* Create a MODE vector constant from 4 ints. */
2027rtx
2028spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2029{
2030 unsigned char arr[16];
2031 arr[0] = (a >> 24) & 0xff;
2032 arr[1] = (a >> 16) & 0xff;
2033 arr[2] = (a >> 8) & 0xff;
2034 arr[3] = (a >> 0) & 0xff;
2035 arr[4] = (b >> 24) & 0xff;
2036 arr[5] = (b >> 16) & 0xff;
2037 arr[6] = (b >> 8) & 0xff;
2038 arr[7] = (b >> 0) & 0xff;
2039 arr[8] = (c >> 24) & 0xff;
2040 arr[9] = (c >> 16) & 0xff;
2041 arr[10] = (c >> 8) & 0xff;
2042 arr[11] = (c >> 0) & 0xff;
2043 arr[12] = (d >> 24) & 0xff;
2044 arr[13] = (d >> 16) & 0xff;
2045 arr[14] = (d >> 8) & 0xff;
2046 arr[15] = (d >> 0) & 0xff;
2047 return array_to_constant(mode, arr);
2048}
5a976006 2049\f
2050/* branch hint stuff */
5474166e 2051
644459d0 2052/* An array of these is used to propagate hints to predecessor blocks. */
2053struct spu_bb_info
2054{
5a976006 2055 rtx prop_jump; /* propagated from another block */
2056 int bb_index; /* the original block. */
644459d0 2057};
5a976006 2058static struct spu_bb_info *spu_bb_info;
644459d0 2059
5a976006 2060#define STOP_HINT_P(INSN) \
2061 (GET_CODE(INSN) == CALL_INSN \
2062 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2063 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2064
2065/* 1 when RTX is a hinted branch or its target. We keep track of
2066 what has been hinted so the safe-hint code can test it easily. */
2067#define HINTED_P(RTX) \
2068 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2069
2070/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2071#define SCHED_ON_EVEN_P(RTX) \
2072 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2073
2074/* Emit a nop for INSN such that the two will dual issue. This assumes
2075 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2076 We check for TImode to handle a MULTI1 insn which has dual issued its
2077 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2078 ADDR_VEC insns. */
2079static void
2080emit_nop_for_insn (rtx insn)
644459d0 2081{
5a976006 2082 int p;
2083 rtx new_insn;
2084 p = get_pipe (insn);
2085 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2086 new_insn = emit_insn_after (gen_lnop (), insn);
2087 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2088 {
5a976006 2089 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2090 PUT_MODE (new_insn, TImode);
2091 PUT_MODE (insn, VOIDmode);
2092 }
2093 else
2094 new_insn = emit_insn_after (gen_lnop (), insn);
2095 recog_memoized (new_insn);
2096}
2097
2098/* Insert nops in basic blocks to meet dual issue alignment
2099 requirements. Also make sure hbrp and hint instructions are at least
2100 one cycle apart, possibly inserting a nop. */
2101static void
2102pad_bb(void)
2103{
2104 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2105 int length;
2106 int addr;
2107
2108 /* This sets up INSN_ADDRESSES. */
2109 shorten_branches (get_insns ());
2110
2111 /* Keep track of length added by nops. */
2112 length = 0;
2113
2114 prev_insn = 0;
2115 insn = get_insns ();
2116 if (!active_insn_p (insn))
2117 insn = next_active_insn (insn);
2118 for (; insn; insn = next_insn)
2119 {
2120 next_insn = next_active_insn (insn);
2121 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2122 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2123 {
5a976006 2124 if (hbr_insn)
2125 {
2126 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2127 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2128 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2129 || (a1 - a0 == 4))
2130 {
2131 prev_insn = emit_insn_before (gen_lnop (), insn);
2132 PUT_MODE (prev_insn, GET_MODE (insn));
2133 PUT_MODE (insn, TImode);
2134 length += 4;
2135 }
2136 }
2137 hbr_insn = insn;
2138 }
2139 if (INSN_CODE (insn) == CODE_FOR_blockage)
2140 {
2141 if (GET_MODE (insn) == TImode)
2142 PUT_MODE (next_insn, TImode);
2143 insn = next_insn;
2144 next_insn = next_active_insn (insn);
2145 }
2146 addr = INSN_ADDRESSES (INSN_UID (insn));
2147 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2148 {
2149 if (((addr + length) & 7) != 0)
2150 {
2151 emit_nop_for_insn (prev_insn);
2152 length += 4;
2153 }
644459d0 2154 }
5a976006 2155 else if (GET_MODE (insn) == TImode
2156 && ((next_insn && GET_MODE (next_insn) != TImode)
2157 || get_attr_type (insn) == TYPE_MULTI0)
2158 && ((addr + length) & 7) != 0)
2159 {
2160 /* prev_insn will always be set because the first insn is
2161 always 8-byte aligned. */
2162 emit_nop_for_insn (prev_insn);
2163 length += 4;
2164 }
2165 prev_insn = insn;
644459d0 2166 }
644459d0 2167}
2168
5a976006 2169\f
2170/* Routines for branch hints. */
2171
644459d0 2172static void
5a976006 2173spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2174 int distance, sbitmap blocks)
644459d0 2175{
5a976006 2176 rtx branch_label = 0;
2177 rtx hint;
2178 rtx insn;
2179 rtx table;
644459d0 2180
2181 if (before == 0 || branch == 0 || target == 0)
2182 return;
2183
5a976006 2184 /* While scheduling we require hints to be no further than 600, so
2185 we need to enforce that here too */
644459d0 2186 if (distance > 600)
2187 return;
2188
5a976006 2189 /* If we have a Basic block note, emit it after the basic block note. */
2190 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2191 before = NEXT_INSN (before);
644459d0 2192
2193 branch_label = gen_label_rtx ();
2194 LABEL_NUSES (branch_label)++;
2195 LABEL_PRESERVE_P (branch_label) = 1;
2196 insn = emit_label_before (branch_label, branch);
2197 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2198 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2199
2200 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2201 recog_memoized (hint);
2202 HINTED_P (branch) = 1;
644459d0 2203
5a976006 2204 if (GET_CODE (target) == LABEL_REF)
2205 HINTED_P (XEXP (target, 0)) = 1;
2206 else if (tablejump_p (branch, 0, &table))
644459d0 2207 {
5a976006 2208 rtvec vec;
2209 int j;
2210 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2211 vec = XVEC (PATTERN (table), 0);
2212 else
2213 vec = XVEC (PATTERN (table), 1);
2214 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2215 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2216 }
5a976006 2217
2218 if (distance >= 588)
644459d0 2219 {
5a976006 2220 /* Make sure the hint isn't scheduled any earlier than this point,
2221 which could make it too far for the branch offest to fit */
2222 recog_memoized (emit_insn_before (gen_blockage (), hint));
2223 }
2224 else if (distance <= 8 * 4)
2225 {
2226 /* To guarantee at least 8 insns between the hint and branch we
2227 insert nops. */
2228 int d;
2229 for (d = distance; d < 8 * 4; d += 4)
2230 {
2231 insn =
2232 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2233 recog_memoized (insn);
2234 }
2235
2236 /* Make sure any nops inserted aren't scheduled before the hint. */
2237 recog_memoized (emit_insn_after (gen_blockage (), hint));
2238
2239 /* Make sure any nops inserted aren't scheduled after the call. */
2240 if (CALL_P (branch) && distance < 8 * 4)
2241 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2242 }
644459d0 2243}
2244
2245/* Returns 0 if we don't want a hint for this branch. Otherwise return
2246 the rtx for the branch target. */
2247static rtx
2248get_branch_target (rtx branch)
2249{
2250 if (GET_CODE (branch) == JUMP_INSN)
2251 {
2252 rtx set, src;
2253
2254 /* Return statements */
2255 if (GET_CODE (PATTERN (branch)) == RETURN)
2256 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2257
2258 /* jump table */
2259 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2260 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2261 return 0;
2262
2263 set = single_set (branch);
2264 src = SET_SRC (set);
2265 if (GET_CODE (SET_DEST (set)) != PC)
2266 abort ();
2267
2268 if (GET_CODE (src) == IF_THEN_ELSE)
2269 {
2270 rtx lab = 0;
2271 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2272 if (note)
2273 {
2274 /* If the more probable case is not a fall through, then
2275 try a branch hint. */
2276 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2277 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2278 && GET_CODE (XEXP (src, 1)) != PC)
2279 lab = XEXP (src, 1);
2280 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2281 && GET_CODE (XEXP (src, 2)) != PC)
2282 lab = XEXP (src, 2);
2283 }
2284 if (lab)
2285 {
2286 if (GET_CODE (lab) == RETURN)
2287 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2288 return lab;
2289 }
2290 return 0;
2291 }
2292
2293 return src;
2294 }
2295 else if (GET_CODE (branch) == CALL_INSN)
2296 {
2297 rtx call;
2298 /* All of our call patterns are in a PARALLEL and the CALL is
2299 the first pattern in the PARALLEL. */
2300 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2301 abort ();
2302 call = XVECEXP (PATTERN (branch), 0, 0);
2303 if (GET_CODE (call) == SET)
2304 call = SET_SRC (call);
2305 if (GET_CODE (call) != CALL)
2306 abort ();
2307 return XEXP (XEXP (call, 0), 0);
2308 }
2309 return 0;
2310}
2311
5a976006 2312/* The special $hbr register is used to prevent the insn scheduler from
2313 moving hbr insns across instructions which invalidate them. It
2314 should only be used in a clobber, and this function searches for
2315 insns which clobber it. */
2316static bool
2317insn_clobbers_hbr (rtx insn)
2318{
2319 if (INSN_P (insn)
2320 && GET_CODE (PATTERN (insn)) == PARALLEL)
2321 {
2322 rtx parallel = PATTERN (insn);
2323 rtx clobber;
2324 int j;
2325 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2326 {
2327 clobber = XVECEXP (parallel, 0, j);
2328 if (GET_CODE (clobber) == CLOBBER
2329 && GET_CODE (XEXP (clobber, 0)) == REG
2330 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2331 return 1;
2332 }
2333 }
2334 return 0;
2335}
2336
2337/* Search up to 32 insns starting at FIRST:
2338 - at any kind of hinted branch, just return
2339 - at any unconditional branch in the first 15 insns, just return
2340 - at a call or indirect branch, after the first 15 insns, force it to
2341 an even address and return
2342 - at any unconditional branch, after the first 15 insns, force it to
2343 an even address.
2344 At then end of the search, insert an hbrp within 4 insns of FIRST,
2345 and an hbrp within 16 instructions of FIRST.
2346 */
644459d0 2347static void
5a976006 2348insert_hbrp_for_ilb_runout (rtx first)
644459d0 2349{
5a976006 2350 rtx insn, before_4 = 0, before_16 = 0;
2351 int addr = 0, length, first_addr = -1;
2352 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2353 int insert_lnop_after = 0;
2354 for (insn = first; insn; insn = NEXT_INSN (insn))
2355 if (INSN_P (insn))
2356 {
2357 if (first_addr == -1)
2358 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2359 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2360 length = get_attr_length (insn);
2361
2362 if (before_4 == 0 && addr + length >= 4 * 4)
2363 before_4 = insn;
2364 /* We test for 14 instructions because the first hbrp will add
2365 up to 2 instructions. */
2366 if (before_16 == 0 && addr + length >= 14 * 4)
2367 before_16 = insn;
2368
2369 if (INSN_CODE (insn) == CODE_FOR_hbr)
2370 {
2371 /* Make sure an hbrp is at least 2 cycles away from a hint.
2372 Insert an lnop after the hbrp when necessary. */
2373 if (before_4 == 0 && addr > 0)
2374 {
2375 before_4 = insn;
2376 insert_lnop_after |= 1;
2377 }
2378 else if (before_4 && addr <= 4 * 4)
2379 insert_lnop_after |= 1;
2380 if (before_16 == 0 && addr > 10 * 4)
2381 {
2382 before_16 = insn;
2383 insert_lnop_after |= 2;
2384 }
2385 else if (before_16 && addr <= 14 * 4)
2386 insert_lnop_after |= 2;
2387 }
644459d0 2388
5a976006 2389 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2390 {
2391 if (addr < hbrp_addr0)
2392 hbrp_addr0 = addr;
2393 else if (addr < hbrp_addr1)
2394 hbrp_addr1 = addr;
2395 }
644459d0 2396
5a976006 2397 if (CALL_P (insn) || JUMP_P (insn))
2398 {
2399 if (HINTED_P (insn))
2400 return;
2401
2402 /* Any branch after the first 15 insns should be on an even
2403 address to avoid a special case branch. There might be
2404 some nops and/or hbrps inserted, so we test after 10
2405 insns. */
2406 if (addr > 10 * 4)
2407 SCHED_ON_EVEN_P (insn) = 1;
2408 }
644459d0 2409
5a976006 2410 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2411 return;
2412
2413
2414 if (addr + length >= 32 * 4)
644459d0 2415 {
5a976006 2416 gcc_assert (before_4 && before_16);
2417 if (hbrp_addr0 > 4 * 4)
644459d0 2418 {
5a976006 2419 insn =
2420 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2421 recog_memoized (insn);
2422 INSN_ADDRESSES_NEW (insn,
2423 INSN_ADDRESSES (INSN_UID (before_4)));
2424 PUT_MODE (insn, GET_MODE (before_4));
2425 PUT_MODE (before_4, TImode);
2426 if (insert_lnop_after & 1)
644459d0 2427 {
5a976006 2428 insn = emit_insn_before (gen_lnop (), before_4);
2429 recog_memoized (insn);
2430 INSN_ADDRESSES_NEW (insn,
2431 INSN_ADDRESSES (INSN_UID (before_4)));
2432 PUT_MODE (insn, TImode);
644459d0 2433 }
644459d0 2434 }
5a976006 2435 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2436 && hbrp_addr1 > 16 * 4)
644459d0 2437 {
5a976006 2438 insn =
2439 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2440 recog_memoized (insn);
2441 INSN_ADDRESSES_NEW (insn,
2442 INSN_ADDRESSES (INSN_UID (before_16)));
2443 PUT_MODE (insn, GET_MODE (before_16));
2444 PUT_MODE (before_16, TImode);
2445 if (insert_lnop_after & 2)
644459d0 2446 {
5a976006 2447 insn = emit_insn_before (gen_lnop (), before_16);
2448 recog_memoized (insn);
2449 INSN_ADDRESSES_NEW (insn,
2450 INSN_ADDRESSES (INSN_UID
2451 (before_16)));
2452 PUT_MODE (insn, TImode);
644459d0 2453 }
2454 }
5a976006 2455 return;
644459d0 2456 }
644459d0 2457 }
5a976006 2458 else if (BARRIER_P (insn))
2459 return;
644459d0 2460
644459d0 2461}
5a976006 2462
2463/* The SPU might hang when it executes 48 inline instructions after a
2464 hinted branch jumps to its hinted target. The beginning of a
2465 function and the return from a call might have been hinted, and must
2466 be handled as well. To prevent a hang we insert 2 hbrps. The first
2467 should be within 6 insns of the branch target. The second should be
2468 within 22 insns of the branch target. When determining if hbrps are
2469 necessary, we look for only 32 inline instructions, because up to to
2470 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2471 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2472static void
5a976006 2473insert_hbrp (void)
644459d0 2474{
5a976006 2475 rtx insn;
2476 if (TARGET_SAFE_HINTS)
644459d0 2477 {
5a976006 2478 shorten_branches (get_insns ());
2479 /* Insert hbrp at beginning of function */
2480 insn = next_active_insn (get_insns ());
2481 if (insn)
2482 insert_hbrp_for_ilb_runout (insn);
2483 /* Insert hbrp after hinted targets. */
2484 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2485 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2486 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2487 }
644459d0 2488}
2489
5a976006 2490static int in_spu_reorg;
2491
2492/* Insert branch hints. There are no branch optimizations after this
2493 pass, so it's safe to set our branch hints now. */
644459d0 2494static void
5a976006 2495spu_machine_dependent_reorg (void)
644459d0 2496{
5a976006 2497 sbitmap blocks;
2498 basic_block bb;
2499 rtx branch, insn;
2500 rtx branch_target = 0;
2501 int branch_addr = 0, insn_addr, required_dist = 0;
2502 int i;
2503 unsigned int j;
644459d0 2504
5a976006 2505 if (!TARGET_BRANCH_HINTS || optimize == 0)
2506 {
2507 /* We still do it for unoptimized code because an external
2508 function might have hinted a call or return. */
2509 insert_hbrp ();
2510 pad_bb ();
2511 return;
2512 }
644459d0 2513
5a976006 2514 blocks = sbitmap_alloc (last_basic_block);
2515 sbitmap_zero (blocks);
644459d0 2516
5a976006 2517 in_spu_reorg = 1;
2518 compute_bb_for_insn ();
2519
2520 compact_blocks ();
2521
2522 spu_bb_info =
2523 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2524 sizeof (struct spu_bb_info));
2525
2526 /* We need exact insn addresses and lengths. */
2527 shorten_branches (get_insns ());
2528
2529 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2530 {
5a976006 2531 bb = BASIC_BLOCK (i);
2532 branch = 0;
2533 if (spu_bb_info[i].prop_jump)
644459d0 2534 {
5a976006 2535 branch = spu_bb_info[i].prop_jump;
2536 branch_target = get_branch_target (branch);
2537 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2538 required_dist = spu_hint_dist;
2539 }
2540 /* Search from end of a block to beginning. In this loop, find
2541 jumps which need a branch and emit them only when:
2542 - it's an indirect branch and we're at the insn which sets
2543 the register
2544 - we're at an insn that will invalidate the hint. e.g., a
2545 call, another hint insn, inline asm that clobbers $hbr, and
2546 some inlined operations (divmodsi4). Don't consider jumps
2547 because they are only at the end of a block and are
2548 considered when we are deciding whether to propagate
2549 - we're getting too far away from the branch. The hbr insns
2550 only have a signed 10 bit offset
2551 We go back as far as possible so the branch will be considered
2552 for propagation when we get to the beginning of the block. */
2553 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2554 {
2555 if (INSN_P (insn))
2556 {
2557 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2558 if (branch
2559 && ((GET_CODE (branch_target) == REG
2560 && set_of (branch_target, insn) != NULL_RTX)
2561 || insn_clobbers_hbr (insn)
2562 || branch_addr - insn_addr > 600))
2563 {
2564 rtx next = NEXT_INSN (insn);
2565 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2566 if (insn != BB_END (bb)
2567 && branch_addr - next_addr >= required_dist)
2568 {
2569 if (dump_file)
2570 fprintf (dump_file,
2571 "hint for %i in block %i before %i\n",
2572 INSN_UID (branch), bb->index,
2573 INSN_UID (next));
2574 spu_emit_branch_hint (next, branch, branch_target,
2575 branch_addr - next_addr, blocks);
2576 }
2577 branch = 0;
2578 }
2579
2580 /* JUMP_P will only be true at the end of a block. When
2581 branch is already set it means we've previously decided
2582 to propagate a hint for that branch into this block. */
2583 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2584 {
2585 branch = 0;
2586 if ((branch_target = get_branch_target (insn)))
2587 {
2588 branch = insn;
2589 branch_addr = insn_addr;
2590 required_dist = spu_hint_dist;
2591 }
2592 }
2593 }
2594 if (insn == BB_HEAD (bb))
2595 break;
2596 }
2597
2598 if (branch)
2599 {
2600 /* If we haven't emitted a hint for this branch yet, it might
2601 be profitable to emit it in one of the predecessor blocks,
2602 especially for loops. */
2603 rtx bbend;
2604 basic_block prev = 0, prop = 0, prev2 = 0;
2605 int loop_exit = 0, simple_loop = 0;
2606 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2607
2608 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2609 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2610 prev = EDGE_PRED (bb, j)->src;
2611 else
2612 prev2 = EDGE_PRED (bb, j)->src;
2613
2614 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2615 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2616 loop_exit = 1;
2617 else if (EDGE_SUCC (bb, j)->dest == bb)
2618 simple_loop = 1;
2619
2620 /* If this branch is a loop exit then propagate to previous
2621 fallthru block. This catches the cases when it is a simple
2622 loop or when there is an initial branch into the loop. */
2623 if (prev && (loop_exit || simple_loop)
2624 && prev->loop_depth <= bb->loop_depth)
2625 prop = prev;
2626
2627 /* If there is only one adjacent predecessor. Don't propagate
2628 outside this loop. This loop_depth test isn't perfect, but
2629 I'm not sure the loop_father member is valid at this point. */
2630 else if (prev && single_pred_p (bb)
2631 && prev->loop_depth == bb->loop_depth)
2632 prop = prev;
2633
2634 /* If this is the JOIN block of a simple IF-THEN then
2635 propogate the hint to the HEADER block. */
2636 else if (prev && prev2
2637 && EDGE_COUNT (bb->preds) == 2
2638 && EDGE_COUNT (prev->preds) == 1
2639 && EDGE_PRED (prev, 0)->src == prev2
2640 && prev2->loop_depth == bb->loop_depth
2641 && GET_CODE (branch_target) != REG)
2642 prop = prev;
2643
2644 /* Don't propagate when:
2645 - this is a simple loop and the hint would be too far
2646 - this is not a simple loop and there are 16 insns in
2647 this block already
2648 - the predecessor block ends in a branch that will be
2649 hinted
2650 - the predecessor block ends in an insn that invalidates
2651 the hint */
2652 if (prop
2653 && prop->index >= 0
2654 && (bbend = BB_END (prop))
2655 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2656 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2657 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2658 {
2659 if (dump_file)
2660 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2661 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2662 bb->index, prop->index, bb->loop_depth,
2663 INSN_UID (branch), loop_exit, simple_loop,
2664 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2665
2666 spu_bb_info[prop->index].prop_jump = branch;
2667 spu_bb_info[prop->index].bb_index = i;
2668 }
2669 else if (branch_addr - next_addr >= required_dist)
2670 {
2671 if (dump_file)
2672 fprintf (dump_file, "hint for %i in block %i before %i\n",
2673 INSN_UID (branch), bb->index,
2674 INSN_UID (NEXT_INSN (insn)));
2675 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2676 branch_addr - next_addr, blocks);
2677 }
2678 branch = 0;
644459d0 2679 }
644459d0 2680 }
5a976006 2681 free (spu_bb_info);
644459d0 2682
5a976006 2683 if (!sbitmap_empty_p (blocks))
2684 find_many_sub_basic_blocks (blocks);
2685
2686 /* We have to schedule to make sure alignment is ok. */
2687 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2688
2689 /* The hints need to be scheduled, so call it again. */
2690 schedule_insns ();
2691
2692 insert_hbrp ();
2693
2694 pad_bb ();
2695
2696
2697 if (spu_flag_var_tracking)
644459d0 2698 {
5a976006 2699 df_analyze ();
2700 timevar_push (TV_VAR_TRACKING);
2701 variable_tracking_main ();
2702 timevar_pop (TV_VAR_TRACKING);
2703 df_finish_pass (false);
644459d0 2704 }
5a976006 2705
2706 free_bb_for_insn ();
2707
2708 in_spu_reorg = 0;
644459d0 2709}
2710\f
2711
2712/* Insn scheduling routines, primarily for dual issue. */
2713static int
2714spu_sched_issue_rate (void)
2715{
2716 return 2;
2717}
2718
2719static int
5a976006 2720uses_ls_unit(rtx insn)
644459d0 2721{
5a976006 2722 rtx set = single_set (insn);
2723 if (set != 0
2724 && (GET_CODE (SET_DEST (set)) == MEM
2725 || GET_CODE (SET_SRC (set)) == MEM))
2726 return 1;
2727 return 0;
644459d0 2728}
2729
2730static int
2731get_pipe (rtx insn)
2732{
2733 enum attr_type t;
2734 /* Handle inline asm */
2735 if (INSN_CODE (insn) == -1)
2736 return -1;
2737 t = get_attr_type (insn);
2738 switch (t)
2739 {
2740 case TYPE_CONVERT:
2741 return -2;
2742 case TYPE_MULTI0:
2743 return -1;
2744
2745 case TYPE_FX2:
2746 case TYPE_FX3:
2747 case TYPE_SPR:
2748 case TYPE_NOP:
2749 case TYPE_FXB:
2750 case TYPE_FPD:
2751 case TYPE_FP6:
2752 case TYPE_FP7:
644459d0 2753 return 0;
2754
2755 case TYPE_LNOP:
2756 case TYPE_SHUF:
2757 case TYPE_LOAD:
2758 case TYPE_STORE:
2759 case TYPE_BR:
2760 case TYPE_MULTI1:
2761 case TYPE_HBR:
5a976006 2762 case TYPE_IPREFETCH:
644459d0 2763 return 1;
2764 default:
2765 abort ();
2766 }
2767}
2768
5a976006 2769
2770/* haifa-sched.c has a static variable that keeps track of the current
2771 cycle. It is passed to spu_sched_reorder, and we record it here for
2772 use by spu_sched_variable_issue. It won't be accurate if the
2773 scheduler updates it's clock_var between the two calls. */
2774static int clock_var;
2775
2776/* This is used to keep track of insn alignment. Set to 0 at the
2777 beginning of each block and increased by the "length" attr of each
2778 insn scheduled. */
2779static int spu_sched_length;
2780
2781/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2782 ready list appropriately in spu_sched_reorder(). */
2783static int pipe0_clock;
2784static int pipe1_clock;
2785
2786static int prev_clock_var;
2787
2788static int prev_priority;
2789
2790/* The SPU needs to load the next ilb sometime during the execution of
2791 the previous ilb. There is a potential conflict if every cycle has a
2792 load or store. To avoid the conflict we make sure the load/store
2793 unit is free for at least one cycle during the execution of insns in
2794 the previous ilb. */
2795static int spu_ls_first;
2796static int prev_ls_clock;
2797
2798static void
2799spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2800 int max_ready ATTRIBUTE_UNUSED)
2801{
2802 spu_sched_length = 0;
2803}
2804
2805static void
2806spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2807 int max_ready ATTRIBUTE_UNUSED)
2808{
2809 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2810 {
2811 /* When any block might be at least 8-byte aligned, assume they
2812 will all be at least 8-byte aligned to make sure dual issue
2813 works out correctly. */
2814 spu_sched_length = 0;
2815 }
2816 spu_ls_first = INT_MAX;
2817 clock_var = -1;
2818 prev_ls_clock = -1;
2819 pipe0_clock = -1;
2820 pipe1_clock = -1;
2821 prev_clock_var = -1;
2822 prev_priority = -1;
2823}
2824
644459d0 2825static int
5a976006 2826spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2827 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2828{
5a976006 2829 int len;
2830 int p;
644459d0 2831 if (GET_CODE (PATTERN (insn)) == USE
2832 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2833 || (len = get_attr_length (insn)) == 0)
2834 return more;
2835
2836 spu_sched_length += len;
2837
2838 /* Reset on inline asm */
2839 if (INSN_CODE (insn) == -1)
2840 {
2841 spu_ls_first = INT_MAX;
2842 pipe0_clock = -1;
2843 pipe1_clock = -1;
2844 return 0;
2845 }
2846 p = get_pipe (insn);
2847 if (p == 0)
2848 pipe0_clock = clock_var;
2849 else
2850 pipe1_clock = clock_var;
2851
2852 if (in_spu_reorg)
2853 {
2854 if (clock_var - prev_ls_clock > 1
2855 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2856 spu_ls_first = INT_MAX;
2857 if (uses_ls_unit (insn))
2858 {
2859 if (spu_ls_first == INT_MAX)
2860 spu_ls_first = spu_sched_length;
2861 prev_ls_clock = clock_var;
2862 }
2863
2864 /* The scheduler hasn't inserted the nop, but we will later on.
2865 Include those nops in spu_sched_length. */
2866 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2867 spu_sched_length += 4;
2868 prev_clock_var = clock_var;
2869
2870 /* more is -1 when called from spu_sched_reorder for new insns
2871 that don't have INSN_PRIORITY */
2872 if (more >= 0)
2873 prev_priority = INSN_PRIORITY (insn);
2874 }
2875
2876 /* Always try issueing more insns. spu_sched_reorder will decide
2877 when the cycle should be advanced. */
2878 return 1;
2879}
2880
2881/* This function is called for both TARGET_SCHED_REORDER and
2882 TARGET_SCHED_REORDER2. */
2883static int
2884spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2885 rtx *ready, int *nreadyp, int clock)
2886{
2887 int i, nready = *nreadyp;
2888 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2889 rtx insn;
2890
2891 clock_var = clock;
2892
2893 if (nready <= 0 || pipe1_clock >= clock)
2894 return 0;
2895
2896 /* Find any rtl insns that don't generate assembly insns and schedule
2897 them first. */
2898 for (i = nready - 1; i >= 0; i--)
2899 {
2900 insn = ready[i];
2901 if (INSN_CODE (insn) == -1
2902 || INSN_CODE (insn) == CODE_FOR_blockage
2903 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2904 {
2905 ready[i] = ready[nready - 1];
2906 ready[nready - 1] = insn;
2907 return 1;
2908 }
2909 }
2910
2911 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2912 for (i = 0; i < nready; i++)
2913 if (INSN_CODE (ready[i]) != -1)
2914 {
2915 insn = ready[i];
2916 switch (get_attr_type (insn))
2917 {
2918 default:
2919 case TYPE_MULTI0:
2920 case TYPE_CONVERT:
2921 case TYPE_FX2:
2922 case TYPE_FX3:
2923 case TYPE_SPR:
2924 case TYPE_NOP:
2925 case TYPE_FXB:
2926 case TYPE_FPD:
2927 case TYPE_FP6:
2928 case TYPE_FP7:
2929 pipe_0 = i;
2930 break;
2931 case TYPE_LOAD:
2932 case TYPE_STORE:
2933 pipe_ls = i;
2934 case TYPE_LNOP:
2935 case TYPE_SHUF:
2936 case TYPE_BR:
2937 case TYPE_MULTI1:
2938 case TYPE_HBR:
2939 pipe_1 = i;
2940 break;
2941 case TYPE_IPREFETCH:
2942 pipe_hbrp = i;
2943 break;
2944 }
2945 }
2946
2947 /* In the first scheduling phase, schedule loads and stores together
2948 to increase the chance they will get merged during postreload CSE. */
2949 if (!reload_completed && pipe_ls >= 0)
2950 {
2951 insn = ready[pipe_ls];
2952 ready[pipe_ls] = ready[nready - 1];
2953 ready[nready - 1] = insn;
2954 return 1;
2955 }
2956
2957 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2958 if (pipe_hbrp >= 0)
2959 pipe_1 = pipe_hbrp;
2960
2961 /* When we have loads/stores in every cycle of the last 15 insns and
2962 we are about to schedule another load/store, emit an hbrp insn
2963 instead. */
2964 if (in_spu_reorg
2965 && spu_sched_length - spu_ls_first >= 4 * 15
2966 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2967 {
2968 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2969 recog_memoized (insn);
2970 if (pipe0_clock < clock)
2971 PUT_MODE (insn, TImode);
2972 spu_sched_variable_issue (file, verbose, insn, -1);
2973 return 0;
2974 }
2975
2976 /* In general, we want to emit nops to increase dual issue, but dual
2977 issue isn't faster when one of the insns could be scheduled later
2978 without effecting the critical path. We look at INSN_PRIORITY to
2979 make a good guess, but it isn't perfect so -mdual-nops=n can be
2980 used to effect it. */
2981 if (in_spu_reorg && spu_dual_nops < 10)
2982 {
2983 /* When we are at an even address and we are not issueing nops to
2984 improve scheduling then we need to advance the cycle. */
2985 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2986 && (spu_dual_nops == 0
2987 || (pipe_1 != -1
2988 && prev_priority >
2989 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2990 return 0;
2991
2992 /* When at an odd address, schedule the highest priority insn
2993 without considering pipeline. */
2994 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2995 && (spu_dual_nops == 0
2996 || (prev_priority >
2997 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2998 return 1;
2999 }
3000
3001
3002 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3003 pipe0 insn in the ready list, schedule it. */
3004 if (pipe0_clock < clock && pipe_0 >= 0)
3005 schedule_i = pipe_0;
3006
3007 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3008 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3009 else
3010 schedule_i = pipe_1;
3011
3012 if (schedule_i > -1)
3013 {
3014 insn = ready[schedule_i];
3015 ready[schedule_i] = ready[nready - 1];
3016 ready[nready - 1] = insn;
3017 return 1;
3018 }
3019 return 0;
644459d0 3020}
3021
3022/* INSN is dependent on DEP_INSN. */
3023static int
5a976006 3024spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3025{
5a976006 3026 rtx set;
3027
3028 /* The blockage pattern is used to prevent instructions from being
3029 moved across it and has no cost. */
3030 if (INSN_CODE (insn) == CODE_FOR_blockage
3031 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3032 return 0;
3033
3034 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3035 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3036 return 0;
3037
3038 /* Make sure hbrps are spread out. */
3039 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3040 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3041 return 8;
3042
3043 /* Make sure hints and hbrps are 2 cycles apart. */
3044 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3045 || INSN_CODE (insn) == CODE_FOR_hbr)
3046 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3047 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3048 return 2;
3049
3050 /* An hbrp has no real dependency on other insns. */
3051 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3052 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3053 return 0;
3054
3055 /* Assuming that it is unlikely an argument register will be used in
3056 the first cycle of the called function, we reduce the cost for
3057 slightly better scheduling of dep_insn. When not hinted, the
3058 mispredicted branch would hide the cost as well. */
3059 if (CALL_P (insn))
3060 {
3061 rtx target = get_branch_target (insn);
3062 if (GET_CODE (target) != REG || !set_of (target, insn))
3063 return cost - 2;
3064 return cost;
3065 }
3066
3067 /* And when returning from a function, let's assume the return values
3068 are completed sooner too. */
3069 if (CALL_P (dep_insn))
644459d0 3070 return cost - 2;
5a976006 3071
3072 /* Make sure an instruction that loads from the back chain is schedule
3073 away from the return instruction so a hint is more likely to get
3074 issued. */
3075 if (INSN_CODE (insn) == CODE_FOR__return
3076 && (set = single_set (dep_insn))
3077 && GET_CODE (SET_DEST (set)) == REG
3078 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3079 return 20;
3080
644459d0 3081 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3082 scheduler makes every insn in a block anti-dependent on the final
3083 jump_insn. We adjust here so higher cost insns will get scheduled
3084 earlier. */
5a976006 3085 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3086 return insn_cost (dep_insn) - 3;
5a976006 3087
644459d0 3088 return cost;
3089}
3090\f
3091/* Create a CONST_DOUBLE from a string. */
3092struct rtx_def *
3093spu_float_const (const char *string, enum machine_mode mode)
3094{
3095 REAL_VALUE_TYPE value;
3096 value = REAL_VALUE_ATOF (string, mode);
3097 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3098}
3099
644459d0 3100int
3101spu_constant_address_p (rtx x)
3102{
3103 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3104 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3105 || GET_CODE (x) == HIGH);
3106}
3107
3108static enum spu_immediate
3109which_immediate_load (HOST_WIDE_INT val)
3110{
3111 gcc_assert (val == trunc_int_for_mode (val, SImode));
3112
3113 if (val >= -0x8000 && val <= 0x7fff)
3114 return SPU_IL;
3115 if (val >= 0 && val <= 0x3ffff)
3116 return SPU_ILA;
3117 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3118 return SPU_ILH;
3119 if ((val & 0xffff) == 0)
3120 return SPU_ILHU;
3121
3122 return SPU_NONE;
3123}
3124
dea01258 3125/* Return true when OP can be loaded by one of the il instructions, or
3126 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3127int
3128immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3129{
3130 if (CONSTANT_P (op))
3131 {
3132 enum immediate_class c = classify_immediate (op, mode);
5df189be 3133 return c == IC_IL1 || c == IC_IL1s
3072d30e 3134 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3135 }
3136 return 0;
3137}
3138
3139/* Return true if the first SIZE bytes of arr is a constant that can be
3140 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3141 represent the size and offset of the instruction to use. */
3142static int
3143cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3144{
3145 int cpat, run, i, start;
3146 cpat = 1;
3147 run = 0;
3148 start = -1;
3149 for (i = 0; i < size && cpat; i++)
3150 if (arr[i] != i+16)
3151 {
3152 if (!run)
3153 {
3154 start = i;
3155 if (arr[i] == 3)
3156 run = 1;
3157 else if (arr[i] == 2 && arr[i+1] == 3)
3158 run = 2;
3159 else if (arr[i] == 0)
3160 {
3161 while (arr[i+run] == run && i+run < 16)
3162 run++;
3163 if (run != 4 && run != 8)
3164 cpat = 0;
3165 }
3166 else
3167 cpat = 0;
3168 if ((i & (run-1)) != 0)
3169 cpat = 0;
3170 i += run;
3171 }
3172 else
3173 cpat = 0;
3174 }
b01a6dc3 3175 if (cpat && (run || size < 16))
dea01258 3176 {
3177 if (run == 0)
3178 run = 1;
3179 if (prun)
3180 *prun = run;
3181 if (pstart)
3182 *pstart = start == -1 ? 16-run : start;
3183 return 1;
3184 }
3185 return 0;
3186}
3187
3188/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3189 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3190static enum immediate_class
3191classify_immediate (rtx op, enum machine_mode mode)
644459d0 3192{
3193 HOST_WIDE_INT val;
3194 unsigned char arr[16];
5df189be 3195 int i, j, repeated, fsmbi, repeat;
dea01258 3196
3197 gcc_assert (CONSTANT_P (op));
3198
644459d0 3199 if (GET_MODE (op) != VOIDmode)
3200 mode = GET_MODE (op);
3201
dea01258 3202 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3203 if (!flag_pic
3204 && mode == V4SImode
dea01258 3205 && GET_CODE (op) == CONST_VECTOR
3206 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3207 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3208 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3209 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3210 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3211 op = CONST_VECTOR_ELT (op, 0);
644459d0 3212
dea01258 3213 switch (GET_CODE (op))
3214 {
3215 case SYMBOL_REF:
3216 case LABEL_REF:
3217 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3218
dea01258 3219 case CONST:
0cfc65d4 3220 /* We can never know if the resulting address fits in 18 bits and can be
3221 loaded with ila. For now, assume the address will not overflow if
3222 the displacement is "small" (fits 'K' constraint). */
3223 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3224 {
3225 rtx sym = XEXP (XEXP (op, 0), 0);
3226 rtx cst = XEXP (XEXP (op, 0), 1);
3227
3228 if (GET_CODE (sym) == SYMBOL_REF
3229 && GET_CODE (cst) == CONST_INT
3230 && satisfies_constraint_K (cst))
3231 return IC_IL1s;
3232 }
3233 return IC_IL2s;
644459d0 3234
dea01258 3235 case HIGH:
3236 return IC_IL1s;
3237
3238 case CONST_VECTOR:
3239 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3240 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3241 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3242 return IC_POOL;
3243 /* Fall through. */
3244
3245 case CONST_INT:
3246 case CONST_DOUBLE:
3247 constant_to_array (mode, op, arr);
644459d0 3248
dea01258 3249 /* Check that each 4-byte slot is identical. */
3250 repeated = 1;
3251 for (i = 4; i < 16; i += 4)
3252 for (j = 0; j < 4; j++)
3253 if (arr[j] != arr[i + j])
3254 repeated = 0;
3255
3256 if (repeated)
3257 {
3258 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3259 val = trunc_int_for_mode (val, SImode);
3260
3261 if (which_immediate_load (val) != SPU_NONE)
3262 return IC_IL1;
3263 }
3264
3265 /* Any mode of 2 bytes or smaller can be loaded with an il
3266 instruction. */
3267 gcc_assert (GET_MODE_SIZE (mode) > 2);
3268
3269 fsmbi = 1;
5df189be 3270 repeat = 0;
dea01258 3271 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3272 if (arr[i] != 0 && repeat == 0)
3273 repeat = arr[i];
3274 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3275 fsmbi = 0;
3276 if (fsmbi)
5df189be 3277 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3278
3279 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3280 return IC_CPAT;
3281
3282 if (repeated)
3283 return IC_IL2;
3284
3285 return IC_POOL;
3286 default:
3287 break;
3288 }
3289 gcc_unreachable ();
644459d0 3290}
3291
3292static enum spu_immediate
3293which_logical_immediate (HOST_WIDE_INT val)
3294{
3295 gcc_assert (val == trunc_int_for_mode (val, SImode));
3296
3297 if (val >= -0x200 && val <= 0x1ff)
3298 return SPU_ORI;
3299 if (val >= 0 && val <= 0xffff)
3300 return SPU_IOHL;
3301 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3302 {
3303 val = trunc_int_for_mode (val, HImode);
3304 if (val >= -0x200 && val <= 0x1ff)
3305 return SPU_ORHI;
3306 if ((val & 0xff) == ((val >> 8) & 0xff))
3307 {
3308 val = trunc_int_for_mode (val, QImode);
3309 if (val >= -0x200 && val <= 0x1ff)
3310 return SPU_ORBI;
3311 }
3312 }
3313 return SPU_NONE;
3314}
3315
5df189be 3316/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3317 CONST_DOUBLEs. */
3318static int
3319const_vector_immediate_p (rtx x)
3320{
3321 int i;
3322 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3323 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3324 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3325 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3326 return 0;
3327 return 1;
3328}
3329
644459d0 3330int
3331logical_immediate_p (rtx op, enum machine_mode mode)
3332{
3333 HOST_WIDE_INT val;
3334 unsigned char arr[16];
3335 int i, j;
3336
3337 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3338 || GET_CODE (op) == CONST_VECTOR);
3339
5df189be 3340 if (GET_CODE (op) == CONST_VECTOR
3341 && !const_vector_immediate_p (op))
3342 return 0;
3343
644459d0 3344 if (GET_MODE (op) != VOIDmode)
3345 mode = GET_MODE (op);
3346
3347 constant_to_array (mode, op, arr);
3348
3349 /* Check that bytes are repeated. */
3350 for (i = 4; i < 16; i += 4)
3351 for (j = 0; j < 4; j++)
3352 if (arr[j] != arr[i + j])
3353 return 0;
3354
3355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3356 val = trunc_int_for_mode (val, SImode);
3357
3358 i = which_logical_immediate (val);
3359 return i != SPU_NONE && i != SPU_IOHL;
3360}
3361
3362int
3363iohl_immediate_p (rtx op, enum machine_mode mode)
3364{
3365 HOST_WIDE_INT val;
3366 unsigned char arr[16];
3367 int i, j;
3368
3369 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3370 || GET_CODE (op) == CONST_VECTOR);
3371
5df189be 3372 if (GET_CODE (op) == CONST_VECTOR
3373 && !const_vector_immediate_p (op))
3374 return 0;
3375
644459d0 3376 if (GET_MODE (op) != VOIDmode)
3377 mode = GET_MODE (op);
3378
3379 constant_to_array (mode, op, arr);
3380
3381 /* Check that bytes are repeated. */
3382 for (i = 4; i < 16; i += 4)
3383 for (j = 0; j < 4; j++)
3384 if (arr[j] != arr[i + j])
3385 return 0;
3386
3387 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3388 val = trunc_int_for_mode (val, SImode);
3389
3390 return val >= 0 && val <= 0xffff;
3391}
3392
3393int
3394arith_immediate_p (rtx op, enum machine_mode mode,
3395 HOST_WIDE_INT low, HOST_WIDE_INT high)
3396{
3397 HOST_WIDE_INT val;
3398 unsigned char arr[16];
3399 int bytes, i, j;
3400
3401 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3402 || GET_CODE (op) == CONST_VECTOR);
3403
5df189be 3404 if (GET_CODE (op) == CONST_VECTOR
3405 && !const_vector_immediate_p (op))
3406 return 0;
3407
644459d0 3408 if (GET_MODE (op) != VOIDmode)
3409 mode = GET_MODE (op);
3410
3411 constant_to_array (mode, op, arr);
3412
3413 if (VECTOR_MODE_P (mode))
3414 mode = GET_MODE_INNER (mode);
3415
3416 bytes = GET_MODE_SIZE (mode);
3417 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3418
3419 /* Check that bytes are repeated. */
3420 for (i = bytes; i < 16; i += bytes)
3421 for (j = 0; j < bytes; j++)
3422 if (arr[j] != arr[i + j])
3423 return 0;
3424
3425 val = arr[0];
3426 for (j = 1; j < bytes; j++)
3427 val = (val << 8) | arr[j];
3428
3429 val = trunc_int_for_mode (val, mode);
3430
3431 return val >= low && val <= high;
3432}
3433
3434/* We accept:
5b865faf 3435 - any 32-bit constant (SImode, SFmode)
644459d0 3436 - any constant that can be generated with fsmbi (any mode)
5b865faf 3437 - a 64-bit constant where the high and low bits are identical
644459d0 3438 (DImode, DFmode)
5b865faf 3439 - a 128-bit constant where the four 32-bit words match. */
644459d0 3440int
3441spu_legitimate_constant_p (rtx x)
3442{
5df189be 3443 if (GET_CODE (x) == HIGH)
3444 x = XEXP (x, 0);
644459d0 3445 /* V4SI with all identical symbols is valid. */
5df189be 3446 if (!flag_pic
3447 && GET_MODE (x) == V4SImode
644459d0 3448 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3449 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3450 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3451 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3452 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3453 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3454
5df189be 3455 if (GET_CODE (x) == CONST_VECTOR
3456 && !const_vector_immediate_p (x))
3457 return 0;
644459d0 3458 return 1;
3459}
3460
3461/* Valid address are:
3462 - symbol_ref, label_ref, const
3463 - reg
3464 - reg + const, where either reg or const is 16 byte aligned
3465 - reg + reg, alignment doesn't matter
3466 The alignment matters in the reg+const case because lqd and stqd
3467 ignore the 4 least significant bits of the const. (TODO: It might be
3468 preferable to allow any alignment and fix it up when splitting.) */
3469int
3470spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3471 rtx x, int reg_ok_strict)
3472{
3473 if (mode == TImode && GET_CODE (x) == AND
3474 && GET_CODE (XEXP (x, 1)) == CONST_INT
3475 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3476 x = XEXP (x, 0);
3477 switch (GET_CODE (x))
3478 {
3479 case SYMBOL_REF:
3480 case LABEL_REF:
3481 return !TARGET_LARGE_MEM;
3482
3483 case CONST:
0cfc65d4 3484 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3485 {
3486 rtx sym = XEXP (XEXP (x, 0), 0);
3487 rtx cst = XEXP (XEXP (x, 0), 1);
3488
3489 /* Accept any symbol_ref + constant, assuming it does not
3490 wrap around the local store addressability limit. */
3491 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3492 return 1;
3493 }
3494 return 0;
644459d0 3495
3496 case CONST_INT:
3497 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3498
3499 case SUBREG:
3500 x = XEXP (x, 0);
3501 gcc_assert (GET_CODE (x) == REG);
3502
3503 case REG:
3504 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3505
3506 case PLUS:
3507 case LO_SUM:
3508 {
3509 rtx op0 = XEXP (x, 0);
3510 rtx op1 = XEXP (x, 1);
3511 if (GET_CODE (op0) == SUBREG)
3512 op0 = XEXP (op0, 0);
3513 if (GET_CODE (op1) == SUBREG)
3514 op1 = XEXP (op1, 0);
3515 /* We can't just accept any aligned register because CSE can
3516 change it to a register that is not marked aligned and then
3517 recog will fail. So we only accept frame registers because
3518 they will only be changed to other frame registers. */
3519 if (GET_CODE (op0) == REG
3520 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3521 && GET_CODE (op1) == CONST_INT
3522 && INTVAL (op1) >= -0x2000
3523 && INTVAL (op1) <= 0x1fff
5df189be 3524 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3525 return 1;
3526 if (GET_CODE (op0) == REG
3527 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3528 && GET_CODE (op1) == REG
3529 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3530 return 1;
3531 }
3532 break;
3533
3534 default:
3535 break;
3536 }
3537 return 0;
3538}
3539
3540/* When the address is reg + const_int, force the const_int into a
fa7637bd 3541 register. */
644459d0 3542rtx
3543spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3544 enum machine_mode mode)
3545{
3546 rtx op0, op1;
3547 /* Make sure both operands are registers. */
3548 if (GET_CODE (x) == PLUS)
3549 {
3550 op0 = XEXP (x, 0);
3551 op1 = XEXP (x, 1);
3552 if (ALIGNED_SYMBOL_REF_P (op0))
3553 {
3554 op0 = force_reg (Pmode, op0);
3555 mark_reg_pointer (op0, 128);
3556 }
3557 else if (GET_CODE (op0) != REG)
3558 op0 = force_reg (Pmode, op0);
3559 if (ALIGNED_SYMBOL_REF_P (op1))
3560 {
3561 op1 = force_reg (Pmode, op1);
3562 mark_reg_pointer (op1, 128);
3563 }
3564 else if (GET_CODE (op1) != REG)
3565 op1 = force_reg (Pmode, op1);
3566 x = gen_rtx_PLUS (Pmode, op0, op1);
3567 if (spu_legitimate_address (mode, x, 0))
3568 return x;
3569 }
3570 return NULL_RTX;
3571}
3572
3573/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3574 struct attribute_spec.handler. */
3575static tree
3576spu_handle_fndecl_attribute (tree * node,
3577 tree name,
3578 tree args ATTRIBUTE_UNUSED,
3579 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3580{
3581 if (TREE_CODE (*node) != FUNCTION_DECL)
3582 {
3583 warning (0, "`%s' attribute only applies to functions",
3584 IDENTIFIER_POINTER (name));
3585 *no_add_attrs = true;
3586 }
3587
3588 return NULL_TREE;
3589}
3590
3591/* Handle the "vector" attribute. */
3592static tree
3593spu_handle_vector_attribute (tree * node, tree name,
3594 tree args ATTRIBUTE_UNUSED,
3595 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3596{
3597 tree type = *node, result = NULL_TREE;
3598 enum machine_mode mode;
3599 int unsigned_p;
3600
3601 while (POINTER_TYPE_P (type)
3602 || TREE_CODE (type) == FUNCTION_TYPE
3603 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3604 type = TREE_TYPE (type);
3605
3606 mode = TYPE_MODE (type);
3607
3608 unsigned_p = TYPE_UNSIGNED (type);
3609 switch (mode)
3610 {
3611 case DImode:
3612 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3613 break;
3614 case SImode:
3615 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3616 break;
3617 case HImode:
3618 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3619 break;
3620 case QImode:
3621 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3622 break;
3623 case SFmode:
3624 result = V4SF_type_node;
3625 break;
3626 case DFmode:
3627 result = V2DF_type_node;
3628 break;
3629 default:
3630 break;
3631 }
3632
3633 /* Propagate qualifiers attached to the element type
3634 onto the vector type. */
3635 if (result && result != type && TYPE_QUALS (type))
3636 result = build_qualified_type (result, TYPE_QUALS (type));
3637
3638 *no_add_attrs = true; /* No need to hang on to the attribute. */
3639
3640 if (!result)
3641 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3642 else
d991e6e8 3643 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3644
3645 return NULL_TREE;
3646}
3647
f2b32076 3648/* Return nonzero if FUNC is a naked function. */
644459d0 3649static int
3650spu_naked_function_p (tree func)
3651{
3652 tree a;
3653
3654 if (TREE_CODE (func) != FUNCTION_DECL)
3655 abort ();
3656
3657 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3658 return a != NULL_TREE;
3659}
3660
3661int
3662spu_initial_elimination_offset (int from, int to)
3663{
3664 int saved_regs_size = spu_saved_regs_size ();
3665 int sp_offset = 0;
abe32cce 3666 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3667 || get_frame_size () || saved_regs_size)
3668 sp_offset = STACK_POINTER_OFFSET;
3669 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3670 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3671 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3672 return get_frame_size ();
644459d0 3673 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3674 return sp_offset + crtl->outgoing_args_size
644459d0 3675 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3676 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3677 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3678 else
3679 gcc_unreachable ();
644459d0 3680}
3681
3682rtx
fb80456a 3683spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3684{
3685 enum machine_mode mode = TYPE_MODE (type);
3686 int byte_size = ((mode == BLKmode)
3687 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3688
3689 /* Make sure small structs are left justified in a register. */
3690 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3691 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3692 {
3693 enum machine_mode smode;
3694 rtvec v;
3695 int i;
3696 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3697 int n = byte_size / UNITS_PER_WORD;
3698 v = rtvec_alloc (nregs);
3699 for (i = 0; i < n; i++)
3700 {
3701 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3702 gen_rtx_REG (TImode,
3703 FIRST_RETURN_REGNUM
3704 + i),
3705 GEN_INT (UNITS_PER_WORD * i));
3706 byte_size -= UNITS_PER_WORD;
3707 }
3708
3709 if (n < nregs)
3710 {
3711 if (byte_size < 4)
3712 byte_size = 4;
3713 smode =
3714 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3715 RTVEC_ELT (v, n) =
3716 gen_rtx_EXPR_LIST (VOIDmode,
3717 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3718 GEN_INT (UNITS_PER_WORD * n));
3719 }
3720 return gen_rtx_PARALLEL (mode, v);
3721 }
3722 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3723}
3724
3725rtx
3726spu_function_arg (CUMULATIVE_ARGS cum,
3727 enum machine_mode mode,
3728 tree type, int named ATTRIBUTE_UNUSED)
3729{
3730 int byte_size;
3731
3732 if (cum >= MAX_REGISTER_ARGS)
3733 return 0;
3734
3735 byte_size = ((mode == BLKmode)
3736 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3737
3738 /* The ABI does not allow parameters to be passed partially in
3739 reg and partially in stack. */
3740 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3741 return 0;
3742
3743 /* Make sure small structs are left justified in a register. */
3744 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3745 && byte_size < UNITS_PER_WORD && byte_size > 0)
3746 {
3747 enum machine_mode smode;
3748 rtx gr_reg;
3749 if (byte_size < 4)
3750 byte_size = 4;
3751 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3752 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3753 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3754 const0_rtx);
3755 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3756 }
3757 else
3758 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3759}
3760
3761/* Variable sized types are passed by reference. */
3762static bool
3763spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3764 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3765 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3766{
3767 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3768}
3769\f
3770
3771/* Var args. */
3772
3773/* Create and return the va_list datatype.
3774
3775 On SPU, va_list is an array type equivalent to
3776
3777 typedef struct __va_list_tag
3778 {
3779 void *__args __attribute__((__aligned(16)));
3780 void *__skip __attribute__((__aligned(16)));
3781
3782 } va_list[1];
3783
fa7637bd 3784 where __args points to the arg that will be returned by the next
644459d0 3785 va_arg(), and __skip points to the previous stack frame such that
3786 when __args == __skip we should advance __args by 32 bytes. */
3787static tree
3788spu_build_builtin_va_list (void)
3789{
3790 tree f_args, f_skip, record, type_decl;
3791 bool owp;
3792
3793 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3794
3795 type_decl =
3796 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3797
3798 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3799 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3800
3801 DECL_FIELD_CONTEXT (f_args) = record;
3802 DECL_ALIGN (f_args) = 128;
3803 DECL_USER_ALIGN (f_args) = 1;
3804
3805 DECL_FIELD_CONTEXT (f_skip) = record;
3806 DECL_ALIGN (f_skip) = 128;
3807 DECL_USER_ALIGN (f_skip) = 1;
3808
3809 TREE_CHAIN (record) = type_decl;
3810 TYPE_NAME (record) = type_decl;
3811 TYPE_FIELDS (record) = f_args;
3812 TREE_CHAIN (f_args) = f_skip;
3813
3814 /* We know this is being padded and we want it too. It is an internal
3815 type so hide the warnings from the user. */
3816 owp = warn_padded;
3817 warn_padded = false;
3818
3819 layout_type (record);
3820
3821 warn_padded = owp;
3822
3823 /* The correct type is an array type of one element. */
3824 return build_array_type (record, build_index_type (size_zero_node));
3825}
3826
3827/* Implement va_start by filling the va_list structure VALIST.
3828 NEXTARG points to the first anonymous stack argument.
3829
3830 The following global variables are used to initialize
3831 the va_list structure:
3832
abe32cce 3833 crtl->args.info;
644459d0 3834 the CUMULATIVE_ARGS for this function
3835
abe32cce 3836 crtl->args.arg_offset_rtx:
644459d0 3837 holds the offset of the first anonymous stack argument
3838 (relative to the virtual arg pointer). */
3839
8a58ed0a 3840static void
644459d0 3841spu_va_start (tree valist, rtx nextarg)
3842{
3843 tree f_args, f_skip;
3844 tree args, skip, t;
3845
3846 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3847 f_skip = TREE_CHAIN (f_args);
3848
3849 valist = build_va_arg_indirect_ref (valist);
3850 args =
3851 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3852 skip =
3853 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3854
3855 /* Find the __args area. */
3856 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3857 if (crtl->args.pretend_args_size > 0)
0de36bdb 3858 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3859 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3860 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3861 TREE_SIDE_EFFECTS (t) = 1;
3862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3863
3864 /* Find the __skip area. */
3865 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3866 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3867 size_int (crtl->args.pretend_args_size
0de36bdb 3868 - STACK_POINTER_OFFSET));
75a70cf9 3869 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3870 TREE_SIDE_EFFECTS (t) = 1;
3871 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3872}
3873
3874/* Gimplify va_arg by updating the va_list structure
3875 VALIST as required to retrieve an argument of type
3876 TYPE, and returning that argument.
3877
3878 ret = va_arg(VALIST, TYPE);
3879
3880 generates code equivalent to:
3881
3882 paddedsize = (sizeof(TYPE) + 15) & -16;
3883 if (VALIST.__args + paddedsize > VALIST.__skip
3884 && VALIST.__args <= VALIST.__skip)
3885 addr = VALIST.__skip + 32;
3886 else
3887 addr = VALIST.__args;
3888 VALIST.__args = addr + paddedsize;
3889 ret = *(TYPE *)addr;
3890 */
3891static tree
75a70cf9 3892spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3893 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3894{
3895 tree f_args, f_skip;
3896 tree args, skip;
3897 HOST_WIDE_INT size, rsize;
3898 tree paddedsize, addr, tmp;
3899 bool pass_by_reference_p;
3900
3901 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3902 f_skip = TREE_CHAIN (f_args);
3903
3904 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3905 args =
3906 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3907 skip =
3908 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3909
3910 addr = create_tmp_var (ptr_type_node, "va_arg");
3911 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3912
3913 /* if an object is dynamically sized, a pointer to it is passed
3914 instead of the object itself. */
3915 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3916 false);
3917 if (pass_by_reference_p)
3918 type = build_pointer_type (type);
3919 size = int_size_in_bytes (type);
3920 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3921
3922 /* build conditional expression to calculate addr. The expression
3923 will be gimplified later. */
0de36bdb 3924 paddedsize = size_int (rsize);
75a70cf9 3925 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3926 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3927 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3928 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3929 unshare_expr (skip)));
644459d0 3930
3931 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3932 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3933 size_int (32)), unshare_expr (args));
644459d0 3934
75a70cf9 3935 gimplify_assign (addr, tmp, pre_p);
644459d0 3936
3937 /* update VALIST.__args */
0de36bdb 3938 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3939 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3940
3941 addr = fold_convert (build_pointer_type (type), addr);
3942
3943 if (pass_by_reference_p)
3944 addr = build_va_arg_indirect_ref (addr);
3945
3946 return build_va_arg_indirect_ref (addr);
3947}
3948
3949/* Save parameter registers starting with the register that corresponds
3950 to the first unnamed parameters. If the first unnamed parameter is
3951 in the stack then save no registers. Set pretend_args_size to the
3952 amount of space needed to save the registers. */
3953void
3954spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3955 tree type, int *pretend_size, int no_rtl)
3956{
3957 if (!no_rtl)
3958 {
3959 rtx tmp;
3960 int regno;
3961 int offset;
3962 int ncum = *cum;
3963
3964 /* cum currently points to the last named argument, we want to
3965 start at the next argument. */
3966 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3967
3968 offset = -STACK_POINTER_OFFSET;
3969 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3970 {
3971 tmp = gen_frame_mem (V4SImode,
3972 plus_constant (virtual_incoming_args_rtx,
3973 offset));
3974 emit_move_insn (tmp,
3975 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3976 offset += 16;
3977 }
3978 *pretend_size = offset + STACK_POINTER_OFFSET;
3979 }
3980}
3981\f
3982void
3983spu_conditional_register_usage (void)
3984{
3985 if (flag_pic)
3986 {
3987 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3988 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3989 }
644459d0 3990}
3991
3992/* This is called to decide when we can simplify a load instruction. We
3993 must only return true for registers which we know will always be
3994 aligned. Taking into account that CSE might replace this reg with
3995 another one that has not been marked aligned.
3996 So this is really only true for frame, stack and virtual registers,
fa7637bd 3997 which we know are always aligned and should not be adversely effected
3998 by CSE. */
644459d0 3999static int
4000regno_aligned_for_load (int regno)
4001{
4002 return regno == FRAME_POINTER_REGNUM
5df189be 4003 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4004 || regno == ARG_POINTER_REGNUM
644459d0 4005 || regno == STACK_POINTER_REGNUM
5df189be 4006 || (regno >= FIRST_VIRTUAL_REGISTER
4007 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4008}
4009
4010/* Return TRUE when mem is known to be 16-byte aligned. */
4011int
4012aligned_mem_p (rtx mem)
4013{
4014 if (MEM_ALIGN (mem) >= 128)
4015 return 1;
4016 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4017 return 1;
4018 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4019 {
4020 rtx p0 = XEXP (XEXP (mem, 0), 0);
4021 rtx p1 = XEXP (XEXP (mem, 0), 1);
4022 if (regno_aligned_for_load (REGNO (p0)))
4023 {
4024 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4025 return 1;
4026 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4027 return 1;
4028 }
4029 }
4030 else if (GET_CODE (XEXP (mem, 0)) == REG)
4031 {
4032 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4033 return 1;
4034 }
4035 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4036 return 1;
4037 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4038 {
4039 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4040 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4041 if (GET_CODE (p0) == SYMBOL_REF
4042 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4043 return 1;
4044 }
4045 return 0;
4046}
4047
69ced2d6 4048/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4049 into its SYMBOL_REF_FLAGS. */
4050static void
4051spu_encode_section_info (tree decl, rtx rtl, int first)
4052{
4053 default_encode_section_info (decl, rtl, first);
4054
4055 /* If a variable has a forced alignment to < 16 bytes, mark it with
4056 SYMBOL_FLAG_ALIGN1. */
4057 if (TREE_CODE (decl) == VAR_DECL
4058 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4059 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4060}
4061
644459d0 4062/* Return TRUE if we are certain the mem refers to a complete object
4063 which is both 16-byte aligned and padded to a 16-byte boundary. This
4064 would make it safe to store with a single instruction.
4065 We guarantee the alignment and padding for static objects by aligning
4066 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4067 FIXME: We currently cannot guarantee this for objects on the stack
4068 because assign_parm_setup_stack calls assign_stack_local with the
4069 alignment of the parameter mode and in that case the alignment never
4070 gets adjusted by LOCAL_ALIGNMENT. */
4071static int
4072store_with_one_insn_p (rtx mem)
4073{
4074 rtx addr = XEXP (mem, 0);
4075 if (GET_MODE (mem) == BLKmode)
4076 return 0;
4077 /* Only static objects. */
4078 if (GET_CODE (addr) == SYMBOL_REF)
4079 {
4080 /* We use the associated declaration to make sure the access is
fa7637bd 4081 referring to the whole object.
644459d0 4082 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4083 if it is necessary. Will there be cases where one exists, and
4084 the other does not? Will there be cases where both exist, but
4085 have different types? */
4086 tree decl = MEM_EXPR (mem);
4087 if (decl
4088 && TREE_CODE (decl) == VAR_DECL
4089 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4090 return 1;
4091 decl = SYMBOL_REF_DECL (addr);
4092 if (decl
4093 && TREE_CODE (decl) == VAR_DECL
4094 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4095 return 1;
4096 }
4097 return 0;
4098}
4099
4100int
4101spu_expand_mov (rtx * ops, enum machine_mode mode)
4102{
4103 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4104 abort ();
4105
4106 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4107 {
4108 rtx from = SUBREG_REG (ops[1]);
8d72495d 4109 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4110
4111 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4112 && GET_MODE_CLASS (imode) == MODE_INT
4113 && subreg_lowpart_p (ops[1]));
4114
4115 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4116 imode = SImode;
4117 if (imode != GET_MODE (from))
4118 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4119
4120 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4121 {
99bdde56 4122 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4123 emit_insn (GEN_FCN (icode) (ops[0], from));
4124 }
4125 else
4126 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4127 return 1;
4128 }
4129
4130 /* At least one of the operands needs to be a register. */
4131 if ((reload_in_progress | reload_completed) == 0
4132 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4133 {
4134 rtx temp = force_reg (mode, ops[1]);
4135 emit_move_insn (ops[0], temp);
4136 return 1;
4137 }
4138 if (reload_in_progress || reload_completed)
4139 {
dea01258 4140 if (CONSTANT_P (ops[1]))
4141 return spu_split_immediate (ops);
644459d0 4142 return 0;
4143 }
4144 else
4145 {
4146 if (GET_CODE (ops[0]) == MEM)
4147 {
4148 if (!spu_valid_move (ops))
4149 {
4150 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4151 gen_reg_rtx (TImode)));
4152 return 1;
4153 }
4154 }
4155 else if (GET_CODE (ops[1]) == MEM)
4156 {
4157 if (!spu_valid_move (ops))
4158 {
4159 emit_insn (gen_load
4160 (ops[0], ops[1], gen_reg_rtx (TImode),
4161 gen_reg_rtx (SImode)));
4162 return 1;
4163 }
4164 }
4165 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4166 extend them. */
4167 if (GET_CODE (ops[1]) == CONST_INT)
4168 {
4169 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4170 if (val != INTVAL (ops[1]))
4171 {
4172 emit_move_insn (ops[0], GEN_INT (val));
4173 return 1;
4174 }
4175 }
4176 }
4177 return 0;
4178}
4179
644459d0 4180void
4181spu_split_load (rtx * ops)
4182{
4183 enum machine_mode mode = GET_MODE (ops[0]);
4184 rtx addr, load, rot, mem, p0, p1;
4185 int rot_amt;
4186
4187 addr = XEXP (ops[1], 0);
4188
4189 rot = 0;
4190 rot_amt = 0;
4191 if (GET_CODE (addr) == PLUS)
4192 {
4193 /* 8 cases:
4194 aligned reg + aligned reg => lqx
4195 aligned reg + unaligned reg => lqx, rotqby
4196 aligned reg + aligned const => lqd
4197 aligned reg + unaligned const => lqd, rotqbyi
4198 unaligned reg + aligned reg => lqx, rotqby
4199 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4200 unaligned reg + aligned const => lqd, rotqby
4201 unaligned reg + unaligned const -> not allowed by legitimate address
4202 */
4203 p0 = XEXP (addr, 0);
4204 p1 = XEXP (addr, 1);
aa71ecd4 4205 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4206 {
aa71ecd4 4207 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4208 {
4209 emit_insn (gen_addsi3 (ops[3], p0, p1));
4210 rot = ops[3];
4211 }
4212 else
4213 rot = p0;
4214 }
4215 else
4216 {
4217 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4218 {
4219 rot_amt = INTVAL (p1) & 15;
4220 p1 = GEN_INT (INTVAL (p1) & -16);
4221 addr = gen_rtx_PLUS (SImode, p0, p1);
4222 }
aa71ecd4 4223 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4224 rot = p1;
4225 }
4226 }
4227 else if (GET_CODE (addr) == REG)
4228 {
aa71ecd4 4229 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4230 rot = addr;
4231 }
4232 else if (GET_CODE (addr) == CONST)
4233 {
4234 if (GET_CODE (XEXP (addr, 0)) == PLUS
4235 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4236 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4237 {
4238 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4239 if (rot_amt & -16)
4240 addr = gen_rtx_CONST (Pmode,
4241 gen_rtx_PLUS (Pmode,
4242 XEXP (XEXP (addr, 0), 0),
4243 GEN_INT (rot_amt & -16)));
4244 else
4245 addr = XEXP (XEXP (addr, 0), 0);
4246 }
4247 else
4248 rot = addr;
4249 }
4250 else if (GET_CODE (addr) == CONST_INT)
4251 {
4252 rot_amt = INTVAL (addr);
4253 addr = GEN_INT (rot_amt & -16);
4254 }
4255 else if (!ALIGNED_SYMBOL_REF_P (addr))
4256 rot = addr;
4257
4258 if (GET_MODE_SIZE (mode) < 4)
4259 rot_amt += GET_MODE_SIZE (mode) - 4;
4260
4261 rot_amt &= 15;
4262
4263 if (rot && rot_amt)
4264 {
4265 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4266 rot = ops[3];
4267 rot_amt = 0;
4268 }
4269
4270 load = ops[2];
4271
4272 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4273 mem = change_address (ops[1], TImode, addr);
4274
e04cf423 4275 emit_insn (gen_movti (load, mem));
644459d0 4276
4277 if (rot)
4278 emit_insn (gen_rotqby_ti (load, load, rot));
4279 else if (rot_amt)
4280 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4281
4282 if (reload_completed)
4283 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4284 else
4285 emit_insn (gen_spu_convert (ops[0], load));
4286}
4287
4288void
4289spu_split_store (rtx * ops)
4290{
4291 enum machine_mode mode = GET_MODE (ops[0]);
4292 rtx pat = ops[2];
4293 rtx reg = ops[3];
4294 rtx addr, p0, p1, p1_lo, smem;
4295 int aform;
4296 int scalar;
4297
4298 addr = XEXP (ops[0], 0);
4299
4300 if (GET_CODE (addr) == PLUS)
4301 {
4302 /* 8 cases:
4303 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4304 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4305 aligned reg + aligned const => lqd, c?d, shuf, stqx
4306 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4307 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4308 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4309 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4310 unaligned reg + unaligned const -> not allowed by legitimate address
4311 */
4312 aform = 0;
4313 p0 = XEXP (addr, 0);
4314 p1 = p1_lo = XEXP (addr, 1);
4315 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4316 {
4317 p1_lo = GEN_INT (INTVAL (p1) & 15);
4318 p1 = GEN_INT (INTVAL (p1) & -16);
4319 addr = gen_rtx_PLUS (SImode, p0, p1);
4320 }
4321 }
4322 else if (GET_CODE (addr) == REG)
4323 {
4324 aform = 0;
4325 p0 = addr;
4326 p1 = p1_lo = const0_rtx;
4327 }
4328 else
4329 {
4330 aform = 1;
4331 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4332 p1 = 0; /* aform doesn't use p1 */
4333 p1_lo = addr;
4334 if (ALIGNED_SYMBOL_REF_P (addr))
4335 p1_lo = const0_rtx;
4336 else if (GET_CODE (addr) == CONST)
4337 {
4338 if (GET_CODE (XEXP (addr, 0)) == PLUS
4339 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4340 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4341 {
4342 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4343 if ((v & -16) != 0)
4344 addr = gen_rtx_CONST (Pmode,
4345 gen_rtx_PLUS (Pmode,
4346 XEXP (XEXP (addr, 0), 0),
4347 GEN_INT (v & -16)));
4348 else
4349 addr = XEXP (XEXP (addr, 0), 0);
4350 p1_lo = GEN_INT (v & 15);
4351 }
4352 }
4353 else if (GET_CODE (addr) == CONST_INT)
4354 {
4355 p1_lo = GEN_INT (INTVAL (addr) & 15);
4356 addr = GEN_INT (INTVAL (addr) & -16);
4357 }
4358 }
4359
e04cf423 4360 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4361
644459d0 4362 scalar = store_with_one_insn_p (ops[0]);
4363 if (!scalar)
4364 {
4365 /* We could copy the flags from the ops[0] MEM to mem here,
4366 We don't because we want this load to be optimized away if
4367 possible, and copying the flags will prevent that in certain
4368 cases, e.g. consider the volatile flag. */
4369
e04cf423 4370 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4371 set_mem_alias_set (lmem, 0);
4372 emit_insn (gen_movti (reg, lmem));
644459d0 4373
aa71ecd4 4374 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4375 p0 = stack_pointer_rtx;
4376 if (!p1_lo)
4377 p1_lo = const0_rtx;
4378
4379 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4380 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4381 }
4382 else if (reload_completed)
4383 {
4384 if (GET_CODE (ops[1]) == REG)
4385 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4386 else if (GET_CODE (ops[1]) == SUBREG)
4387 emit_move_insn (reg,
4388 gen_rtx_REG (GET_MODE (reg),
4389 REGNO (SUBREG_REG (ops[1]))));
4390 else
4391 abort ();
4392 }
4393 else
4394 {
4395 if (GET_CODE (ops[1]) == REG)
4396 emit_insn (gen_spu_convert (reg, ops[1]));
4397 else if (GET_CODE (ops[1]) == SUBREG)
4398 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4399 else
4400 abort ();
4401 }
4402
4403 if (GET_MODE_SIZE (mode) < 4 && scalar)
4404 emit_insn (gen_shlqby_ti
4405 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4406
644459d0 4407 smem = change_address (ops[0], TImode, addr);
4408 /* We can't use the previous alias set because the memory has changed
4409 size and can potentially overlap objects of other types. */
4410 set_mem_alias_set (smem, 0);
4411
e04cf423 4412 emit_insn (gen_movti (smem, reg));
644459d0 4413}
4414
4415/* Return TRUE if X is MEM which is a struct member reference
4416 and the member can safely be loaded and stored with a single
4417 instruction because it is padded. */
4418static int
4419mem_is_padded_component_ref (rtx x)
4420{
4421 tree t = MEM_EXPR (x);
4422 tree r;
4423 if (!t || TREE_CODE (t) != COMPONENT_REF)
4424 return 0;
4425 t = TREE_OPERAND (t, 1);
4426 if (!t || TREE_CODE (t) != FIELD_DECL
4427 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4428 return 0;
4429 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4430 r = DECL_FIELD_CONTEXT (t);
4431 if (!r || TREE_CODE (r) != RECORD_TYPE)
4432 return 0;
4433 /* Make sure they are the same mode */
4434 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4435 return 0;
4436 /* If there are no following fields then the field alignment assures
fa7637bd 4437 the structure is padded to the alignment which means this field is
4438 padded too. */
644459d0 4439 if (TREE_CHAIN (t) == 0)
4440 return 1;
4441 /* If the following field is also aligned then this field will be
4442 padded. */
4443 t = TREE_CHAIN (t);
4444 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4445 return 1;
4446 return 0;
4447}
4448
c7b91b14 4449/* Parse the -mfixed-range= option string. */
4450static void
4451fix_range (const char *const_str)
4452{
4453 int i, first, last;
4454 char *str, *dash, *comma;
4455
4456 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4457 REG2 are either register names or register numbers. The effect
4458 of this option is to mark the registers in the range from REG1 to
4459 REG2 as ``fixed'' so they won't be used by the compiler. */
4460
4461 i = strlen (const_str);
4462 str = (char *) alloca (i + 1);
4463 memcpy (str, const_str, i + 1);
4464
4465 while (1)
4466 {
4467 dash = strchr (str, '-');
4468 if (!dash)
4469 {
4470 warning (0, "value of -mfixed-range must have form REG1-REG2");
4471 return;
4472 }
4473 *dash = '\0';
4474 comma = strchr (dash + 1, ',');
4475 if (comma)
4476 *comma = '\0';
4477
4478 first = decode_reg_name (str);
4479 if (first < 0)
4480 {
4481 warning (0, "unknown register name: %s", str);
4482 return;
4483 }
4484
4485 last = decode_reg_name (dash + 1);
4486 if (last < 0)
4487 {
4488 warning (0, "unknown register name: %s", dash + 1);
4489 return;
4490 }
4491
4492 *dash = '-';
4493
4494 if (first > last)
4495 {
4496 warning (0, "%s-%s is an empty range", str, dash + 1);
4497 return;
4498 }
4499
4500 for (i = first; i <= last; ++i)
4501 fixed_regs[i] = call_used_regs[i] = 1;
4502
4503 if (!comma)
4504 break;
4505
4506 *comma = ',';
4507 str = comma + 1;
4508 }
4509}
4510
644459d0 4511int
4512spu_valid_move (rtx * ops)
4513{
4514 enum machine_mode mode = GET_MODE (ops[0]);
4515 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4516 return 0;
4517
4518 /* init_expr_once tries to recog against load and store insns to set
4519 the direct_load[] and direct_store[] arrays. We always want to
4520 consider those loads and stores valid. init_expr_once is called in
4521 the context of a dummy function which does not have a decl. */
4522 if (cfun->decl == 0)
4523 return 1;
4524
4525 /* Don't allows loads/stores which would require more than 1 insn.
4526 During and after reload we assume loads and stores only take 1
4527 insn. */
4528 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4529 {
4530 if (GET_CODE (ops[0]) == MEM
4531 && (GET_MODE_SIZE (mode) < 4
4532 || !(store_with_one_insn_p (ops[0])
4533 || mem_is_padded_component_ref (ops[0]))))
4534 return 0;
4535 if (GET_CODE (ops[1]) == MEM
4536 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4537 return 0;
4538 }
4539 return 1;
4540}
4541
4542/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4543 can be generated using the fsmbi instruction. */
4544int
4545fsmbi_const_p (rtx x)
4546{
dea01258 4547 if (CONSTANT_P (x))
4548 {
5df189be 4549 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4550 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4551 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4552 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4553 }
4554 return 0;
4555}
4556
4557/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4558 can be generated using the cbd, chd, cwd or cdd instruction. */
4559int
4560cpat_const_p (rtx x, enum machine_mode mode)
4561{
4562 if (CONSTANT_P (x))
4563 {
4564 enum immediate_class c = classify_immediate (x, mode);
4565 return c == IC_CPAT;
4566 }
4567 return 0;
4568}
644459d0 4569
dea01258 4570rtx
4571gen_cpat_const (rtx * ops)
4572{
4573 unsigned char dst[16];
4574 int i, offset, shift, isize;
4575 if (GET_CODE (ops[3]) != CONST_INT
4576 || GET_CODE (ops[2]) != CONST_INT
4577 || (GET_CODE (ops[1]) != CONST_INT
4578 && GET_CODE (ops[1]) != REG))
4579 return 0;
4580 if (GET_CODE (ops[1]) == REG
4581 && (!REG_POINTER (ops[1])
4582 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4583 return 0;
644459d0 4584
4585 for (i = 0; i < 16; i++)
dea01258 4586 dst[i] = i + 16;
4587 isize = INTVAL (ops[3]);
4588 if (isize == 1)
4589 shift = 3;
4590 else if (isize == 2)
4591 shift = 2;
4592 else
4593 shift = 0;
4594 offset = (INTVAL (ops[2]) +
4595 (GET_CODE (ops[1]) ==
4596 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4597 for (i = 0; i < isize; i++)
4598 dst[offset + i] = i + shift;
4599 return array_to_constant (TImode, dst);
644459d0 4600}
4601
4602/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4603 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4604 than 16 bytes, the value is repeated across the rest of the array. */
4605void
4606constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4607{
4608 HOST_WIDE_INT val;
4609 int i, j, first;
4610
4611 memset (arr, 0, 16);
4612 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4613 if (GET_CODE (x) == CONST_INT
4614 || (GET_CODE (x) == CONST_DOUBLE
4615 && (mode == SFmode || mode == DFmode)))
4616 {
4617 gcc_assert (mode != VOIDmode && mode != BLKmode);
4618
4619 if (GET_CODE (x) == CONST_DOUBLE)
4620 val = const_double_to_hwint (x);
4621 else
4622 val = INTVAL (x);
4623 first = GET_MODE_SIZE (mode) - 1;
4624 for (i = first; i >= 0; i--)
4625 {
4626 arr[i] = val & 0xff;
4627 val >>= 8;
4628 }
4629 /* Splat the constant across the whole array. */
4630 for (j = 0, i = first + 1; i < 16; i++)
4631 {
4632 arr[i] = arr[j];
4633 j = (j == first) ? 0 : j + 1;
4634 }
4635 }
4636 else if (GET_CODE (x) == CONST_DOUBLE)
4637 {
4638 val = CONST_DOUBLE_LOW (x);
4639 for (i = 15; i >= 8; i--)
4640 {
4641 arr[i] = val & 0xff;
4642 val >>= 8;
4643 }
4644 val = CONST_DOUBLE_HIGH (x);
4645 for (i = 7; i >= 0; i--)
4646 {
4647 arr[i] = val & 0xff;
4648 val >>= 8;
4649 }
4650 }
4651 else if (GET_CODE (x) == CONST_VECTOR)
4652 {
4653 int units;
4654 rtx elt;
4655 mode = GET_MODE_INNER (mode);
4656 units = CONST_VECTOR_NUNITS (x);
4657 for (i = 0; i < units; i++)
4658 {
4659 elt = CONST_VECTOR_ELT (x, i);
4660 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4661 {
4662 if (GET_CODE (elt) == CONST_DOUBLE)
4663 val = const_double_to_hwint (elt);
4664 else
4665 val = INTVAL (elt);
4666 first = GET_MODE_SIZE (mode) - 1;
4667 if (first + i * GET_MODE_SIZE (mode) > 16)
4668 abort ();
4669 for (j = first; j >= 0; j--)
4670 {
4671 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4672 val >>= 8;
4673 }
4674 }
4675 }
4676 }
4677 else
4678 gcc_unreachable();
4679}
4680
4681/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4682 smaller than 16 bytes, use the bytes that would represent that value
4683 in a register, e.g., for QImode return the value of arr[3]. */
4684rtx
4685array_to_constant (enum machine_mode mode, unsigned char arr[16])
4686{
4687 enum machine_mode inner_mode;
4688 rtvec v;
4689 int units, size, i, j, k;
4690 HOST_WIDE_INT val;
4691
4692 if (GET_MODE_CLASS (mode) == MODE_INT
4693 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4694 {
4695 j = GET_MODE_SIZE (mode);
4696 i = j < 4 ? 4 - j : 0;
4697 for (val = 0; i < j; i++)
4698 val = (val << 8) | arr[i];
4699 val = trunc_int_for_mode (val, mode);
4700 return GEN_INT (val);
4701 }
4702
4703 if (mode == TImode)
4704 {
4705 HOST_WIDE_INT high;
4706 for (i = high = 0; i < 8; i++)
4707 high = (high << 8) | arr[i];
4708 for (i = 8, val = 0; i < 16; i++)
4709 val = (val << 8) | arr[i];
4710 return immed_double_const (val, high, TImode);
4711 }
4712 if (mode == SFmode)
4713 {
4714 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4715 val = trunc_int_for_mode (val, SImode);
171b6d22 4716 return hwint_to_const_double (SFmode, val);
644459d0 4717 }
4718 if (mode == DFmode)
4719 {
1f915911 4720 for (i = 0, val = 0; i < 8; i++)
4721 val = (val << 8) | arr[i];
171b6d22 4722 return hwint_to_const_double (DFmode, val);
644459d0 4723 }
4724
4725 if (!VECTOR_MODE_P (mode))
4726 abort ();
4727
4728 units = GET_MODE_NUNITS (mode);
4729 size = GET_MODE_UNIT_SIZE (mode);
4730 inner_mode = GET_MODE_INNER (mode);
4731 v = rtvec_alloc (units);
4732
4733 for (k = i = 0; i < units; ++i)
4734 {
4735 val = 0;
4736 for (j = 0; j < size; j++, k++)
4737 val = (val << 8) | arr[k];
4738
4739 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4740 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4741 else
4742 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4743 }
4744 if (k > 16)
4745 abort ();
4746
4747 return gen_rtx_CONST_VECTOR (mode, v);
4748}
4749
4750static void
4751reloc_diagnostic (rtx x)
4752{
4753 tree loc_decl, decl = 0;
4754 const char *msg;
4755 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4756 return;
4757
4758 if (GET_CODE (x) == SYMBOL_REF)
4759 decl = SYMBOL_REF_DECL (x);
4760 else if (GET_CODE (x) == CONST
4761 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4762 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4763
4764 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4765 if (decl && !DECL_P (decl))
4766 decl = 0;
4767
4768 /* We use last_assemble_variable_decl to get line information. It's
4769 not always going to be right and might not even be close, but will
4770 be right for the more common cases. */
5df189be 4771 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4772 loc_decl = decl;
4773 else
4774 loc_decl = last_assemble_variable_decl;
4775
4776 /* The decl could be a string constant. */
4777 if (decl && DECL_P (decl))
4778 msg = "%Jcreating run-time relocation for %qD";
4779 else
4780 msg = "creating run-time relocation";
4781
99369027 4782 if (TARGET_WARN_RELOC)
644459d0 4783 warning (0, msg, loc_decl, decl);
99369027 4784 else
4785 error (msg, loc_decl, decl);
644459d0 4786}
4787
4788/* Hook into assemble_integer so we can generate an error for run-time
4789 relocations. The SPU ABI disallows them. */
4790static bool
4791spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4792{
4793 /* By default run-time relocations aren't supported, but we allow them
4794 in case users support it in their own run-time loader. And we provide
4795 a warning for those users that don't. */
4796 if ((GET_CODE (x) == SYMBOL_REF)
4797 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4798 reloc_diagnostic (x);
4799
4800 return default_assemble_integer (x, size, aligned_p);
4801}
4802
4803static void
4804spu_asm_globalize_label (FILE * file, const char *name)
4805{
4806 fputs ("\t.global\t", file);
4807 assemble_name (file, name);
4808 fputs ("\n", file);
4809}
4810
4811static bool
f529eb25 4812spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4813 bool speed ATTRIBUTE_UNUSED)
644459d0 4814{
4815 enum machine_mode mode = GET_MODE (x);
4816 int cost = COSTS_N_INSNS (2);
4817
4818 /* Folding to a CONST_VECTOR will use extra space but there might
4819 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4820 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4821 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4822 because this cost will only be compared against a single insn.
4823 if (code == CONST_VECTOR)
4824 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4825 */
4826
4827 /* Use defaults for float operations. Not accurate but good enough. */
4828 if (mode == DFmode)
4829 {
4830 *total = COSTS_N_INSNS (13);
4831 return true;
4832 }
4833 if (mode == SFmode)
4834 {
4835 *total = COSTS_N_INSNS (6);
4836 return true;
4837 }
4838 switch (code)
4839 {
4840 case CONST_INT:
4841 if (satisfies_constraint_K (x))
4842 *total = 0;
4843 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4844 *total = COSTS_N_INSNS (1);
4845 else
4846 *total = COSTS_N_INSNS (3);
4847 return true;
4848
4849 case CONST:
4850 *total = COSTS_N_INSNS (3);
4851 return true;
4852
4853 case LABEL_REF:
4854 case SYMBOL_REF:
4855 *total = COSTS_N_INSNS (0);
4856 return true;
4857
4858 case CONST_DOUBLE:
4859 *total = COSTS_N_INSNS (5);
4860 return true;
4861
4862 case FLOAT_EXTEND:
4863 case FLOAT_TRUNCATE:
4864 case FLOAT:
4865 case UNSIGNED_FLOAT:
4866 case FIX:
4867 case UNSIGNED_FIX:
4868 *total = COSTS_N_INSNS (7);
4869 return true;
4870
4871 case PLUS:
4872 if (mode == TImode)
4873 {
4874 *total = COSTS_N_INSNS (9);
4875 return true;
4876 }
4877 break;
4878
4879 case MULT:
4880 cost =
4881 GET_CODE (XEXP (x, 0)) ==
4882 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4883 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4884 {
4885 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4886 {
4887 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4888 cost = COSTS_N_INSNS (14);
4889 if ((val & 0xffff) == 0)
4890 cost = COSTS_N_INSNS (9);
4891 else if (val > 0 && val < 0x10000)
4892 cost = COSTS_N_INSNS (11);
4893 }
4894 }
4895 *total = cost;
4896 return true;
4897 case DIV:
4898 case UDIV:
4899 case MOD:
4900 case UMOD:
4901 *total = COSTS_N_INSNS (20);
4902 return true;
4903 case ROTATE:
4904 case ROTATERT:
4905 case ASHIFT:
4906 case ASHIFTRT:
4907 case LSHIFTRT:
4908 *total = COSTS_N_INSNS (4);
4909 return true;
4910 case UNSPEC:
4911 if (XINT (x, 1) == UNSPEC_CONVERT)
4912 *total = COSTS_N_INSNS (0);
4913 else
4914 *total = COSTS_N_INSNS (4);
4915 return true;
4916 }
4917 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4918 if (GET_MODE_CLASS (mode) == MODE_INT
4919 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4920 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4921 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4922 *total = cost;
4923 return true;
4924}
4925
1bd43494 4926static enum machine_mode
4927spu_unwind_word_mode (void)
644459d0 4928{
1bd43494 4929 return SImode;
644459d0 4930}
4931
4932/* Decide whether we can make a sibling call to a function. DECL is the
4933 declaration of the function being targeted by the call and EXP is the
4934 CALL_EXPR representing the call. */
4935static bool
4936spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4937{
4938 return decl && !TARGET_LARGE_MEM;
4939}
4940
4941/* We need to correctly update the back chain pointer and the Available
4942 Stack Size (which is in the second slot of the sp register.) */
4943void
4944spu_allocate_stack (rtx op0, rtx op1)
4945{
4946 HOST_WIDE_INT v;
4947 rtx chain = gen_reg_rtx (V4SImode);
4948 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4949 rtx sp = gen_reg_rtx (V4SImode);
4950 rtx splatted = gen_reg_rtx (V4SImode);
4951 rtx pat = gen_reg_rtx (TImode);
4952
4953 /* copy the back chain so we can save it back again. */
4954 emit_move_insn (chain, stack_bot);
4955
4956 op1 = force_reg (SImode, op1);
4957
4958 v = 0x1020300010203ll;
4959 emit_move_insn (pat, immed_double_const (v, v, TImode));
4960 emit_insn (gen_shufb (splatted, op1, op1, pat));
4961
4962 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4963 emit_insn (gen_subv4si3 (sp, sp, splatted));
4964
4965 if (flag_stack_check)
4966 {
4967 rtx avail = gen_reg_rtx(SImode);
4968 rtx result = gen_reg_rtx(SImode);
4969 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4970 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4971 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4972 }
4973
4974 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4975
4976 emit_move_insn (stack_bot, chain);
4977
4978 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4979}
4980
4981void
4982spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4983{
4984 static unsigned char arr[16] =
4985 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4986 rtx temp = gen_reg_rtx (SImode);
4987 rtx temp2 = gen_reg_rtx (SImode);
4988 rtx temp3 = gen_reg_rtx (V4SImode);
4989 rtx temp4 = gen_reg_rtx (V4SImode);
4990 rtx pat = gen_reg_rtx (TImode);
4991 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4992
4993 /* Restore the backchain from the first word, sp from the second. */
4994 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4995 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4996
4997 emit_move_insn (pat, array_to_constant (TImode, arr));
4998
4999 /* Compute Available Stack Size for sp */
5000 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5001 emit_insn (gen_shufb (temp3, temp, temp, pat));
5002
5003 /* Compute Available Stack Size for back chain */
5004 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5005 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5006 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5007
5008 emit_insn (gen_addv4si3 (sp, sp, temp3));
5009 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5010}
5011
5012static void
5013spu_init_libfuncs (void)
5014{
5015 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5016 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5017 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5018 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5019 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5020 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5021 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5022 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5023 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5024 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5025 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5026
5027 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5028 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5029
5030 set_optab_libfunc (smul_optab, TImode, "__multi3");
5031 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5032 set_optab_libfunc (smod_optab, TImode, "__modti3");
5033 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5034 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5035 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5036}
5037
5038/* Make a subreg, stripping any existing subreg. We could possibly just
5039 call simplify_subreg, but in this case we know what we want. */
5040rtx
5041spu_gen_subreg (enum machine_mode mode, rtx x)
5042{
5043 if (GET_CODE (x) == SUBREG)
5044 x = SUBREG_REG (x);
5045 if (GET_MODE (x) == mode)
5046 return x;
5047 return gen_rtx_SUBREG (mode, x, 0);
5048}
5049
5050static bool
fb80456a 5051spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5052{
5053 return (TYPE_MODE (type) == BLKmode
5054 && ((type) == 0
5055 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5056 || int_size_in_bytes (type) >
5057 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5058}
5059\f
5060/* Create the built-in types and functions */
5061
5062struct spu_builtin_description spu_builtins[] = {
5063#define DEF_BUILTIN(fcode, icode, name, type, params) \
5064 {fcode, icode, name, type, params, NULL_TREE},
5065#include "spu-builtins.def"
5066#undef DEF_BUILTIN
5067};
5068
5069static void
5070spu_init_builtins (void)
5071{
5072 struct spu_builtin_description *d;
5073 unsigned int i;
5074
5075 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5076 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5077 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5078 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5079 V4SF_type_node = build_vector_type (float_type_node, 4);
5080 V2DF_type_node = build_vector_type (double_type_node, 2);
5081
5082 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5083 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5084 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5085 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5086
c4ecce0c 5087 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5088
5089 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5090 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5091 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5092 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5093 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5094 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5095 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5101
5102 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5103 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5104 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5106 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5107 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5108 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5109 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5110
5111 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5112 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5113
5114 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5115
5116 spu_builtin_types[SPU_BTI_PTR] =
5117 build_pointer_type (build_qualified_type
5118 (void_type_node,
5119 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5120
5121 /* For each builtin we build a new prototype. The tree code will make
5122 sure nodes are shared. */
5123 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5124 {
5125 tree p;
5126 char name[64]; /* build_function will make a copy. */
5127 int parm;
5128
5129 if (d->name == 0)
5130 continue;
5131
5dfbd18f 5132 /* Find last parm. */
644459d0 5133 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5134 ;
644459d0 5135
5136 p = void_list_node;
5137 while (parm > 1)
5138 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5139
5140 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5141
5142 sprintf (name, "__builtin_%s", d->name);
5143 d->fndecl =
5144 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5145 NULL, NULL_TREE);
a76866d3 5146 if (d->fcode == SPU_MASK_FOR_LOAD)
5147 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5148
5149 /* These builtins don't throw. */
5150 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5151 }
5152}
5153
cf31d486 5154void
5155spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5156{
5157 static unsigned char arr[16] =
5158 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5159
5160 rtx temp = gen_reg_rtx (Pmode);
5161 rtx temp2 = gen_reg_rtx (V4SImode);
5162 rtx temp3 = gen_reg_rtx (V4SImode);
5163 rtx pat = gen_reg_rtx (TImode);
5164 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5165
5166 emit_move_insn (pat, array_to_constant (TImode, arr));
5167
5168 /* Restore the sp. */
5169 emit_move_insn (temp, op1);
5170 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5171
5172 /* Compute available stack size for sp. */
5173 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5174 emit_insn (gen_shufb (temp3, temp, temp, pat));
5175
5176 emit_insn (gen_addv4si3 (sp, sp, temp3));
5177 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5178}
5179
644459d0 5180int
5181spu_safe_dma (HOST_WIDE_INT channel)
5182{
006e4b96 5183 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5184}
5185
5186void
5187spu_builtin_splats (rtx ops[])
5188{
5189 enum machine_mode mode = GET_MODE (ops[0]);
5190 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5191 {
5192 unsigned char arr[16];
5193 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5194 emit_move_insn (ops[0], array_to_constant (mode, arr));
5195 }
644459d0 5196 else
5197 {
5198 rtx reg = gen_reg_rtx (TImode);
5199 rtx shuf;
5200 if (GET_CODE (ops[1]) != REG
5201 && GET_CODE (ops[1]) != SUBREG)
5202 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5203 switch (mode)
5204 {
5205 case V2DImode:
5206 case V2DFmode:
5207 shuf =
5208 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5209 TImode);
5210 break;
5211 case V4SImode:
5212 case V4SFmode:
5213 shuf =
5214 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5215 TImode);
5216 break;
5217 case V8HImode:
5218 shuf =
5219 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5220 TImode);
5221 break;
5222 case V16QImode:
5223 shuf =
5224 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5225 TImode);
5226 break;
5227 default:
5228 abort ();
5229 }
5230 emit_move_insn (reg, shuf);
5231 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5232 }
5233}
5234
5235void
5236spu_builtin_extract (rtx ops[])
5237{
5238 enum machine_mode mode;
5239 rtx rot, from, tmp;
5240
5241 mode = GET_MODE (ops[1]);
5242
5243 if (GET_CODE (ops[2]) == CONST_INT)
5244 {
5245 switch (mode)
5246 {
5247 case V16QImode:
5248 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5249 break;
5250 case V8HImode:
5251 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5252 break;
5253 case V4SFmode:
5254 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5255 break;
5256 case V4SImode:
5257 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5258 break;
5259 case V2DImode:
5260 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5261 break;
5262 case V2DFmode:
5263 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5264 break;
5265 default:
5266 abort ();
5267 }
5268 return;
5269 }
5270
5271 from = spu_gen_subreg (TImode, ops[1]);
5272 rot = gen_reg_rtx (TImode);
5273 tmp = gen_reg_rtx (SImode);
5274
5275 switch (mode)
5276 {
5277 case V16QImode:
5278 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5279 break;
5280 case V8HImode:
5281 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5282 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5283 break;
5284 case V4SFmode:
5285 case V4SImode:
5286 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5287 break;
5288 case V2DImode:
5289 case V2DFmode:
5290 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5291 break;
5292 default:
5293 abort ();
5294 }
5295 emit_insn (gen_rotqby_ti (rot, from, tmp));
5296
5297 emit_insn (gen_spu_convert (ops[0], rot));
5298}
5299
5300void
5301spu_builtin_insert (rtx ops[])
5302{
5303 enum machine_mode mode = GET_MODE (ops[0]);
5304 enum machine_mode imode = GET_MODE_INNER (mode);
5305 rtx mask = gen_reg_rtx (TImode);
5306 rtx offset;
5307
5308 if (GET_CODE (ops[3]) == CONST_INT)
5309 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5310 else
5311 {
5312 offset = gen_reg_rtx (SImode);
5313 emit_insn (gen_mulsi3
5314 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5315 }
5316 emit_insn (gen_cpat
5317 (mask, stack_pointer_rtx, offset,
5318 GEN_INT (GET_MODE_SIZE (imode))));
5319 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5320}
5321
5322void
5323spu_builtin_promote (rtx ops[])
5324{
5325 enum machine_mode mode, imode;
5326 rtx rot, from, offset;
5327 HOST_WIDE_INT pos;
5328
5329 mode = GET_MODE (ops[0]);
5330 imode = GET_MODE_INNER (mode);
5331
5332 from = gen_reg_rtx (TImode);
5333 rot = spu_gen_subreg (TImode, ops[0]);
5334
5335 emit_insn (gen_spu_convert (from, ops[1]));
5336
5337 if (GET_CODE (ops[2]) == CONST_INT)
5338 {
5339 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5340 if (GET_MODE_SIZE (imode) < 4)
5341 pos += 4 - GET_MODE_SIZE (imode);
5342 offset = GEN_INT (pos & 15);
5343 }
5344 else
5345 {
5346 offset = gen_reg_rtx (SImode);
5347 switch (mode)
5348 {
5349 case V16QImode:
5350 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5351 break;
5352 case V8HImode:
5353 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5354 emit_insn (gen_addsi3 (offset, offset, offset));
5355 break;
5356 case V4SFmode:
5357 case V4SImode:
5358 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5359 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5360 break;
5361 case V2DImode:
5362 case V2DFmode:
5363 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5364 break;
5365 default:
5366 abort ();
5367 }
5368 }
5369 emit_insn (gen_rotqby_ti (rot, from, offset));
5370}
5371
5372void
5373spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5374{
5375 rtx shuf = gen_reg_rtx (V4SImode);
5376 rtx insn = gen_reg_rtx (V4SImode);
5377 rtx shufc;
5378 rtx insnc;
5379 rtx mem;
5380
5381 fnaddr = force_reg (SImode, fnaddr);
5382 cxt = force_reg (SImode, cxt);
5383
5384 if (TARGET_LARGE_MEM)
5385 {
5386 rtx rotl = gen_reg_rtx (V4SImode);
5387 rtx mask = gen_reg_rtx (V4SImode);
5388 rtx bi = gen_reg_rtx (SImode);
5389 unsigned char shufa[16] = {
5390 2, 3, 0, 1, 18, 19, 16, 17,
5391 0, 1, 2, 3, 16, 17, 18, 19
5392 };
5393 unsigned char insna[16] = {
5394 0x41, 0, 0, 79,
5395 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5396 0x60, 0x80, 0, 79,
5397 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5398 };
5399
5400 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5401 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5402
5403 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5404 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5405 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5406 emit_insn (gen_selb (insn, insnc, rotl, mask));
5407
5408 mem = memory_address (Pmode, tramp);
5409 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5410
5411 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5412 mem = memory_address (Pmode, plus_constant (tramp, 16));
5413 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5414 }
5415 else
5416 {
5417 rtx scxt = gen_reg_rtx (SImode);
5418 rtx sfnaddr = gen_reg_rtx (SImode);
5419 unsigned char insna[16] = {
5420 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5421 0x30, 0, 0, 0,
5422 0, 0, 0, 0,
5423 0, 0, 0, 0
5424 };
5425
5426 shufc = gen_reg_rtx (TImode);
5427 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5428
5429 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5430 fits 18 bits and the last 4 are zeros. This will be true if
5431 the stack pointer is initialized to 0x3fff0 at program start,
5432 otherwise the ila instruction will be garbage. */
5433
5434 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5435 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5436 emit_insn (gen_cpat
5437 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5438 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5439 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5440
5441 mem = memory_address (Pmode, tramp);
5442 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5443
5444 }
5445 emit_insn (gen_sync ());
5446}
5447
5448void
5449spu_expand_sign_extend (rtx ops[])
5450{
5451 unsigned char arr[16];
5452 rtx pat = gen_reg_rtx (TImode);
5453 rtx sign, c;
5454 int i, last;
5455 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5456 if (GET_MODE (ops[1]) == QImode)
5457 {
5458 sign = gen_reg_rtx (HImode);
5459 emit_insn (gen_extendqihi2 (sign, ops[1]));
5460 for (i = 0; i < 16; i++)
5461 arr[i] = 0x12;
5462 arr[last] = 0x13;
5463 }
5464 else
5465 {
5466 for (i = 0; i < 16; i++)
5467 arr[i] = 0x10;
5468 switch (GET_MODE (ops[1]))
5469 {
5470 case HImode:
5471 sign = gen_reg_rtx (SImode);
5472 emit_insn (gen_extendhisi2 (sign, ops[1]));
5473 arr[last] = 0x03;
5474 arr[last - 1] = 0x02;
5475 break;
5476 case SImode:
5477 sign = gen_reg_rtx (SImode);
5478 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5479 for (i = 0; i < 4; i++)
5480 arr[last - i] = 3 - i;
5481 break;
5482 case DImode:
5483 sign = gen_reg_rtx (SImode);
5484 c = gen_reg_rtx (SImode);
5485 emit_insn (gen_spu_convert (c, ops[1]));
5486 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5487 for (i = 0; i < 8; i++)
5488 arr[last - i] = 7 - i;
5489 break;
5490 default:
5491 abort ();
5492 }
5493 }
5494 emit_move_insn (pat, array_to_constant (TImode, arr));
5495 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5496}
5497
5498/* expand vector initialization. If there are any constant parts,
5499 load constant parts first. Then load any non-constant parts. */
5500void
5501spu_expand_vector_init (rtx target, rtx vals)
5502{
5503 enum machine_mode mode = GET_MODE (target);
5504 int n_elts = GET_MODE_NUNITS (mode);
5505 int n_var = 0;
5506 bool all_same = true;
790c536c 5507 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5508 int i;
5509
5510 first = XVECEXP (vals, 0, 0);
5511 for (i = 0; i < n_elts; ++i)
5512 {
5513 x = XVECEXP (vals, 0, i);
e442af0b 5514 if (!(CONST_INT_P (x)
5515 || GET_CODE (x) == CONST_DOUBLE
5516 || GET_CODE (x) == CONST_FIXED))
644459d0 5517 ++n_var;
5518 else
5519 {
5520 if (first_constant == NULL_RTX)
5521 first_constant = x;
5522 }
5523 if (i > 0 && !rtx_equal_p (x, first))
5524 all_same = false;
5525 }
5526
5527 /* if all elements are the same, use splats to repeat elements */
5528 if (all_same)
5529 {
5530 if (!CONSTANT_P (first)
5531 && !register_operand (first, GET_MODE (x)))
5532 first = force_reg (GET_MODE (first), first);
5533 emit_insn (gen_spu_splats (target, first));
5534 return;
5535 }
5536
5537 /* load constant parts */
5538 if (n_var != n_elts)
5539 {
5540 if (n_var == 0)
5541 {
5542 emit_move_insn (target,
5543 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5544 }
5545 else
5546 {
5547 rtx constant_parts_rtx = copy_rtx (vals);
5548
5549 gcc_assert (first_constant != NULL_RTX);
5550 /* fill empty slots with the first constant, this increases
5551 our chance of using splats in the recursive call below. */
5552 for (i = 0; i < n_elts; ++i)
e442af0b 5553 {
5554 x = XVECEXP (constant_parts_rtx, 0, i);
5555 if (!(CONST_INT_P (x)
5556 || GET_CODE (x) == CONST_DOUBLE
5557 || GET_CODE (x) == CONST_FIXED))
5558 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5559 }
644459d0 5560
5561 spu_expand_vector_init (target, constant_parts_rtx);
5562 }
5563 }
5564
5565 /* load variable parts */
5566 if (n_var != 0)
5567 {
5568 rtx insert_operands[4];
5569
5570 insert_operands[0] = target;
5571 insert_operands[2] = target;
5572 for (i = 0; i < n_elts; ++i)
5573 {
5574 x = XVECEXP (vals, 0, i);
e442af0b 5575 if (!(CONST_INT_P (x)
5576 || GET_CODE (x) == CONST_DOUBLE
5577 || GET_CODE (x) == CONST_FIXED))
644459d0 5578 {
5579 if (!register_operand (x, GET_MODE (x)))
5580 x = force_reg (GET_MODE (x), x);
5581 insert_operands[1] = x;
5582 insert_operands[3] = GEN_INT (i);
5583 spu_builtin_insert (insert_operands);
5584 }
5585 }
5586 }
5587}
6352eedf 5588
5474166e 5589/* Return insn index for the vector compare instruction for given CODE,
5590 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5591
5592static int
5593get_vec_cmp_insn (enum rtx_code code,
5594 enum machine_mode dest_mode,
5595 enum machine_mode op_mode)
5596
5597{
5598 switch (code)
5599 {
5600 case EQ:
5601 if (dest_mode == V16QImode && op_mode == V16QImode)
5602 return CODE_FOR_ceq_v16qi;
5603 if (dest_mode == V8HImode && op_mode == V8HImode)
5604 return CODE_FOR_ceq_v8hi;
5605 if (dest_mode == V4SImode && op_mode == V4SImode)
5606 return CODE_FOR_ceq_v4si;
5607 if (dest_mode == V4SImode && op_mode == V4SFmode)
5608 return CODE_FOR_ceq_v4sf;
5609 if (dest_mode == V2DImode && op_mode == V2DFmode)
5610 return CODE_FOR_ceq_v2df;
5611 break;
5612 case GT:
5613 if (dest_mode == V16QImode && op_mode == V16QImode)
5614 return CODE_FOR_cgt_v16qi;
5615 if (dest_mode == V8HImode && op_mode == V8HImode)
5616 return CODE_FOR_cgt_v8hi;
5617 if (dest_mode == V4SImode && op_mode == V4SImode)
5618 return CODE_FOR_cgt_v4si;
5619 if (dest_mode == V4SImode && op_mode == V4SFmode)
5620 return CODE_FOR_cgt_v4sf;
5621 if (dest_mode == V2DImode && op_mode == V2DFmode)
5622 return CODE_FOR_cgt_v2df;
5623 break;
5624 case GTU:
5625 if (dest_mode == V16QImode && op_mode == V16QImode)
5626 return CODE_FOR_clgt_v16qi;
5627 if (dest_mode == V8HImode && op_mode == V8HImode)
5628 return CODE_FOR_clgt_v8hi;
5629 if (dest_mode == V4SImode && op_mode == V4SImode)
5630 return CODE_FOR_clgt_v4si;
5631 break;
5632 default:
5633 break;
5634 }
5635 return -1;
5636}
5637
5638/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5639 DMODE is expected destination mode. This is a recursive function. */
5640
5641static rtx
5642spu_emit_vector_compare (enum rtx_code rcode,
5643 rtx op0, rtx op1,
5644 enum machine_mode dmode)
5645{
5646 int vec_cmp_insn;
5647 rtx mask;
5648 enum machine_mode dest_mode;
5649 enum machine_mode op_mode = GET_MODE (op1);
5650
5651 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5652
5653 /* Floating point vector compare instructions uses destination V4SImode.
5654 Double floating point vector compare instructions uses destination V2DImode.
5655 Move destination to appropriate mode later. */
5656 if (dmode == V4SFmode)
5657 dest_mode = V4SImode;
5658 else if (dmode == V2DFmode)
5659 dest_mode = V2DImode;
5660 else
5661 dest_mode = dmode;
5662
5663 mask = gen_reg_rtx (dest_mode);
5664 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5665
5666 if (vec_cmp_insn == -1)
5667 {
5668 bool swap_operands = false;
5669 bool try_again = false;
5670 switch (rcode)
5671 {
5672 case LT:
5673 rcode = GT;
5674 swap_operands = true;
5675 try_again = true;
5676 break;
5677 case LTU:
5678 rcode = GTU;
5679 swap_operands = true;
5680 try_again = true;
5681 break;
5682 case NE:
5683 /* Treat A != B as ~(A==B). */
5684 {
5685 enum insn_code nor_code;
5686 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5687 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5688 gcc_assert (nor_code != CODE_FOR_nothing);
5689 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5690 if (dmode != dest_mode)
5691 {
5692 rtx temp = gen_reg_rtx (dest_mode);
5693 convert_move (temp, mask, 0);
5694 return temp;
5695 }
5696 return mask;
5697 }
5698 break;
5699 case GE:
5700 case GEU:
5701 case LE:
5702 case LEU:
5703 /* Try GT/GTU/LT/LTU OR EQ */
5704 {
5705 rtx c_rtx, eq_rtx;
5706 enum insn_code ior_code;
5707 enum rtx_code new_code;
5708
5709 switch (rcode)
5710 {
5711 case GE: new_code = GT; break;
5712 case GEU: new_code = GTU; break;
5713 case LE: new_code = LT; break;
5714 case LEU: new_code = LTU; break;
5715 default:
5716 gcc_unreachable ();
5717 }
5718
5719 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5720 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5721
99bdde56 5722 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5723 gcc_assert (ior_code != CODE_FOR_nothing);
5724 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5725 if (dmode != dest_mode)
5726 {
5727 rtx temp = gen_reg_rtx (dest_mode);
5728 convert_move (temp, mask, 0);
5729 return temp;
5730 }
5731 return mask;
5732 }
5733 break;
5734 default:
5735 gcc_unreachable ();
5736 }
5737
5738 /* You only get two chances. */
5739 if (try_again)
5740 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5741
5742 gcc_assert (vec_cmp_insn != -1);
5743
5744 if (swap_operands)
5745 {
5746 rtx tmp;
5747 tmp = op0;
5748 op0 = op1;
5749 op1 = tmp;
5750 }
5751 }
5752
5753 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5754 if (dmode != dest_mode)
5755 {
5756 rtx temp = gen_reg_rtx (dest_mode);
5757 convert_move (temp, mask, 0);
5758 return temp;
5759 }
5760 return mask;
5761}
5762
5763
5764/* Emit vector conditional expression.
5765 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5766 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5767
5768int
5769spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5770 rtx cond, rtx cc_op0, rtx cc_op1)
5771{
5772 enum machine_mode dest_mode = GET_MODE (dest);
5773 enum rtx_code rcode = GET_CODE (cond);
5774 rtx mask;
5775
5776 /* Get the vector mask for the given relational operations. */
5777 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5778
5779 emit_insn(gen_selb (dest, op2, op1, mask));
5780
5781 return 1;
5782}
5783
6352eedf 5784static rtx
5785spu_force_reg (enum machine_mode mode, rtx op)
5786{
5787 rtx x, r;
5788 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5789 {
5790 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5791 || GET_MODE (op) == BLKmode)
5792 return force_reg (mode, convert_to_mode (mode, op, 0));
5793 abort ();
5794 }
5795
5796 r = force_reg (GET_MODE (op), op);
5797 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5798 {
5799 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5800 if (x)
5801 return x;
5802 }
5803
5804 x = gen_reg_rtx (mode);
5805 emit_insn (gen_spu_convert (x, r));
5806 return x;
5807}
5808
5809static void
5810spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5811{
5812 HOST_WIDE_INT v = 0;
5813 int lsbits;
5814 /* Check the range of immediate operands. */
5815 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5816 {
5817 int range = p - SPU_BTI_7;
5df189be 5818
5819 if (!CONSTANT_P (op))
6352eedf 5820 error ("%s expects an integer literal in the range [%d, %d].",
5821 d->name,
5822 spu_builtin_range[range].low, spu_builtin_range[range].high);
5823
5824 if (GET_CODE (op) == CONST
5825 && (GET_CODE (XEXP (op, 0)) == PLUS
5826 || GET_CODE (XEXP (op, 0)) == MINUS))
5827 {
5828 v = INTVAL (XEXP (XEXP (op, 0), 1));
5829 op = XEXP (XEXP (op, 0), 0);
5830 }
5831 else if (GET_CODE (op) == CONST_INT)
5832 v = INTVAL (op);
5df189be 5833 else if (GET_CODE (op) == CONST_VECTOR
5834 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5835 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5836
5837 /* The default for v is 0 which is valid in every range. */
5838 if (v < spu_builtin_range[range].low
5839 || v > spu_builtin_range[range].high)
5840 error ("%s expects an integer literal in the range [%d, %d]. ("
5841 HOST_WIDE_INT_PRINT_DEC ")",
5842 d->name,
5843 spu_builtin_range[range].low, spu_builtin_range[range].high,
5844 v);
6352eedf 5845
5846 switch (p)
5847 {
5848 case SPU_BTI_S10_4:
5849 lsbits = 4;
5850 break;
5851 case SPU_BTI_U16_2:
5852 /* This is only used in lqa, and stqa. Even though the insns
5853 encode 16 bits of the address (all but the 2 least
5854 significant), only 14 bits are used because it is masked to
5855 be 16 byte aligned. */
5856 lsbits = 4;
5857 break;
5858 case SPU_BTI_S16_2:
5859 /* This is used for lqr and stqr. */
5860 lsbits = 2;
5861 break;
5862 default:
5863 lsbits = 0;
5864 }
5865
5866 if (GET_CODE (op) == LABEL_REF
5867 || (GET_CODE (op) == SYMBOL_REF
5868 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5869 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5870 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5871 d->name);
5872 }
5873}
5874
5875
70ca06f8 5876static int
5df189be 5877expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5878 rtx target, rtx ops[])
5879{
5880 enum insn_code icode = d->icode;
5df189be 5881 int i = 0, a;
6352eedf 5882
5883 /* Expand the arguments into rtl. */
5884
5885 if (d->parm[0] != SPU_BTI_VOID)
5886 ops[i++] = target;
5887
70ca06f8 5888 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 5889 {
5df189be 5890 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5891 if (arg == 0)
5892 abort ();
b9c74b4d 5893 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6352eedf 5894 }
70ca06f8 5895
5896 /* The insn pattern may have additional operands (SCRATCH).
5897 Return the number of actual non-SCRATCH operands. */
5898 gcc_assert (i <= insn_data[icode].n_operands);
5899 return i;
6352eedf 5900}
5901
5902static rtx
5903spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5904 tree exp, rtx target)
6352eedf 5905{
5906 rtx pat;
5907 rtx ops[8];
5908 enum insn_code icode = d->icode;
5909 enum machine_mode mode, tmode;
5910 int i, p;
70ca06f8 5911 int n_operands;
6352eedf 5912 tree return_type;
5913
5914 /* Set up ops[] with values from arglist. */
70ca06f8 5915 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 5916
5917 /* Handle the target operand which must be operand 0. */
5918 i = 0;
5919 if (d->parm[0] != SPU_BTI_VOID)
5920 {
5921
5922 /* We prefer the mode specified for the match_operand otherwise
5923 use the mode from the builtin function prototype. */
5924 tmode = insn_data[d->icode].operand[0].mode;
5925 if (tmode == VOIDmode)
5926 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5927
5928 /* Try to use target because not using it can lead to extra copies
5929 and when we are using all of the registers extra copies leads
5930 to extra spills. */
5931 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5932 ops[0] = target;
5933 else
5934 target = ops[0] = gen_reg_rtx (tmode);
5935
5936 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5937 abort ();
5938
5939 i++;
5940 }
5941
a76866d3 5942 if (d->fcode == SPU_MASK_FOR_LOAD)
5943 {
5944 enum machine_mode mode = insn_data[icode].operand[1].mode;
5945 tree arg;
5946 rtx addr, op, pat;
5947
5948 /* get addr */
5df189be 5949 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5950 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5951 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5952 addr = memory_address (mode, op);
5953
5954 /* negate addr */
5955 op = gen_reg_rtx (GET_MODE (addr));
5956 emit_insn (gen_rtx_SET (VOIDmode, op,
5957 gen_rtx_NEG (GET_MODE (addr), addr)));
5958 op = gen_rtx_MEM (mode, op);
5959
5960 pat = GEN_FCN (icode) (target, op);
5961 if (!pat)
5962 return 0;
5963 emit_insn (pat);
5964 return target;
5965 }
5966
6352eedf 5967 /* Ignore align_hint, but still expand it's args in case they have
5968 side effects. */
5969 if (icode == CODE_FOR_spu_align_hint)
5970 return 0;
5971
5972 /* Handle the rest of the operands. */
70ca06f8 5973 for (p = 1; i < n_operands; i++, p++)
6352eedf 5974 {
5975 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5976 mode = insn_data[d->icode].operand[i].mode;
5977 else
5978 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5979
5980 /* mode can be VOIDmode here for labels */
5981
5982 /* For specific intrinsics with an immediate operand, e.g.,
5983 si_ai(), we sometimes need to convert the scalar argument to a
5984 vector argument by splatting the scalar. */
5985 if (VECTOR_MODE_P (mode)
5986 && (GET_CODE (ops[i]) == CONST_INT
5987 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5988 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5989 {
5990 if (GET_CODE (ops[i]) == CONST_INT)
5991 ops[i] = spu_const (mode, INTVAL (ops[i]));
5992 else
5993 {
5994 rtx reg = gen_reg_rtx (mode);
5995 enum machine_mode imode = GET_MODE_INNER (mode);
5996 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5997 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5998 if (imode != GET_MODE (ops[i]))
5999 ops[i] = convert_to_mode (imode, ops[i],
6000 TYPE_UNSIGNED (spu_builtin_types
6001 [d->parm[i]]));
6002 emit_insn (gen_spu_splats (reg, ops[i]));
6003 ops[i] = reg;
6004 }
6005 }
6006
5df189be 6007 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6008
6352eedf 6009 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6010 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6011 }
6012
70ca06f8 6013 switch (n_operands)
6352eedf 6014 {
6015 case 0:
6016 pat = GEN_FCN (icode) (0);
6017 break;
6018 case 1:
6019 pat = GEN_FCN (icode) (ops[0]);
6020 break;
6021 case 2:
6022 pat = GEN_FCN (icode) (ops[0], ops[1]);
6023 break;
6024 case 3:
6025 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6026 break;
6027 case 4:
6028 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6029 break;
6030 case 5:
6031 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6032 break;
6033 case 6:
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6035 break;
6036 default:
6037 abort ();
6038 }
6039
6040 if (!pat)
6041 abort ();
6042
6043 if (d->type == B_CALL || d->type == B_BISLED)
6044 emit_call_insn (pat);
6045 else if (d->type == B_JUMP)
6046 {
6047 emit_jump_insn (pat);
6048 emit_barrier ();
6049 }
6050 else
6051 emit_insn (pat);
6052
6053 return_type = spu_builtin_types[d->parm[0]];
6054 if (d->parm[0] != SPU_BTI_VOID
6055 && GET_MODE (target) != TYPE_MODE (return_type))
6056 {
6057 /* target is the return value. It should always be the mode of
6058 the builtin function prototype. */
6059 target = spu_force_reg (TYPE_MODE (return_type), target);
6060 }
6061
6062 return target;
6063}
6064
6065rtx
6066spu_expand_builtin (tree exp,
6067 rtx target,
6068 rtx subtarget ATTRIBUTE_UNUSED,
6069 enum machine_mode mode ATTRIBUTE_UNUSED,
6070 int ignore ATTRIBUTE_UNUSED)
6071{
5df189be 6072 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6073 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6074 struct spu_builtin_description *d;
6075
6076 if (fcode < NUM_SPU_BUILTINS)
6077 {
6078 d = &spu_builtins[fcode];
6079
5df189be 6080 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6081 }
6082 abort ();
6083}
6084
e99f512d 6085/* Implement targetm.vectorize.builtin_mul_widen_even. */
6086static tree
6087spu_builtin_mul_widen_even (tree type)
6088{
e99f512d 6089 switch (TYPE_MODE (type))
6090 {
6091 case V8HImode:
6092 if (TYPE_UNSIGNED (type))
6093 return spu_builtins[SPU_MULE_0].fndecl;
6094 else
6095 return spu_builtins[SPU_MULE_1].fndecl;
6096 break;
6097 default:
6098 return NULL_TREE;
6099 }
6100}
6101
6102/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6103static tree
6104spu_builtin_mul_widen_odd (tree type)
6105{
6106 switch (TYPE_MODE (type))
6107 {
6108 case V8HImode:
6109 if (TYPE_UNSIGNED (type))
6110 return spu_builtins[SPU_MULO_1].fndecl;
6111 else
6112 return spu_builtins[SPU_MULO_0].fndecl;
6113 break;
6114 default:
6115 return NULL_TREE;
6116 }
6117}
6118
a76866d3 6119/* Implement targetm.vectorize.builtin_mask_for_load. */
6120static tree
6121spu_builtin_mask_for_load (void)
6122{
6123 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6124 gcc_assert (d);
6125 return d->fndecl;
6126}
5df189be 6127
a28df51d 6128/* Implement targetm.vectorize.builtin_vectorization_cost. */
6129static int
6130spu_builtin_vectorization_cost (bool runtime_test)
6131{
6132 /* If the branch of the runtime test is taken - i.e. - the vectorized
6133 version is skipped - this incurs a misprediction cost (because the
6134 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6135 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6136 when the vectorized version is executed. */
6137 if (runtime_test)
6138 return -19;
6139 else
6140 return 0;
6141}
6142
0e87db76 6143/* Return true iff, data reference of TYPE can reach vector alignment (16)
6144 after applying N number of iterations. This routine does not determine
6145 how may iterations are required to reach desired alignment. */
6146
6147static bool
a9f1838b 6148spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6149{
6150 if (is_packed)
6151 return false;
6152
6153 /* All other types are naturally aligned. */
6154 return true;
6155}
6156
a0515226 6157/* Implement targetm.vectorize.builtin_vec_perm. */
6158tree
6159spu_builtin_vec_perm (tree type, tree *mask_element_type)
6160{
6161 struct spu_builtin_description *d;
6162
6163 *mask_element_type = unsigned_char_type_node;
6164
6165 switch (TYPE_MODE (type))
6166 {
6167 case V16QImode:
6168 if (TYPE_UNSIGNED (type))
6169 d = &spu_builtins[SPU_SHUFFLE_0];
6170 else
6171 d = &spu_builtins[SPU_SHUFFLE_1];
6172 break;
6173
6174 case V8HImode:
6175 if (TYPE_UNSIGNED (type))
6176 d = &spu_builtins[SPU_SHUFFLE_2];
6177 else
6178 d = &spu_builtins[SPU_SHUFFLE_3];
6179 break;
6180
6181 case V4SImode:
6182 if (TYPE_UNSIGNED (type))
6183 d = &spu_builtins[SPU_SHUFFLE_4];
6184 else
6185 d = &spu_builtins[SPU_SHUFFLE_5];
6186 break;
6187
6188 case V2DImode:
6189 if (TYPE_UNSIGNED (type))
6190 d = &spu_builtins[SPU_SHUFFLE_6];
6191 else
6192 d = &spu_builtins[SPU_SHUFFLE_7];
6193 break;
6194
6195 case V4SFmode:
6196 d = &spu_builtins[SPU_SHUFFLE_8];
6197 break;
6198
6199 case V2DFmode:
6200 d = &spu_builtins[SPU_SHUFFLE_9];
6201 break;
6202
6203 default:
6204 return NULL_TREE;
6205 }
6206
6207 gcc_assert (d);
6208 return d->fndecl;
6209}
6210
d52fd16a 6211/* Count the total number of instructions in each pipe and return the
6212 maximum, which is used as the Minimum Iteration Interval (MII)
6213 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6214 -2 are instructions that can go in pipe0 or pipe1. */
6215static int
6216spu_sms_res_mii (struct ddg *g)
6217{
6218 int i;
6219 unsigned t[4] = {0, 0, 0, 0};
6220
6221 for (i = 0; i < g->num_nodes; i++)
6222 {
6223 rtx insn = g->nodes[i].insn;
6224 int p = get_pipe (insn) + 2;
6225
6226 assert (p >= 0);
6227 assert (p < 4);
6228
6229 t[p]++;
6230 if (dump_file && INSN_P (insn))
6231 fprintf (dump_file, "i%d %s %d %d\n",
6232 INSN_UID (insn),
6233 insn_data[INSN_CODE(insn)].name,
6234 p, t[p]);
6235 }
6236 if (dump_file)
6237 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6238
6239 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6240}
6241
6242
5df189be 6243void
6244spu_init_expanders (void)
6245{
6246 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6247 * frame_pointer_needed is true. We don't know that until we're
6248 * expanding the prologue. */
6249 if (cfun)
6250 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6251}
6252
6253static enum machine_mode
6254spu_libgcc_cmp_return_mode (void)
6255{
6256
6257/* For SPU word mode is TI mode so it is better to use SImode
6258 for compare returns. */
6259 return SImode;
6260}
6261
6262static enum machine_mode
6263spu_libgcc_shift_count_mode (void)
6264{
6265/* For SPU word mode is TI mode so it is better to use SImode
6266 for shift counts. */
6267 return SImode;
6268}
5a976006 6269
6270/* An early place to adjust some flags after GCC has finished processing
6271 * them. */
6272static void
6273asm_file_start (void)
6274{
6275 /* Variable tracking should be run after all optimizations which
6276 change order of insns. It also needs a valid CFG. */
6277 spu_flag_var_tracking = flag_var_tracking;
6278 flag_var_tracking = 0;
6279
6280 default_file_start ();
6281}
6282
a08dfd55 6283/* Implement targetm.section_type_flags. */
6284static unsigned int
6285spu_section_type_flags (tree decl, const char *name, int reloc)
6286{
6287 /* .toe needs to have type @nobits. */
6288 if (strcmp (name, ".toe") == 0)
6289 return SECTION_BSS;
6290 return default_section_type_flags (decl, name, reloc);
6291}