]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* MAINTAINERS: Move myself into the write after approval list.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
5a976006 57#include "sbitmap.h"
58#include "timevar.h"
59#include "df.h"
6352eedf 60
61/* Builtin types, data and prototypes. */
62struct spu_builtin_range
63{
64 int low, high;
65};
66
67static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80};
81
644459d0 82\f
83/* Target specific attribute specifications. */
84char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
85
86/* Prototypes and external defs. */
87static void spu_init_builtins (void);
88static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91static rtx get_pic_reg (void);
92static int need_to_save_reg (int regno, int saving);
93static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97static void emit_nop_for_insn (rtx insn);
98static bool insn_clobbers_hbr (rtx insn);
99static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 100 int distance, sbitmap blocks);
5474166e 101static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
644459d0 103static rtx get_branch_target (rtx branch);
644459d0 104static void spu_machine_dependent_reorg (void);
105static int spu_sched_issue_rate (void);
106static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108static int get_pipe (rtx insn);
644459d0 109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 110static void spu_sched_init_global (FILE *, int, int);
111static void spu_sched_init (FILE *, int, int);
112static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 113static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119static int spu_naked_function_p (tree func);
fb80456a 120static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
644459d0 122static tree spu_build_builtin_va_list (void);
8a58ed0a 123static void spu_va_start (tree, rtx);
75a70cf9 124static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 126static int regno_aligned_for_load (int regno);
127static int store_with_one_insn_p (rtx mem);
644459d0 128static int mem_is_padded_component_ref (rtx x);
129static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130static void spu_asm_globalize_label (FILE * file, const char *name);
131static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 132 int *total, bool speed);
644459d0 133static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134static void spu_init_libfuncs (void);
fb80456a 135static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 136static void fix_range (const char *);
69ced2d6 137static void spu_encode_section_info (tree, rtx, int);
e99f512d 138static tree spu_builtin_mul_widen_even (tree);
139static tree spu_builtin_mul_widen_odd (tree);
a76866d3 140static tree spu_builtin_mask_for_load (void);
a28df51d 141static int spu_builtin_vectorization_cost (bool);
a9f1838b 142static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 143static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 144static int spu_sms_res_mii (struct ddg *g);
5a976006 145static void asm_file_start (void);
a08dfd55 146static unsigned int spu_section_type_flags (tree, const char *, int);
644459d0 147
148extern const char *reg_names[];
149rtx spu_compare_op0, spu_compare_op1;
150
5474166e 151/* Which instruction set architecture to use. */
152int spu_arch;
153/* Which cpu are we tuning for. */
154int spu_tune;
155
5a976006 156/* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162int spu_hint_dist = (8*4) - (2*4);
163
164/* Determines whether we run variable tracking in machine dependent
165 reorganization. */
166static int spu_flag_var_tracking;
167
644459d0 168enum spu_immediate {
169 SPU_NONE,
170 SPU_IL,
171 SPU_ILA,
172 SPU_ILH,
173 SPU_ILHU,
174 SPU_ORI,
175 SPU_ORHI,
176 SPU_ORBI,
99369027 177 SPU_IOHL
644459d0 178};
dea01258 179enum immediate_class
180{
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
5df189be 188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 189};
644459d0 190
191static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 193static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
644459d0 196
1bd43494 197static enum machine_mode spu_unwind_word_mode (void);
198
ea32e033 199static enum machine_mode
200spu_libgcc_cmp_return_mode (void);
201
202static enum machine_mode
203spu_libgcc_shift_count_mode (void);
204
644459d0 205/* Built in types. */
206tree spu_builtin_types[SPU_BTI_MAX];
207\f
208/* TARGET overrides. */
209
210#undef TARGET_INIT_BUILTINS
211#define TARGET_INIT_BUILTINS spu_init_builtins
212
644459d0 213#undef TARGET_EXPAND_BUILTIN
214#define TARGET_EXPAND_BUILTIN spu_expand_builtin
215
1bd43494 216#undef TARGET_UNWIND_WORD_MODE
217#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 218
219/* The .8byte directive doesn't seem to work well for a 32 bit
220 architecture. */
221#undef TARGET_ASM_UNALIGNED_DI_OP
222#define TARGET_ASM_UNALIGNED_DI_OP NULL
223
224#undef TARGET_RTX_COSTS
225#define TARGET_RTX_COSTS spu_rtx_costs
226
227#undef TARGET_ADDRESS_COST
f529eb25 228#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 229
230#undef TARGET_SCHED_ISSUE_RATE
231#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
232
5a976006 233#undef TARGET_SCHED_INIT_GLOBAL
234#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
235
236#undef TARGET_SCHED_INIT
237#define TARGET_SCHED_INIT spu_sched_init
238
644459d0 239#undef TARGET_SCHED_VARIABLE_ISSUE
240#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
241
5a976006 242#undef TARGET_SCHED_REORDER
243#define TARGET_SCHED_REORDER spu_sched_reorder
244
245#undef TARGET_SCHED_REORDER2
246#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 247
248#undef TARGET_SCHED_ADJUST_COST
249#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
250
251const struct attribute_spec spu_attribute_table[];
252#undef TARGET_ATTRIBUTE_TABLE
253#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
254
255#undef TARGET_ASM_INTEGER
256#define TARGET_ASM_INTEGER spu_assemble_integer
257
258#undef TARGET_SCALAR_MODE_SUPPORTED_P
259#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
260
261#undef TARGET_VECTOR_MODE_SUPPORTED_P
262#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
263
264#undef TARGET_FUNCTION_OK_FOR_SIBCALL
265#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
266
267#undef TARGET_ASM_GLOBALIZE_LABEL
268#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
269
270#undef TARGET_PASS_BY_REFERENCE
271#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
272
273#undef TARGET_MUST_PASS_IN_STACK
274#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
275
276#undef TARGET_BUILD_BUILTIN_VA_LIST
277#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
278
8a58ed0a 279#undef TARGET_EXPAND_BUILTIN_VA_START
280#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
281
644459d0 282#undef TARGET_SETUP_INCOMING_VARARGS
283#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
284
285#undef TARGET_MACHINE_DEPENDENT_REORG
286#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
287
288#undef TARGET_GIMPLIFY_VA_ARG_EXPR
289#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
290
291#undef TARGET_DEFAULT_TARGET_FLAGS
292#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
293
294#undef TARGET_INIT_LIBFUNCS
295#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
296
297#undef TARGET_RETURN_IN_MEMORY
298#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
299
69ced2d6 300#undef TARGET_ENCODE_SECTION_INFO
301#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
302
e99f512d 303#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
305
306#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
308
a76866d3 309#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
311
a28df51d 312#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
314
0e87db76 315#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
317
a0515226 318#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
320
ea32e033 321#undef TARGET_LIBGCC_CMP_RETURN_MODE
322#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
323
324#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
326
d52fd16a 327#undef TARGET_SCHED_SMS_RES_MII
328#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
329
5a976006 330#undef TARGET_ASM_FILE_START
331#define TARGET_ASM_FILE_START asm_file_start
332
a08dfd55 333#undef TARGET_SECTION_TYPE_FLAGS
334#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
335
644459d0 336struct gcc_target targetm = TARGET_INITIALIZER;
337
5df189be 338void
339spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
340{
5df189be 341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
344
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
347}
348
644459d0 349/* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
353void
354spu_override_options (void)
355{
14d408d9 356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
361
644459d0 362 flag_omit_frame_pointer = 1;
363
5a976006 364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 365 if (align_functions < 8)
366 align_functions = 8;
c7b91b14 367
5a976006 368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
370 spu_hint_dist = 0;
371
c7b91b14 372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
5474166e 374
375 /* Determine processor architectural level. */
376 if (spu_arch_string)
377 {
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
382 else
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
384 }
385
386 /* Determine processor to tune for. */
387 if (spu_tune_string)
388 {
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
393 else
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
395 }
98bbec1e 396
13684256 397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
399 {
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
405 }
406
98bbec1e 407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 408}
409\f
410/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
412
413/* Table of machine attributes. */
414const struct attribute_spec spu_attribute_table[] =
415{
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
420};
421
422/* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
425static bool
426spu_scalar_mode_supported_p (enum machine_mode mode)
427{
428 switch (mode)
429 {
430 case QImode:
431 case HImode:
432 case SImode:
433 case SFmode:
434 case DImode:
435 case TImode:
436 case DFmode:
437 return true;
438
439 default:
440 return false;
441 }
442}
443
444/* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
447static bool
448spu_vector_mode_supported_p (enum machine_mode mode)
449{
450 switch (mode)
451 {
452 case V16QImode:
453 case V8HImode:
454 case V4SImode:
455 case V2DImode:
456 case V4SFmode:
457 case V2DFmode:
458 return true;
459
460 default:
461 return false;
462 }
463}
464
465/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
468int
469valid_subreg (rtx op)
470{
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 477}
478
479/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 480 and adjust the start offset. */
644459d0 481static rtx
482adjust_operand (rtx op, HOST_WIDE_INT * start)
483{
484 enum machine_mode mode;
485 int op_size;
38aca5eb 486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 490 {
491 if (start)
492 *start -=
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
496 }
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
499 if (op_size < 32)
500 {
501 if (start)
502 *start += 32 - op_size;
503 op_size = 32;
504 }
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
509 return op;
510}
511
512void
513spu_expand_extv (rtx ops[], int unsignedp)
514{
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
520 rtx s;
521
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
525
644459d0 526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
529
530 if (start > 0)
531 {
532 s = gen_reg_rtx (src_mode);
533 switch (src_mode)
534 {
535 case SImode:
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
537 break;
538 case DImode:
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
540 break;
541 case TImode:
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
543 break;
544 default:
545 abort ();
546 }
547 src = s;
548 }
549
550 if (width < src_size)
551 {
552 rtx pat;
553 int icode;
554 switch (src_mode)
555 {
556 case SImode:
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
558 break;
559 case DImode:
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
561 break;
562 case TImode:
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
564 break;
565 default:
566 abort ();
567 }
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
570 emit_insn (pat);
571 src = s;
572 }
573
574 convert_move (dst, src, unsignedp);
575}
576
577void
578spu_expand_insv (rtx ops[])
579{
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
586 rtx mask;
587 rtx shift_reg;
588 int shift;
589
590
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
593 else
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
597
598 if (CONSTANT_P (src))
599 {
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
603 }
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
607
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
611
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
615
616 if (shift > 0)
617 {
618 switch (dst_mode)
619 {
620 case SImode:
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
622 break;
623 case DImode:
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
625 break;
626 case TImode:
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
628 break;
629 default:
630 abort ();
631 }
632 }
633 else if (shift < 0)
634 abort ();
635
636 switch (dst_size)
637 {
638 case 32:
639 maskbits = (-1ll << (32 - width - start));
640 if (start)
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
643 break;
644 case 64:
645 maskbits = (-1ll << (64 - width - start));
646 if (start)
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
649 break;
650 case 128:
651 {
652 unsigned char arr[16];
653 int i = start / 8;
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
657 arr[i] = 0xff;
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
660 }
661 break;
662 default:
663 abort ();
664 }
665 if (GET_CODE (ops[0]) == MEM)
666 {
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
672 rtx mem;
673
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
686 {
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
690 rtx mem1;
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
698 }
699 }
700 else
71cd778d 701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 702}
703
704
705int
706spu_expand_block_move (rtx ops[])
707{
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
710 int i;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
48eb4342 713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 714 return 0;
715
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
718
719 if (bytes <= 0)
720 return 1;
721
722 dst = ops[0];
723 src = ops[1];
724
725 if (align == 16)
726 {
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
728 {
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
732 }
733 if (offset < bytes)
734 {
735 rtx mask;
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
738 arr[i] = 0xff;
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
750 }
751 return 1;
752 }
753 return 0;
754}
755
756enum spu_comp_code
757{ SPU_EQ, SPU_GT, SPU_GTU };
758
5474166e 759int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 772};
773
774/* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
778 ourselves. */
779void
780spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
781{
782 int reverse_compare = 0;
783 int reverse_test = 0;
5d70b918 784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
644459d0 786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
5d70b918 789 enum spu_comp_code scode, eq_code, ior_code;
644459d0 790 int index;
5d70b918 791 int eq_test = 0;
644459d0 792
793 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
794 and so on, to keep the constant in operand 1. */
795 if (GET_CODE (spu_compare_op1) == CONST_INT)
796 {
797 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
798 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
799 switch (code)
800 {
801 case GE:
802 spu_compare_op1 = GEN_INT (val);
803 code = GT;
804 break;
805 case LT:
806 spu_compare_op1 = GEN_INT (val);
807 code = LE;
808 break;
809 case GEU:
810 spu_compare_op1 = GEN_INT (val);
811 code = GTU;
812 break;
813 case LTU:
814 spu_compare_op1 = GEN_INT (val);
815 code = LEU;
816 break;
817 default:
818 break;
819 }
820 }
821
5d70b918 822 comp_mode = SImode;
823 op_mode = GET_MODE (spu_compare_op0);
824
644459d0 825 switch (code)
826 {
827 case GE:
644459d0 828 scode = SPU_GT;
07027691 829 if (HONOR_NANS (op_mode))
5d70b918 830 {
831 reverse_compare = 0;
832 reverse_test = 0;
833 eq_test = 1;
834 eq_code = SPU_EQ;
835 }
836 else
837 {
838 reverse_compare = 1;
839 reverse_test = 1;
840 }
644459d0 841 break;
842 case LE:
644459d0 843 scode = SPU_GT;
07027691 844 if (HONOR_NANS (op_mode))
5d70b918 845 {
846 reverse_compare = 1;
847 reverse_test = 0;
848 eq_test = 1;
849 eq_code = SPU_EQ;
850 }
851 else
852 {
853 reverse_compare = 0;
854 reverse_test = 1;
855 }
644459d0 856 break;
857 case LT:
858 reverse_compare = 1;
859 reverse_test = 0;
860 scode = SPU_GT;
861 break;
862 case GEU:
863 reverse_compare = 1;
864 reverse_test = 1;
865 scode = SPU_GTU;
866 break;
867 case LEU:
868 reverse_compare = 0;
869 reverse_test = 1;
870 scode = SPU_GTU;
871 break;
872 case LTU:
873 reverse_compare = 1;
874 reverse_test = 0;
875 scode = SPU_GTU;
876 break;
877 case NE:
878 reverse_compare = 0;
879 reverse_test = 1;
880 scode = SPU_EQ;
881 break;
882
883 case EQ:
884 scode = SPU_EQ;
885 break;
886 case GT:
887 scode = SPU_GT;
888 break;
889 case GTU:
890 scode = SPU_GTU;
891 break;
892 default:
893 scode = SPU_EQ;
894 break;
895 }
896
644459d0 897 switch (op_mode)
898 {
899 case QImode:
900 index = 0;
901 comp_mode = QImode;
902 break;
903 case HImode:
904 index = 1;
905 comp_mode = HImode;
906 break;
907 case SImode:
908 index = 2;
909 break;
910 case DImode:
911 index = 3;
912 break;
913 case TImode:
914 index = 4;
915 break;
916 case SFmode:
917 index = 5;
918 break;
919 case DFmode:
920 index = 6;
921 break;
922 case V16QImode:
5474166e 923 index = 7;
924 comp_mode = op_mode;
925 break;
644459d0 926 case V8HImode:
5474166e 927 index = 8;
928 comp_mode = op_mode;
929 break;
644459d0 930 case V4SImode:
5474166e 931 index = 9;
932 comp_mode = op_mode;
933 break;
644459d0 934 case V4SFmode:
5474166e 935 index = 10;
936 comp_mode = V4SImode;
937 break;
644459d0 938 case V2DFmode:
5474166e 939 index = 11;
940 comp_mode = V2DImode;
644459d0 941 break;
5474166e 942 case V2DImode:
644459d0 943 default:
944 abort ();
945 }
946
07027691 947 if (GET_MODE (spu_compare_op1) == DFmode
948 && (scode != SPU_GT && scode != SPU_EQ))
949 abort ();
644459d0 950
951 if (is_set == 0 && spu_compare_op1 == const0_rtx
952 && (GET_MODE (spu_compare_op0) == SImode
953 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
954 {
955 /* Don't need to set a register with the result when we are
956 comparing against zero and branching. */
957 reverse_test = !reverse_test;
958 compare_result = spu_compare_op0;
959 }
960 else
961 {
962 compare_result = gen_reg_rtx (comp_mode);
963
964 if (reverse_compare)
965 {
966 rtx t = spu_compare_op1;
967 spu_compare_op1 = spu_compare_op0;
968 spu_compare_op0 = t;
969 }
970
971 if (spu_comp_icode[index][scode] == 0)
972 abort ();
973
974 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
975 (spu_compare_op0, op_mode))
976 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
977 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
978 (spu_compare_op1, op_mode))
979 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
980 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
981 spu_compare_op0,
982 spu_compare_op1);
983 if (comp_rtx == 0)
984 abort ();
985 emit_insn (comp_rtx);
986
5d70b918 987 if (eq_test)
988 {
989 eq_result = gen_reg_rtx (comp_mode);
990 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
991 spu_compare_op0,
992 spu_compare_op1);
993 if (eq_rtx == 0)
994 abort ();
995 emit_insn (eq_rtx);
996 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
997 gcc_assert (ior_code != CODE_FOR_nothing);
998 emit_insn (GEN_FCN (ior_code)
999 (compare_result, compare_result, eq_result));
1000 }
644459d0 1001 }
1002
1003 if (is_set == 0)
1004 {
1005 rtx bcomp;
1006 rtx loc_ref;
1007
1008 /* We don't have branch on QI compare insns, so we convert the
1009 QI compare result to a HI result. */
1010 if (comp_mode == QImode)
1011 {
1012 rtx old_res = compare_result;
1013 compare_result = gen_reg_rtx (HImode);
1014 comp_mode = HImode;
1015 emit_insn (gen_extendqihi2 (compare_result, old_res));
1016 }
1017
1018 if (reverse_test)
1019 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1020 else
1021 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1022
1023 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1024 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1025 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1026 loc_ref, pc_rtx)));
1027 }
1028 else if (is_set == 2)
1029 {
1030 int compare_size = GET_MODE_BITSIZE (comp_mode);
1031 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1032 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1033 rtx select_mask;
1034 rtx op_t = operands[2];
1035 rtx op_f = operands[3];
1036
1037 /* The result of the comparison can be SI, HI or QI mode. Create a
1038 mask based on that result. */
1039 if (target_size > compare_size)
1040 {
1041 select_mask = gen_reg_rtx (mode);
1042 emit_insn (gen_extend_compare (select_mask, compare_result));
1043 }
1044 else if (target_size < compare_size)
1045 select_mask =
1046 gen_rtx_SUBREG (mode, compare_result,
1047 (compare_size - target_size) / BITS_PER_UNIT);
1048 else if (comp_mode != mode)
1049 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1050 else
1051 select_mask = compare_result;
1052
1053 if (GET_MODE (target) != GET_MODE (op_t)
1054 || GET_MODE (target) != GET_MODE (op_f))
1055 abort ();
1056
1057 if (reverse_test)
1058 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1059 else
1060 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1061 }
1062 else
1063 {
1064 if (reverse_test)
1065 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1066 gen_rtx_NOT (comp_mode, compare_result)));
1067 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1068 emit_insn (gen_extendhisi2 (target, compare_result));
1069 else if (GET_MODE (target) == SImode
1070 && GET_MODE (compare_result) == QImode)
1071 emit_insn (gen_extend_compare (target, compare_result));
1072 else
1073 emit_move_insn (target, compare_result);
1074 }
1075}
1076
1077HOST_WIDE_INT
1078const_double_to_hwint (rtx x)
1079{
1080 HOST_WIDE_INT val;
1081 REAL_VALUE_TYPE rv;
1082 if (GET_MODE (x) == SFmode)
1083 {
1084 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1085 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1086 }
1087 else if (GET_MODE (x) == DFmode)
1088 {
1089 long l[2];
1090 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1091 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1092 val = l[0];
1093 val = (val << 32) | (l[1] & 0xffffffff);
1094 }
1095 else
1096 abort ();
1097 return val;
1098}
1099
1100rtx
1101hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1102{
1103 long tv[2];
1104 REAL_VALUE_TYPE rv;
1105 gcc_assert (mode == SFmode || mode == DFmode);
1106
1107 if (mode == SFmode)
1108 tv[0] = (v << 32) >> 32;
1109 else if (mode == DFmode)
1110 {
1111 tv[1] = (v << 32) >> 32;
1112 tv[0] = v >> 32;
1113 }
1114 real_from_target (&rv, tv, mode);
1115 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1116}
1117
1118void
1119print_operand_address (FILE * file, register rtx addr)
1120{
1121 rtx reg;
1122 rtx offset;
1123
e04cf423 1124 if (GET_CODE (addr) == AND
1125 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1126 && INTVAL (XEXP (addr, 1)) == -16)
1127 addr = XEXP (addr, 0);
1128
644459d0 1129 switch (GET_CODE (addr))
1130 {
1131 case REG:
1132 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1133 break;
1134
1135 case PLUS:
1136 reg = XEXP (addr, 0);
1137 offset = XEXP (addr, 1);
1138 if (GET_CODE (offset) == REG)
1139 {
1140 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1141 reg_names[REGNO (offset)]);
1142 }
1143 else if (GET_CODE (offset) == CONST_INT)
1144 {
1145 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1146 INTVAL (offset), reg_names[REGNO (reg)]);
1147 }
1148 else
1149 abort ();
1150 break;
1151
1152 case CONST:
1153 case LABEL_REF:
1154 case SYMBOL_REF:
1155 case CONST_INT:
1156 output_addr_const (file, addr);
1157 break;
1158
1159 default:
1160 debug_rtx (addr);
1161 abort ();
1162 }
1163}
1164
1165void
1166print_operand (FILE * file, rtx x, int code)
1167{
1168 enum machine_mode mode = GET_MODE (x);
1169 HOST_WIDE_INT val;
1170 unsigned char arr[16];
1171 int xcode = GET_CODE (x);
dea01258 1172 int i, info;
644459d0 1173 if (GET_MODE (x) == VOIDmode)
1174 switch (code)
1175 {
644459d0 1176 case 'L': /* 128 bits, signed */
1177 case 'm': /* 128 bits, signed */
1178 case 'T': /* 128 bits, signed */
1179 case 't': /* 128 bits, signed */
1180 mode = TImode;
1181 break;
644459d0 1182 case 'K': /* 64 bits, signed */
1183 case 'k': /* 64 bits, signed */
1184 case 'D': /* 64 bits, signed */
1185 case 'd': /* 64 bits, signed */
1186 mode = DImode;
1187 break;
644459d0 1188 case 'J': /* 32 bits, signed */
1189 case 'j': /* 32 bits, signed */
1190 case 's': /* 32 bits, signed */
1191 case 'S': /* 32 bits, signed */
1192 mode = SImode;
1193 break;
1194 }
1195 switch (code)
1196 {
1197
1198 case 'j': /* 32 bits, signed */
1199 case 'k': /* 64 bits, signed */
1200 case 'm': /* 128 bits, signed */
1201 if (xcode == CONST_INT
1202 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1203 {
1204 gcc_assert (logical_immediate_p (x, mode));
1205 constant_to_array (mode, x, arr);
1206 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1207 val = trunc_int_for_mode (val, SImode);
1208 switch (which_logical_immediate (val))
1209 {
1210 case SPU_ORI:
1211 break;
1212 case SPU_ORHI:
1213 fprintf (file, "h");
1214 break;
1215 case SPU_ORBI:
1216 fprintf (file, "b");
1217 break;
1218 default:
1219 gcc_unreachable();
1220 }
1221 }
1222 else
1223 gcc_unreachable();
1224 return;
1225
1226 case 'J': /* 32 bits, signed */
1227 case 'K': /* 64 bits, signed */
1228 case 'L': /* 128 bits, signed */
1229 if (xcode == CONST_INT
1230 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1231 {
1232 gcc_assert (logical_immediate_p (x, mode)
1233 || iohl_immediate_p (x, mode));
1234 constant_to_array (mode, x, arr);
1235 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1236 val = trunc_int_for_mode (val, SImode);
1237 switch (which_logical_immediate (val))
1238 {
1239 case SPU_ORI:
1240 case SPU_IOHL:
1241 break;
1242 case SPU_ORHI:
1243 val = trunc_int_for_mode (val, HImode);
1244 break;
1245 case SPU_ORBI:
1246 val = trunc_int_for_mode (val, QImode);
1247 break;
1248 default:
1249 gcc_unreachable();
1250 }
1251 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1252 }
1253 else
1254 gcc_unreachable();
1255 return;
1256
1257 case 't': /* 128 bits, signed */
1258 case 'd': /* 64 bits, signed */
1259 case 's': /* 32 bits, signed */
dea01258 1260 if (CONSTANT_P (x))
644459d0 1261 {
dea01258 1262 enum immediate_class c = classify_immediate (x, mode);
1263 switch (c)
1264 {
1265 case IC_IL1:
1266 constant_to_array (mode, x, arr);
1267 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1268 val = trunc_int_for_mode (val, SImode);
1269 switch (which_immediate_load (val))
1270 {
1271 case SPU_IL:
1272 break;
1273 case SPU_ILA:
1274 fprintf (file, "a");
1275 break;
1276 case SPU_ILH:
1277 fprintf (file, "h");
1278 break;
1279 case SPU_ILHU:
1280 fprintf (file, "hu");
1281 break;
1282 default:
1283 gcc_unreachable ();
1284 }
1285 break;
1286 case IC_CPAT:
1287 constant_to_array (mode, x, arr);
1288 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1289 if (info == 1)
1290 fprintf (file, "b");
1291 else if (info == 2)
1292 fprintf (file, "h");
1293 else if (info == 4)
1294 fprintf (file, "w");
1295 else if (info == 8)
1296 fprintf (file, "d");
1297 break;
1298 case IC_IL1s:
1299 if (xcode == CONST_VECTOR)
1300 {
1301 x = CONST_VECTOR_ELT (x, 0);
1302 xcode = GET_CODE (x);
1303 }
1304 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1305 fprintf (file, "a");
1306 else if (xcode == HIGH)
1307 fprintf (file, "hu");
1308 break;
1309 case IC_FSMBI:
5df189be 1310 case IC_FSMBI2:
dea01258 1311 case IC_IL2:
1312 case IC_IL2s:
1313 case IC_POOL:
1314 abort ();
1315 }
644459d0 1316 }
644459d0 1317 else
1318 gcc_unreachable ();
1319 return;
1320
1321 case 'T': /* 128 bits, signed */
1322 case 'D': /* 64 bits, signed */
1323 case 'S': /* 32 bits, signed */
dea01258 1324 if (CONSTANT_P (x))
644459d0 1325 {
dea01258 1326 enum immediate_class c = classify_immediate (x, mode);
1327 switch (c)
644459d0 1328 {
dea01258 1329 case IC_IL1:
1330 constant_to_array (mode, x, arr);
1331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1332 val = trunc_int_for_mode (val, SImode);
1333 switch (which_immediate_load (val))
1334 {
1335 case SPU_IL:
1336 case SPU_ILA:
1337 break;
1338 case SPU_ILH:
1339 case SPU_ILHU:
1340 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1341 break;
1342 default:
1343 gcc_unreachable ();
1344 }
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1346 break;
1347 case IC_FSMBI:
1348 constant_to_array (mode, x, arr);
1349 val = 0;
1350 for (i = 0; i < 16; i++)
1351 {
1352 val <<= 1;
1353 val |= arr[i] & 1;
1354 }
1355 print_operand (file, GEN_INT (val), 0);
1356 break;
1357 case IC_CPAT:
1358 constant_to_array (mode, x, arr);
1359 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1361 break;
dea01258 1362 case IC_IL1s:
dea01258 1363 if (xcode == HIGH)
5df189be 1364 x = XEXP (x, 0);
1365 if (GET_CODE (x) == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 output_addr_const (file, x);
1368 if (xcode == HIGH)
1369 fprintf (file, "@h");
644459d0 1370 break;
dea01258 1371 case IC_IL2:
1372 case IC_IL2s:
5df189be 1373 case IC_FSMBI2:
dea01258 1374 case IC_POOL:
1375 abort ();
644459d0 1376 }
c8befdb9 1377 }
644459d0 1378 else
1379 gcc_unreachable ();
1380 return;
1381
644459d0 1382 case 'C':
1383 if (xcode == CONST_INT)
1384 {
1385 /* Only 4 least significant bits are relevant for generate
1386 control word instructions. */
1387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1388 return;
1389 }
1390 break;
1391
1392 case 'M': /* print code for c*d */
1393 if (GET_CODE (x) == CONST_INT)
1394 switch (INTVAL (x))
1395 {
1396 case 1:
1397 fprintf (file, "b");
1398 break;
1399 case 2:
1400 fprintf (file, "h");
1401 break;
1402 case 4:
1403 fprintf (file, "w");
1404 break;
1405 case 8:
1406 fprintf (file, "d");
1407 break;
1408 default:
1409 gcc_unreachable();
1410 }
1411 else
1412 gcc_unreachable();
1413 return;
1414
1415 case 'N': /* Negate the operand */
1416 if (xcode == CONST_INT)
1417 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1418 else if (xcode == CONST_VECTOR)
1419 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1420 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1421 return;
1422
1423 case 'I': /* enable/disable interrupts */
1424 if (xcode == CONST_INT)
1425 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1426 return;
1427
1428 case 'b': /* branch modifiers */
1429 if (xcode == REG)
1430 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1431 else if (COMPARISON_P (x))
1432 fprintf (file, "%s", xcode == NE ? "n" : "");
1433 return;
1434
1435 case 'i': /* indirect call */
1436 if (xcode == MEM)
1437 {
1438 if (GET_CODE (XEXP (x, 0)) == REG)
1439 /* Used in indirect function calls. */
1440 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1441 else
1442 output_address (XEXP (x, 0));
1443 }
1444 return;
1445
1446 case 'p': /* load/store */
1447 if (xcode == MEM)
1448 {
1449 x = XEXP (x, 0);
1450 xcode = GET_CODE (x);
1451 }
e04cf423 1452 if (xcode == AND)
1453 {
1454 x = XEXP (x, 0);
1455 xcode = GET_CODE (x);
1456 }
644459d0 1457 if (xcode == REG)
1458 fprintf (file, "d");
1459 else if (xcode == CONST_INT)
1460 fprintf (file, "a");
1461 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1462 fprintf (file, "r");
1463 else if (xcode == PLUS || xcode == LO_SUM)
1464 {
1465 if (GET_CODE (XEXP (x, 1)) == REG)
1466 fprintf (file, "x");
1467 else
1468 fprintf (file, "d");
1469 }
1470 return;
1471
5df189be 1472 case 'e':
1473 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1474 val &= 0x7;
1475 output_addr_const (file, GEN_INT (val));
1476 return;
1477
1478 case 'f':
1479 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1480 val &= 0x1f;
1481 output_addr_const (file, GEN_INT (val));
1482 return;
1483
1484 case 'g':
1485 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1486 val &= 0x3f;
1487 output_addr_const (file, GEN_INT (val));
1488 return;
1489
1490 case 'h':
1491 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1492 val = (val >> 3) & 0x1f;
1493 output_addr_const (file, GEN_INT (val));
1494 return;
1495
1496 case 'E':
1497 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1498 val = -val;
1499 val &= 0x7;
1500 output_addr_const (file, GEN_INT (val));
1501 return;
1502
1503 case 'F':
1504 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1505 val = -val;
1506 val &= 0x1f;
1507 output_addr_const (file, GEN_INT (val));
1508 return;
1509
1510 case 'G':
1511 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1512 val = -val;
1513 val &= 0x3f;
1514 output_addr_const (file, GEN_INT (val));
1515 return;
1516
1517 case 'H':
1518 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1519 val = -(val & -8ll);
1520 val = (val >> 3) & 0x1f;
1521 output_addr_const (file, GEN_INT (val));
1522 return;
1523
644459d0 1524 case 0:
1525 if (xcode == REG)
1526 fprintf (file, "%s", reg_names[REGNO (x)]);
1527 else if (xcode == MEM)
1528 output_address (XEXP (x, 0));
1529 else if (xcode == CONST_VECTOR)
dea01258 1530 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1531 else
1532 output_addr_const (file, x);
1533 return;
1534
f6a0d06f 1535 /* unused letters
5df189be 1536 o qr uvw yz
1537 AB OPQR UVWXYZ */
644459d0 1538 default:
1539 output_operand_lossage ("invalid %%xn code");
1540 }
1541 gcc_unreachable ();
1542}
1543
1544extern char call_used_regs[];
644459d0 1545
1546/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1547 caller saved register. For leaf functions it is more efficient to
1548 use a volatile register because we won't need to save and restore the
1549 pic register. This routine is only valid after register allocation
1550 is completed, so we can pick an unused register. */
1551static rtx
1552get_pic_reg (void)
1553{
1554 rtx pic_reg = pic_offset_table_rtx;
1555 if (!reload_completed && !reload_in_progress)
1556 abort ();
1557 return pic_reg;
1558}
1559
5df189be 1560/* Split constant addresses to handle cases that are too large.
1561 Add in the pic register when in PIC mode.
1562 Split immediates that require more than 1 instruction. */
dea01258 1563int
1564spu_split_immediate (rtx * ops)
c8befdb9 1565{
dea01258 1566 enum machine_mode mode = GET_MODE (ops[0]);
1567 enum immediate_class c = classify_immediate (ops[1], mode);
1568
1569 switch (c)
c8befdb9 1570 {
dea01258 1571 case IC_IL2:
1572 {
1573 unsigned char arrhi[16];
1574 unsigned char arrlo[16];
98bbec1e 1575 rtx to, temp, hi, lo;
dea01258 1576 int i;
98bbec1e 1577 enum machine_mode imode = mode;
1578 /* We need to do reals as ints because the constant used in the
1579 IOR might not be a legitimate real constant. */
1580 imode = int_mode_for_mode (mode);
dea01258 1581 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1582 if (imode != mode)
1583 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1584 else
1585 to = ops[0];
1586 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1587 for (i = 0; i < 16; i += 4)
1588 {
1589 arrlo[i + 2] = arrhi[i + 2];
1590 arrlo[i + 3] = arrhi[i + 3];
1591 arrlo[i + 0] = arrlo[i + 1] = 0;
1592 arrhi[i + 2] = arrhi[i + 3] = 0;
1593 }
98bbec1e 1594 hi = array_to_constant (imode, arrhi);
1595 lo = array_to_constant (imode, arrlo);
1596 emit_move_insn (temp, hi);
dea01258 1597 emit_insn (gen_rtx_SET
98bbec1e 1598 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1599 return 1;
1600 }
5df189be 1601 case IC_FSMBI2:
1602 {
1603 unsigned char arr_fsmbi[16];
1604 unsigned char arr_andbi[16];
1605 rtx to, reg_fsmbi, reg_and;
1606 int i;
1607 enum machine_mode imode = mode;
1608 /* We need to do reals as ints because the constant used in the
1609 * AND might not be a legitimate real constant. */
1610 imode = int_mode_for_mode (mode);
1611 constant_to_array (mode, ops[1], arr_fsmbi);
1612 if (imode != mode)
1613 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1614 else
1615 to = ops[0];
1616 for (i = 0; i < 16; i++)
1617 if (arr_fsmbi[i] != 0)
1618 {
1619 arr_andbi[0] = arr_fsmbi[i];
1620 arr_fsmbi[i] = 0xff;
1621 }
1622 for (i = 1; i < 16; i++)
1623 arr_andbi[i] = arr_andbi[0];
1624 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1625 reg_and = array_to_constant (imode, arr_andbi);
1626 emit_move_insn (to, reg_fsmbi);
1627 emit_insn (gen_rtx_SET
1628 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1629 return 1;
1630 }
dea01258 1631 case IC_POOL:
1632 if (reload_in_progress || reload_completed)
1633 {
1634 rtx mem = force_const_mem (mode, ops[1]);
1635 if (TARGET_LARGE_MEM)
1636 {
1637 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1638 emit_move_insn (addr, XEXP (mem, 0));
1639 mem = replace_equiv_address (mem, addr);
1640 }
1641 emit_move_insn (ops[0], mem);
1642 return 1;
1643 }
1644 break;
1645 case IC_IL1s:
1646 case IC_IL2s:
1647 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1648 {
1649 if (c == IC_IL2s)
1650 {
5df189be 1651 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1652 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1653 }
1654 else if (flag_pic)
1655 emit_insn (gen_pic (ops[0], ops[1]));
1656 if (flag_pic)
1657 {
1658 rtx pic_reg = get_pic_reg ();
1659 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1660 crtl->uses_pic_offset_table = 1;
dea01258 1661 }
1662 return flag_pic || c == IC_IL2s;
1663 }
1664 break;
1665 case IC_IL1:
1666 case IC_FSMBI:
1667 case IC_CPAT:
1668 break;
c8befdb9 1669 }
dea01258 1670 return 0;
c8befdb9 1671}
1672
644459d0 1673/* SAVING is TRUE when we are generating the actual load and store
1674 instructions for REGNO. When determining the size of the stack
1675 needed for saving register we must allocate enough space for the
1676 worst case, because we don't always have the information early enough
1677 to not allocate it. But we can at least eliminate the actual loads
1678 and stores during the prologue/epilogue. */
1679static int
1680need_to_save_reg (int regno, int saving)
1681{
3072d30e 1682 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1683 return 1;
1684 if (flag_pic
1685 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1686 && (!saving || crtl->uses_pic_offset_table)
644459d0 1687 && (!saving
3072d30e 1688 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1689 return 1;
1690 return 0;
1691}
1692
1693/* This function is only correct starting with local register
1694 allocation */
1695int
1696spu_saved_regs_size (void)
1697{
1698 int reg_save_size = 0;
1699 int regno;
1700
1701 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1702 if (need_to_save_reg (regno, 0))
1703 reg_save_size += 0x10;
1704 return reg_save_size;
1705}
1706
1707static rtx
1708frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1709{
1710 rtx reg = gen_rtx_REG (V4SImode, regno);
1711 rtx mem =
1712 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1713 return emit_insn (gen_movv4si (mem, reg));
1714}
1715
1716static rtx
1717frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1718{
1719 rtx reg = gen_rtx_REG (V4SImode, regno);
1720 rtx mem =
1721 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1722 return emit_insn (gen_movv4si (reg, mem));
1723}
1724
1725/* This happens after reload, so we need to expand it. */
1726static rtx
1727frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1728{
1729 rtx insn;
1730 if (satisfies_constraint_K (GEN_INT (imm)))
1731 {
1732 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1733 }
1734 else
1735 {
3072d30e 1736 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1737 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1738 if (REGNO (src) == REGNO (scratch))
1739 abort ();
1740 }
644459d0 1741 return insn;
1742}
1743
1744/* Return nonzero if this function is known to have a null epilogue. */
1745
1746int
1747direct_return (void)
1748{
1749 if (reload_completed)
1750 {
1751 if (cfun->static_chain_decl == 0
1752 && (spu_saved_regs_size ()
1753 + get_frame_size ()
abe32cce 1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size == 0)
644459d0 1756 && current_function_is_leaf)
1757 return 1;
1758 }
1759 return 0;
1760}
1761
1762/*
1763 The stack frame looks like this:
1764 +-------------+
1765 | incoming |
a8e019fa 1766 | args |
1767 AP -> +-------------+
644459d0 1768 | $lr save |
1769 +-------------+
1770 prev SP | back chain |
1771 +-------------+
1772 | var args |
abe32cce 1773 | reg save | crtl->args.pretend_args_size bytes
644459d0 1774 +-------------+
1775 | ... |
1776 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1777 FP -> +-------------+
644459d0 1778 | ... |
a8e019fa 1779 | vars | get_frame_size() bytes
1780 HFP -> +-------------+
644459d0 1781 | ... |
1782 | outgoing |
abe32cce 1783 | args | crtl->outgoing_args_size bytes
644459d0 1784 +-------------+
1785 | $lr of next |
1786 | frame |
1787 +-------------+
a8e019fa 1788 | back chain |
1789 SP -> +-------------+
644459d0 1790
1791*/
1792void
1793spu_expand_prologue (void)
1794{
1795 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1796 HOST_WIDE_INT total_size;
1797 HOST_WIDE_INT saved_regs_size;
1798 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1799 rtx scratch_reg_0, scratch_reg_1;
1800 rtx insn, real;
1801
1802 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1803 the "toplevel" insn chain. */
1804 emit_note (NOTE_INSN_DELETED);
1805
1806 if (flag_pic && optimize == 0)
18d50ae6 1807 crtl->uses_pic_offset_table = 1;
644459d0 1808
1809 if (spu_naked_function_p (current_function_decl))
1810 return;
1811
1812 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1813 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1814
1815 saved_regs_size = spu_saved_regs_size ();
1816 total_size = size + saved_regs_size
abe32cce 1817 + crtl->outgoing_args_size
1818 + crtl->args.pretend_args_size;
644459d0 1819
1820 if (!current_function_is_leaf
18d50ae6 1821 || cfun->calls_alloca || total_size > 0)
644459d0 1822 total_size += STACK_POINTER_OFFSET;
1823
1824 /* Save this first because code after this might use the link
1825 register as a scratch register. */
1826 if (!current_function_is_leaf)
1827 {
1828 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1829 RTX_FRAME_RELATED_P (insn) = 1;
1830 }
1831
1832 if (total_size > 0)
1833 {
abe32cce 1834 offset = -crtl->args.pretend_args_size;
644459d0 1835 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1836 if (need_to_save_reg (regno, 1))
1837 {
1838 offset -= 16;
1839 insn = frame_emit_store (regno, sp_reg, offset);
1840 RTX_FRAME_RELATED_P (insn) = 1;
1841 }
1842 }
1843
18d50ae6 1844 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1845 {
1846 rtx pic_reg = get_pic_reg ();
1847 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1848 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1849 }
1850
1851 if (total_size > 0)
1852 {
1853 if (flag_stack_check)
1854 {
d819917f 1855 /* We compare against total_size-1 because
644459d0 1856 ($sp >= total_size) <=> ($sp > total_size-1) */
1857 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1858 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1859 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1860 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1861 {
1862 emit_move_insn (scratch_v4si, size_v4si);
1863 size_v4si = scratch_v4si;
1864 }
1865 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1866 emit_insn (gen_vec_extractv4si
1867 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1868 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1869 }
1870
1871 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1872 the value of the previous $sp because we save it as the back
1873 chain. */
1874 if (total_size <= 2000)
1875 {
1876 /* In this case we save the back chain first. */
1877 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1878 insn =
1879 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1880 }
1881 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1882 {
1883 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1884 insn =
1885 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1886 }
1887 else
1888 {
1889 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1890 insn =
1891 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1892 }
1893 RTX_FRAME_RELATED_P (insn) = 1;
1894 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1895 REG_NOTES (insn) =
1896 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1897
1898 if (total_size > 2000)
1899 {
1900 /* Save the back chain ptr */
1901 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1902 }
1903
1904 if (frame_pointer_needed)
1905 {
1906 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1907 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1908 + crtl->outgoing_args_size;
644459d0 1909 /* Set the new frame_pointer */
d8dfeb55 1910 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1911 RTX_FRAME_RELATED_P (insn) = 1;
1912 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1913 REG_NOTES (insn) =
1914 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1915 real, REG_NOTES (insn));
5df189be 1916 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1917 }
1918 }
1919
1920 emit_note (NOTE_INSN_DELETED);
1921}
1922
1923void
1924spu_expand_epilogue (bool sibcall_p)
1925{
1926 int size = get_frame_size (), offset, regno;
1927 HOST_WIDE_INT saved_regs_size, total_size;
1928 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1929 rtx jump, scratch_reg_0;
1930
1931 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1932 the "toplevel" insn chain. */
1933 emit_note (NOTE_INSN_DELETED);
1934
1935 if (spu_naked_function_p (current_function_decl))
1936 return;
1937
1938 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1939
1940 saved_regs_size = spu_saved_regs_size ();
1941 total_size = size + saved_regs_size
abe32cce 1942 + crtl->outgoing_args_size
1943 + crtl->args.pretend_args_size;
644459d0 1944
1945 if (!current_function_is_leaf
18d50ae6 1946 || cfun->calls_alloca || total_size > 0)
644459d0 1947 total_size += STACK_POINTER_OFFSET;
1948
1949 if (total_size > 0)
1950 {
18d50ae6 1951 if (cfun->calls_alloca)
644459d0 1952 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1953 else
1954 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1955
1956
1957 if (saved_regs_size > 0)
1958 {
abe32cce 1959 offset = -crtl->args.pretend_args_size;
644459d0 1960 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1961 if (need_to_save_reg (regno, 1))
1962 {
1963 offset -= 0x10;
1964 frame_emit_load (regno, sp_reg, offset);
1965 }
1966 }
1967 }
1968
1969 if (!current_function_is_leaf)
1970 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1971
1972 if (!sibcall_p)
1973 {
18b42941 1974 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1975 jump = emit_jump_insn (gen__return ());
1976 emit_barrier_after (jump);
1977 }
1978
1979 emit_note (NOTE_INSN_DELETED);
1980}
1981
1982rtx
1983spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1984{
1985 if (count != 0)
1986 return 0;
1987 /* This is inefficient because it ends up copying to a save-register
1988 which then gets saved even though $lr has already been saved. But
1989 it does generate better code for leaf functions and we don't need
1990 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1991 used for __builtin_return_address anyway, so maybe we don't care if
1992 it's inefficient. */
1993 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1994}
1995\f
1996
1997/* Given VAL, generate a constant appropriate for MODE.
1998 If MODE is a vector mode, every element will be VAL.
1999 For TImode, VAL will be zero extended to 128 bits. */
2000rtx
2001spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2002{
2003 rtx inner;
2004 rtvec v;
2005 int units, i;
2006
2007 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2008 || GET_MODE_CLASS (mode) == MODE_FLOAT
2009 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2010 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2011
2012 if (GET_MODE_CLASS (mode) == MODE_INT)
2013 return immed_double_const (val, 0, mode);
2014
2015 /* val is the bit representation of the float */
2016 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2017 return hwint_to_const_double (mode, val);
2018
2019 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2020 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2021 else
2022 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2023
2024 units = GET_MODE_NUNITS (mode);
2025
2026 v = rtvec_alloc (units);
2027
2028 for (i = 0; i < units; ++i)
2029 RTVEC_ELT (v, i) = inner;
2030
2031 return gen_rtx_CONST_VECTOR (mode, v);
2032}
644459d0 2033
5474166e 2034/* Create a MODE vector constant from 4 ints. */
2035rtx
2036spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2037{
2038 unsigned char arr[16];
2039 arr[0] = (a >> 24) & 0xff;
2040 arr[1] = (a >> 16) & 0xff;
2041 arr[2] = (a >> 8) & 0xff;
2042 arr[3] = (a >> 0) & 0xff;
2043 arr[4] = (b >> 24) & 0xff;
2044 arr[5] = (b >> 16) & 0xff;
2045 arr[6] = (b >> 8) & 0xff;
2046 arr[7] = (b >> 0) & 0xff;
2047 arr[8] = (c >> 24) & 0xff;
2048 arr[9] = (c >> 16) & 0xff;
2049 arr[10] = (c >> 8) & 0xff;
2050 arr[11] = (c >> 0) & 0xff;
2051 arr[12] = (d >> 24) & 0xff;
2052 arr[13] = (d >> 16) & 0xff;
2053 arr[14] = (d >> 8) & 0xff;
2054 arr[15] = (d >> 0) & 0xff;
2055 return array_to_constant(mode, arr);
2056}
5a976006 2057\f
2058/* branch hint stuff */
5474166e 2059
644459d0 2060/* An array of these is used to propagate hints to predecessor blocks. */
2061struct spu_bb_info
2062{
5a976006 2063 rtx prop_jump; /* propagated from another block */
2064 int bb_index; /* the original block. */
644459d0 2065};
5a976006 2066static struct spu_bb_info *spu_bb_info;
644459d0 2067
5a976006 2068#define STOP_HINT_P(INSN) \
2069 (GET_CODE(INSN) == CALL_INSN \
2070 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2071 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2072
2073/* 1 when RTX is a hinted branch or its target. We keep track of
2074 what has been hinted so the safe-hint code can test it easily. */
2075#define HINTED_P(RTX) \
2076 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2077
2078/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2079#define SCHED_ON_EVEN_P(RTX) \
2080 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2081
2082/* Emit a nop for INSN such that the two will dual issue. This assumes
2083 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2084 We check for TImode to handle a MULTI1 insn which has dual issued its
2085 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2086 ADDR_VEC insns. */
2087static void
2088emit_nop_for_insn (rtx insn)
644459d0 2089{
5a976006 2090 int p;
2091 rtx new_insn;
2092 p = get_pipe (insn);
2093 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2094 new_insn = emit_insn_after (gen_lnop (), insn);
2095 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2096 {
5a976006 2097 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2098 PUT_MODE (new_insn, TImode);
2099 PUT_MODE (insn, VOIDmode);
2100 }
2101 else
2102 new_insn = emit_insn_after (gen_lnop (), insn);
2103 recog_memoized (new_insn);
2104}
2105
2106/* Insert nops in basic blocks to meet dual issue alignment
2107 requirements. Also make sure hbrp and hint instructions are at least
2108 one cycle apart, possibly inserting a nop. */
2109static void
2110pad_bb(void)
2111{
2112 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2113 int length;
2114 int addr;
2115
2116 /* This sets up INSN_ADDRESSES. */
2117 shorten_branches (get_insns ());
2118
2119 /* Keep track of length added by nops. */
2120 length = 0;
2121
2122 prev_insn = 0;
2123 insn = get_insns ();
2124 if (!active_insn_p (insn))
2125 insn = next_active_insn (insn);
2126 for (; insn; insn = next_insn)
2127 {
2128 next_insn = next_active_insn (insn);
2129 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2130 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2131 {
5a976006 2132 if (hbr_insn)
2133 {
2134 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2135 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2136 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2137 || (a1 - a0 == 4))
2138 {
2139 prev_insn = emit_insn_before (gen_lnop (), insn);
2140 PUT_MODE (prev_insn, GET_MODE (insn));
2141 PUT_MODE (insn, TImode);
2142 length += 4;
2143 }
2144 }
2145 hbr_insn = insn;
2146 }
2147 if (INSN_CODE (insn) == CODE_FOR_blockage)
2148 {
2149 if (GET_MODE (insn) == TImode)
2150 PUT_MODE (next_insn, TImode);
2151 insn = next_insn;
2152 next_insn = next_active_insn (insn);
2153 }
2154 addr = INSN_ADDRESSES (INSN_UID (insn));
2155 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2156 {
2157 if (((addr + length) & 7) != 0)
2158 {
2159 emit_nop_for_insn (prev_insn);
2160 length += 4;
2161 }
644459d0 2162 }
5a976006 2163 else if (GET_MODE (insn) == TImode
2164 && ((next_insn && GET_MODE (next_insn) != TImode)
2165 || get_attr_type (insn) == TYPE_MULTI0)
2166 && ((addr + length) & 7) != 0)
2167 {
2168 /* prev_insn will always be set because the first insn is
2169 always 8-byte aligned. */
2170 emit_nop_for_insn (prev_insn);
2171 length += 4;
2172 }
2173 prev_insn = insn;
644459d0 2174 }
644459d0 2175}
2176
5a976006 2177\f
2178/* Routines for branch hints. */
2179
644459d0 2180static void
5a976006 2181spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2182 int distance, sbitmap blocks)
644459d0 2183{
5a976006 2184 rtx branch_label = 0;
2185 rtx hint;
2186 rtx insn;
2187 rtx table;
644459d0 2188
2189 if (before == 0 || branch == 0 || target == 0)
2190 return;
2191
5a976006 2192 /* While scheduling we require hints to be no further than 600, so
2193 we need to enforce that here too */
644459d0 2194 if (distance > 600)
2195 return;
2196
5a976006 2197 /* If we have a Basic block note, emit it after the basic block note. */
2198 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2199 before = NEXT_INSN (before);
644459d0 2200
2201 branch_label = gen_label_rtx ();
2202 LABEL_NUSES (branch_label)++;
2203 LABEL_PRESERVE_P (branch_label) = 1;
2204 insn = emit_label_before (branch_label, branch);
2205 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2206 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2207
2208 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2209 recog_memoized (hint);
2210 HINTED_P (branch) = 1;
644459d0 2211
5a976006 2212 if (GET_CODE (target) == LABEL_REF)
2213 HINTED_P (XEXP (target, 0)) = 1;
2214 else if (tablejump_p (branch, 0, &table))
644459d0 2215 {
5a976006 2216 rtvec vec;
2217 int j;
2218 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2219 vec = XVEC (PATTERN (table), 0);
2220 else
2221 vec = XVEC (PATTERN (table), 1);
2222 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2223 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2224 }
5a976006 2225
2226 if (distance >= 588)
644459d0 2227 {
5a976006 2228 /* Make sure the hint isn't scheduled any earlier than this point,
2229 which could make it too far for the branch offest to fit */
2230 recog_memoized (emit_insn_before (gen_blockage (), hint));
2231 }
2232 else if (distance <= 8 * 4)
2233 {
2234 /* To guarantee at least 8 insns between the hint and branch we
2235 insert nops. */
2236 int d;
2237 for (d = distance; d < 8 * 4; d += 4)
2238 {
2239 insn =
2240 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2241 recog_memoized (insn);
2242 }
2243
2244 /* Make sure any nops inserted aren't scheduled before the hint. */
2245 recog_memoized (emit_insn_after (gen_blockage (), hint));
2246
2247 /* Make sure any nops inserted aren't scheduled after the call. */
2248 if (CALL_P (branch) && distance < 8 * 4)
2249 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2250 }
644459d0 2251}
2252
2253/* Returns 0 if we don't want a hint for this branch. Otherwise return
2254 the rtx for the branch target. */
2255static rtx
2256get_branch_target (rtx branch)
2257{
2258 if (GET_CODE (branch) == JUMP_INSN)
2259 {
2260 rtx set, src;
2261
2262 /* Return statements */
2263 if (GET_CODE (PATTERN (branch)) == RETURN)
2264 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2265
2266 /* jump table */
2267 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2268 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2269 return 0;
2270
2271 set = single_set (branch);
2272 src = SET_SRC (set);
2273 if (GET_CODE (SET_DEST (set)) != PC)
2274 abort ();
2275
2276 if (GET_CODE (src) == IF_THEN_ELSE)
2277 {
2278 rtx lab = 0;
2279 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2280 if (note)
2281 {
2282 /* If the more probable case is not a fall through, then
2283 try a branch hint. */
2284 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2285 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2286 && GET_CODE (XEXP (src, 1)) != PC)
2287 lab = XEXP (src, 1);
2288 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2289 && GET_CODE (XEXP (src, 2)) != PC)
2290 lab = XEXP (src, 2);
2291 }
2292 if (lab)
2293 {
2294 if (GET_CODE (lab) == RETURN)
2295 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2296 return lab;
2297 }
2298 return 0;
2299 }
2300
2301 return src;
2302 }
2303 else if (GET_CODE (branch) == CALL_INSN)
2304 {
2305 rtx call;
2306 /* All of our call patterns are in a PARALLEL and the CALL is
2307 the first pattern in the PARALLEL. */
2308 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2309 abort ();
2310 call = XVECEXP (PATTERN (branch), 0, 0);
2311 if (GET_CODE (call) == SET)
2312 call = SET_SRC (call);
2313 if (GET_CODE (call) != CALL)
2314 abort ();
2315 return XEXP (XEXP (call, 0), 0);
2316 }
2317 return 0;
2318}
2319
5a976006 2320/* The special $hbr register is used to prevent the insn scheduler from
2321 moving hbr insns across instructions which invalidate them. It
2322 should only be used in a clobber, and this function searches for
2323 insns which clobber it. */
2324static bool
2325insn_clobbers_hbr (rtx insn)
2326{
2327 if (INSN_P (insn)
2328 && GET_CODE (PATTERN (insn)) == PARALLEL)
2329 {
2330 rtx parallel = PATTERN (insn);
2331 rtx clobber;
2332 int j;
2333 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2334 {
2335 clobber = XVECEXP (parallel, 0, j);
2336 if (GET_CODE (clobber) == CLOBBER
2337 && GET_CODE (XEXP (clobber, 0)) == REG
2338 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2339 return 1;
2340 }
2341 }
2342 return 0;
2343}
2344
2345/* Search up to 32 insns starting at FIRST:
2346 - at any kind of hinted branch, just return
2347 - at any unconditional branch in the first 15 insns, just return
2348 - at a call or indirect branch, after the first 15 insns, force it to
2349 an even address and return
2350 - at any unconditional branch, after the first 15 insns, force it to
2351 an even address.
2352 At then end of the search, insert an hbrp within 4 insns of FIRST,
2353 and an hbrp within 16 instructions of FIRST.
2354 */
644459d0 2355static void
5a976006 2356insert_hbrp_for_ilb_runout (rtx first)
644459d0 2357{
5a976006 2358 rtx insn, before_4 = 0, before_16 = 0;
2359 int addr = 0, length, first_addr = -1;
2360 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2361 int insert_lnop_after = 0;
2362 for (insn = first; insn; insn = NEXT_INSN (insn))
2363 if (INSN_P (insn))
2364 {
2365 if (first_addr == -1)
2366 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2367 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2368 length = get_attr_length (insn);
2369
2370 if (before_4 == 0 && addr + length >= 4 * 4)
2371 before_4 = insn;
2372 /* We test for 14 instructions because the first hbrp will add
2373 up to 2 instructions. */
2374 if (before_16 == 0 && addr + length >= 14 * 4)
2375 before_16 = insn;
2376
2377 if (INSN_CODE (insn) == CODE_FOR_hbr)
2378 {
2379 /* Make sure an hbrp is at least 2 cycles away from a hint.
2380 Insert an lnop after the hbrp when necessary. */
2381 if (before_4 == 0 && addr > 0)
2382 {
2383 before_4 = insn;
2384 insert_lnop_after |= 1;
2385 }
2386 else if (before_4 && addr <= 4 * 4)
2387 insert_lnop_after |= 1;
2388 if (before_16 == 0 && addr > 10 * 4)
2389 {
2390 before_16 = insn;
2391 insert_lnop_after |= 2;
2392 }
2393 else if (before_16 && addr <= 14 * 4)
2394 insert_lnop_after |= 2;
2395 }
644459d0 2396
5a976006 2397 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2398 {
2399 if (addr < hbrp_addr0)
2400 hbrp_addr0 = addr;
2401 else if (addr < hbrp_addr1)
2402 hbrp_addr1 = addr;
2403 }
644459d0 2404
5a976006 2405 if (CALL_P (insn) || JUMP_P (insn))
2406 {
2407 if (HINTED_P (insn))
2408 return;
2409
2410 /* Any branch after the first 15 insns should be on an even
2411 address to avoid a special case branch. There might be
2412 some nops and/or hbrps inserted, so we test after 10
2413 insns. */
2414 if (addr > 10 * 4)
2415 SCHED_ON_EVEN_P (insn) = 1;
2416 }
644459d0 2417
5a976006 2418 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2419 return;
2420
2421
2422 if (addr + length >= 32 * 4)
644459d0 2423 {
5a976006 2424 gcc_assert (before_4 && before_16);
2425 if (hbrp_addr0 > 4 * 4)
644459d0 2426 {
5a976006 2427 insn =
2428 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2429 recog_memoized (insn);
2430 INSN_ADDRESSES_NEW (insn,
2431 INSN_ADDRESSES (INSN_UID (before_4)));
2432 PUT_MODE (insn, GET_MODE (before_4));
2433 PUT_MODE (before_4, TImode);
2434 if (insert_lnop_after & 1)
644459d0 2435 {
5a976006 2436 insn = emit_insn_before (gen_lnop (), before_4);
2437 recog_memoized (insn);
2438 INSN_ADDRESSES_NEW (insn,
2439 INSN_ADDRESSES (INSN_UID (before_4)));
2440 PUT_MODE (insn, TImode);
644459d0 2441 }
644459d0 2442 }
5a976006 2443 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2444 && hbrp_addr1 > 16 * 4)
644459d0 2445 {
5a976006 2446 insn =
2447 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2448 recog_memoized (insn);
2449 INSN_ADDRESSES_NEW (insn,
2450 INSN_ADDRESSES (INSN_UID (before_16)));
2451 PUT_MODE (insn, GET_MODE (before_16));
2452 PUT_MODE (before_16, TImode);
2453 if (insert_lnop_after & 2)
644459d0 2454 {
5a976006 2455 insn = emit_insn_before (gen_lnop (), before_16);
2456 recog_memoized (insn);
2457 INSN_ADDRESSES_NEW (insn,
2458 INSN_ADDRESSES (INSN_UID
2459 (before_16)));
2460 PUT_MODE (insn, TImode);
644459d0 2461 }
2462 }
5a976006 2463 return;
644459d0 2464 }
644459d0 2465 }
5a976006 2466 else if (BARRIER_P (insn))
2467 return;
644459d0 2468
644459d0 2469}
5a976006 2470
2471/* The SPU might hang when it executes 48 inline instructions after a
2472 hinted branch jumps to its hinted target. The beginning of a
2473 function and the return from a call might have been hinted, and must
2474 be handled as well. To prevent a hang we insert 2 hbrps. The first
2475 should be within 6 insns of the branch target. The second should be
2476 within 22 insns of the branch target. When determining if hbrps are
2477 necessary, we look for only 32 inline instructions, because up to to
2478 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2479 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2480static void
5a976006 2481insert_hbrp (void)
644459d0 2482{
5a976006 2483 rtx insn;
2484 if (TARGET_SAFE_HINTS)
644459d0 2485 {
5a976006 2486 shorten_branches (get_insns ());
2487 /* Insert hbrp at beginning of function */
2488 insn = next_active_insn (get_insns ());
2489 if (insn)
2490 insert_hbrp_for_ilb_runout (insn);
2491 /* Insert hbrp after hinted targets. */
2492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2493 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2494 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2495 }
644459d0 2496}
2497
5a976006 2498static int in_spu_reorg;
2499
2500/* Insert branch hints. There are no branch optimizations after this
2501 pass, so it's safe to set our branch hints now. */
644459d0 2502static void
5a976006 2503spu_machine_dependent_reorg (void)
644459d0 2504{
5a976006 2505 sbitmap blocks;
2506 basic_block bb;
2507 rtx branch, insn;
2508 rtx branch_target = 0;
2509 int branch_addr = 0, insn_addr, required_dist = 0;
2510 int i;
2511 unsigned int j;
644459d0 2512
5a976006 2513 if (!TARGET_BRANCH_HINTS || optimize == 0)
2514 {
2515 /* We still do it for unoptimized code because an external
2516 function might have hinted a call or return. */
2517 insert_hbrp ();
2518 pad_bb ();
2519 return;
2520 }
644459d0 2521
5a976006 2522 blocks = sbitmap_alloc (last_basic_block);
2523 sbitmap_zero (blocks);
644459d0 2524
5a976006 2525 in_spu_reorg = 1;
2526 compute_bb_for_insn ();
2527
2528 compact_blocks ();
2529
2530 spu_bb_info =
2531 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2532 sizeof (struct spu_bb_info));
2533
2534 /* We need exact insn addresses and lengths. */
2535 shorten_branches (get_insns ());
2536
2537 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2538 {
5a976006 2539 bb = BASIC_BLOCK (i);
2540 branch = 0;
2541 if (spu_bb_info[i].prop_jump)
644459d0 2542 {
5a976006 2543 branch = spu_bb_info[i].prop_jump;
2544 branch_target = get_branch_target (branch);
2545 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2546 required_dist = spu_hint_dist;
2547 }
2548 /* Search from end of a block to beginning. In this loop, find
2549 jumps which need a branch and emit them only when:
2550 - it's an indirect branch and we're at the insn which sets
2551 the register
2552 - we're at an insn that will invalidate the hint. e.g., a
2553 call, another hint insn, inline asm that clobbers $hbr, and
2554 some inlined operations (divmodsi4). Don't consider jumps
2555 because they are only at the end of a block and are
2556 considered when we are deciding whether to propagate
2557 - we're getting too far away from the branch. The hbr insns
2558 only have a signed 10 bit offset
2559 We go back as far as possible so the branch will be considered
2560 for propagation when we get to the beginning of the block. */
2561 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2562 {
2563 if (INSN_P (insn))
2564 {
2565 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2566 if (branch
2567 && ((GET_CODE (branch_target) == REG
2568 && set_of (branch_target, insn) != NULL_RTX)
2569 || insn_clobbers_hbr (insn)
2570 || branch_addr - insn_addr > 600))
2571 {
2572 rtx next = NEXT_INSN (insn);
2573 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2574 if (insn != BB_END (bb)
2575 && branch_addr - next_addr >= required_dist)
2576 {
2577 if (dump_file)
2578 fprintf (dump_file,
2579 "hint for %i in block %i before %i\n",
2580 INSN_UID (branch), bb->index,
2581 INSN_UID (next));
2582 spu_emit_branch_hint (next, branch, branch_target,
2583 branch_addr - next_addr, blocks);
2584 }
2585 branch = 0;
2586 }
2587
2588 /* JUMP_P will only be true at the end of a block. When
2589 branch is already set it means we've previously decided
2590 to propagate a hint for that branch into this block. */
2591 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2592 {
2593 branch = 0;
2594 if ((branch_target = get_branch_target (insn)))
2595 {
2596 branch = insn;
2597 branch_addr = insn_addr;
2598 required_dist = spu_hint_dist;
2599 }
2600 }
2601 }
2602 if (insn == BB_HEAD (bb))
2603 break;
2604 }
2605
2606 if (branch)
2607 {
2608 /* If we haven't emitted a hint for this branch yet, it might
2609 be profitable to emit it in one of the predecessor blocks,
2610 especially for loops. */
2611 rtx bbend;
2612 basic_block prev = 0, prop = 0, prev2 = 0;
2613 int loop_exit = 0, simple_loop = 0;
2614 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2615
2616 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2617 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2618 prev = EDGE_PRED (bb, j)->src;
2619 else
2620 prev2 = EDGE_PRED (bb, j)->src;
2621
2622 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2623 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2624 loop_exit = 1;
2625 else if (EDGE_SUCC (bb, j)->dest == bb)
2626 simple_loop = 1;
2627
2628 /* If this branch is a loop exit then propagate to previous
2629 fallthru block. This catches the cases when it is a simple
2630 loop or when there is an initial branch into the loop. */
2631 if (prev && (loop_exit || simple_loop)
2632 && prev->loop_depth <= bb->loop_depth)
2633 prop = prev;
2634
2635 /* If there is only one adjacent predecessor. Don't propagate
2636 outside this loop. This loop_depth test isn't perfect, but
2637 I'm not sure the loop_father member is valid at this point. */
2638 else if (prev && single_pred_p (bb)
2639 && prev->loop_depth == bb->loop_depth)
2640 prop = prev;
2641
2642 /* If this is the JOIN block of a simple IF-THEN then
2643 propogate the hint to the HEADER block. */
2644 else if (prev && prev2
2645 && EDGE_COUNT (bb->preds) == 2
2646 && EDGE_COUNT (prev->preds) == 1
2647 && EDGE_PRED (prev, 0)->src == prev2
2648 && prev2->loop_depth == bb->loop_depth
2649 && GET_CODE (branch_target) != REG)
2650 prop = prev;
2651
2652 /* Don't propagate when:
2653 - this is a simple loop and the hint would be too far
2654 - this is not a simple loop and there are 16 insns in
2655 this block already
2656 - the predecessor block ends in a branch that will be
2657 hinted
2658 - the predecessor block ends in an insn that invalidates
2659 the hint */
2660 if (prop
2661 && prop->index >= 0
2662 && (bbend = BB_END (prop))
2663 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2664 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2665 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2666 {
2667 if (dump_file)
2668 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2669 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2670 bb->index, prop->index, bb->loop_depth,
2671 INSN_UID (branch), loop_exit, simple_loop,
2672 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2673
2674 spu_bb_info[prop->index].prop_jump = branch;
2675 spu_bb_info[prop->index].bb_index = i;
2676 }
2677 else if (branch_addr - next_addr >= required_dist)
2678 {
2679 if (dump_file)
2680 fprintf (dump_file, "hint for %i in block %i before %i\n",
2681 INSN_UID (branch), bb->index,
2682 INSN_UID (NEXT_INSN (insn)));
2683 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2684 branch_addr - next_addr, blocks);
2685 }
2686 branch = 0;
644459d0 2687 }
644459d0 2688 }
5a976006 2689 free (spu_bb_info);
644459d0 2690
5a976006 2691 if (!sbitmap_empty_p (blocks))
2692 find_many_sub_basic_blocks (blocks);
2693
2694 /* We have to schedule to make sure alignment is ok. */
2695 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2696
2697 /* The hints need to be scheduled, so call it again. */
2698 schedule_insns ();
2699
2700 insert_hbrp ();
2701
2702 pad_bb ();
2703
2704
2705 if (spu_flag_var_tracking)
644459d0 2706 {
5a976006 2707 df_analyze ();
2708 timevar_push (TV_VAR_TRACKING);
2709 variable_tracking_main ();
2710 timevar_pop (TV_VAR_TRACKING);
2711 df_finish_pass (false);
644459d0 2712 }
5a976006 2713
2714 free_bb_for_insn ();
2715
2716 in_spu_reorg = 0;
644459d0 2717}
2718\f
2719
2720/* Insn scheduling routines, primarily for dual issue. */
2721static int
2722spu_sched_issue_rate (void)
2723{
2724 return 2;
2725}
2726
2727static int
5a976006 2728uses_ls_unit(rtx insn)
644459d0 2729{
5a976006 2730 rtx set = single_set (insn);
2731 if (set != 0
2732 && (GET_CODE (SET_DEST (set)) == MEM
2733 || GET_CODE (SET_SRC (set)) == MEM))
2734 return 1;
2735 return 0;
644459d0 2736}
2737
2738static int
2739get_pipe (rtx insn)
2740{
2741 enum attr_type t;
2742 /* Handle inline asm */
2743 if (INSN_CODE (insn) == -1)
2744 return -1;
2745 t = get_attr_type (insn);
2746 switch (t)
2747 {
2748 case TYPE_CONVERT:
2749 return -2;
2750 case TYPE_MULTI0:
2751 return -1;
2752
2753 case TYPE_FX2:
2754 case TYPE_FX3:
2755 case TYPE_SPR:
2756 case TYPE_NOP:
2757 case TYPE_FXB:
2758 case TYPE_FPD:
2759 case TYPE_FP6:
2760 case TYPE_FP7:
644459d0 2761 return 0;
2762
2763 case TYPE_LNOP:
2764 case TYPE_SHUF:
2765 case TYPE_LOAD:
2766 case TYPE_STORE:
2767 case TYPE_BR:
2768 case TYPE_MULTI1:
2769 case TYPE_HBR:
5a976006 2770 case TYPE_IPREFETCH:
644459d0 2771 return 1;
2772 default:
2773 abort ();
2774 }
2775}
2776
5a976006 2777
2778/* haifa-sched.c has a static variable that keeps track of the current
2779 cycle. It is passed to spu_sched_reorder, and we record it here for
2780 use by spu_sched_variable_issue. It won't be accurate if the
2781 scheduler updates it's clock_var between the two calls. */
2782static int clock_var;
2783
2784/* This is used to keep track of insn alignment. Set to 0 at the
2785 beginning of each block and increased by the "length" attr of each
2786 insn scheduled. */
2787static int spu_sched_length;
2788
2789/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2790 ready list appropriately in spu_sched_reorder(). */
2791static int pipe0_clock;
2792static int pipe1_clock;
2793
2794static int prev_clock_var;
2795
2796static int prev_priority;
2797
2798/* The SPU needs to load the next ilb sometime during the execution of
2799 the previous ilb. There is a potential conflict if every cycle has a
2800 load or store. To avoid the conflict we make sure the load/store
2801 unit is free for at least one cycle during the execution of insns in
2802 the previous ilb. */
2803static int spu_ls_first;
2804static int prev_ls_clock;
2805
2806static void
2807spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2808 int max_ready ATTRIBUTE_UNUSED)
2809{
2810 spu_sched_length = 0;
2811}
2812
2813static void
2814spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2815 int max_ready ATTRIBUTE_UNUSED)
2816{
2817 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2818 {
2819 /* When any block might be at least 8-byte aligned, assume they
2820 will all be at least 8-byte aligned to make sure dual issue
2821 works out correctly. */
2822 spu_sched_length = 0;
2823 }
2824 spu_ls_first = INT_MAX;
2825 clock_var = -1;
2826 prev_ls_clock = -1;
2827 pipe0_clock = -1;
2828 pipe1_clock = -1;
2829 prev_clock_var = -1;
2830 prev_priority = -1;
2831}
2832
644459d0 2833static int
5a976006 2834spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2835 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2836{
5a976006 2837 int len;
2838 int p;
644459d0 2839 if (GET_CODE (PATTERN (insn)) == USE
2840 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2841 || (len = get_attr_length (insn)) == 0)
2842 return more;
2843
2844 spu_sched_length += len;
2845
2846 /* Reset on inline asm */
2847 if (INSN_CODE (insn) == -1)
2848 {
2849 spu_ls_first = INT_MAX;
2850 pipe0_clock = -1;
2851 pipe1_clock = -1;
2852 return 0;
2853 }
2854 p = get_pipe (insn);
2855 if (p == 0)
2856 pipe0_clock = clock_var;
2857 else
2858 pipe1_clock = clock_var;
2859
2860 if (in_spu_reorg)
2861 {
2862 if (clock_var - prev_ls_clock > 1
2863 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2864 spu_ls_first = INT_MAX;
2865 if (uses_ls_unit (insn))
2866 {
2867 if (spu_ls_first == INT_MAX)
2868 spu_ls_first = spu_sched_length;
2869 prev_ls_clock = clock_var;
2870 }
2871
2872 /* The scheduler hasn't inserted the nop, but we will later on.
2873 Include those nops in spu_sched_length. */
2874 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2875 spu_sched_length += 4;
2876 prev_clock_var = clock_var;
2877
2878 /* more is -1 when called from spu_sched_reorder for new insns
2879 that don't have INSN_PRIORITY */
2880 if (more >= 0)
2881 prev_priority = INSN_PRIORITY (insn);
2882 }
2883
2884 /* Always try issueing more insns. spu_sched_reorder will decide
2885 when the cycle should be advanced. */
2886 return 1;
2887}
2888
2889/* This function is called for both TARGET_SCHED_REORDER and
2890 TARGET_SCHED_REORDER2. */
2891static int
2892spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2893 rtx *ready, int *nreadyp, int clock)
2894{
2895 int i, nready = *nreadyp;
2896 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2897 rtx insn;
2898
2899 clock_var = clock;
2900
2901 if (nready <= 0 || pipe1_clock >= clock)
2902 return 0;
2903
2904 /* Find any rtl insns that don't generate assembly insns and schedule
2905 them first. */
2906 for (i = nready - 1; i >= 0; i--)
2907 {
2908 insn = ready[i];
2909 if (INSN_CODE (insn) == -1
2910 || INSN_CODE (insn) == CODE_FOR_blockage
2911 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2912 {
2913 ready[i] = ready[nready - 1];
2914 ready[nready - 1] = insn;
2915 return 1;
2916 }
2917 }
2918
2919 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2920 for (i = 0; i < nready; i++)
2921 if (INSN_CODE (ready[i]) != -1)
2922 {
2923 insn = ready[i];
2924 switch (get_attr_type (insn))
2925 {
2926 default:
2927 case TYPE_MULTI0:
2928 case TYPE_CONVERT:
2929 case TYPE_FX2:
2930 case TYPE_FX3:
2931 case TYPE_SPR:
2932 case TYPE_NOP:
2933 case TYPE_FXB:
2934 case TYPE_FPD:
2935 case TYPE_FP6:
2936 case TYPE_FP7:
2937 pipe_0 = i;
2938 break;
2939 case TYPE_LOAD:
2940 case TYPE_STORE:
2941 pipe_ls = i;
2942 case TYPE_LNOP:
2943 case TYPE_SHUF:
2944 case TYPE_BR:
2945 case TYPE_MULTI1:
2946 case TYPE_HBR:
2947 pipe_1 = i;
2948 break;
2949 case TYPE_IPREFETCH:
2950 pipe_hbrp = i;
2951 break;
2952 }
2953 }
2954
2955 /* In the first scheduling phase, schedule loads and stores together
2956 to increase the chance they will get merged during postreload CSE. */
2957 if (!reload_completed && pipe_ls >= 0)
2958 {
2959 insn = ready[pipe_ls];
2960 ready[pipe_ls] = ready[nready - 1];
2961 ready[nready - 1] = insn;
2962 return 1;
2963 }
2964
2965 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2966 if (pipe_hbrp >= 0)
2967 pipe_1 = pipe_hbrp;
2968
2969 /* When we have loads/stores in every cycle of the last 15 insns and
2970 we are about to schedule another load/store, emit an hbrp insn
2971 instead. */
2972 if (in_spu_reorg
2973 && spu_sched_length - spu_ls_first >= 4 * 15
2974 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2975 {
2976 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2977 recog_memoized (insn);
2978 if (pipe0_clock < clock)
2979 PUT_MODE (insn, TImode);
2980 spu_sched_variable_issue (file, verbose, insn, -1);
2981 return 0;
2982 }
2983
2984 /* In general, we want to emit nops to increase dual issue, but dual
2985 issue isn't faster when one of the insns could be scheduled later
2986 without effecting the critical path. We look at INSN_PRIORITY to
2987 make a good guess, but it isn't perfect so -mdual-nops=n can be
2988 used to effect it. */
2989 if (in_spu_reorg && spu_dual_nops < 10)
2990 {
2991 /* When we are at an even address and we are not issueing nops to
2992 improve scheduling then we need to advance the cycle. */
2993 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2994 && (spu_dual_nops == 0
2995 || (pipe_1 != -1
2996 && prev_priority >
2997 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2998 return 0;
2999
3000 /* When at an odd address, schedule the highest priority insn
3001 without considering pipeline. */
3002 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3003 && (spu_dual_nops == 0
3004 || (prev_priority >
3005 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3006 return 1;
3007 }
3008
3009
3010 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3011 pipe0 insn in the ready list, schedule it. */
3012 if (pipe0_clock < clock && pipe_0 >= 0)
3013 schedule_i = pipe_0;
3014
3015 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3016 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3017 else
3018 schedule_i = pipe_1;
3019
3020 if (schedule_i > -1)
3021 {
3022 insn = ready[schedule_i];
3023 ready[schedule_i] = ready[nready - 1];
3024 ready[nready - 1] = insn;
3025 return 1;
3026 }
3027 return 0;
644459d0 3028}
3029
3030/* INSN is dependent on DEP_INSN. */
3031static int
5a976006 3032spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3033{
5a976006 3034 rtx set;
3035
3036 /* The blockage pattern is used to prevent instructions from being
3037 moved across it and has no cost. */
3038 if (INSN_CODE (insn) == CODE_FOR_blockage
3039 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3040 return 0;
3041
3042 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3043 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3044 return 0;
3045
3046 /* Make sure hbrps are spread out. */
3047 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3048 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3049 return 8;
3050
3051 /* Make sure hints and hbrps are 2 cycles apart. */
3052 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3053 || INSN_CODE (insn) == CODE_FOR_hbr)
3054 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3055 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3056 return 2;
3057
3058 /* An hbrp has no real dependency on other insns. */
3059 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3060 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3061 return 0;
3062
3063 /* Assuming that it is unlikely an argument register will be used in
3064 the first cycle of the called function, we reduce the cost for
3065 slightly better scheduling of dep_insn. When not hinted, the
3066 mispredicted branch would hide the cost as well. */
3067 if (CALL_P (insn))
3068 {
3069 rtx target = get_branch_target (insn);
3070 if (GET_CODE (target) != REG || !set_of (target, insn))
3071 return cost - 2;
3072 return cost;
3073 }
3074
3075 /* And when returning from a function, let's assume the return values
3076 are completed sooner too. */
3077 if (CALL_P (dep_insn))
644459d0 3078 return cost - 2;
5a976006 3079
3080 /* Make sure an instruction that loads from the back chain is schedule
3081 away from the return instruction so a hint is more likely to get
3082 issued. */
3083 if (INSN_CODE (insn) == CODE_FOR__return
3084 && (set = single_set (dep_insn))
3085 && GET_CODE (SET_DEST (set)) == REG
3086 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3087 return 20;
3088
644459d0 3089 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3090 scheduler makes every insn in a block anti-dependent on the final
3091 jump_insn. We adjust here so higher cost insns will get scheduled
3092 earlier. */
5a976006 3093 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3094 return insn_cost (dep_insn) - 3;
5a976006 3095
644459d0 3096 return cost;
3097}
3098\f
3099/* Create a CONST_DOUBLE from a string. */
3100struct rtx_def *
3101spu_float_const (const char *string, enum machine_mode mode)
3102{
3103 REAL_VALUE_TYPE value;
3104 value = REAL_VALUE_ATOF (string, mode);
3105 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3106}
3107
644459d0 3108int
3109spu_constant_address_p (rtx x)
3110{
3111 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3112 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3113 || GET_CODE (x) == HIGH);
3114}
3115
3116static enum spu_immediate
3117which_immediate_load (HOST_WIDE_INT val)
3118{
3119 gcc_assert (val == trunc_int_for_mode (val, SImode));
3120
3121 if (val >= -0x8000 && val <= 0x7fff)
3122 return SPU_IL;
3123 if (val >= 0 && val <= 0x3ffff)
3124 return SPU_ILA;
3125 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3126 return SPU_ILH;
3127 if ((val & 0xffff) == 0)
3128 return SPU_ILHU;
3129
3130 return SPU_NONE;
3131}
3132
dea01258 3133/* Return true when OP can be loaded by one of the il instructions, or
3134 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3135int
3136immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3137{
3138 if (CONSTANT_P (op))
3139 {
3140 enum immediate_class c = classify_immediate (op, mode);
5df189be 3141 return c == IC_IL1 || c == IC_IL1s
3072d30e 3142 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3143 }
3144 return 0;
3145}
3146
3147/* Return true if the first SIZE bytes of arr is a constant that can be
3148 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3149 represent the size and offset of the instruction to use. */
3150static int
3151cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3152{
3153 int cpat, run, i, start;
3154 cpat = 1;
3155 run = 0;
3156 start = -1;
3157 for (i = 0; i < size && cpat; i++)
3158 if (arr[i] != i+16)
3159 {
3160 if (!run)
3161 {
3162 start = i;
3163 if (arr[i] == 3)
3164 run = 1;
3165 else if (arr[i] == 2 && arr[i+1] == 3)
3166 run = 2;
3167 else if (arr[i] == 0)
3168 {
3169 while (arr[i+run] == run && i+run < 16)
3170 run++;
3171 if (run != 4 && run != 8)
3172 cpat = 0;
3173 }
3174 else
3175 cpat = 0;
3176 if ((i & (run-1)) != 0)
3177 cpat = 0;
3178 i += run;
3179 }
3180 else
3181 cpat = 0;
3182 }
b01a6dc3 3183 if (cpat && (run || size < 16))
dea01258 3184 {
3185 if (run == 0)
3186 run = 1;
3187 if (prun)
3188 *prun = run;
3189 if (pstart)
3190 *pstart = start == -1 ? 16-run : start;
3191 return 1;
3192 }
3193 return 0;
3194}
3195
3196/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3197 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3198static enum immediate_class
3199classify_immediate (rtx op, enum machine_mode mode)
644459d0 3200{
3201 HOST_WIDE_INT val;
3202 unsigned char arr[16];
5df189be 3203 int i, j, repeated, fsmbi, repeat;
dea01258 3204
3205 gcc_assert (CONSTANT_P (op));
3206
644459d0 3207 if (GET_MODE (op) != VOIDmode)
3208 mode = GET_MODE (op);
3209
dea01258 3210 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3211 if (!flag_pic
3212 && mode == V4SImode
dea01258 3213 && GET_CODE (op) == CONST_VECTOR
3214 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3215 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3216 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3217 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3218 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3219 op = CONST_VECTOR_ELT (op, 0);
644459d0 3220
dea01258 3221 switch (GET_CODE (op))
3222 {
3223 case SYMBOL_REF:
3224 case LABEL_REF:
3225 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3226
dea01258 3227 case CONST:
0cfc65d4 3228 /* We can never know if the resulting address fits in 18 bits and can be
3229 loaded with ila. For now, assume the address will not overflow if
3230 the displacement is "small" (fits 'K' constraint). */
3231 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3232 {
3233 rtx sym = XEXP (XEXP (op, 0), 0);
3234 rtx cst = XEXP (XEXP (op, 0), 1);
3235
3236 if (GET_CODE (sym) == SYMBOL_REF
3237 && GET_CODE (cst) == CONST_INT
3238 && satisfies_constraint_K (cst))
3239 return IC_IL1s;
3240 }
3241 return IC_IL2s;
644459d0 3242
dea01258 3243 case HIGH:
3244 return IC_IL1s;
3245
3246 case CONST_VECTOR:
3247 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3248 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3249 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3250 return IC_POOL;
3251 /* Fall through. */
3252
3253 case CONST_INT:
3254 case CONST_DOUBLE:
3255 constant_to_array (mode, op, arr);
644459d0 3256
dea01258 3257 /* Check that each 4-byte slot is identical. */
3258 repeated = 1;
3259 for (i = 4; i < 16; i += 4)
3260 for (j = 0; j < 4; j++)
3261 if (arr[j] != arr[i + j])
3262 repeated = 0;
3263
3264 if (repeated)
3265 {
3266 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3267 val = trunc_int_for_mode (val, SImode);
3268
3269 if (which_immediate_load (val) != SPU_NONE)
3270 return IC_IL1;
3271 }
3272
3273 /* Any mode of 2 bytes or smaller can be loaded with an il
3274 instruction. */
3275 gcc_assert (GET_MODE_SIZE (mode) > 2);
3276
3277 fsmbi = 1;
5df189be 3278 repeat = 0;
dea01258 3279 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3280 if (arr[i] != 0 && repeat == 0)
3281 repeat = arr[i];
3282 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3283 fsmbi = 0;
3284 if (fsmbi)
5df189be 3285 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3286
3287 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3288 return IC_CPAT;
3289
3290 if (repeated)
3291 return IC_IL2;
3292
3293 return IC_POOL;
3294 default:
3295 break;
3296 }
3297 gcc_unreachable ();
644459d0 3298}
3299
3300static enum spu_immediate
3301which_logical_immediate (HOST_WIDE_INT val)
3302{
3303 gcc_assert (val == trunc_int_for_mode (val, SImode));
3304
3305 if (val >= -0x200 && val <= 0x1ff)
3306 return SPU_ORI;
3307 if (val >= 0 && val <= 0xffff)
3308 return SPU_IOHL;
3309 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3310 {
3311 val = trunc_int_for_mode (val, HImode);
3312 if (val >= -0x200 && val <= 0x1ff)
3313 return SPU_ORHI;
3314 if ((val & 0xff) == ((val >> 8) & 0xff))
3315 {
3316 val = trunc_int_for_mode (val, QImode);
3317 if (val >= -0x200 && val <= 0x1ff)
3318 return SPU_ORBI;
3319 }
3320 }
3321 return SPU_NONE;
3322}
3323
5df189be 3324/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3325 CONST_DOUBLEs. */
3326static int
3327const_vector_immediate_p (rtx x)
3328{
3329 int i;
3330 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3331 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3332 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3333 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3334 return 0;
3335 return 1;
3336}
3337
644459d0 3338int
3339logical_immediate_p (rtx op, enum machine_mode mode)
3340{
3341 HOST_WIDE_INT val;
3342 unsigned char arr[16];
3343 int i, j;
3344
3345 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3346 || GET_CODE (op) == CONST_VECTOR);
3347
5df189be 3348 if (GET_CODE (op) == CONST_VECTOR
3349 && !const_vector_immediate_p (op))
3350 return 0;
3351
644459d0 3352 if (GET_MODE (op) != VOIDmode)
3353 mode = GET_MODE (op);
3354
3355 constant_to_array (mode, op, arr);
3356
3357 /* Check that bytes are repeated. */
3358 for (i = 4; i < 16; i += 4)
3359 for (j = 0; j < 4; j++)
3360 if (arr[j] != arr[i + j])
3361 return 0;
3362
3363 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3364 val = trunc_int_for_mode (val, SImode);
3365
3366 i = which_logical_immediate (val);
3367 return i != SPU_NONE && i != SPU_IOHL;
3368}
3369
3370int
3371iohl_immediate_p (rtx op, enum machine_mode mode)
3372{
3373 HOST_WIDE_INT val;
3374 unsigned char arr[16];
3375 int i, j;
3376
3377 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3378 || GET_CODE (op) == CONST_VECTOR);
3379
5df189be 3380 if (GET_CODE (op) == CONST_VECTOR
3381 && !const_vector_immediate_p (op))
3382 return 0;
3383
644459d0 3384 if (GET_MODE (op) != VOIDmode)
3385 mode = GET_MODE (op);
3386
3387 constant_to_array (mode, op, arr);
3388
3389 /* Check that bytes are repeated. */
3390 for (i = 4; i < 16; i += 4)
3391 for (j = 0; j < 4; j++)
3392 if (arr[j] != arr[i + j])
3393 return 0;
3394
3395 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3396 val = trunc_int_for_mode (val, SImode);
3397
3398 return val >= 0 && val <= 0xffff;
3399}
3400
3401int
3402arith_immediate_p (rtx op, enum machine_mode mode,
3403 HOST_WIDE_INT low, HOST_WIDE_INT high)
3404{
3405 HOST_WIDE_INT val;
3406 unsigned char arr[16];
3407 int bytes, i, j;
3408
3409 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3410 || GET_CODE (op) == CONST_VECTOR);
3411
5df189be 3412 if (GET_CODE (op) == CONST_VECTOR
3413 && !const_vector_immediate_p (op))
3414 return 0;
3415
644459d0 3416 if (GET_MODE (op) != VOIDmode)
3417 mode = GET_MODE (op);
3418
3419 constant_to_array (mode, op, arr);
3420
3421 if (VECTOR_MODE_P (mode))
3422 mode = GET_MODE_INNER (mode);
3423
3424 bytes = GET_MODE_SIZE (mode);
3425 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3426
3427 /* Check that bytes are repeated. */
3428 for (i = bytes; i < 16; i += bytes)
3429 for (j = 0; j < bytes; j++)
3430 if (arr[j] != arr[i + j])
3431 return 0;
3432
3433 val = arr[0];
3434 for (j = 1; j < bytes; j++)
3435 val = (val << 8) | arr[j];
3436
3437 val = trunc_int_for_mode (val, mode);
3438
3439 return val >= low && val <= high;
3440}
3441
3442/* We accept:
5b865faf 3443 - any 32-bit constant (SImode, SFmode)
644459d0 3444 - any constant that can be generated with fsmbi (any mode)
5b865faf 3445 - a 64-bit constant where the high and low bits are identical
644459d0 3446 (DImode, DFmode)
5b865faf 3447 - a 128-bit constant where the four 32-bit words match. */
644459d0 3448int
3449spu_legitimate_constant_p (rtx x)
3450{
5df189be 3451 if (GET_CODE (x) == HIGH)
3452 x = XEXP (x, 0);
644459d0 3453 /* V4SI with all identical symbols is valid. */
5df189be 3454 if (!flag_pic
3455 && GET_MODE (x) == V4SImode
644459d0 3456 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3457 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3458 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3459 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3460 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3461 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3462
5df189be 3463 if (GET_CODE (x) == CONST_VECTOR
3464 && !const_vector_immediate_p (x))
3465 return 0;
644459d0 3466 return 1;
3467}
3468
3469/* Valid address are:
3470 - symbol_ref, label_ref, const
3471 - reg
3472 - reg + const, where either reg or const is 16 byte aligned
3473 - reg + reg, alignment doesn't matter
3474 The alignment matters in the reg+const case because lqd and stqd
3475 ignore the 4 least significant bits of the const. (TODO: It might be
3476 preferable to allow any alignment and fix it up when splitting.) */
3477int
3478spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3479 rtx x, int reg_ok_strict)
3480{
3481 if (mode == TImode && GET_CODE (x) == AND
3482 && GET_CODE (XEXP (x, 1)) == CONST_INT
3483 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3484 x = XEXP (x, 0);
3485 switch (GET_CODE (x))
3486 {
3487 case SYMBOL_REF:
3488 case LABEL_REF:
3489 return !TARGET_LARGE_MEM;
3490
3491 case CONST:
0cfc65d4 3492 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3493 {
3494 rtx sym = XEXP (XEXP (x, 0), 0);
3495 rtx cst = XEXP (XEXP (x, 0), 1);
3496
3497 /* Accept any symbol_ref + constant, assuming it does not
3498 wrap around the local store addressability limit. */
3499 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3500 return 1;
3501 }
3502 return 0;
644459d0 3503
3504 case CONST_INT:
3505 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3506
3507 case SUBREG:
3508 x = XEXP (x, 0);
3509 gcc_assert (GET_CODE (x) == REG);
3510
3511 case REG:
3512 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3513
3514 case PLUS:
3515 case LO_SUM:
3516 {
3517 rtx op0 = XEXP (x, 0);
3518 rtx op1 = XEXP (x, 1);
3519 if (GET_CODE (op0) == SUBREG)
3520 op0 = XEXP (op0, 0);
3521 if (GET_CODE (op1) == SUBREG)
3522 op1 = XEXP (op1, 0);
3523 /* We can't just accept any aligned register because CSE can
3524 change it to a register that is not marked aligned and then
3525 recog will fail. So we only accept frame registers because
3526 they will only be changed to other frame registers. */
3527 if (GET_CODE (op0) == REG
3528 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3529 && GET_CODE (op1) == CONST_INT
3530 && INTVAL (op1) >= -0x2000
3531 && INTVAL (op1) <= 0x1fff
5df189be 3532 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3533 return 1;
3534 if (GET_CODE (op0) == REG
3535 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3536 && GET_CODE (op1) == REG
3537 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3538 return 1;
3539 }
3540 break;
3541
3542 default:
3543 break;
3544 }
3545 return 0;
3546}
3547
3548/* When the address is reg + const_int, force the const_int into a
fa7637bd 3549 register. */
644459d0 3550rtx
3551spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3552 enum machine_mode mode)
3553{
3554 rtx op0, op1;
3555 /* Make sure both operands are registers. */
3556 if (GET_CODE (x) == PLUS)
3557 {
3558 op0 = XEXP (x, 0);
3559 op1 = XEXP (x, 1);
3560 if (ALIGNED_SYMBOL_REF_P (op0))
3561 {
3562 op0 = force_reg (Pmode, op0);
3563 mark_reg_pointer (op0, 128);
3564 }
3565 else if (GET_CODE (op0) != REG)
3566 op0 = force_reg (Pmode, op0);
3567 if (ALIGNED_SYMBOL_REF_P (op1))
3568 {
3569 op1 = force_reg (Pmode, op1);
3570 mark_reg_pointer (op1, 128);
3571 }
3572 else if (GET_CODE (op1) != REG)
3573 op1 = force_reg (Pmode, op1);
3574 x = gen_rtx_PLUS (Pmode, op0, op1);
3575 if (spu_legitimate_address (mode, x, 0))
3576 return x;
3577 }
3578 return NULL_RTX;
3579}
3580
3581/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3582 struct attribute_spec.handler. */
3583static tree
3584spu_handle_fndecl_attribute (tree * node,
3585 tree name,
3586 tree args ATTRIBUTE_UNUSED,
3587 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3588{
3589 if (TREE_CODE (*node) != FUNCTION_DECL)
3590 {
3591 warning (0, "`%s' attribute only applies to functions",
3592 IDENTIFIER_POINTER (name));
3593 *no_add_attrs = true;
3594 }
3595
3596 return NULL_TREE;
3597}
3598
3599/* Handle the "vector" attribute. */
3600static tree
3601spu_handle_vector_attribute (tree * node, tree name,
3602 tree args ATTRIBUTE_UNUSED,
3603 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3604{
3605 tree type = *node, result = NULL_TREE;
3606 enum machine_mode mode;
3607 int unsigned_p;
3608
3609 while (POINTER_TYPE_P (type)
3610 || TREE_CODE (type) == FUNCTION_TYPE
3611 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3612 type = TREE_TYPE (type);
3613
3614 mode = TYPE_MODE (type);
3615
3616 unsigned_p = TYPE_UNSIGNED (type);
3617 switch (mode)
3618 {
3619 case DImode:
3620 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3621 break;
3622 case SImode:
3623 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3624 break;
3625 case HImode:
3626 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3627 break;
3628 case QImode:
3629 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3630 break;
3631 case SFmode:
3632 result = V4SF_type_node;
3633 break;
3634 case DFmode:
3635 result = V2DF_type_node;
3636 break;
3637 default:
3638 break;
3639 }
3640
3641 /* Propagate qualifiers attached to the element type
3642 onto the vector type. */
3643 if (result && result != type && TYPE_QUALS (type))
3644 result = build_qualified_type (result, TYPE_QUALS (type));
3645
3646 *no_add_attrs = true; /* No need to hang on to the attribute. */
3647
3648 if (!result)
3649 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3650 else
d991e6e8 3651 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3652
3653 return NULL_TREE;
3654}
3655
f2b32076 3656/* Return nonzero if FUNC is a naked function. */
644459d0 3657static int
3658spu_naked_function_p (tree func)
3659{
3660 tree a;
3661
3662 if (TREE_CODE (func) != FUNCTION_DECL)
3663 abort ();
3664
3665 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3666 return a != NULL_TREE;
3667}
3668
3669int
3670spu_initial_elimination_offset (int from, int to)
3671{
3672 int saved_regs_size = spu_saved_regs_size ();
3673 int sp_offset = 0;
abe32cce 3674 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3675 || get_frame_size () || saved_regs_size)
3676 sp_offset = STACK_POINTER_OFFSET;
3677 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3678 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3679 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3680 return get_frame_size ();
644459d0 3681 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3682 return sp_offset + crtl->outgoing_args_size
644459d0 3683 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3684 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3685 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3686 else
3687 gcc_unreachable ();
644459d0 3688}
3689
3690rtx
fb80456a 3691spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3692{
3693 enum machine_mode mode = TYPE_MODE (type);
3694 int byte_size = ((mode == BLKmode)
3695 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3696
3697 /* Make sure small structs are left justified in a register. */
3698 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3699 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3700 {
3701 enum machine_mode smode;
3702 rtvec v;
3703 int i;
3704 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3705 int n = byte_size / UNITS_PER_WORD;
3706 v = rtvec_alloc (nregs);
3707 for (i = 0; i < n; i++)
3708 {
3709 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3710 gen_rtx_REG (TImode,
3711 FIRST_RETURN_REGNUM
3712 + i),
3713 GEN_INT (UNITS_PER_WORD * i));
3714 byte_size -= UNITS_PER_WORD;
3715 }
3716
3717 if (n < nregs)
3718 {
3719 if (byte_size < 4)
3720 byte_size = 4;
3721 smode =
3722 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3723 RTVEC_ELT (v, n) =
3724 gen_rtx_EXPR_LIST (VOIDmode,
3725 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3726 GEN_INT (UNITS_PER_WORD * n));
3727 }
3728 return gen_rtx_PARALLEL (mode, v);
3729 }
3730 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3731}
3732
3733rtx
3734spu_function_arg (CUMULATIVE_ARGS cum,
3735 enum machine_mode mode,
3736 tree type, int named ATTRIBUTE_UNUSED)
3737{
3738 int byte_size;
3739
3740 if (cum >= MAX_REGISTER_ARGS)
3741 return 0;
3742
3743 byte_size = ((mode == BLKmode)
3744 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3745
3746 /* The ABI does not allow parameters to be passed partially in
3747 reg and partially in stack. */
3748 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3749 return 0;
3750
3751 /* Make sure small structs are left justified in a register. */
3752 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3753 && byte_size < UNITS_PER_WORD && byte_size > 0)
3754 {
3755 enum machine_mode smode;
3756 rtx gr_reg;
3757 if (byte_size < 4)
3758 byte_size = 4;
3759 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3760 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3761 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3762 const0_rtx);
3763 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3764 }
3765 else
3766 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3767}
3768
3769/* Variable sized types are passed by reference. */
3770static bool
3771spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3772 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3773 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3774{
3775 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3776}
3777\f
3778
3779/* Var args. */
3780
3781/* Create and return the va_list datatype.
3782
3783 On SPU, va_list is an array type equivalent to
3784
3785 typedef struct __va_list_tag
3786 {
3787 void *__args __attribute__((__aligned(16)));
3788 void *__skip __attribute__((__aligned(16)));
3789
3790 } va_list[1];
3791
fa7637bd 3792 where __args points to the arg that will be returned by the next
644459d0 3793 va_arg(), and __skip points to the previous stack frame such that
3794 when __args == __skip we should advance __args by 32 bytes. */
3795static tree
3796spu_build_builtin_va_list (void)
3797{
3798 tree f_args, f_skip, record, type_decl;
3799 bool owp;
3800
3801 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3802
3803 type_decl =
3804 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3805
3806 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3807 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3808
3809 DECL_FIELD_CONTEXT (f_args) = record;
3810 DECL_ALIGN (f_args) = 128;
3811 DECL_USER_ALIGN (f_args) = 1;
3812
3813 DECL_FIELD_CONTEXT (f_skip) = record;
3814 DECL_ALIGN (f_skip) = 128;
3815 DECL_USER_ALIGN (f_skip) = 1;
3816
3817 TREE_CHAIN (record) = type_decl;
3818 TYPE_NAME (record) = type_decl;
3819 TYPE_FIELDS (record) = f_args;
3820 TREE_CHAIN (f_args) = f_skip;
3821
3822 /* We know this is being padded and we want it too. It is an internal
3823 type so hide the warnings from the user. */
3824 owp = warn_padded;
3825 warn_padded = false;
3826
3827 layout_type (record);
3828
3829 warn_padded = owp;
3830
3831 /* The correct type is an array type of one element. */
3832 return build_array_type (record, build_index_type (size_zero_node));
3833}
3834
3835/* Implement va_start by filling the va_list structure VALIST.
3836 NEXTARG points to the first anonymous stack argument.
3837
3838 The following global variables are used to initialize
3839 the va_list structure:
3840
abe32cce 3841 crtl->args.info;
644459d0 3842 the CUMULATIVE_ARGS for this function
3843
abe32cce 3844 crtl->args.arg_offset_rtx:
644459d0 3845 holds the offset of the first anonymous stack argument
3846 (relative to the virtual arg pointer). */
3847
8a58ed0a 3848static void
644459d0 3849spu_va_start (tree valist, rtx nextarg)
3850{
3851 tree f_args, f_skip;
3852 tree args, skip, t;
3853
3854 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3855 f_skip = TREE_CHAIN (f_args);
3856
3857 valist = build_va_arg_indirect_ref (valist);
3858 args =
3859 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3860 skip =
3861 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3862
3863 /* Find the __args area. */
3864 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3865 if (crtl->args.pretend_args_size > 0)
0de36bdb 3866 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3867 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3868 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3869 TREE_SIDE_EFFECTS (t) = 1;
3870 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3871
3872 /* Find the __skip area. */
3873 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3874 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3875 size_int (crtl->args.pretend_args_size
0de36bdb 3876 - STACK_POINTER_OFFSET));
75a70cf9 3877 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3878 TREE_SIDE_EFFECTS (t) = 1;
3879 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3880}
3881
3882/* Gimplify va_arg by updating the va_list structure
3883 VALIST as required to retrieve an argument of type
3884 TYPE, and returning that argument.
3885
3886 ret = va_arg(VALIST, TYPE);
3887
3888 generates code equivalent to:
3889
3890 paddedsize = (sizeof(TYPE) + 15) & -16;
3891 if (VALIST.__args + paddedsize > VALIST.__skip
3892 && VALIST.__args <= VALIST.__skip)
3893 addr = VALIST.__skip + 32;
3894 else
3895 addr = VALIST.__args;
3896 VALIST.__args = addr + paddedsize;
3897 ret = *(TYPE *)addr;
3898 */
3899static tree
75a70cf9 3900spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3901 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3902{
3903 tree f_args, f_skip;
3904 tree args, skip;
3905 HOST_WIDE_INT size, rsize;
3906 tree paddedsize, addr, tmp;
3907 bool pass_by_reference_p;
3908
3909 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3910 f_skip = TREE_CHAIN (f_args);
3911
3912 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3913 args =
3914 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3915 skip =
3916 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3917
3918 addr = create_tmp_var (ptr_type_node, "va_arg");
3919 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3920
3921 /* if an object is dynamically sized, a pointer to it is passed
3922 instead of the object itself. */
3923 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3924 false);
3925 if (pass_by_reference_p)
3926 type = build_pointer_type (type);
3927 size = int_size_in_bytes (type);
3928 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3929
3930 /* build conditional expression to calculate addr. The expression
3931 will be gimplified later. */
0de36bdb 3932 paddedsize = size_int (rsize);
75a70cf9 3933 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3934 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3935 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3936 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3937 unshare_expr (skip)));
644459d0 3938
3939 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3940 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3941 size_int (32)), unshare_expr (args));
644459d0 3942
75a70cf9 3943 gimplify_assign (addr, tmp, pre_p);
644459d0 3944
3945 /* update VALIST.__args */
0de36bdb 3946 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3947 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3948
3949 addr = fold_convert (build_pointer_type (type), addr);
3950
3951 if (pass_by_reference_p)
3952 addr = build_va_arg_indirect_ref (addr);
3953
3954 return build_va_arg_indirect_ref (addr);
3955}
3956
3957/* Save parameter registers starting with the register that corresponds
3958 to the first unnamed parameters. If the first unnamed parameter is
3959 in the stack then save no registers. Set pretend_args_size to the
3960 amount of space needed to save the registers. */
3961void
3962spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3963 tree type, int *pretend_size, int no_rtl)
3964{
3965 if (!no_rtl)
3966 {
3967 rtx tmp;
3968 int regno;
3969 int offset;
3970 int ncum = *cum;
3971
3972 /* cum currently points to the last named argument, we want to
3973 start at the next argument. */
3974 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3975
3976 offset = -STACK_POINTER_OFFSET;
3977 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3978 {
3979 tmp = gen_frame_mem (V4SImode,
3980 plus_constant (virtual_incoming_args_rtx,
3981 offset));
3982 emit_move_insn (tmp,
3983 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3984 offset += 16;
3985 }
3986 *pretend_size = offset + STACK_POINTER_OFFSET;
3987 }
3988}
3989\f
3990void
3991spu_conditional_register_usage (void)
3992{
3993 if (flag_pic)
3994 {
3995 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3996 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3997 }
644459d0 3998}
3999
4000/* This is called to decide when we can simplify a load instruction. We
4001 must only return true for registers which we know will always be
4002 aligned. Taking into account that CSE might replace this reg with
4003 another one that has not been marked aligned.
4004 So this is really only true for frame, stack and virtual registers,
fa7637bd 4005 which we know are always aligned and should not be adversely effected
4006 by CSE. */
644459d0 4007static int
4008regno_aligned_for_load (int regno)
4009{
4010 return regno == FRAME_POINTER_REGNUM
5df189be 4011 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4012 || regno == ARG_POINTER_REGNUM
644459d0 4013 || regno == STACK_POINTER_REGNUM
5df189be 4014 || (regno >= FIRST_VIRTUAL_REGISTER
4015 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4016}
4017
4018/* Return TRUE when mem is known to be 16-byte aligned. */
4019int
4020aligned_mem_p (rtx mem)
4021{
4022 if (MEM_ALIGN (mem) >= 128)
4023 return 1;
4024 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4025 return 1;
4026 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4027 {
4028 rtx p0 = XEXP (XEXP (mem, 0), 0);
4029 rtx p1 = XEXP (XEXP (mem, 0), 1);
4030 if (regno_aligned_for_load (REGNO (p0)))
4031 {
4032 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4033 return 1;
4034 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4035 return 1;
4036 }
4037 }
4038 else if (GET_CODE (XEXP (mem, 0)) == REG)
4039 {
4040 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4041 return 1;
4042 }
4043 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4044 return 1;
4045 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4046 {
4047 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4048 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4049 if (GET_CODE (p0) == SYMBOL_REF
4050 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4051 return 1;
4052 }
4053 return 0;
4054}
4055
69ced2d6 4056/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4057 into its SYMBOL_REF_FLAGS. */
4058static void
4059spu_encode_section_info (tree decl, rtx rtl, int first)
4060{
4061 default_encode_section_info (decl, rtl, first);
4062
4063 /* If a variable has a forced alignment to < 16 bytes, mark it with
4064 SYMBOL_FLAG_ALIGN1. */
4065 if (TREE_CODE (decl) == VAR_DECL
4066 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4067 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4068}
4069
644459d0 4070/* Return TRUE if we are certain the mem refers to a complete object
4071 which is both 16-byte aligned and padded to a 16-byte boundary. This
4072 would make it safe to store with a single instruction.
4073 We guarantee the alignment and padding for static objects by aligning
4074 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4075 FIXME: We currently cannot guarantee this for objects on the stack
4076 because assign_parm_setup_stack calls assign_stack_local with the
4077 alignment of the parameter mode and in that case the alignment never
4078 gets adjusted by LOCAL_ALIGNMENT. */
4079static int
4080store_with_one_insn_p (rtx mem)
4081{
4082 rtx addr = XEXP (mem, 0);
4083 if (GET_MODE (mem) == BLKmode)
4084 return 0;
4085 /* Only static objects. */
4086 if (GET_CODE (addr) == SYMBOL_REF)
4087 {
4088 /* We use the associated declaration to make sure the access is
fa7637bd 4089 referring to the whole object.
644459d0 4090 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4091 if it is necessary. Will there be cases where one exists, and
4092 the other does not? Will there be cases where both exist, but
4093 have different types? */
4094 tree decl = MEM_EXPR (mem);
4095 if (decl
4096 && TREE_CODE (decl) == VAR_DECL
4097 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4098 return 1;
4099 decl = SYMBOL_REF_DECL (addr);
4100 if (decl
4101 && TREE_CODE (decl) == VAR_DECL
4102 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4103 return 1;
4104 }
4105 return 0;
4106}
4107
4108int
4109spu_expand_mov (rtx * ops, enum machine_mode mode)
4110{
4111 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4112 abort ();
4113
4114 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4115 {
4116 rtx from = SUBREG_REG (ops[1]);
4117 enum machine_mode imode = GET_MODE (from);
4118
4119 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4120 && GET_MODE_CLASS (imode) == MODE_INT
4121 && subreg_lowpart_p (ops[1]));
4122
4123 if (GET_MODE_SIZE (imode) < 4)
4124 {
4125 from = gen_rtx_SUBREG (SImode, from, 0);
4126 imode = SImode;
4127 }
4128
4129 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4130 {
99bdde56 4131 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4132 emit_insn (GEN_FCN (icode) (ops[0], from));
4133 }
4134 else
4135 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4136 return 1;
4137 }
4138
4139 /* At least one of the operands needs to be a register. */
4140 if ((reload_in_progress | reload_completed) == 0
4141 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4142 {
4143 rtx temp = force_reg (mode, ops[1]);
4144 emit_move_insn (ops[0], temp);
4145 return 1;
4146 }
4147 if (reload_in_progress || reload_completed)
4148 {
dea01258 4149 if (CONSTANT_P (ops[1]))
4150 return spu_split_immediate (ops);
644459d0 4151 return 0;
4152 }
4153 else
4154 {
4155 if (GET_CODE (ops[0]) == MEM)
4156 {
4157 if (!spu_valid_move (ops))
4158 {
4159 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4160 gen_reg_rtx (TImode)));
4161 return 1;
4162 }
4163 }
4164 else if (GET_CODE (ops[1]) == MEM)
4165 {
4166 if (!spu_valid_move (ops))
4167 {
4168 emit_insn (gen_load
4169 (ops[0], ops[1], gen_reg_rtx (TImode),
4170 gen_reg_rtx (SImode)));
4171 return 1;
4172 }
4173 }
4174 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4175 extend them. */
4176 if (GET_CODE (ops[1]) == CONST_INT)
4177 {
4178 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4179 if (val != INTVAL (ops[1]))
4180 {
4181 emit_move_insn (ops[0], GEN_INT (val));
4182 return 1;
4183 }
4184 }
4185 }
4186 return 0;
4187}
4188
644459d0 4189void
4190spu_split_load (rtx * ops)
4191{
4192 enum machine_mode mode = GET_MODE (ops[0]);
4193 rtx addr, load, rot, mem, p0, p1;
4194 int rot_amt;
4195
4196 addr = XEXP (ops[1], 0);
4197
4198 rot = 0;
4199 rot_amt = 0;
4200 if (GET_CODE (addr) == PLUS)
4201 {
4202 /* 8 cases:
4203 aligned reg + aligned reg => lqx
4204 aligned reg + unaligned reg => lqx, rotqby
4205 aligned reg + aligned const => lqd
4206 aligned reg + unaligned const => lqd, rotqbyi
4207 unaligned reg + aligned reg => lqx, rotqby
4208 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4209 unaligned reg + aligned const => lqd, rotqby
4210 unaligned reg + unaligned const -> not allowed by legitimate address
4211 */
4212 p0 = XEXP (addr, 0);
4213 p1 = XEXP (addr, 1);
aa71ecd4 4214 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4215 {
aa71ecd4 4216 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4217 {
4218 emit_insn (gen_addsi3 (ops[3], p0, p1));
4219 rot = ops[3];
4220 }
4221 else
4222 rot = p0;
4223 }
4224 else
4225 {
4226 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4227 {
4228 rot_amt = INTVAL (p1) & 15;
4229 p1 = GEN_INT (INTVAL (p1) & -16);
4230 addr = gen_rtx_PLUS (SImode, p0, p1);
4231 }
aa71ecd4 4232 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4233 rot = p1;
4234 }
4235 }
4236 else if (GET_CODE (addr) == REG)
4237 {
aa71ecd4 4238 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4239 rot = addr;
4240 }
4241 else if (GET_CODE (addr) == CONST)
4242 {
4243 if (GET_CODE (XEXP (addr, 0)) == PLUS
4244 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4245 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4246 {
4247 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4248 if (rot_amt & -16)
4249 addr = gen_rtx_CONST (Pmode,
4250 gen_rtx_PLUS (Pmode,
4251 XEXP (XEXP (addr, 0), 0),
4252 GEN_INT (rot_amt & -16)));
4253 else
4254 addr = XEXP (XEXP (addr, 0), 0);
4255 }
4256 else
4257 rot = addr;
4258 }
4259 else if (GET_CODE (addr) == CONST_INT)
4260 {
4261 rot_amt = INTVAL (addr);
4262 addr = GEN_INT (rot_amt & -16);
4263 }
4264 else if (!ALIGNED_SYMBOL_REF_P (addr))
4265 rot = addr;
4266
4267 if (GET_MODE_SIZE (mode) < 4)
4268 rot_amt += GET_MODE_SIZE (mode) - 4;
4269
4270 rot_amt &= 15;
4271
4272 if (rot && rot_amt)
4273 {
4274 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4275 rot = ops[3];
4276 rot_amt = 0;
4277 }
4278
4279 load = ops[2];
4280
4281 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4282 mem = change_address (ops[1], TImode, addr);
4283
e04cf423 4284 emit_insn (gen_movti (load, mem));
644459d0 4285
4286 if (rot)
4287 emit_insn (gen_rotqby_ti (load, load, rot));
4288 else if (rot_amt)
4289 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4290
4291 if (reload_completed)
4292 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4293 else
4294 emit_insn (gen_spu_convert (ops[0], load));
4295}
4296
4297void
4298spu_split_store (rtx * ops)
4299{
4300 enum machine_mode mode = GET_MODE (ops[0]);
4301 rtx pat = ops[2];
4302 rtx reg = ops[3];
4303 rtx addr, p0, p1, p1_lo, smem;
4304 int aform;
4305 int scalar;
4306
4307 addr = XEXP (ops[0], 0);
4308
4309 if (GET_CODE (addr) == PLUS)
4310 {
4311 /* 8 cases:
4312 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4313 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4314 aligned reg + aligned const => lqd, c?d, shuf, stqx
4315 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4316 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4317 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4318 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4319 unaligned reg + unaligned const -> not allowed by legitimate address
4320 */
4321 aform = 0;
4322 p0 = XEXP (addr, 0);
4323 p1 = p1_lo = XEXP (addr, 1);
4324 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4325 {
4326 p1_lo = GEN_INT (INTVAL (p1) & 15);
4327 p1 = GEN_INT (INTVAL (p1) & -16);
4328 addr = gen_rtx_PLUS (SImode, p0, p1);
4329 }
4330 }
4331 else if (GET_CODE (addr) == REG)
4332 {
4333 aform = 0;
4334 p0 = addr;
4335 p1 = p1_lo = const0_rtx;
4336 }
4337 else
4338 {
4339 aform = 1;
4340 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4341 p1 = 0; /* aform doesn't use p1 */
4342 p1_lo = addr;
4343 if (ALIGNED_SYMBOL_REF_P (addr))
4344 p1_lo = const0_rtx;
4345 else if (GET_CODE (addr) == CONST)
4346 {
4347 if (GET_CODE (XEXP (addr, 0)) == PLUS
4348 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4349 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4350 {
4351 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4352 if ((v & -16) != 0)
4353 addr = gen_rtx_CONST (Pmode,
4354 gen_rtx_PLUS (Pmode,
4355 XEXP (XEXP (addr, 0), 0),
4356 GEN_INT (v & -16)));
4357 else
4358 addr = XEXP (XEXP (addr, 0), 0);
4359 p1_lo = GEN_INT (v & 15);
4360 }
4361 }
4362 else if (GET_CODE (addr) == CONST_INT)
4363 {
4364 p1_lo = GEN_INT (INTVAL (addr) & 15);
4365 addr = GEN_INT (INTVAL (addr) & -16);
4366 }
4367 }
4368
e04cf423 4369 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4370
644459d0 4371 scalar = store_with_one_insn_p (ops[0]);
4372 if (!scalar)
4373 {
4374 /* We could copy the flags from the ops[0] MEM to mem here,
4375 We don't because we want this load to be optimized away if
4376 possible, and copying the flags will prevent that in certain
4377 cases, e.g. consider the volatile flag. */
4378
e04cf423 4379 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4380 set_mem_alias_set (lmem, 0);
4381 emit_insn (gen_movti (reg, lmem));
644459d0 4382
aa71ecd4 4383 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4384 p0 = stack_pointer_rtx;
4385 if (!p1_lo)
4386 p1_lo = const0_rtx;
4387
4388 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4389 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4390 }
4391 else if (reload_completed)
4392 {
4393 if (GET_CODE (ops[1]) == REG)
4394 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4395 else if (GET_CODE (ops[1]) == SUBREG)
4396 emit_move_insn (reg,
4397 gen_rtx_REG (GET_MODE (reg),
4398 REGNO (SUBREG_REG (ops[1]))));
4399 else
4400 abort ();
4401 }
4402 else
4403 {
4404 if (GET_CODE (ops[1]) == REG)
4405 emit_insn (gen_spu_convert (reg, ops[1]));
4406 else if (GET_CODE (ops[1]) == SUBREG)
4407 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4408 else
4409 abort ();
4410 }
4411
4412 if (GET_MODE_SIZE (mode) < 4 && scalar)
4413 emit_insn (gen_shlqby_ti
4414 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4415
644459d0 4416 smem = change_address (ops[0], TImode, addr);
4417 /* We can't use the previous alias set because the memory has changed
4418 size and can potentially overlap objects of other types. */
4419 set_mem_alias_set (smem, 0);
4420
e04cf423 4421 emit_insn (gen_movti (smem, reg));
644459d0 4422}
4423
4424/* Return TRUE if X is MEM which is a struct member reference
4425 and the member can safely be loaded and stored with a single
4426 instruction because it is padded. */
4427static int
4428mem_is_padded_component_ref (rtx x)
4429{
4430 tree t = MEM_EXPR (x);
4431 tree r;
4432 if (!t || TREE_CODE (t) != COMPONENT_REF)
4433 return 0;
4434 t = TREE_OPERAND (t, 1);
4435 if (!t || TREE_CODE (t) != FIELD_DECL
4436 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4437 return 0;
4438 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4439 r = DECL_FIELD_CONTEXT (t);
4440 if (!r || TREE_CODE (r) != RECORD_TYPE)
4441 return 0;
4442 /* Make sure they are the same mode */
4443 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4444 return 0;
4445 /* If there are no following fields then the field alignment assures
fa7637bd 4446 the structure is padded to the alignment which means this field is
4447 padded too. */
644459d0 4448 if (TREE_CHAIN (t) == 0)
4449 return 1;
4450 /* If the following field is also aligned then this field will be
4451 padded. */
4452 t = TREE_CHAIN (t);
4453 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4454 return 1;
4455 return 0;
4456}
4457
c7b91b14 4458/* Parse the -mfixed-range= option string. */
4459static void
4460fix_range (const char *const_str)
4461{
4462 int i, first, last;
4463 char *str, *dash, *comma;
4464
4465 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4466 REG2 are either register names or register numbers. The effect
4467 of this option is to mark the registers in the range from REG1 to
4468 REG2 as ``fixed'' so they won't be used by the compiler. */
4469
4470 i = strlen (const_str);
4471 str = (char *) alloca (i + 1);
4472 memcpy (str, const_str, i + 1);
4473
4474 while (1)
4475 {
4476 dash = strchr (str, '-');
4477 if (!dash)
4478 {
4479 warning (0, "value of -mfixed-range must have form REG1-REG2");
4480 return;
4481 }
4482 *dash = '\0';
4483 comma = strchr (dash + 1, ',');
4484 if (comma)
4485 *comma = '\0';
4486
4487 first = decode_reg_name (str);
4488 if (first < 0)
4489 {
4490 warning (0, "unknown register name: %s", str);
4491 return;
4492 }
4493
4494 last = decode_reg_name (dash + 1);
4495 if (last < 0)
4496 {
4497 warning (0, "unknown register name: %s", dash + 1);
4498 return;
4499 }
4500
4501 *dash = '-';
4502
4503 if (first > last)
4504 {
4505 warning (0, "%s-%s is an empty range", str, dash + 1);
4506 return;
4507 }
4508
4509 for (i = first; i <= last; ++i)
4510 fixed_regs[i] = call_used_regs[i] = 1;
4511
4512 if (!comma)
4513 break;
4514
4515 *comma = ',';
4516 str = comma + 1;
4517 }
4518}
4519
644459d0 4520int
4521spu_valid_move (rtx * ops)
4522{
4523 enum machine_mode mode = GET_MODE (ops[0]);
4524 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4525 return 0;
4526
4527 /* init_expr_once tries to recog against load and store insns to set
4528 the direct_load[] and direct_store[] arrays. We always want to
4529 consider those loads and stores valid. init_expr_once is called in
4530 the context of a dummy function which does not have a decl. */
4531 if (cfun->decl == 0)
4532 return 1;
4533
4534 /* Don't allows loads/stores which would require more than 1 insn.
4535 During and after reload we assume loads and stores only take 1
4536 insn. */
4537 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4538 {
4539 if (GET_CODE (ops[0]) == MEM
4540 && (GET_MODE_SIZE (mode) < 4
4541 || !(store_with_one_insn_p (ops[0])
4542 || mem_is_padded_component_ref (ops[0]))))
4543 return 0;
4544 if (GET_CODE (ops[1]) == MEM
4545 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4546 return 0;
4547 }
4548 return 1;
4549}
4550
4551/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4552 can be generated using the fsmbi instruction. */
4553int
4554fsmbi_const_p (rtx x)
4555{
dea01258 4556 if (CONSTANT_P (x))
4557 {
5df189be 4558 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4559 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4560 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4561 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4562 }
4563 return 0;
4564}
4565
4566/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4567 can be generated using the cbd, chd, cwd or cdd instruction. */
4568int
4569cpat_const_p (rtx x, enum machine_mode mode)
4570{
4571 if (CONSTANT_P (x))
4572 {
4573 enum immediate_class c = classify_immediate (x, mode);
4574 return c == IC_CPAT;
4575 }
4576 return 0;
4577}
644459d0 4578
dea01258 4579rtx
4580gen_cpat_const (rtx * ops)
4581{
4582 unsigned char dst[16];
4583 int i, offset, shift, isize;
4584 if (GET_CODE (ops[3]) != CONST_INT
4585 || GET_CODE (ops[2]) != CONST_INT
4586 || (GET_CODE (ops[1]) != CONST_INT
4587 && GET_CODE (ops[1]) != REG))
4588 return 0;
4589 if (GET_CODE (ops[1]) == REG
4590 && (!REG_POINTER (ops[1])
4591 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4592 return 0;
644459d0 4593
4594 for (i = 0; i < 16; i++)
dea01258 4595 dst[i] = i + 16;
4596 isize = INTVAL (ops[3]);
4597 if (isize == 1)
4598 shift = 3;
4599 else if (isize == 2)
4600 shift = 2;
4601 else
4602 shift = 0;
4603 offset = (INTVAL (ops[2]) +
4604 (GET_CODE (ops[1]) ==
4605 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4606 for (i = 0; i < isize; i++)
4607 dst[offset + i] = i + shift;
4608 return array_to_constant (TImode, dst);
644459d0 4609}
4610
4611/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4612 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4613 than 16 bytes, the value is repeated across the rest of the array. */
4614void
4615constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4616{
4617 HOST_WIDE_INT val;
4618 int i, j, first;
4619
4620 memset (arr, 0, 16);
4621 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4622 if (GET_CODE (x) == CONST_INT
4623 || (GET_CODE (x) == CONST_DOUBLE
4624 && (mode == SFmode || mode == DFmode)))
4625 {
4626 gcc_assert (mode != VOIDmode && mode != BLKmode);
4627
4628 if (GET_CODE (x) == CONST_DOUBLE)
4629 val = const_double_to_hwint (x);
4630 else
4631 val = INTVAL (x);
4632 first = GET_MODE_SIZE (mode) - 1;
4633 for (i = first; i >= 0; i--)
4634 {
4635 arr[i] = val & 0xff;
4636 val >>= 8;
4637 }
4638 /* Splat the constant across the whole array. */
4639 for (j = 0, i = first + 1; i < 16; i++)
4640 {
4641 arr[i] = arr[j];
4642 j = (j == first) ? 0 : j + 1;
4643 }
4644 }
4645 else if (GET_CODE (x) == CONST_DOUBLE)
4646 {
4647 val = CONST_DOUBLE_LOW (x);
4648 for (i = 15; i >= 8; i--)
4649 {
4650 arr[i] = val & 0xff;
4651 val >>= 8;
4652 }
4653 val = CONST_DOUBLE_HIGH (x);
4654 for (i = 7; i >= 0; i--)
4655 {
4656 arr[i] = val & 0xff;
4657 val >>= 8;
4658 }
4659 }
4660 else if (GET_CODE (x) == CONST_VECTOR)
4661 {
4662 int units;
4663 rtx elt;
4664 mode = GET_MODE_INNER (mode);
4665 units = CONST_VECTOR_NUNITS (x);
4666 for (i = 0; i < units; i++)
4667 {
4668 elt = CONST_VECTOR_ELT (x, i);
4669 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4670 {
4671 if (GET_CODE (elt) == CONST_DOUBLE)
4672 val = const_double_to_hwint (elt);
4673 else
4674 val = INTVAL (elt);
4675 first = GET_MODE_SIZE (mode) - 1;
4676 if (first + i * GET_MODE_SIZE (mode) > 16)
4677 abort ();
4678 for (j = first; j >= 0; j--)
4679 {
4680 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4681 val >>= 8;
4682 }
4683 }
4684 }
4685 }
4686 else
4687 gcc_unreachable();
4688}
4689
4690/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4691 smaller than 16 bytes, use the bytes that would represent that value
4692 in a register, e.g., for QImode return the value of arr[3]. */
4693rtx
4694array_to_constant (enum machine_mode mode, unsigned char arr[16])
4695{
4696 enum machine_mode inner_mode;
4697 rtvec v;
4698 int units, size, i, j, k;
4699 HOST_WIDE_INT val;
4700
4701 if (GET_MODE_CLASS (mode) == MODE_INT
4702 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4703 {
4704 j = GET_MODE_SIZE (mode);
4705 i = j < 4 ? 4 - j : 0;
4706 for (val = 0; i < j; i++)
4707 val = (val << 8) | arr[i];
4708 val = trunc_int_for_mode (val, mode);
4709 return GEN_INT (val);
4710 }
4711
4712 if (mode == TImode)
4713 {
4714 HOST_WIDE_INT high;
4715 for (i = high = 0; i < 8; i++)
4716 high = (high << 8) | arr[i];
4717 for (i = 8, val = 0; i < 16; i++)
4718 val = (val << 8) | arr[i];
4719 return immed_double_const (val, high, TImode);
4720 }
4721 if (mode == SFmode)
4722 {
4723 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4724 val = trunc_int_for_mode (val, SImode);
171b6d22 4725 return hwint_to_const_double (SFmode, val);
644459d0 4726 }
4727 if (mode == DFmode)
4728 {
1f915911 4729 for (i = 0, val = 0; i < 8; i++)
4730 val = (val << 8) | arr[i];
171b6d22 4731 return hwint_to_const_double (DFmode, val);
644459d0 4732 }
4733
4734 if (!VECTOR_MODE_P (mode))
4735 abort ();
4736
4737 units = GET_MODE_NUNITS (mode);
4738 size = GET_MODE_UNIT_SIZE (mode);
4739 inner_mode = GET_MODE_INNER (mode);
4740 v = rtvec_alloc (units);
4741
4742 for (k = i = 0; i < units; ++i)
4743 {
4744 val = 0;
4745 for (j = 0; j < size; j++, k++)
4746 val = (val << 8) | arr[k];
4747
4748 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4749 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4750 else
4751 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4752 }
4753 if (k > 16)
4754 abort ();
4755
4756 return gen_rtx_CONST_VECTOR (mode, v);
4757}
4758
4759static void
4760reloc_diagnostic (rtx x)
4761{
4762 tree loc_decl, decl = 0;
4763 const char *msg;
4764 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4765 return;
4766
4767 if (GET_CODE (x) == SYMBOL_REF)
4768 decl = SYMBOL_REF_DECL (x);
4769 else if (GET_CODE (x) == CONST
4770 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4771 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4772
4773 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4774 if (decl && !DECL_P (decl))
4775 decl = 0;
4776
4777 /* We use last_assemble_variable_decl to get line information. It's
4778 not always going to be right and might not even be close, but will
4779 be right for the more common cases. */
5df189be 4780 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4781 loc_decl = decl;
4782 else
4783 loc_decl = last_assemble_variable_decl;
4784
4785 /* The decl could be a string constant. */
4786 if (decl && DECL_P (decl))
4787 msg = "%Jcreating run-time relocation for %qD";
4788 else
4789 msg = "creating run-time relocation";
4790
99369027 4791 if (TARGET_WARN_RELOC)
644459d0 4792 warning (0, msg, loc_decl, decl);
99369027 4793 else
4794 error (msg, loc_decl, decl);
644459d0 4795}
4796
4797/* Hook into assemble_integer so we can generate an error for run-time
4798 relocations. The SPU ABI disallows them. */
4799static bool
4800spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4801{
4802 /* By default run-time relocations aren't supported, but we allow them
4803 in case users support it in their own run-time loader. And we provide
4804 a warning for those users that don't. */
4805 if ((GET_CODE (x) == SYMBOL_REF)
4806 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4807 reloc_diagnostic (x);
4808
4809 return default_assemble_integer (x, size, aligned_p);
4810}
4811
4812static void
4813spu_asm_globalize_label (FILE * file, const char *name)
4814{
4815 fputs ("\t.global\t", file);
4816 assemble_name (file, name);
4817 fputs ("\n", file);
4818}
4819
4820static bool
f529eb25 4821spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4822 bool speed ATTRIBUTE_UNUSED)
644459d0 4823{
4824 enum machine_mode mode = GET_MODE (x);
4825 int cost = COSTS_N_INSNS (2);
4826
4827 /* Folding to a CONST_VECTOR will use extra space but there might
4828 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4829 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4830 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4831 because this cost will only be compared against a single insn.
4832 if (code == CONST_VECTOR)
4833 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4834 */
4835
4836 /* Use defaults for float operations. Not accurate but good enough. */
4837 if (mode == DFmode)
4838 {
4839 *total = COSTS_N_INSNS (13);
4840 return true;
4841 }
4842 if (mode == SFmode)
4843 {
4844 *total = COSTS_N_INSNS (6);
4845 return true;
4846 }
4847 switch (code)
4848 {
4849 case CONST_INT:
4850 if (satisfies_constraint_K (x))
4851 *total = 0;
4852 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4853 *total = COSTS_N_INSNS (1);
4854 else
4855 *total = COSTS_N_INSNS (3);
4856 return true;
4857
4858 case CONST:
4859 *total = COSTS_N_INSNS (3);
4860 return true;
4861
4862 case LABEL_REF:
4863 case SYMBOL_REF:
4864 *total = COSTS_N_INSNS (0);
4865 return true;
4866
4867 case CONST_DOUBLE:
4868 *total = COSTS_N_INSNS (5);
4869 return true;
4870
4871 case FLOAT_EXTEND:
4872 case FLOAT_TRUNCATE:
4873 case FLOAT:
4874 case UNSIGNED_FLOAT:
4875 case FIX:
4876 case UNSIGNED_FIX:
4877 *total = COSTS_N_INSNS (7);
4878 return true;
4879
4880 case PLUS:
4881 if (mode == TImode)
4882 {
4883 *total = COSTS_N_INSNS (9);
4884 return true;
4885 }
4886 break;
4887
4888 case MULT:
4889 cost =
4890 GET_CODE (XEXP (x, 0)) ==
4891 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4892 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4893 {
4894 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4895 {
4896 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4897 cost = COSTS_N_INSNS (14);
4898 if ((val & 0xffff) == 0)
4899 cost = COSTS_N_INSNS (9);
4900 else if (val > 0 && val < 0x10000)
4901 cost = COSTS_N_INSNS (11);
4902 }
4903 }
4904 *total = cost;
4905 return true;
4906 case DIV:
4907 case UDIV:
4908 case MOD:
4909 case UMOD:
4910 *total = COSTS_N_INSNS (20);
4911 return true;
4912 case ROTATE:
4913 case ROTATERT:
4914 case ASHIFT:
4915 case ASHIFTRT:
4916 case LSHIFTRT:
4917 *total = COSTS_N_INSNS (4);
4918 return true;
4919 case UNSPEC:
4920 if (XINT (x, 1) == UNSPEC_CONVERT)
4921 *total = COSTS_N_INSNS (0);
4922 else
4923 *total = COSTS_N_INSNS (4);
4924 return true;
4925 }
4926 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4927 if (GET_MODE_CLASS (mode) == MODE_INT
4928 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4929 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4930 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4931 *total = cost;
4932 return true;
4933}
4934
1bd43494 4935static enum machine_mode
4936spu_unwind_word_mode (void)
644459d0 4937{
1bd43494 4938 return SImode;
644459d0 4939}
4940
4941/* Decide whether we can make a sibling call to a function. DECL is the
4942 declaration of the function being targeted by the call and EXP is the
4943 CALL_EXPR representing the call. */
4944static bool
4945spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4946{
4947 return decl && !TARGET_LARGE_MEM;
4948}
4949
4950/* We need to correctly update the back chain pointer and the Available
4951 Stack Size (which is in the second slot of the sp register.) */
4952void
4953spu_allocate_stack (rtx op0, rtx op1)
4954{
4955 HOST_WIDE_INT v;
4956 rtx chain = gen_reg_rtx (V4SImode);
4957 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4958 rtx sp = gen_reg_rtx (V4SImode);
4959 rtx splatted = gen_reg_rtx (V4SImode);
4960 rtx pat = gen_reg_rtx (TImode);
4961
4962 /* copy the back chain so we can save it back again. */
4963 emit_move_insn (chain, stack_bot);
4964
4965 op1 = force_reg (SImode, op1);
4966
4967 v = 0x1020300010203ll;
4968 emit_move_insn (pat, immed_double_const (v, v, TImode));
4969 emit_insn (gen_shufb (splatted, op1, op1, pat));
4970
4971 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4972 emit_insn (gen_subv4si3 (sp, sp, splatted));
4973
4974 if (flag_stack_check)
4975 {
4976 rtx avail = gen_reg_rtx(SImode);
4977 rtx result = gen_reg_rtx(SImode);
4978 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4979 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4980 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4981 }
4982
4983 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4984
4985 emit_move_insn (stack_bot, chain);
4986
4987 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4988}
4989
4990void
4991spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4992{
4993 static unsigned char arr[16] =
4994 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4995 rtx temp = gen_reg_rtx (SImode);
4996 rtx temp2 = gen_reg_rtx (SImode);
4997 rtx temp3 = gen_reg_rtx (V4SImode);
4998 rtx temp4 = gen_reg_rtx (V4SImode);
4999 rtx pat = gen_reg_rtx (TImode);
5000 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5001
5002 /* Restore the backchain from the first word, sp from the second. */
5003 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5004 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5005
5006 emit_move_insn (pat, array_to_constant (TImode, arr));
5007
5008 /* Compute Available Stack Size for sp */
5009 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5010 emit_insn (gen_shufb (temp3, temp, temp, pat));
5011
5012 /* Compute Available Stack Size for back chain */
5013 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5014 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5015 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5016
5017 emit_insn (gen_addv4si3 (sp, sp, temp3));
5018 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5019}
5020
5021static void
5022spu_init_libfuncs (void)
5023{
5024 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5025 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5026 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5027 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5028 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5029 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5030 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5031 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5032 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5033 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5034 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5035
5036 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5037 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5038
5039 set_optab_libfunc (smul_optab, TImode, "__multi3");
5040 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5041 set_optab_libfunc (smod_optab, TImode, "__modti3");
5042 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5043 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5044 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5045}
5046
5047/* Make a subreg, stripping any existing subreg. We could possibly just
5048 call simplify_subreg, but in this case we know what we want. */
5049rtx
5050spu_gen_subreg (enum machine_mode mode, rtx x)
5051{
5052 if (GET_CODE (x) == SUBREG)
5053 x = SUBREG_REG (x);
5054 if (GET_MODE (x) == mode)
5055 return x;
5056 return gen_rtx_SUBREG (mode, x, 0);
5057}
5058
5059static bool
fb80456a 5060spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5061{
5062 return (TYPE_MODE (type) == BLKmode
5063 && ((type) == 0
5064 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5065 || int_size_in_bytes (type) >
5066 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5067}
5068\f
5069/* Create the built-in types and functions */
5070
5071struct spu_builtin_description spu_builtins[] = {
5072#define DEF_BUILTIN(fcode, icode, name, type, params) \
5073 {fcode, icode, name, type, params, NULL_TREE},
5074#include "spu-builtins.def"
5075#undef DEF_BUILTIN
5076};
5077
5078static void
5079spu_init_builtins (void)
5080{
5081 struct spu_builtin_description *d;
5082 unsigned int i;
5083
5084 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5085 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5086 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5087 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5088 V4SF_type_node = build_vector_type (float_type_node, 4);
5089 V2DF_type_node = build_vector_type (double_type_node, 2);
5090
5091 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5092 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5093 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5094 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5095
c4ecce0c 5096 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5097
5098 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5103 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5104 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5105 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5106 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5107 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5108 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5109 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5110
5111 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5112 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5113 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5114 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5115 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5116 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5117 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5118 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5119
5120 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5121 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5122
5123 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5124
5125 spu_builtin_types[SPU_BTI_PTR] =
5126 build_pointer_type (build_qualified_type
5127 (void_type_node,
5128 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5129
5130 /* For each builtin we build a new prototype. The tree code will make
5131 sure nodes are shared. */
5132 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5133 {
5134 tree p;
5135 char name[64]; /* build_function will make a copy. */
5136 int parm;
5137
5138 if (d->name == 0)
5139 continue;
5140
5dfbd18f 5141 /* Find last parm. */
644459d0 5142 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5143 ;
644459d0 5144
5145 p = void_list_node;
5146 while (parm > 1)
5147 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5148
5149 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5150
5151 sprintf (name, "__builtin_%s", d->name);
5152 d->fndecl =
5153 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5154 NULL, NULL_TREE);
a76866d3 5155 if (d->fcode == SPU_MASK_FOR_LOAD)
5156 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5157
5158 /* These builtins don't throw. */
5159 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5160 }
5161}
5162
cf31d486 5163void
5164spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5165{
5166 static unsigned char arr[16] =
5167 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5168
5169 rtx temp = gen_reg_rtx (Pmode);
5170 rtx temp2 = gen_reg_rtx (V4SImode);
5171 rtx temp3 = gen_reg_rtx (V4SImode);
5172 rtx pat = gen_reg_rtx (TImode);
5173 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5174
5175 emit_move_insn (pat, array_to_constant (TImode, arr));
5176
5177 /* Restore the sp. */
5178 emit_move_insn (temp, op1);
5179 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5180
5181 /* Compute available stack size for sp. */
5182 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5183 emit_insn (gen_shufb (temp3, temp, temp, pat));
5184
5185 emit_insn (gen_addv4si3 (sp, sp, temp3));
5186 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5187}
5188
644459d0 5189int
5190spu_safe_dma (HOST_WIDE_INT channel)
5191{
006e4b96 5192 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5193}
5194
5195void
5196spu_builtin_splats (rtx ops[])
5197{
5198 enum machine_mode mode = GET_MODE (ops[0]);
5199 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5200 {
5201 unsigned char arr[16];
5202 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5203 emit_move_insn (ops[0], array_to_constant (mode, arr));
5204 }
644459d0 5205 else
5206 {
5207 rtx reg = gen_reg_rtx (TImode);
5208 rtx shuf;
5209 if (GET_CODE (ops[1]) != REG
5210 && GET_CODE (ops[1]) != SUBREG)
5211 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5212 switch (mode)
5213 {
5214 case V2DImode:
5215 case V2DFmode:
5216 shuf =
5217 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5218 TImode);
5219 break;
5220 case V4SImode:
5221 case V4SFmode:
5222 shuf =
5223 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5224 TImode);
5225 break;
5226 case V8HImode:
5227 shuf =
5228 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5229 TImode);
5230 break;
5231 case V16QImode:
5232 shuf =
5233 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5234 TImode);
5235 break;
5236 default:
5237 abort ();
5238 }
5239 emit_move_insn (reg, shuf);
5240 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5241 }
5242}
5243
5244void
5245spu_builtin_extract (rtx ops[])
5246{
5247 enum machine_mode mode;
5248 rtx rot, from, tmp;
5249
5250 mode = GET_MODE (ops[1]);
5251
5252 if (GET_CODE (ops[2]) == CONST_INT)
5253 {
5254 switch (mode)
5255 {
5256 case V16QImode:
5257 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5258 break;
5259 case V8HImode:
5260 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5261 break;
5262 case V4SFmode:
5263 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5264 break;
5265 case V4SImode:
5266 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5267 break;
5268 case V2DImode:
5269 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5270 break;
5271 case V2DFmode:
5272 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5273 break;
5274 default:
5275 abort ();
5276 }
5277 return;
5278 }
5279
5280 from = spu_gen_subreg (TImode, ops[1]);
5281 rot = gen_reg_rtx (TImode);
5282 tmp = gen_reg_rtx (SImode);
5283
5284 switch (mode)
5285 {
5286 case V16QImode:
5287 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5288 break;
5289 case V8HImode:
5290 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5291 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5292 break;
5293 case V4SFmode:
5294 case V4SImode:
5295 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5296 break;
5297 case V2DImode:
5298 case V2DFmode:
5299 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5300 break;
5301 default:
5302 abort ();
5303 }
5304 emit_insn (gen_rotqby_ti (rot, from, tmp));
5305
5306 emit_insn (gen_spu_convert (ops[0], rot));
5307}
5308
5309void
5310spu_builtin_insert (rtx ops[])
5311{
5312 enum machine_mode mode = GET_MODE (ops[0]);
5313 enum machine_mode imode = GET_MODE_INNER (mode);
5314 rtx mask = gen_reg_rtx (TImode);
5315 rtx offset;
5316
5317 if (GET_CODE (ops[3]) == CONST_INT)
5318 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5319 else
5320 {
5321 offset = gen_reg_rtx (SImode);
5322 emit_insn (gen_mulsi3
5323 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5324 }
5325 emit_insn (gen_cpat
5326 (mask, stack_pointer_rtx, offset,
5327 GEN_INT (GET_MODE_SIZE (imode))));
5328 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5329}
5330
5331void
5332spu_builtin_promote (rtx ops[])
5333{
5334 enum machine_mode mode, imode;
5335 rtx rot, from, offset;
5336 HOST_WIDE_INT pos;
5337
5338 mode = GET_MODE (ops[0]);
5339 imode = GET_MODE_INNER (mode);
5340
5341 from = gen_reg_rtx (TImode);
5342 rot = spu_gen_subreg (TImode, ops[0]);
5343
5344 emit_insn (gen_spu_convert (from, ops[1]));
5345
5346 if (GET_CODE (ops[2]) == CONST_INT)
5347 {
5348 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5349 if (GET_MODE_SIZE (imode) < 4)
5350 pos += 4 - GET_MODE_SIZE (imode);
5351 offset = GEN_INT (pos & 15);
5352 }
5353 else
5354 {
5355 offset = gen_reg_rtx (SImode);
5356 switch (mode)
5357 {
5358 case V16QImode:
5359 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5360 break;
5361 case V8HImode:
5362 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5363 emit_insn (gen_addsi3 (offset, offset, offset));
5364 break;
5365 case V4SFmode:
5366 case V4SImode:
5367 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5368 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5369 break;
5370 case V2DImode:
5371 case V2DFmode:
5372 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5373 break;
5374 default:
5375 abort ();
5376 }
5377 }
5378 emit_insn (gen_rotqby_ti (rot, from, offset));
5379}
5380
5381void
5382spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5383{
5384 rtx shuf = gen_reg_rtx (V4SImode);
5385 rtx insn = gen_reg_rtx (V4SImode);
5386 rtx shufc;
5387 rtx insnc;
5388 rtx mem;
5389
5390 fnaddr = force_reg (SImode, fnaddr);
5391 cxt = force_reg (SImode, cxt);
5392
5393 if (TARGET_LARGE_MEM)
5394 {
5395 rtx rotl = gen_reg_rtx (V4SImode);
5396 rtx mask = gen_reg_rtx (V4SImode);
5397 rtx bi = gen_reg_rtx (SImode);
5398 unsigned char shufa[16] = {
5399 2, 3, 0, 1, 18, 19, 16, 17,
5400 0, 1, 2, 3, 16, 17, 18, 19
5401 };
5402 unsigned char insna[16] = {
5403 0x41, 0, 0, 79,
5404 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5405 0x60, 0x80, 0, 79,
5406 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5407 };
5408
5409 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5410 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5411
5412 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5413 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5414 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5415 emit_insn (gen_selb (insn, insnc, rotl, mask));
5416
5417 mem = memory_address (Pmode, tramp);
5418 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5419
5420 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5421 mem = memory_address (Pmode, plus_constant (tramp, 16));
5422 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5423 }
5424 else
5425 {
5426 rtx scxt = gen_reg_rtx (SImode);
5427 rtx sfnaddr = gen_reg_rtx (SImode);
5428 unsigned char insna[16] = {
5429 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5430 0x30, 0, 0, 0,
5431 0, 0, 0, 0,
5432 0, 0, 0, 0
5433 };
5434
5435 shufc = gen_reg_rtx (TImode);
5436 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5437
5438 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5439 fits 18 bits and the last 4 are zeros. This will be true if
5440 the stack pointer is initialized to 0x3fff0 at program start,
5441 otherwise the ila instruction will be garbage. */
5442
5443 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5444 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5445 emit_insn (gen_cpat
5446 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5447 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5448 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5449
5450 mem = memory_address (Pmode, tramp);
5451 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5452
5453 }
5454 emit_insn (gen_sync ());
5455}
5456
5457void
5458spu_expand_sign_extend (rtx ops[])
5459{
5460 unsigned char arr[16];
5461 rtx pat = gen_reg_rtx (TImode);
5462 rtx sign, c;
5463 int i, last;
5464 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5465 if (GET_MODE (ops[1]) == QImode)
5466 {
5467 sign = gen_reg_rtx (HImode);
5468 emit_insn (gen_extendqihi2 (sign, ops[1]));
5469 for (i = 0; i < 16; i++)
5470 arr[i] = 0x12;
5471 arr[last] = 0x13;
5472 }
5473 else
5474 {
5475 for (i = 0; i < 16; i++)
5476 arr[i] = 0x10;
5477 switch (GET_MODE (ops[1]))
5478 {
5479 case HImode:
5480 sign = gen_reg_rtx (SImode);
5481 emit_insn (gen_extendhisi2 (sign, ops[1]));
5482 arr[last] = 0x03;
5483 arr[last - 1] = 0x02;
5484 break;
5485 case SImode:
5486 sign = gen_reg_rtx (SImode);
5487 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5488 for (i = 0; i < 4; i++)
5489 arr[last - i] = 3 - i;
5490 break;
5491 case DImode:
5492 sign = gen_reg_rtx (SImode);
5493 c = gen_reg_rtx (SImode);
5494 emit_insn (gen_spu_convert (c, ops[1]));
5495 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5496 for (i = 0; i < 8; i++)
5497 arr[last - i] = 7 - i;
5498 break;
5499 default:
5500 abort ();
5501 }
5502 }
5503 emit_move_insn (pat, array_to_constant (TImode, arr));
5504 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5505}
5506
5507/* expand vector initialization. If there are any constant parts,
5508 load constant parts first. Then load any non-constant parts. */
5509void
5510spu_expand_vector_init (rtx target, rtx vals)
5511{
5512 enum machine_mode mode = GET_MODE (target);
5513 int n_elts = GET_MODE_NUNITS (mode);
5514 int n_var = 0;
5515 bool all_same = true;
790c536c 5516 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5517 int i;
5518
5519 first = XVECEXP (vals, 0, 0);
5520 for (i = 0; i < n_elts; ++i)
5521 {
5522 x = XVECEXP (vals, 0, i);
e442af0b 5523 if (!(CONST_INT_P (x)
5524 || GET_CODE (x) == CONST_DOUBLE
5525 || GET_CODE (x) == CONST_FIXED))
644459d0 5526 ++n_var;
5527 else
5528 {
5529 if (first_constant == NULL_RTX)
5530 first_constant = x;
5531 }
5532 if (i > 0 && !rtx_equal_p (x, first))
5533 all_same = false;
5534 }
5535
5536 /* if all elements are the same, use splats to repeat elements */
5537 if (all_same)
5538 {
5539 if (!CONSTANT_P (first)
5540 && !register_operand (first, GET_MODE (x)))
5541 first = force_reg (GET_MODE (first), first);
5542 emit_insn (gen_spu_splats (target, first));
5543 return;
5544 }
5545
5546 /* load constant parts */
5547 if (n_var != n_elts)
5548 {
5549 if (n_var == 0)
5550 {
5551 emit_move_insn (target,
5552 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5553 }
5554 else
5555 {
5556 rtx constant_parts_rtx = copy_rtx (vals);
5557
5558 gcc_assert (first_constant != NULL_RTX);
5559 /* fill empty slots with the first constant, this increases
5560 our chance of using splats in the recursive call below. */
5561 for (i = 0; i < n_elts; ++i)
e442af0b 5562 {
5563 x = XVECEXP (constant_parts_rtx, 0, i);
5564 if (!(CONST_INT_P (x)
5565 || GET_CODE (x) == CONST_DOUBLE
5566 || GET_CODE (x) == CONST_FIXED))
5567 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5568 }
644459d0 5569
5570 spu_expand_vector_init (target, constant_parts_rtx);
5571 }
5572 }
5573
5574 /* load variable parts */
5575 if (n_var != 0)
5576 {
5577 rtx insert_operands[4];
5578
5579 insert_operands[0] = target;
5580 insert_operands[2] = target;
5581 for (i = 0; i < n_elts; ++i)
5582 {
5583 x = XVECEXP (vals, 0, i);
e442af0b 5584 if (!(CONST_INT_P (x)
5585 || GET_CODE (x) == CONST_DOUBLE
5586 || GET_CODE (x) == CONST_FIXED))
644459d0 5587 {
5588 if (!register_operand (x, GET_MODE (x)))
5589 x = force_reg (GET_MODE (x), x);
5590 insert_operands[1] = x;
5591 insert_operands[3] = GEN_INT (i);
5592 spu_builtin_insert (insert_operands);
5593 }
5594 }
5595 }
5596}
6352eedf 5597
5474166e 5598/* Return insn index for the vector compare instruction for given CODE,
5599 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5600
5601static int
5602get_vec_cmp_insn (enum rtx_code code,
5603 enum machine_mode dest_mode,
5604 enum machine_mode op_mode)
5605
5606{
5607 switch (code)
5608 {
5609 case EQ:
5610 if (dest_mode == V16QImode && op_mode == V16QImode)
5611 return CODE_FOR_ceq_v16qi;
5612 if (dest_mode == V8HImode && op_mode == V8HImode)
5613 return CODE_FOR_ceq_v8hi;
5614 if (dest_mode == V4SImode && op_mode == V4SImode)
5615 return CODE_FOR_ceq_v4si;
5616 if (dest_mode == V4SImode && op_mode == V4SFmode)
5617 return CODE_FOR_ceq_v4sf;
5618 if (dest_mode == V2DImode && op_mode == V2DFmode)
5619 return CODE_FOR_ceq_v2df;
5620 break;
5621 case GT:
5622 if (dest_mode == V16QImode && op_mode == V16QImode)
5623 return CODE_FOR_cgt_v16qi;
5624 if (dest_mode == V8HImode && op_mode == V8HImode)
5625 return CODE_FOR_cgt_v8hi;
5626 if (dest_mode == V4SImode && op_mode == V4SImode)
5627 return CODE_FOR_cgt_v4si;
5628 if (dest_mode == V4SImode && op_mode == V4SFmode)
5629 return CODE_FOR_cgt_v4sf;
5630 if (dest_mode == V2DImode && op_mode == V2DFmode)
5631 return CODE_FOR_cgt_v2df;
5632 break;
5633 case GTU:
5634 if (dest_mode == V16QImode && op_mode == V16QImode)
5635 return CODE_FOR_clgt_v16qi;
5636 if (dest_mode == V8HImode && op_mode == V8HImode)
5637 return CODE_FOR_clgt_v8hi;
5638 if (dest_mode == V4SImode && op_mode == V4SImode)
5639 return CODE_FOR_clgt_v4si;
5640 break;
5641 default:
5642 break;
5643 }
5644 return -1;
5645}
5646
5647/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5648 DMODE is expected destination mode. This is a recursive function. */
5649
5650static rtx
5651spu_emit_vector_compare (enum rtx_code rcode,
5652 rtx op0, rtx op1,
5653 enum machine_mode dmode)
5654{
5655 int vec_cmp_insn;
5656 rtx mask;
5657 enum machine_mode dest_mode;
5658 enum machine_mode op_mode = GET_MODE (op1);
5659
5660 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5661
5662 /* Floating point vector compare instructions uses destination V4SImode.
5663 Double floating point vector compare instructions uses destination V2DImode.
5664 Move destination to appropriate mode later. */
5665 if (dmode == V4SFmode)
5666 dest_mode = V4SImode;
5667 else if (dmode == V2DFmode)
5668 dest_mode = V2DImode;
5669 else
5670 dest_mode = dmode;
5671
5672 mask = gen_reg_rtx (dest_mode);
5673 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5674
5675 if (vec_cmp_insn == -1)
5676 {
5677 bool swap_operands = false;
5678 bool try_again = false;
5679 switch (rcode)
5680 {
5681 case LT:
5682 rcode = GT;
5683 swap_operands = true;
5684 try_again = true;
5685 break;
5686 case LTU:
5687 rcode = GTU;
5688 swap_operands = true;
5689 try_again = true;
5690 break;
5691 case NE:
5692 /* Treat A != B as ~(A==B). */
5693 {
5694 enum insn_code nor_code;
5695 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5696 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5697 gcc_assert (nor_code != CODE_FOR_nothing);
5698 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5699 if (dmode != dest_mode)
5700 {
5701 rtx temp = gen_reg_rtx (dest_mode);
5702 convert_move (temp, mask, 0);
5703 return temp;
5704 }
5705 return mask;
5706 }
5707 break;
5708 case GE:
5709 case GEU:
5710 case LE:
5711 case LEU:
5712 /* Try GT/GTU/LT/LTU OR EQ */
5713 {
5714 rtx c_rtx, eq_rtx;
5715 enum insn_code ior_code;
5716 enum rtx_code new_code;
5717
5718 switch (rcode)
5719 {
5720 case GE: new_code = GT; break;
5721 case GEU: new_code = GTU; break;
5722 case LE: new_code = LT; break;
5723 case LEU: new_code = LTU; break;
5724 default:
5725 gcc_unreachable ();
5726 }
5727
5728 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5729 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5730
99bdde56 5731 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5732 gcc_assert (ior_code != CODE_FOR_nothing);
5733 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5734 if (dmode != dest_mode)
5735 {
5736 rtx temp = gen_reg_rtx (dest_mode);
5737 convert_move (temp, mask, 0);
5738 return temp;
5739 }
5740 return mask;
5741 }
5742 break;
5743 default:
5744 gcc_unreachable ();
5745 }
5746
5747 /* You only get two chances. */
5748 if (try_again)
5749 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5750
5751 gcc_assert (vec_cmp_insn != -1);
5752
5753 if (swap_operands)
5754 {
5755 rtx tmp;
5756 tmp = op0;
5757 op0 = op1;
5758 op1 = tmp;
5759 }
5760 }
5761
5762 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5763 if (dmode != dest_mode)
5764 {
5765 rtx temp = gen_reg_rtx (dest_mode);
5766 convert_move (temp, mask, 0);
5767 return temp;
5768 }
5769 return mask;
5770}
5771
5772
5773/* Emit vector conditional expression.
5774 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5775 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5776
5777int
5778spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5779 rtx cond, rtx cc_op0, rtx cc_op1)
5780{
5781 enum machine_mode dest_mode = GET_MODE (dest);
5782 enum rtx_code rcode = GET_CODE (cond);
5783 rtx mask;
5784
5785 /* Get the vector mask for the given relational operations. */
5786 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5787
5788 emit_insn(gen_selb (dest, op2, op1, mask));
5789
5790 return 1;
5791}
5792
6352eedf 5793static rtx
5794spu_force_reg (enum machine_mode mode, rtx op)
5795{
5796 rtx x, r;
5797 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5798 {
5799 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5800 || GET_MODE (op) == BLKmode)
5801 return force_reg (mode, convert_to_mode (mode, op, 0));
5802 abort ();
5803 }
5804
5805 r = force_reg (GET_MODE (op), op);
5806 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5807 {
5808 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5809 if (x)
5810 return x;
5811 }
5812
5813 x = gen_reg_rtx (mode);
5814 emit_insn (gen_spu_convert (x, r));
5815 return x;
5816}
5817
5818static void
5819spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5820{
5821 HOST_WIDE_INT v = 0;
5822 int lsbits;
5823 /* Check the range of immediate operands. */
5824 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5825 {
5826 int range = p - SPU_BTI_7;
5df189be 5827
5828 if (!CONSTANT_P (op))
6352eedf 5829 error ("%s expects an integer literal in the range [%d, %d].",
5830 d->name,
5831 spu_builtin_range[range].low, spu_builtin_range[range].high);
5832
5833 if (GET_CODE (op) == CONST
5834 && (GET_CODE (XEXP (op, 0)) == PLUS
5835 || GET_CODE (XEXP (op, 0)) == MINUS))
5836 {
5837 v = INTVAL (XEXP (XEXP (op, 0), 1));
5838 op = XEXP (XEXP (op, 0), 0);
5839 }
5840 else if (GET_CODE (op) == CONST_INT)
5841 v = INTVAL (op);
5df189be 5842 else if (GET_CODE (op) == CONST_VECTOR
5843 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5844 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5845
5846 /* The default for v is 0 which is valid in every range. */
5847 if (v < spu_builtin_range[range].low
5848 || v > spu_builtin_range[range].high)
5849 error ("%s expects an integer literal in the range [%d, %d]. ("
5850 HOST_WIDE_INT_PRINT_DEC ")",
5851 d->name,
5852 spu_builtin_range[range].low, spu_builtin_range[range].high,
5853 v);
6352eedf 5854
5855 switch (p)
5856 {
5857 case SPU_BTI_S10_4:
5858 lsbits = 4;
5859 break;
5860 case SPU_BTI_U16_2:
5861 /* This is only used in lqa, and stqa. Even though the insns
5862 encode 16 bits of the address (all but the 2 least
5863 significant), only 14 bits are used because it is masked to
5864 be 16 byte aligned. */
5865 lsbits = 4;
5866 break;
5867 case SPU_BTI_S16_2:
5868 /* This is used for lqr and stqr. */
5869 lsbits = 2;
5870 break;
5871 default:
5872 lsbits = 0;
5873 }
5874
5875 if (GET_CODE (op) == LABEL_REF
5876 || (GET_CODE (op) == SYMBOL_REF
5877 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5878 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5879 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5880 d->name);
5881 }
5882}
5883
5884
70ca06f8 5885static int
5df189be 5886expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5887 rtx target, rtx ops[])
5888{
5889 enum insn_code icode = d->icode;
5df189be 5890 int i = 0, a;
6352eedf 5891
5892 /* Expand the arguments into rtl. */
5893
5894 if (d->parm[0] != SPU_BTI_VOID)
5895 ops[i++] = target;
5896
70ca06f8 5897 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 5898 {
5df189be 5899 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5900 if (arg == 0)
5901 abort ();
5902 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5903 }
70ca06f8 5904
5905 /* The insn pattern may have additional operands (SCRATCH).
5906 Return the number of actual non-SCRATCH operands. */
5907 gcc_assert (i <= insn_data[icode].n_operands);
5908 return i;
6352eedf 5909}
5910
5911static rtx
5912spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5913 tree exp, rtx target)
6352eedf 5914{
5915 rtx pat;
5916 rtx ops[8];
5917 enum insn_code icode = d->icode;
5918 enum machine_mode mode, tmode;
5919 int i, p;
70ca06f8 5920 int n_operands;
6352eedf 5921 tree return_type;
5922
5923 /* Set up ops[] with values from arglist. */
70ca06f8 5924 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 5925
5926 /* Handle the target operand which must be operand 0. */
5927 i = 0;
5928 if (d->parm[0] != SPU_BTI_VOID)
5929 {
5930
5931 /* We prefer the mode specified for the match_operand otherwise
5932 use the mode from the builtin function prototype. */
5933 tmode = insn_data[d->icode].operand[0].mode;
5934 if (tmode == VOIDmode)
5935 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5936
5937 /* Try to use target because not using it can lead to extra copies
5938 and when we are using all of the registers extra copies leads
5939 to extra spills. */
5940 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5941 ops[0] = target;
5942 else
5943 target = ops[0] = gen_reg_rtx (tmode);
5944
5945 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5946 abort ();
5947
5948 i++;
5949 }
5950
a76866d3 5951 if (d->fcode == SPU_MASK_FOR_LOAD)
5952 {
5953 enum machine_mode mode = insn_data[icode].operand[1].mode;
5954 tree arg;
5955 rtx addr, op, pat;
5956
5957 /* get addr */
5df189be 5958 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5959 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5960 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5961 addr = memory_address (mode, op);
5962
5963 /* negate addr */
5964 op = gen_reg_rtx (GET_MODE (addr));
5965 emit_insn (gen_rtx_SET (VOIDmode, op,
5966 gen_rtx_NEG (GET_MODE (addr), addr)));
5967 op = gen_rtx_MEM (mode, op);
5968
5969 pat = GEN_FCN (icode) (target, op);
5970 if (!pat)
5971 return 0;
5972 emit_insn (pat);
5973 return target;
5974 }
5975
6352eedf 5976 /* Ignore align_hint, but still expand it's args in case they have
5977 side effects. */
5978 if (icode == CODE_FOR_spu_align_hint)
5979 return 0;
5980
5981 /* Handle the rest of the operands. */
70ca06f8 5982 for (p = 1; i < n_operands; i++, p++)
6352eedf 5983 {
5984 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5985 mode = insn_data[d->icode].operand[i].mode;
5986 else
5987 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5988
5989 /* mode can be VOIDmode here for labels */
5990
5991 /* For specific intrinsics with an immediate operand, e.g.,
5992 si_ai(), we sometimes need to convert the scalar argument to a
5993 vector argument by splatting the scalar. */
5994 if (VECTOR_MODE_P (mode)
5995 && (GET_CODE (ops[i]) == CONST_INT
5996 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5997 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5998 {
5999 if (GET_CODE (ops[i]) == CONST_INT)
6000 ops[i] = spu_const (mode, INTVAL (ops[i]));
6001 else
6002 {
6003 rtx reg = gen_reg_rtx (mode);
6004 enum machine_mode imode = GET_MODE_INNER (mode);
6005 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6006 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6007 if (imode != GET_MODE (ops[i]))
6008 ops[i] = convert_to_mode (imode, ops[i],
6009 TYPE_UNSIGNED (spu_builtin_types
6010 [d->parm[i]]));
6011 emit_insn (gen_spu_splats (reg, ops[i]));
6012 ops[i] = reg;
6013 }
6014 }
6015
5df189be 6016 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6017
6352eedf 6018 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6019 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6020 }
6021
70ca06f8 6022 switch (n_operands)
6352eedf 6023 {
6024 case 0:
6025 pat = GEN_FCN (icode) (0);
6026 break;
6027 case 1:
6028 pat = GEN_FCN (icode) (ops[0]);
6029 break;
6030 case 2:
6031 pat = GEN_FCN (icode) (ops[0], ops[1]);
6032 break;
6033 case 3:
6034 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6035 break;
6036 case 4:
6037 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6038 break;
6039 case 5:
6040 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6041 break;
6042 case 6:
6043 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6044 break;
6045 default:
6046 abort ();
6047 }
6048
6049 if (!pat)
6050 abort ();
6051
6052 if (d->type == B_CALL || d->type == B_BISLED)
6053 emit_call_insn (pat);
6054 else if (d->type == B_JUMP)
6055 {
6056 emit_jump_insn (pat);
6057 emit_barrier ();
6058 }
6059 else
6060 emit_insn (pat);
6061
6062 return_type = spu_builtin_types[d->parm[0]];
6063 if (d->parm[0] != SPU_BTI_VOID
6064 && GET_MODE (target) != TYPE_MODE (return_type))
6065 {
6066 /* target is the return value. It should always be the mode of
6067 the builtin function prototype. */
6068 target = spu_force_reg (TYPE_MODE (return_type), target);
6069 }
6070
6071 return target;
6072}
6073
6074rtx
6075spu_expand_builtin (tree exp,
6076 rtx target,
6077 rtx subtarget ATTRIBUTE_UNUSED,
6078 enum machine_mode mode ATTRIBUTE_UNUSED,
6079 int ignore ATTRIBUTE_UNUSED)
6080{
5df189be 6081 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6082 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6083 struct spu_builtin_description *d;
6084
6085 if (fcode < NUM_SPU_BUILTINS)
6086 {
6087 d = &spu_builtins[fcode];
6088
5df189be 6089 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6090 }
6091 abort ();
6092}
6093
e99f512d 6094/* Implement targetm.vectorize.builtin_mul_widen_even. */
6095static tree
6096spu_builtin_mul_widen_even (tree type)
6097{
e99f512d 6098 switch (TYPE_MODE (type))
6099 {
6100 case V8HImode:
6101 if (TYPE_UNSIGNED (type))
6102 return spu_builtins[SPU_MULE_0].fndecl;
6103 else
6104 return spu_builtins[SPU_MULE_1].fndecl;
6105 break;
6106 default:
6107 return NULL_TREE;
6108 }
6109}
6110
6111/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6112static tree
6113spu_builtin_mul_widen_odd (tree type)
6114{
6115 switch (TYPE_MODE (type))
6116 {
6117 case V8HImode:
6118 if (TYPE_UNSIGNED (type))
6119 return spu_builtins[SPU_MULO_1].fndecl;
6120 else
6121 return spu_builtins[SPU_MULO_0].fndecl;
6122 break;
6123 default:
6124 return NULL_TREE;
6125 }
6126}
6127
a76866d3 6128/* Implement targetm.vectorize.builtin_mask_for_load. */
6129static tree
6130spu_builtin_mask_for_load (void)
6131{
6132 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6133 gcc_assert (d);
6134 return d->fndecl;
6135}
5df189be 6136
a28df51d 6137/* Implement targetm.vectorize.builtin_vectorization_cost. */
6138static int
6139spu_builtin_vectorization_cost (bool runtime_test)
6140{
6141 /* If the branch of the runtime test is taken - i.e. - the vectorized
6142 version is skipped - this incurs a misprediction cost (because the
6143 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6144 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6145 when the vectorized version is executed. */
6146 if (runtime_test)
6147 return -19;
6148 else
6149 return 0;
6150}
6151
0e87db76 6152/* Return true iff, data reference of TYPE can reach vector alignment (16)
6153 after applying N number of iterations. This routine does not determine
6154 how may iterations are required to reach desired alignment. */
6155
6156static bool
a9f1838b 6157spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6158{
6159 if (is_packed)
6160 return false;
6161
6162 /* All other types are naturally aligned. */
6163 return true;
6164}
6165
a0515226 6166/* Implement targetm.vectorize.builtin_vec_perm. */
6167tree
6168spu_builtin_vec_perm (tree type, tree *mask_element_type)
6169{
6170 struct spu_builtin_description *d;
6171
6172 *mask_element_type = unsigned_char_type_node;
6173
6174 switch (TYPE_MODE (type))
6175 {
6176 case V16QImode:
6177 if (TYPE_UNSIGNED (type))
6178 d = &spu_builtins[SPU_SHUFFLE_0];
6179 else
6180 d = &spu_builtins[SPU_SHUFFLE_1];
6181 break;
6182
6183 case V8HImode:
6184 if (TYPE_UNSIGNED (type))
6185 d = &spu_builtins[SPU_SHUFFLE_2];
6186 else
6187 d = &spu_builtins[SPU_SHUFFLE_3];
6188 break;
6189
6190 case V4SImode:
6191 if (TYPE_UNSIGNED (type))
6192 d = &spu_builtins[SPU_SHUFFLE_4];
6193 else
6194 d = &spu_builtins[SPU_SHUFFLE_5];
6195 break;
6196
6197 case V2DImode:
6198 if (TYPE_UNSIGNED (type))
6199 d = &spu_builtins[SPU_SHUFFLE_6];
6200 else
6201 d = &spu_builtins[SPU_SHUFFLE_7];
6202 break;
6203
6204 case V4SFmode:
6205 d = &spu_builtins[SPU_SHUFFLE_8];
6206 break;
6207
6208 case V2DFmode:
6209 d = &spu_builtins[SPU_SHUFFLE_9];
6210 break;
6211
6212 default:
6213 return NULL_TREE;
6214 }
6215
6216 gcc_assert (d);
6217 return d->fndecl;
6218}
6219
d52fd16a 6220/* Count the total number of instructions in each pipe and return the
6221 maximum, which is used as the Minimum Iteration Interval (MII)
6222 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6223 -2 are instructions that can go in pipe0 or pipe1. */
6224static int
6225spu_sms_res_mii (struct ddg *g)
6226{
6227 int i;
6228 unsigned t[4] = {0, 0, 0, 0};
6229
6230 for (i = 0; i < g->num_nodes; i++)
6231 {
6232 rtx insn = g->nodes[i].insn;
6233 int p = get_pipe (insn) + 2;
6234
6235 assert (p >= 0);
6236 assert (p < 4);
6237
6238 t[p]++;
6239 if (dump_file && INSN_P (insn))
6240 fprintf (dump_file, "i%d %s %d %d\n",
6241 INSN_UID (insn),
6242 insn_data[INSN_CODE(insn)].name,
6243 p, t[p]);
6244 }
6245 if (dump_file)
6246 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6247
6248 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6249}
6250
6251
5df189be 6252void
6253spu_init_expanders (void)
6254{
6255 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6256 * frame_pointer_needed is true. We don't know that until we're
6257 * expanding the prologue. */
6258 if (cfun)
6259 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6260}
6261
6262static enum machine_mode
6263spu_libgcc_cmp_return_mode (void)
6264{
6265
6266/* For SPU word mode is TI mode so it is better to use SImode
6267 for compare returns. */
6268 return SImode;
6269}
6270
6271static enum machine_mode
6272spu_libgcc_shift_count_mode (void)
6273{
6274/* For SPU word mode is TI mode so it is better to use SImode
6275 for shift counts. */
6276 return SImode;
6277}
5a976006 6278
6279/* An early place to adjust some flags after GCC has finished processing
6280 * them. */
6281static void
6282asm_file_start (void)
6283{
6284 /* Variable tracking should be run after all optimizations which
6285 change order of insns. It also needs a valid CFG. */
6286 spu_flag_var_tracking = flag_var_tracking;
6287 flag_var_tracking = 0;
6288
6289 default_file_start ();
6290}
6291
a08dfd55 6292/* Implement targetm.section_type_flags. */
6293static unsigned int
6294spu_section_type_flags (tree decl, const char *name, int reloc)
6295{
6296 /* .toe needs to have type @nobits. */
6297 if (strcmp (name, ".toe") == 0)
6298 return SECTION_BSS;
6299 return default_section_type_flags (decl, name, reloc);
6300}
6301