]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/spu/spu.c
* gcc-interface/trans.c (unchecked_conversion_lhs_nop): New predicate.
[thirdparty/gcc.git] / gcc / config / spu / spu.c
CommitLineData
cfaf579d 1/* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
644459d0 2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
038d1e19 5 Software Foundation; either version 3 of the License, or (at your option)
644459d0 6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
038d1e19 14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
644459d0 16
17#include "config.h"
18#include "system.h"
19#include "coretypes.h"
20#include "tm.h"
21#include "rtl.h"
22#include "regs.h"
23#include "hard-reg-set.h"
24#include "real.h"
25#include "insn-config.h"
26#include "conditions.h"
27#include "insn-attr.h"
28#include "flags.h"
29#include "recog.h"
30#include "obstack.h"
31#include "tree.h"
32#include "expr.h"
33#include "optabs.h"
34#include "except.h"
35#include "function.h"
36#include "output.h"
37#include "basic-block.h"
38#include "integrate.h"
39#include "toplev.h"
40#include "ggc.h"
41#include "hashtab.h"
42#include "tm_p.h"
43#include "target.h"
44#include "target-def.h"
45#include "langhooks.h"
46#include "reload.h"
47#include "cfglayout.h"
48#include "sched-int.h"
49#include "params.h"
50#include "assert.h"
51#include "c-common.h"
52#include "machmode.h"
75a70cf9 53#include "gimple.h"
644459d0 54#include "tm-constrs.h"
55#include "spu-builtins.h"
d52fd16a 56#include "ddg.h"
5a976006 57#include "sbitmap.h"
58#include "timevar.h"
59#include "df.h"
6352eedf 60
61/* Builtin types, data and prototypes. */
62struct spu_builtin_range
63{
64 int low, high;
65};
66
67static struct spu_builtin_range spu_builtin_range[] = {
68 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
69 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
70 {0ll, 0x7fll}, /* SPU_BTI_U7 */
71 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
72 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
73 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
74 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
75 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
76 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
77 {0ll, 0xffffll}, /* SPU_BTI_U16 */
78 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
79 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
80};
81
644459d0 82\f
83/* Target specific attribute specifications. */
84char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
85
86/* Prototypes and external defs. */
87static void spu_init_builtins (void);
88static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
89static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
90static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
91static rtx get_pic_reg (void);
92static int need_to_save_reg (int regno, int saving);
93static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
94static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
95static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
96 rtx scratch);
97static void emit_nop_for_insn (rtx insn);
98static bool insn_clobbers_hbr (rtx insn);
99static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
5a976006 100 int distance, sbitmap blocks);
5474166e 101static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
102 enum machine_mode dmode);
644459d0 103static rtx get_branch_target (rtx branch);
644459d0 104static void spu_machine_dependent_reorg (void);
105static int spu_sched_issue_rate (void);
106static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
107 int can_issue_more);
108static int get_pipe (rtx insn);
644459d0 109static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
5a976006 110static void spu_sched_init_global (FILE *, int, int);
111static void spu_sched_init (FILE *, int, int);
112static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
644459d0 113static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
117 int flags,
118 unsigned char *no_add_attrs);
119static int spu_naked_function_p (tree func);
fb80456a 120static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
121 const_tree type, unsigned char named);
644459d0 122static tree spu_build_builtin_va_list (void);
8a58ed0a 123static void spu_va_start (tree, rtx);
75a70cf9 124static tree spu_gimplify_va_arg_expr (tree valist, tree type,
125 gimple_seq * pre_p, gimple_seq * post_p);
644459d0 126static int regno_aligned_for_load (int regno);
127static int store_with_one_insn_p (rtx mem);
644459d0 128static int mem_is_padded_component_ref (rtx x);
129static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
130static void spu_asm_globalize_label (FILE * file, const char *name);
131static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
db65aa2c 132 int *total, bool speed);
644459d0 133static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
134static void spu_init_libfuncs (void);
fb80456a 135static bool spu_return_in_memory (const_tree type, const_tree fntype);
c7b91b14 136static void fix_range (const char *);
69ced2d6 137static void spu_encode_section_info (tree, rtx, int);
e99f512d 138static tree spu_builtin_mul_widen_even (tree);
139static tree spu_builtin_mul_widen_odd (tree);
a76866d3 140static tree spu_builtin_mask_for_load (void);
a28df51d 141static int spu_builtin_vectorization_cost (bool);
a9f1838b 142static bool spu_vector_alignment_reachable (const_tree, bool);
a0515226 143static tree spu_builtin_vec_perm (tree, tree *);
d52fd16a 144static int spu_sms_res_mii (struct ddg *g);
5a976006 145static void asm_file_start (void);
a08dfd55 146static unsigned int spu_section_type_flags (tree, const char *, int);
644459d0 147
148extern const char *reg_names[];
149rtx spu_compare_op0, spu_compare_op1;
150
5474166e 151/* Which instruction set architecture to use. */
152int spu_arch;
153/* Which cpu are we tuning for. */
154int spu_tune;
155
5a976006 156/* The hardware requires 8 insns between a hint and the branch it
157 effects. This variable describes how many rtl instructions the
158 compiler needs to see before inserting a hint, and then the compiler
159 will insert enough nops to make it at least 8 insns. The default is
160 for the compiler to allow up to 2 nops be emitted. The nops are
161 inserted in pairs, so we round down. */
162int spu_hint_dist = (8*4) - (2*4);
163
164/* Determines whether we run variable tracking in machine dependent
165 reorganization. */
166static int spu_flag_var_tracking;
167
644459d0 168enum spu_immediate {
169 SPU_NONE,
170 SPU_IL,
171 SPU_ILA,
172 SPU_ILH,
173 SPU_ILHU,
174 SPU_ORI,
175 SPU_ORHI,
176 SPU_ORBI,
99369027 177 SPU_IOHL
644459d0 178};
dea01258 179enum immediate_class
180{
181 IC_POOL, /* constant pool */
182 IC_IL1, /* one il* instruction */
183 IC_IL2, /* both ilhu and iohl instructions */
184 IC_IL1s, /* one il* instruction */
185 IC_IL2s, /* both ilhu and iohl instructions */
186 IC_FSMBI, /* the fsmbi instruction */
187 IC_CPAT, /* one of the c*d instructions */
5df189be 188 IC_FSMBI2 /* fsmbi plus 1 other instruction */
dea01258 189};
644459d0 190
191static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
192static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
dea01258 193static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
194static enum immediate_class classify_immediate (rtx op,
195 enum machine_mode mode);
644459d0 196
1bd43494 197static enum machine_mode spu_unwind_word_mode (void);
198
ea32e033 199static enum machine_mode
200spu_libgcc_cmp_return_mode (void);
201
202static enum machine_mode
203spu_libgcc_shift_count_mode (void);
204
644459d0 205/* Built in types. */
206tree spu_builtin_types[SPU_BTI_MAX];
207\f
208/* TARGET overrides. */
209
210#undef TARGET_INIT_BUILTINS
211#define TARGET_INIT_BUILTINS spu_init_builtins
212
644459d0 213#undef TARGET_EXPAND_BUILTIN
214#define TARGET_EXPAND_BUILTIN spu_expand_builtin
215
1bd43494 216#undef TARGET_UNWIND_WORD_MODE
217#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
644459d0 218
219/* The .8byte directive doesn't seem to work well for a 32 bit
220 architecture. */
221#undef TARGET_ASM_UNALIGNED_DI_OP
222#define TARGET_ASM_UNALIGNED_DI_OP NULL
223
224#undef TARGET_RTX_COSTS
225#define TARGET_RTX_COSTS spu_rtx_costs
226
227#undef TARGET_ADDRESS_COST
f529eb25 228#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
644459d0 229
230#undef TARGET_SCHED_ISSUE_RATE
231#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
232
5a976006 233#undef TARGET_SCHED_INIT_GLOBAL
234#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
235
236#undef TARGET_SCHED_INIT
237#define TARGET_SCHED_INIT spu_sched_init
238
644459d0 239#undef TARGET_SCHED_VARIABLE_ISSUE
240#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
241
5a976006 242#undef TARGET_SCHED_REORDER
243#define TARGET_SCHED_REORDER spu_sched_reorder
244
245#undef TARGET_SCHED_REORDER2
246#define TARGET_SCHED_REORDER2 spu_sched_reorder
644459d0 247
248#undef TARGET_SCHED_ADJUST_COST
249#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
250
251const struct attribute_spec spu_attribute_table[];
252#undef TARGET_ATTRIBUTE_TABLE
253#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
254
255#undef TARGET_ASM_INTEGER
256#define TARGET_ASM_INTEGER spu_assemble_integer
257
258#undef TARGET_SCALAR_MODE_SUPPORTED_P
259#define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
260
261#undef TARGET_VECTOR_MODE_SUPPORTED_P
262#define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
263
264#undef TARGET_FUNCTION_OK_FOR_SIBCALL
265#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
266
267#undef TARGET_ASM_GLOBALIZE_LABEL
268#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
269
270#undef TARGET_PASS_BY_REFERENCE
271#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
272
273#undef TARGET_MUST_PASS_IN_STACK
274#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
275
276#undef TARGET_BUILD_BUILTIN_VA_LIST
277#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
278
8a58ed0a 279#undef TARGET_EXPAND_BUILTIN_VA_START
280#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
281
644459d0 282#undef TARGET_SETUP_INCOMING_VARARGS
283#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
284
285#undef TARGET_MACHINE_DEPENDENT_REORG
286#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
287
288#undef TARGET_GIMPLIFY_VA_ARG_EXPR
289#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
290
291#undef TARGET_DEFAULT_TARGET_FLAGS
292#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
293
294#undef TARGET_INIT_LIBFUNCS
295#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
296
297#undef TARGET_RETURN_IN_MEMORY
298#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
299
69ced2d6 300#undef TARGET_ENCODE_SECTION_INFO
301#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
302
e99f512d 303#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
304#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
305
306#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
307#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
308
a76866d3 309#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
310#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
311
a28df51d 312#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
313#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
314
0e87db76 315#undef TARGET_VECTOR_ALIGNMENT_REACHABLE
316#define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
317
a0515226 318#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
319#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
320
ea32e033 321#undef TARGET_LIBGCC_CMP_RETURN_MODE
322#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
323
324#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
325#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
326
d52fd16a 327#undef TARGET_SCHED_SMS_RES_MII
328#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
329
5a976006 330#undef TARGET_ASM_FILE_START
331#define TARGET_ASM_FILE_START asm_file_start
332
a08dfd55 333#undef TARGET_SECTION_TYPE_FLAGS
334#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
335
644459d0 336struct gcc_target targetm = TARGET_INITIALIZER;
337
5df189be 338void
339spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
340{
5df189be 341 /* Override some of the default param values. With so many registers
342 larger values are better for these params. */
343 MAX_PENDING_LIST_LENGTH = 128;
344
345 /* With so many registers this is better on by default. */
346 flag_rename_registers = 1;
347}
348
644459d0 349/* Sometimes certain combinations of command options do not make sense
350 on a particular target machine. You can define a macro
351 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
352 executed once just after all the command options have been parsed. */
353void
354spu_override_options (void)
355{
14d408d9 356 /* Small loops will be unpeeled at -O3. For SPU it is more important
357 to keep code small by default. */
358 if (!flag_unroll_loops && !flag_peel_loops
359 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
360 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
361
644459d0 362 flag_omit_frame_pointer = 1;
363
5a976006 364 /* Functions must be 8 byte aligned so we correctly handle dual issue */
644459d0 365 if (align_functions < 8)
366 align_functions = 8;
c7b91b14 367
5a976006 368 spu_hint_dist = 8*4 - spu_max_nops*4;
369 if (spu_hint_dist < 0)
370 spu_hint_dist = 0;
371
c7b91b14 372 if (spu_fixed_range_string)
373 fix_range (spu_fixed_range_string);
5474166e 374
375 /* Determine processor architectural level. */
376 if (spu_arch_string)
377 {
378 if (strcmp (&spu_arch_string[0], "cell") == 0)
379 spu_arch = PROCESSOR_CELL;
380 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
381 spu_arch = PROCESSOR_CELLEDP;
382 else
383 error ("Unknown architecture '%s'", &spu_arch_string[0]);
384 }
385
386 /* Determine processor to tune for. */
387 if (spu_tune_string)
388 {
389 if (strcmp (&spu_tune_string[0], "cell") == 0)
390 spu_tune = PROCESSOR_CELL;
391 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
392 spu_tune = PROCESSOR_CELLEDP;
393 else
394 error ("Unknown architecture '%s'", &spu_tune_string[0]);
395 }
98bbec1e 396
13684256 397 /* Change defaults according to the processor architecture. */
398 if (spu_arch == PROCESSOR_CELLEDP)
399 {
400 /* If no command line option has been otherwise specified, change
401 the default to -mno-safe-hints on celledp -- only the original
402 Cell/B.E. processors require this workaround. */
403 if (!(target_flags_explicit & MASK_SAFE_HINTS))
404 target_flags &= ~MASK_SAFE_HINTS;
405 }
406
98bbec1e 407 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
644459d0 408}
409\f
410/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
411 struct attribute_spec.handler. */
412
413/* Table of machine attributes. */
414const struct attribute_spec spu_attribute_table[] =
415{
416 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
417 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
418 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
419 { NULL, 0, 0, false, false, false, NULL }
420};
421
422/* True if MODE is valid for the target. By "valid", we mean able to
423 be manipulated in non-trivial ways. In particular, this means all
424 the arithmetic is supported. */
425static bool
426spu_scalar_mode_supported_p (enum machine_mode mode)
427{
428 switch (mode)
429 {
430 case QImode:
431 case HImode:
432 case SImode:
433 case SFmode:
434 case DImode:
435 case TImode:
436 case DFmode:
437 return true;
438
439 default:
440 return false;
441 }
442}
443
444/* Similarly for vector modes. "Supported" here is less strict. At
445 least some operations are supported; need to check optabs or builtins
446 for further details. */
447static bool
448spu_vector_mode_supported_p (enum machine_mode mode)
449{
450 switch (mode)
451 {
452 case V16QImode:
453 case V8HImode:
454 case V4SImode:
455 case V2DImode:
456 case V4SFmode:
457 case V2DFmode:
458 return true;
459
460 default:
461 return false;
462 }
463}
464
465/* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
466 least significant bytes of the outer mode. This function returns
467 TRUE for the SUBREG's where this is correct. */
468int
469valid_subreg (rtx op)
470{
471 enum machine_mode om = GET_MODE (op);
472 enum machine_mode im = GET_MODE (SUBREG_REG (op));
473 return om != VOIDmode && im != VOIDmode
474 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
38aca5eb 475 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
476 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
644459d0 477}
478
479/* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
fa7637bd 480 and adjust the start offset. */
644459d0 481static rtx
482adjust_operand (rtx op, HOST_WIDE_INT * start)
483{
484 enum machine_mode mode;
485 int op_size;
38aca5eb 486 /* Strip any paradoxical SUBREG. */
487 if (GET_CODE (op) == SUBREG
488 && (GET_MODE_BITSIZE (GET_MODE (op))
489 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
644459d0 490 {
491 if (start)
492 *start -=
493 GET_MODE_BITSIZE (GET_MODE (op)) -
494 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
495 op = SUBREG_REG (op);
496 }
497 /* If it is smaller than SI, assure a SUBREG */
498 op_size = GET_MODE_BITSIZE (GET_MODE (op));
499 if (op_size < 32)
500 {
501 if (start)
502 *start += 32 - op_size;
503 op_size = 32;
504 }
505 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
506 mode = mode_for_size (op_size, MODE_INT, 0);
507 if (mode != GET_MODE (op))
508 op = gen_rtx_SUBREG (mode, op, 0);
509 return op;
510}
511
512void
513spu_expand_extv (rtx ops[], int unsignedp)
514{
515 HOST_WIDE_INT width = INTVAL (ops[2]);
516 HOST_WIDE_INT start = INTVAL (ops[3]);
517 HOST_WIDE_INT src_size, dst_size;
518 enum machine_mode src_mode, dst_mode;
519 rtx dst = ops[0], src = ops[1];
520 rtx s;
521
522 dst = adjust_operand (ops[0], 0);
523 dst_mode = GET_MODE (dst);
524 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
525
644459d0 526 src = adjust_operand (src, &start);
527 src_mode = GET_MODE (src);
528 src_size = GET_MODE_BITSIZE (GET_MODE (src));
529
530 if (start > 0)
531 {
532 s = gen_reg_rtx (src_mode);
533 switch (src_mode)
534 {
535 case SImode:
536 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
537 break;
538 case DImode:
539 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
540 break;
541 case TImode:
542 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
543 break;
544 default:
545 abort ();
546 }
547 src = s;
548 }
549
550 if (width < src_size)
551 {
552 rtx pat;
553 int icode;
554 switch (src_mode)
555 {
556 case SImode:
557 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
558 break;
559 case DImode:
560 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
561 break;
562 case TImode:
563 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
564 break;
565 default:
566 abort ();
567 }
568 s = gen_reg_rtx (src_mode);
569 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
570 emit_insn (pat);
571 src = s;
572 }
573
574 convert_move (dst, src, unsignedp);
575}
576
577void
578spu_expand_insv (rtx ops[])
579{
580 HOST_WIDE_INT width = INTVAL (ops[1]);
581 HOST_WIDE_INT start = INTVAL (ops[2]);
582 HOST_WIDE_INT maskbits;
583 enum machine_mode dst_mode, src_mode;
584 rtx dst = ops[0], src = ops[3];
585 int dst_size, src_size;
586 rtx mask;
587 rtx shift_reg;
588 int shift;
589
590
591 if (GET_CODE (ops[0]) == MEM)
592 dst = gen_reg_rtx (TImode);
593 else
594 dst = adjust_operand (dst, &start);
595 dst_mode = GET_MODE (dst);
596 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
597
598 if (CONSTANT_P (src))
599 {
600 enum machine_mode m =
601 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
602 src = force_reg (m, convert_to_mode (m, src, 0));
603 }
604 src = adjust_operand (src, 0);
605 src_mode = GET_MODE (src);
606 src_size = GET_MODE_BITSIZE (GET_MODE (src));
607
608 mask = gen_reg_rtx (dst_mode);
609 shift_reg = gen_reg_rtx (dst_mode);
610 shift = dst_size - start - width;
611
612 /* It's not safe to use subreg here because the compiler assumes
613 that the SUBREG_REG is right justified in the SUBREG. */
614 convert_move (shift_reg, src, 1);
615
616 if (shift > 0)
617 {
618 switch (dst_mode)
619 {
620 case SImode:
621 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
622 break;
623 case DImode:
624 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
625 break;
626 case TImode:
627 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
628 break;
629 default:
630 abort ();
631 }
632 }
633 else if (shift < 0)
634 abort ();
635
636 switch (dst_size)
637 {
638 case 32:
639 maskbits = (-1ll << (32 - width - start));
640 if (start)
641 maskbits += (1ll << (32 - start));
642 emit_move_insn (mask, GEN_INT (maskbits));
643 break;
644 case 64:
645 maskbits = (-1ll << (64 - width - start));
646 if (start)
647 maskbits += (1ll << (64 - start));
648 emit_move_insn (mask, GEN_INT (maskbits));
649 break;
650 case 128:
651 {
652 unsigned char arr[16];
653 int i = start / 8;
654 memset (arr, 0, sizeof (arr));
655 arr[i] = 0xff >> (start & 7);
656 for (i++; i <= (start + width - 1) / 8; i++)
657 arr[i] = 0xff;
658 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
659 emit_move_insn (mask, array_to_constant (TImode, arr));
660 }
661 break;
662 default:
663 abort ();
664 }
665 if (GET_CODE (ops[0]) == MEM)
666 {
667 rtx aligned = gen_reg_rtx (SImode);
668 rtx low = gen_reg_rtx (SImode);
669 rtx addr = gen_reg_rtx (SImode);
670 rtx rotl = gen_reg_rtx (SImode);
671 rtx mask0 = gen_reg_rtx (TImode);
672 rtx mem;
673
674 emit_move_insn (addr, XEXP (ops[0], 0));
675 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
676 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
677 emit_insn (gen_negsi2 (rotl, low));
678 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
679 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
680 mem = change_address (ops[0], TImode, aligned);
681 set_mem_alias_set (mem, 0);
682 emit_move_insn (dst, mem);
683 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
684 emit_move_insn (mem, dst);
685 if (start + width > MEM_ALIGN (ops[0]))
686 {
687 rtx shl = gen_reg_rtx (SImode);
688 rtx mask1 = gen_reg_rtx (TImode);
689 rtx dst1 = gen_reg_rtx (TImode);
690 rtx mem1;
691 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
692 emit_insn (gen_shlqby_ti (mask1, mask, shl));
693 mem1 = adjust_address (mem, TImode, 16);
694 set_mem_alias_set (mem1, 0);
695 emit_move_insn (dst1, mem1);
696 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
697 emit_move_insn (mem1, dst1);
698 }
699 }
700 else
71cd778d 701 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
644459d0 702}
703
704
705int
706spu_expand_block_move (rtx ops[])
707{
708 HOST_WIDE_INT bytes, align, offset;
709 rtx src, dst, sreg, dreg, target;
710 int i;
711 if (GET_CODE (ops[2]) != CONST_INT
712 || GET_CODE (ops[3]) != CONST_INT
48eb4342 713 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
644459d0 714 return 0;
715
716 bytes = INTVAL (ops[2]);
717 align = INTVAL (ops[3]);
718
719 if (bytes <= 0)
720 return 1;
721
722 dst = ops[0];
723 src = ops[1];
724
725 if (align == 16)
726 {
727 for (offset = 0; offset + 16 <= bytes; offset += 16)
728 {
729 dst = adjust_address (ops[0], V16QImode, offset);
730 src = adjust_address (ops[1], V16QImode, offset);
731 emit_move_insn (dst, src);
732 }
733 if (offset < bytes)
734 {
735 rtx mask;
736 unsigned char arr[16] = { 0 };
737 for (i = 0; i < bytes - offset; i++)
738 arr[i] = 0xff;
739 dst = adjust_address (ops[0], V16QImode, offset);
740 src = adjust_address (ops[1], V16QImode, offset);
741 mask = gen_reg_rtx (V16QImode);
742 sreg = gen_reg_rtx (V16QImode);
743 dreg = gen_reg_rtx (V16QImode);
744 target = gen_reg_rtx (V16QImode);
745 emit_move_insn (mask, array_to_constant (V16QImode, arr));
746 emit_move_insn (dreg, dst);
747 emit_move_insn (sreg, src);
748 emit_insn (gen_selb (target, dreg, sreg, mask));
749 emit_move_insn (dst, target);
750 }
751 return 1;
752 }
753 return 0;
754}
755
756enum spu_comp_code
757{ SPU_EQ, SPU_GT, SPU_GTU };
758
5474166e 759int spu_comp_icode[12][3] = {
760 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
761 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
762 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
763 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
764 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
765 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
766 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
767 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
768 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
769 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
770 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
771 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
644459d0 772};
773
774/* Generate a compare for CODE. Return a brand-new rtx that represents
775 the result of the compare. GCC can figure this out too if we don't
776 provide all variations of compares, but GCC always wants to use
777 WORD_MODE, we can generate better code in most cases if we do it
778 ourselves. */
779void
780spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
781{
782 int reverse_compare = 0;
783 int reverse_test = 0;
5d70b918 784 rtx compare_result, eq_result;
785 rtx comp_rtx, eq_rtx;
644459d0 786 rtx target = operands[0];
787 enum machine_mode comp_mode;
788 enum machine_mode op_mode;
5d70b918 789 enum spu_comp_code scode, eq_code, ior_code;
644459d0 790 int index;
5d70b918 791 int eq_test = 0;
644459d0 792
793 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
794 and so on, to keep the constant in operand 1. */
795 if (GET_CODE (spu_compare_op1) == CONST_INT)
796 {
797 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
798 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
799 switch (code)
800 {
801 case GE:
802 spu_compare_op1 = GEN_INT (val);
803 code = GT;
804 break;
805 case LT:
806 spu_compare_op1 = GEN_INT (val);
807 code = LE;
808 break;
809 case GEU:
810 spu_compare_op1 = GEN_INT (val);
811 code = GTU;
812 break;
813 case LTU:
814 spu_compare_op1 = GEN_INT (val);
815 code = LEU;
816 break;
817 default:
818 break;
819 }
820 }
821
5d70b918 822 comp_mode = SImode;
823 op_mode = GET_MODE (spu_compare_op0);
824
644459d0 825 switch (code)
826 {
827 case GE:
644459d0 828 scode = SPU_GT;
07027691 829 if (HONOR_NANS (op_mode))
5d70b918 830 {
831 reverse_compare = 0;
832 reverse_test = 0;
833 eq_test = 1;
834 eq_code = SPU_EQ;
835 }
836 else
837 {
838 reverse_compare = 1;
839 reverse_test = 1;
840 }
644459d0 841 break;
842 case LE:
644459d0 843 scode = SPU_GT;
07027691 844 if (HONOR_NANS (op_mode))
5d70b918 845 {
846 reverse_compare = 1;
847 reverse_test = 0;
848 eq_test = 1;
849 eq_code = SPU_EQ;
850 }
851 else
852 {
853 reverse_compare = 0;
854 reverse_test = 1;
855 }
644459d0 856 break;
857 case LT:
858 reverse_compare = 1;
859 reverse_test = 0;
860 scode = SPU_GT;
861 break;
862 case GEU:
863 reverse_compare = 1;
864 reverse_test = 1;
865 scode = SPU_GTU;
866 break;
867 case LEU:
868 reverse_compare = 0;
869 reverse_test = 1;
870 scode = SPU_GTU;
871 break;
872 case LTU:
873 reverse_compare = 1;
874 reverse_test = 0;
875 scode = SPU_GTU;
876 break;
877 case NE:
878 reverse_compare = 0;
879 reverse_test = 1;
880 scode = SPU_EQ;
881 break;
882
883 case EQ:
884 scode = SPU_EQ;
885 break;
886 case GT:
887 scode = SPU_GT;
888 break;
889 case GTU:
890 scode = SPU_GTU;
891 break;
892 default:
893 scode = SPU_EQ;
894 break;
895 }
896
644459d0 897 switch (op_mode)
898 {
899 case QImode:
900 index = 0;
901 comp_mode = QImode;
902 break;
903 case HImode:
904 index = 1;
905 comp_mode = HImode;
906 break;
907 case SImode:
908 index = 2;
909 break;
910 case DImode:
911 index = 3;
912 break;
913 case TImode:
914 index = 4;
915 break;
916 case SFmode:
917 index = 5;
918 break;
919 case DFmode:
920 index = 6;
921 break;
922 case V16QImode:
5474166e 923 index = 7;
924 comp_mode = op_mode;
925 break;
644459d0 926 case V8HImode:
5474166e 927 index = 8;
928 comp_mode = op_mode;
929 break;
644459d0 930 case V4SImode:
5474166e 931 index = 9;
932 comp_mode = op_mode;
933 break;
644459d0 934 case V4SFmode:
5474166e 935 index = 10;
936 comp_mode = V4SImode;
937 break;
644459d0 938 case V2DFmode:
5474166e 939 index = 11;
940 comp_mode = V2DImode;
644459d0 941 break;
5474166e 942 case V2DImode:
644459d0 943 default:
944 abort ();
945 }
946
07027691 947 if (GET_MODE (spu_compare_op1) == DFmode
948 && (scode != SPU_GT && scode != SPU_EQ))
949 abort ();
644459d0 950
951 if (is_set == 0 && spu_compare_op1 == const0_rtx
952 && (GET_MODE (spu_compare_op0) == SImode
953 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
954 {
955 /* Don't need to set a register with the result when we are
956 comparing against zero and branching. */
957 reverse_test = !reverse_test;
958 compare_result = spu_compare_op0;
959 }
960 else
961 {
962 compare_result = gen_reg_rtx (comp_mode);
963
964 if (reverse_compare)
965 {
966 rtx t = spu_compare_op1;
967 spu_compare_op1 = spu_compare_op0;
968 spu_compare_op0 = t;
969 }
970
971 if (spu_comp_icode[index][scode] == 0)
972 abort ();
973
974 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
975 (spu_compare_op0, op_mode))
976 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
977 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
978 (spu_compare_op1, op_mode))
979 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
980 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
981 spu_compare_op0,
982 spu_compare_op1);
983 if (comp_rtx == 0)
984 abort ();
985 emit_insn (comp_rtx);
986
5d70b918 987 if (eq_test)
988 {
989 eq_result = gen_reg_rtx (comp_mode);
990 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
991 spu_compare_op0,
992 spu_compare_op1);
993 if (eq_rtx == 0)
994 abort ();
995 emit_insn (eq_rtx);
996 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
997 gcc_assert (ior_code != CODE_FOR_nothing);
998 emit_insn (GEN_FCN (ior_code)
999 (compare_result, compare_result, eq_result));
1000 }
644459d0 1001 }
1002
1003 if (is_set == 0)
1004 {
1005 rtx bcomp;
1006 rtx loc_ref;
1007
1008 /* We don't have branch on QI compare insns, so we convert the
1009 QI compare result to a HI result. */
1010 if (comp_mode == QImode)
1011 {
1012 rtx old_res = compare_result;
1013 compare_result = gen_reg_rtx (HImode);
1014 comp_mode = HImode;
1015 emit_insn (gen_extendqihi2 (compare_result, old_res));
1016 }
1017
1018 if (reverse_test)
1019 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1020 else
1021 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1022
1023 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
1024 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1025 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1026 loc_ref, pc_rtx)));
1027 }
1028 else if (is_set == 2)
1029 {
1030 int compare_size = GET_MODE_BITSIZE (comp_mode);
1031 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1032 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1033 rtx select_mask;
1034 rtx op_t = operands[2];
1035 rtx op_f = operands[3];
1036
1037 /* The result of the comparison can be SI, HI or QI mode. Create a
1038 mask based on that result. */
1039 if (target_size > compare_size)
1040 {
1041 select_mask = gen_reg_rtx (mode);
1042 emit_insn (gen_extend_compare (select_mask, compare_result));
1043 }
1044 else if (target_size < compare_size)
1045 select_mask =
1046 gen_rtx_SUBREG (mode, compare_result,
1047 (compare_size - target_size) / BITS_PER_UNIT);
1048 else if (comp_mode != mode)
1049 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1050 else
1051 select_mask = compare_result;
1052
1053 if (GET_MODE (target) != GET_MODE (op_t)
1054 || GET_MODE (target) != GET_MODE (op_f))
1055 abort ();
1056
1057 if (reverse_test)
1058 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1059 else
1060 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1061 }
1062 else
1063 {
1064 if (reverse_test)
1065 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1066 gen_rtx_NOT (comp_mode, compare_result)));
1067 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1068 emit_insn (gen_extendhisi2 (target, compare_result));
1069 else if (GET_MODE (target) == SImode
1070 && GET_MODE (compare_result) == QImode)
1071 emit_insn (gen_extend_compare (target, compare_result));
1072 else
1073 emit_move_insn (target, compare_result);
1074 }
1075}
1076
1077HOST_WIDE_INT
1078const_double_to_hwint (rtx x)
1079{
1080 HOST_WIDE_INT val;
1081 REAL_VALUE_TYPE rv;
1082 if (GET_MODE (x) == SFmode)
1083 {
1084 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1085 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1086 }
1087 else if (GET_MODE (x) == DFmode)
1088 {
1089 long l[2];
1090 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1091 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1092 val = l[0];
1093 val = (val << 32) | (l[1] & 0xffffffff);
1094 }
1095 else
1096 abort ();
1097 return val;
1098}
1099
1100rtx
1101hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1102{
1103 long tv[2];
1104 REAL_VALUE_TYPE rv;
1105 gcc_assert (mode == SFmode || mode == DFmode);
1106
1107 if (mode == SFmode)
1108 tv[0] = (v << 32) >> 32;
1109 else if (mode == DFmode)
1110 {
1111 tv[1] = (v << 32) >> 32;
1112 tv[0] = v >> 32;
1113 }
1114 real_from_target (&rv, tv, mode);
1115 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1116}
1117
1118void
1119print_operand_address (FILE * file, register rtx addr)
1120{
1121 rtx reg;
1122 rtx offset;
1123
e04cf423 1124 if (GET_CODE (addr) == AND
1125 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1126 && INTVAL (XEXP (addr, 1)) == -16)
1127 addr = XEXP (addr, 0);
1128
644459d0 1129 switch (GET_CODE (addr))
1130 {
1131 case REG:
1132 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1133 break;
1134
1135 case PLUS:
1136 reg = XEXP (addr, 0);
1137 offset = XEXP (addr, 1);
1138 if (GET_CODE (offset) == REG)
1139 {
1140 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1141 reg_names[REGNO (offset)]);
1142 }
1143 else if (GET_CODE (offset) == CONST_INT)
1144 {
1145 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1146 INTVAL (offset), reg_names[REGNO (reg)]);
1147 }
1148 else
1149 abort ();
1150 break;
1151
1152 case CONST:
1153 case LABEL_REF:
1154 case SYMBOL_REF:
1155 case CONST_INT:
1156 output_addr_const (file, addr);
1157 break;
1158
1159 default:
1160 debug_rtx (addr);
1161 abort ();
1162 }
1163}
1164
1165void
1166print_operand (FILE * file, rtx x, int code)
1167{
1168 enum machine_mode mode = GET_MODE (x);
1169 HOST_WIDE_INT val;
1170 unsigned char arr[16];
1171 int xcode = GET_CODE (x);
dea01258 1172 int i, info;
644459d0 1173 if (GET_MODE (x) == VOIDmode)
1174 switch (code)
1175 {
644459d0 1176 case 'L': /* 128 bits, signed */
1177 case 'm': /* 128 bits, signed */
1178 case 'T': /* 128 bits, signed */
1179 case 't': /* 128 bits, signed */
1180 mode = TImode;
1181 break;
644459d0 1182 case 'K': /* 64 bits, signed */
1183 case 'k': /* 64 bits, signed */
1184 case 'D': /* 64 bits, signed */
1185 case 'd': /* 64 bits, signed */
1186 mode = DImode;
1187 break;
644459d0 1188 case 'J': /* 32 bits, signed */
1189 case 'j': /* 32 bits, signed */
1190 case 's': /* 32 bits, signed */
1191 case 'S': /* 32 bits, signed */
1192 mode = SImode;
1193 break;
1194 }
1195 switch (code)
1196 {
1197
1198 case 'j': /* 32 bits, signed */
1199 case 'k': /* 64 bits, signed */
1200 case 'm': /* 128 bits, signed */
1201 if (xcode == CONST_INT
1202 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1203 {
1204 gcc_assert (logical_immediate_p (x, mode));
1205 constant_to_array (mode, x, arr);
1206 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1207 val = trunc_int_for_mode (val, SImode);
1208 switch (which_logical_immediate (val))
1209 {
1210 case SPU_ORI:
1211 break;
1212 case SPU_ORHI:
1213 fprintf (file, "h");
1214 break;
1215 case SPU_ORBI:
1216 fprintf (file, "b");
1217 break;
1218 default:
1219 gcc_unreachable();
1220 }
1221 }
1222 else
1223 gcc_unreachable();
1224 return;
1225
1226 case 'J': /* 32 bits, signed */
1227 case 'K': /* 64 bits, signed */
1228 case 'L': /* 128 bits, signed */
1229 if (xcode == CONST_INT
1230 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1231 {
1232 gcc_assert (logical_immediate_p (x, mode)
1233 || iohl_immediate_p (x, mode));
1234 constant_to_array (mode, x, arr);
1235 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1236 val = trunc_int_for_mode (val, SImode);
1237 switch (which_logical_immediate (val))
1238 {
1239 case SPU_ORI:
1240 case SPU_IOHL:
1241 break;
1242 case SPU_ORHI:
1243 val = trunc_int_for_mode (val, HImode);
1244 break;
1245 case SPU_ORBI:
1246 val = trunc_int_for_mode (val, QImode);
1247 break;
1248 default:
1249 gcc_unreachable();
1250 }
1251 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1252 }
1253 else
1254 gcc_unreachable();
1255 return;
1256
1257 case 't': /* 128 bits, signed */
1258 case 'd': /* 64 bits, signed */
1259 case 's': /* 32 bits, signed */
dea01258 1260 if (CONSTANT_P (x))
644459d0 1261 {
dea01258 1262 enum immediate_class c = classify_immediate (x, mode);
1263 switch (c)
1264 {
1265 case IC_IL1:
1266 constant_to_array (mode, x, arr);
1267 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1268 val = trunc_int_for_mode (val, SImode);
1269 switch (which_immediate_load (val))
1270 {
1271 case SPU_IL:
1272 break;
1273 case SPU_ILA:
1274 fprintf (file, "a");
1275 break;
1276 case SPU_ILH:
1277 fprintf (file, "h");
1278 break;
1279 case SPU_ILHU:
1280 fprintf (file, "hu");
1281 break;
1282 default:
1283 gcc_unreachable ();
1284 }
1285 break;
1286 case IC_CPAT:
1287 constant_to_array (mode, x, arr);
1288 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1289 if (info == 1)
1290 fprintf (file, "b");
1291 else if (info == 2)
1292 fprintf (file, "h");
1293 else if (info == 4)
1294 fprintf (file, "w");
1295 else if (info == 8)
1296 fprintf (file, "d");
1297 break;
1298 case IC_IL1s:
1299 if (xcode == CONST_VECTOR)
1300 {
1301 x = CONST_VECTOR_ELT (x, 0);
1302 xcode = GET_CODE (x);
1303 }
1304 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1305 fprintf (file, "a");
1306 else if (xcode == HIGH)
1307 fprintf (file, "hu");
1308 break;
1309 case IC_FSMBI:
5df189be 1310 case IC_FSMBI2:
dea01258 1311 case IC_IL2:
1312 case IC_IL2s:
1313 case IC_POOL:
1314 abort ();
1315 }
644459d0 1316 }
644459d0 1317 else
1318 gcc_unreachable ();
1319 return;
1320
1321 case 'T': /* 128 bits, signed */
1322 case 'D': /* 64 bits, signed */
1323 case 'S': /* 32 bits, signed */
dea01258 1324 if (CONSTANT_P (x))
644459d0 1325 {
dea01258 1326 enum immediate_class c = classify_immediate (x, mode);
1327 switch (c)
644459d0 1328 {
dea01258 1329 case IC_IL1:
1330 constant_to_array (mode, x, arr);
1331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1332 val = trunc_int_for_mode (val, SImode);
1333 switch (which_immediate_load (val))
1334 {
1335 case SPU_IL:
1336 case SPU_ILA:
1337 break;
1338 case SPU_ILH:
1339 case SPU_ILHU:
1340 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1341 break;
1342 default:
1343 gcc_unreachable ();
1344 }
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1346 break;
1347 case IC_FSMBI:
1348 constant_to_array (mode, x, arr);
1349 val = 0;
1350 for (i = 0; i < 16; i++)
1351 {
1352 val <<= 1;
1353 val |= arr[i] & 1;
1354 }
1355 print_operand (file, GEN_INT (val), 0);
1356 break;
1357 case IC_CPAT:
1358 constant_to_array (mode, x, arr);
1359 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
644459d0 1361 break;
dea01258 1362 case IC_IL1s:
dea01258 1363 if (xcode == HIGH)
5df189be 1364 x = XEXP (x, 0);
1365 if (GET_CODE (x) == CONST_VECTOR)
1366 x = CONST_VECTOR_ELT (x, 0);
1367 output_addr_const (file, x);
1368 if (xcode == HIGH)
1369 fprintf (file, "@h");
644459d0 1370 break;
dea01258 1371 case IC_IL2:
1372 case IC_IL2s:
5df189be 1373 case IC_FSMBI2:
dea01258 1374 case IC_POOL:
1375 abort ();
644459d0 1376 }
c8befdb9 1377 }
644459d0 1378 else
1379 gcc_unreachable ();
1380 return;
1381
644459d0 1382 case 'C':
1383 if (xcode == CONST_INT)
1384 {
1385 /* Only 4 least significant bits are relevant for generate
1386 control word instructions. */
1387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1388 return;
1389 }
1390 break;
1391
1392 case 'M': /* print code for c*d */
1393 if (GET_CODE (x) == CONST_INT)
1394 switch (INTVAL (x))
1395 {
1396 case 1:
1397 fprintf (file, "b");
1398 break;
1399 case 2:
1400 fprintf (file, "h");
1401 break;
1402 case 4:
1403 fprintf (file, "w");
1404 break;
1405 case 8:
1406 fprintf (file, "d");
1407 break;
1408 default:
1409 gcc_unreachable();
1410 }
1411 else
1412 gcc_unreachable();
1413 return;
1414
1415 case 'N': /* Negate the operand */
1416 if (xcode == CONST_INT)
1417 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1418 else if (xcode == CONST_VECTOR)
1419 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1420 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1421 return;
1422
1423 case 'I': /* enable/disable interrupts */
1424 if (xcode == CONST_INT)
1425 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1426 return;
1427
1428 case 'b': /* branch modifiers */
1429 if (xcode == REG)
1430 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1431 else if (COMPARISON_P (x))
1432 fprintf (file, "%s", xcode == NE ? "n" : "");
1433 return;
1434
1435 case 'i': /* indirect call */
1436 if (xcode == MEM)
1437 {
1438 if (GET_CODE (XEXP (x, 0)) == REG)
1439 /* Used in indirect function calls. */
1440 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1441 else
1442 output_address (XEXP (x, 0));
1443 }
1444 return;
1445
1446 case 'p': /* load/store */
1447 if (xcode == MEM)
1448 {
1449 x = XEXP (x, 0);
1450 xcode = GET_CODE (x);
1451 }
e04cf423 1452 if (xcode == AND)
1453 {
1454 x = XEXP (x, 0);
1455 xcode = GET_CODE (x);
1456 }
644459d0 1457 if (xcode == REG)
1458 fprintf (file, "d");
1459 else if (xcode == CONST_INT)
1460 fprintf (file, "a");
1461 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1462 fprintf (file, "r");
1463 else if (xcode == PLUS || xcode == LO_SUM)
1464 {
1465 if (GET_CODE (XEXP (x, 1)) == REG)
1466 fprintf (file, "x");
1467 else
1468 fprintf (file, "d");
1469 }
1470 return;
1471
5df189be 1472 case 'e':
1473 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1474 val &= 0x7;
1475 output_addr_const (file, GEN_INT (val));
1476 return;
1477
1478 case 'f':
1479 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1480 val &= 0x1f;
1481 output_addr_const (file, GEN_INT (val));
1482 return;
1483
1484 case 'g':
1485 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1486 val &= 0x3f;
1487 output_addr_const (file, GEN_INT (val));
1488 return;
1489
1490 case 'h':
1491 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1492 val = (val >> 3) & 0x1f;
1493 output_addr_const (file, GEN_INT (val));
1494 return;
1495
1496 case 'E':
1497 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1498 val = -val;
1499 val &= 0x7;
1500 output_addr_const (file, GEN_INT (val));
1501 return;
1502
1503 case 'F':
1504 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1505 val = -val;
1506 val &= 0x1f;
1507 output_addr_const (file, GEN_INT (val));
1508 return;
1509
1510 case 'G':
1511 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1512 val = -val;
1513 val &= 0x3f;
1514 output_addr_const (file, GEN_INT (val));
1515 return;
1516
1517 case 'H':
1518 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1519 val = -(val & -8ll);
1520 val = (val >> 3) & 0x1f;
1521 output_addr_const (file, GEN_INT (val));
1522 return;
1523
644459d0 1524 case 0:
1525 if (xcode == REG)
1526 fprintf (file, "%s", reg_names[REGNO (x)]);
1527 else if (xcode == MEM)
1528 output_address (XEXP (x, 0));
1529 else if (xcode == CONST_VECTOR)
dea01258 1530 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
644459d0 1531 else
1532 output_addr_const (file, x);
1533 return;
1534
f6a0d06f 1535 /* unused letters
5df189be 1536 o qr uvw yz
1537 AB OPQR UVWXYZ */
644459d0 1538 default:
1539 output_operand_lossage ("invalid %%xn code");
1540 }
1541 gcc_unreachable ();
1542}
1543
1544extern char call_used_regs[];
644459d0 1545
1546/* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1547 caller saved register. For leaf functions it is more efficient to
1548 use a volatile register because we won't need to save and restore the
1549 pic register. This routine is only valid after register allocation
1550 is completed, so we can pick an unused register. */
1551static rtx
1552get_pic_reg (void)
1553{
1554 rtx pic_reg = pic_offset_table_rtx;
1555 if (!reload_completed && !reload_in_progress)
1556 abort ();
1557 return pic_reg;
1558}
1559
5df189be 1560/* Split constant addresses to handle cases that are too large.
1561 Add in the pic register when in PIC mode.
1562 Split immediates that require more than 1 instruction. */
dea01258 1563int
1564spu_split_immediate (rtx * ops)
c8befdb9 1565{
dea01258 1566 enum machine_mode mode = GET_MODE (ops[0]);
1567 enum immediate_class c = classify_immediate (ops[1], mode);
1568
1569 switch (c)
c8befdb9 1570 {
dea01258 1571 case IC_IL2:
1572 {
1573 unsigned char arrhi[16];
1574 unsigned char arrlo[16];
98bbec1e 1575 rtx to, temp, hi, lo;
dea01258 1576 int i;
98bbec1e 1577 enum machine_mode imode = mode;
1578 /* We need to do reals as ints because the constant used in the
1579 IOR might not be a legitimate real constant. */
1580 imode = int_mode_for_mode (mode);
dea01258 1581 constant_to_array (mode, ops[1], arrhi);
98bbec1e 1582 if (imode != mode)
1583 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1584 else
1585 to = ops[0];
1586 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
dea01258 1587 for (i = 0; i < 16; i += 4)
1588 {
1589 arrlo[i + 2] = arrhi[i + 2];
1590 arrlo[i + 3] = arrhi[i + 3];
1591 arrlo[i + 0] = arrlo[i + 1] = 0;
1592 arrhi[i + 2] = arrhi[i + 3] = 0;
1593 }
98bbec1e 1594 hi = array_to_constant (imode, arrhi);
1595 lo = array_to_constant (imode, arrlo);
1596 emit_move_insn (temp, hi);
dea01258 1597 emit_insn (gen_rtx_SET
98bbec1e 1598 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
dea01258 1599 return 1;
1600 }
5df189be 1601 case IC_FSMBI2:
1602 {
1603 unsigned char arr_fsmbi[16];
1604 unsigned char arr_andbi[16];
1605 rtx to, reg_fsmbi, reg_and;
1606 int i;
1607 enum machine_mode imode = mode;
1608 /* We need to do reals as ints because the constant used in the
1609 * AND might not be a legitimate real constant. */
1610 imode = int_mode_for_mode (mode);
1611 constant_to_array (mode, ops[1], arr_fsmbi);
1612 if (imode != mode)
1613 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1614 else
1615 to = ops[0];
1616 for (i = 0; i < 16; i++)
1617 if (arr_fsmbi[i] != 0)
1618 {
1619 arr_andbi[0] = arr_fsmbi[i];
1620 arr_fsmbi[i] = 0xff;
1621 }
1622 for (i = 1; i < 16; i++)
1623 arr_andbi[i] = arr_andbi[0];
1624 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1625 reg_and = array_to_constant (imode, arr_andbi);
1626 emit_move_insn (to, reg_fsmbi);
1627 emit_insn (gen_rtx_SET
1628 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1629 return 1;
1630 }
dea01258 1631 case IC_POOL:
1632 if (reload_in_progress || reload_completed)
1633 {
1634 rtx mem = force_const_mem (mode, ops[1]);
1635 if (TARGET_LARGE_MEM)
1636 {
1637 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1638 emit_move_insn (addr, XEXP (mem, 0));
1639 mem = replace_equiv_address (mem, addr);
1640 }
1641 emit_move_insn (ops[0], mem);
1642 return 1;
1643 }
1644 break;
1645 case IC_IL1s:
1646 case IC_IL2s:
1647 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1648 {
1649 if (c == IC_IL2s)
1650 {
5df189be 1651 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1652 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
dea01258 1653 }
1654 else if (flag_pic)
1655 emit_insn (gen_pic (ops[0], ops[1]));
1656 if (flag_pic)
1657 {
1658 rtx pic_reg = get_pic_reg ();
1659 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
18d50ae6 1660 crtl->uses_pic_offset_table = 1;
dea01258 1661 }
1662 return flag_pic || c == IC_IL2s;
1663 }
1664 break;
1665 case IC_IL1:
1666 case IC_FSMBI:
1667 case IC_CPAT:
1668 break;
c8befdb9 1669 }
dea01258 1670 return 0;
c8befdb9 1671}
1672
644459d0 1673/* SAVING is TRUE when we are generating the actual load and store
1674 instructions for REGNO. When determining the size of the stack
1675 needed for saving register we must allocate enough space for the
1676 worst case, because we don't always have the information early enough
1677 to not allocate it. But we can at least eliminate the actual loads
1678 and stores during the prologue/epilogue. */
1679static int
1680need_to_save_reg (int regno, int saving)
1681{
3072d30e 1682 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
644459d0 1683 return 1;
1684 if (flag_pic
1685 && regno == PIC_OFFSET_TABLE_REGNUM
18d50ae6 1686 && (!saving || crtl->uses_pic_offset_table)
644459d0 1687 && (!saving
3072d30e 1688 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
644459d0 1689 return 1;
1690 return 0;
1691}
1692
1693/* This function is only correct starting with local register
1694 allocation */
1695int
1696spu_saved_regs_size (void)
1697{
1698 int reg_save_size = 0;
1699 int regno;
1700
1701 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1702 if (need_to_save_reg (regno, 0))
1703 reg_save_size += 0x10;
1704 return reg_save_size;
1705}
1706
1707static rtx
1708frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1709{
1710 rtx reg = gen_rtx_REG (V4SImode, regno);
1711 rtx mem =
1712 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1713 return emit_insn (gen_movv4si (mem, reg));
1714}
1715
1716static rtx
1717frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1718{
1719 rtx reg = gen_rtx_REG (V4SImode, regno);
1720 rtx mem =
1721 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1722 return emit_insn (gen_movv4si (reg, mem));
1723}
1724
1725/* This happens after reload, so we need to expand it. */
1726static rtx
1727frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1728{
1729 rtx insn;
1730 if (satisfies_constraint_K (GEN_INT (imm)))
1731 {
1732 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1733 }
1734 else
1735 {
3072d30e 1736 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
644459d0 1737 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1738 if (REGNO (src) == REGNO (scratch))
1739 abort ();
1740 }
644459d0 1741 return insn;
1742}
1743
1744/* Return nonzero if this function is known to have a null epilogue. */
1745
1746int
1747direct_return (void)
1748{
1749 if (reload_completed)
1750 {
1751 if (cfun->static_chain_decl == 0
1752 && (spu_saved_regs_size ()
1753 + get_frame_size ()
abe32cce 1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size == 0)
644459d0 1756 && current_function_is_leaf)
1757 return 1;
1758 }
1759 return 0;
1760}
1761
1762/*
1763 The stack frame looks like this:
1764 +-------------+
1765 | incoming |
a8e019fa 1766 | args |
1767 AP -> +-------------+
644459d0 1768 | $lr save |
1769 +-------------+
1770 prev SP | back chain |
1771 +-------------+
1772 | var args |
abe32cce 1773 | reg save | crtl->args.pretend_args_size bytes
644459d0 1774 +-------------+
1775 | ... |
1776 | saved regs | spu_saved_regs_size() bytes
a8e019fa 1777 FP -> +-------------+
644459d0 1778 | ... |
a8e019fa 1779 | vars | get_frame_size() bytes
1780 HFP -> +-------------+
644459d0 1781 | ... |
1782 | outgoing |
abe32cce 1783 | args | crtl->outgoing_args_size bytes
644459d0 1784 +-------------+
1785 | $lr of next |
1786 | frame |
1787 +-------------+
a8e019fa 1788 | back chain |
1789 SP -> +-------------+
644459d0 1790
1791*/
1792void
1793spu_expand_prologue (void)
1794{
1795 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1796 HOST_WIDE_INT total_size;
1797 HOST_WIDE_INT saved_regs_size;
1798 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1799 rtx scratch_reg_0, scratch_reg_1;
1800 rtx insn, real;
1801
1802 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1803 the "toplevel" insn chain. */
1804 emit_note (NOTE_INSN_DELETED);
1805
1806 if (flag_pic && optimize == 0)
18d50ae6 1807 crtl->uses_pic_offset_table = 1;
644459d0 1808
1809 if (spu_naked_function_p (current_function_decl))
1810 return;
1811
1812 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1813 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1814
1815 saved_regs_size = spu_saved_regs_size ();
1816 total_size = size + saved_regs_size
abe32cce 1817 + crtl->outgoing_args_size
1818 + crtl->args.pretend_args_size;
644459d0 1819
1820 if (!current_function_is_leaf
18d50ae6 1821 || cfun->calls_alloca || total_size > 0)
644459d0 1822 total_size += STACK_POINTER_OFFSET;
1823
1824 /* Save this first because code after this might use the link
1825 register as a scratch register. */
1826 if (!current_function_is_leaf)
1827 {
1828 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1829 RTX_FRAME_RELATED_P (insn) = 1;
1830 }
1831
1832 if (total_size > 0)
1833 {
abe32cce 1834 offset = -crtl->args.pretend_args_size;
644459d0 1835 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1836 if (need_to_save_reg (regno, 1))
1837 {
1838 offset -= 16;
1839 insn = frame_emit_store (regno, sp_reg, offset);
1840 RTX_FRAME_RELATED_P (insn) = 1;
1841 }
1842 }
1843
18d50ae6 1844 if (flag_pic && crtl->uses_pic_offset_table)
644459d0 1845 {
1846 rtx pic_reg = get_pic_reg ();
1847 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
644459d0 1848 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
644459d0 1849 }
1850
1851 if (total_size > 0)
1852 {
1853 if (flag_stack_check)
1854 {
d819917f 1855 /* We compare against total_size-1 because
644459d0 1856 ($sp >= total_size) <=> ($sp > total_size-1) */
1857 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1858 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1859 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1860 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1861 {
1862 emit_move_insn (scratch_v4si, size_v4si);
1863 size_v4si = scratch_v4si;
1864 }
1865 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1866 emit_insn (gen_vec_extractv4si
1867 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1868 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1869 }
1870
1871 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1872 the value of the previous $sp because we save it as the back
1873 chain. */
1874 if (total_size <= 2000)
1875 {
1876 /* In this case we save the back chain first. */
1877 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
644459d0 1878 insn =
1879 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1880 }
644459d0 1881 else
1882 {
1883 insn = emit_move_insn (scratch_reg_0, sp_reg);
644459d0 1884 insn =
1885 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1886 }
1887 RTX_FRAME_RELATED_P (insn) = 1;
1888 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1889 REG_NOTES (insn) =
1890 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1891
1892 if (total_size > 2000)
1893 {
1894 /* Save the back chain ptr */
1895 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
644459d0 1896 }
1897
1898 if (frame_pointer_needed)
1899 {
1900 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1901 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
abe32cce 1902 + crtl->outgoing_args_size;
644459d0 1903 /* Set the new frame_pointer */
d8dfeb55 1904 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1905 RTX_FRAME_RELATED_P (insn) = 1;
1906 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1907 REG_NOTES (insn) =
1908 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1909 real, REG_NOTES (insn));
5df189be 1910 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
644459d0 1911 }
1912 }
1913
1914 emit_note (NOTE_INSN_DELETED);
1915}
1916
1917void
1918spu_expand_epilogue (bool sibcall_p)
1919{
1920 int size = get_frame_size (), offset, regno;
1921 HOST_WIDE_INT saved_regs_size, total_size;
1922 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1923 rtx jump, scratch_reg_0;
1924
1925 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1926 the "toplevel" insn chain. */
1927 emit_note (NOTE_INSN_DELETED);
1928
1929 if (spu_naked_function_p (current_function_decl))
1930 return;
1931
1932 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1933
1934 saved_regs_size = spu_saved_regs_size ();
1935 total_size = size + saved_regs_size
abe32cce 1936 + crtl->outgoing_args_size
1937 + crtl->args.pretend_args_size;
644459d0 1938
1939 if (!current_function_is_leaf
18d50ae6 1940 || cfun->calls_alloca || total_size > 0)
644459d0 1941 total_size += STACK_POINTER_OFFSET;
1942
1943 if (total_size > 0)
1944 {
18d50ae6 1945 if (cfun->calls_alloca)
644459d0 1946 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1947 else
1948 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1949
1950
1951 if (saved_regs_size > 0)
1952 {
abe32cce 1953 offset = -crtl->args.pretend_args_size;
644459d0 1954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1955 if (need_to_save_reg (regno, 1))
1956 {
1957 offset -= 0x10;
1958 frame_emit_load (regno, sp_reg, offset);
1959 }
1960 }
1961 }
1962
1963 if (!current_function_is_leaf)
1964 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1965
1966 if (!sibcall_p)
1967 {
18b42941 1968 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
644459d0 1969 jump = emit_jump_insn (gen__return ());
1970 emit_barrier_after (jump);
1971 }
1972
1973 emit_note (NOTE_INSN_DELETED);
1974}
1975
1976rtx
1977spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1978{
1979 if (count != 0)
1980 return 0;
1981 /* This is inefficient because it ends up copying to a save-register
1982 which then gets saved even though $lr has already been saved. But
1983 it does generate better code for leaf functions and we don't need
1984 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1985 used for __builtin_return_address anyway, so maybe we don't care if
1986 it's inefficient. */
1987 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1988}
1989\f
1990
1991/* Given VAL, generate a constant appropriate for MODE.
1992 If MODE is a vector mode, every element will be VAL.
1993 For TImode, VAL will be zero extended to 128 bits. */
1994rtx
1995spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1996{
1997 rtx inner;
1998 rtvec v;
1999 int units, i;
2000
2001 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2002 || GET_MODE_CLASS (mode) == MODE_FLOAT
2003 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2004 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2005
2006 if (GET_MODE_CLASS (mode) == MODE_INT)
2007 return immed_double_const (val, 0, mode);
2008
2009 /* val is the bit representation of the float */
2010 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2011 return hwint_to_const_double (mode, val);
2012
2013 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2014 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2015 else
2016 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2017
2018 units = GET_MODE_NUNITS (mode);
2019
2020 v = rtvec_alloc (units);
2021
2022 for (i = 0; i < units; ++i)
2023 RTVEC_ELT (v, i) = inner;
2024
2025 return gen_rtx_CONST_VECTOR (mode, v);
2026}
644459d0 2027
5474166e 2028/* Create a MODE vector constant from 4 ints. */
2029rtx
2030spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2031{
2032 unsigned char arr[16];
2033 arr[0] = (a >> 24) & 0xff;
2034 arr[1] = (a >> 16) & 0xff;
2035 arr[2] = (a >> 8) & 0xff;
2036 arr[3] = (a >> 0) & 0xff;
2037 arr[4] = (b >> 24) & 0xff;
2038 arr[5] = (b >> 16) & 0xff;
2039 arr[6] = (b >> 8) & 0xff;
2040 arr[7] = (b >> 0) & 0xff;
2041 arr[8] = (c >> 24) & 0xff;
2042 arr[9] = (c >> 16) & 0xff;
2043 arr[10] = (c >> 8) & 0xff;
2044 arr[11] = (c >> 0) & 0xff;
2045 arr[12] = (d >> 24) & 0xff;
2046 arr[13] = (d >> 16) & 0xff;
2047 arr[14] = (d >> 8) & 0xff;
2048 arr[15] = (d >> 0) & 0xff;
2049 return array_to_constant(mode, arr);
2050}
5a976006 2051\f
2052/* branch hint stuff */
5474166e 2053
644459d0 2054/* An array of these is used to propagate hints to predecessor blocks. */
2055struct spu_bb_info
2056{
5a976006 2057 rtx prop_jump; /* propagated from another block */
2058 int bb_index; /* the original block. */
644459d0 2059};
5a976006 2060static struct spu_bb_info *spu_bb_info;
644459d0 2061
5a976006 2062#define STOP_HINT_P(INSN) \
2063 (GET_CODE(INSN) == CALL_INSN \
2064 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2065 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2066
2067/* 1 when RTX is a hinted branch or its target. We keep track of
2068 what has been hinted so the safe-hint code can test it easily. */
2069#define HINTED_P(RTX) \
2070 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2071
2072/* 1 when RTX is an insn that must be scheduled on an even boundary. */
2073#define SCHED_ON_EVEN_P(RTX) \
2074 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2075
2076/* Emit a nop for INSN such that the two will dual issue. This assumes
2077 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2078 We check for TImode to handle a MULTI1 insn which has dual issued its
2079 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2080 ADDR_VEC insns. */
2081static void
2082emit_nop_for_insn (rtx insn)
644459d0 2083{
5a976006 2084 int p;
2085 rtx new_insn;
2086 p = get_pipe (insn);
2087 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2088 new_insn = emit_insn_after (gen_lnop (), insn);
2089 else if (p == 1 && GET_MODE (insn) == TImode)
644459d0 2090 {
5a976006 2091 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2092 PUT_MODE (new_insn, TImode);
2093 PUT_MODE (insn, VOIDmode);
2094 }
2095 else
2096 new_insn = emit_insn_after (gen_lnop (), insn);
2097 recog_memoized (new_insn);
2098}
2099
2100/* Insert nops in basic blocks to meet dual issue alignment
2101 requirements. Also make sure hbrp and hint instructions are at least
2102 one cycle apart, possibly inserting a nop. */
2103static void
2104pad_bb(void)
2105{
2106 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2107 int length;
2108 int addr;
2109
2110 /* This sets up INSN_ADDRESSES. */
2111 shorten_branches (get_insns ());
2112
2113 /* Keep track of length added by nops. */
2114 length = 0;
2115
2116 prev_insn = 0;
2117 insn = get_insns ();
2118 if (!active_insn_p (insn))
2119 insn = next_active_insn (insn);
2120 for (; insn; insn = next_insn)
2121 {
2122 next_insn = next_active_insn (insn);
2123 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2124 || INSN_CODE (insn) == CODE_FOR_hbr)
644459d0 2125 {
5a976006 2126 if (hbr_insn)
2127 {
2128 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2129 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2130 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2131 || (a1 - a0 == 4))
2132 {
2133 prev_insn = emit_insn_before (gen_lnop (), insn);
2134 PUT_MODE (prev_insn, GET_MODE (insn));
2135 PUT_MODE (insn, TImode);
2136 length += 4;
2137 }
2138 }
2139 hbr_insn = insn;
2140 }
2141 if (INSN_CODE (insn) == CODE_FOR_blockage)
2142 {
2143 if (GET_MODE (insn) == TImode)
2144 PUT_MODE (next_insn, TImode);
2145 insn = next_insn;
2146 next_insn = next_active_insn (insn);
2147 }
2148 addr = INSN_ADDRESSES (INSN_UID (insn));
2149 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2150 {
2151 if (((addr + length) & 7) != 0)
2152 {
2153 emit_nop_for_insn (prev_insn);
2154 length += 4;
2155 }
644459d0 2156 }
5a976006 2157 else if (GET_MODE (insn) == TImode
2158 && ((next_insn && GET_MODE (next_insn) != TImode)
2159 || get_attr_type (insn) == TYPE_MULTI0)
2160 && ((addr + length) & 7) != 0)
2161 {
2162 /* prev_insn will always be set because the first insn is
2163 always 8-byte aligned. */
2164 emit_nop_for_insn (prev_insn);
2165 length += 4;
2166 }
2167 prev_insn = insn;
644459d0 2168 }
644459d0 2169}
2170
5a976006 2171\f
2172/* Routines for branch hints. */
2173
644459d0 2174static void
5a976006 2175spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2176 int distance, sbitmap blocks)
644459d0 2177{
5a976006 2178 rtx branch_label = 0;
2179 rtx hint;
2180 rtx insn;
2181 rtx table;
644459d0 2182
2183 if (before == 0 || branch == 0 || target == 0)
2184 return;
2185
5a976006 2186 /* While scheduling we require hints to be no further than 600, so
2187 we need to enforce that here too */
644459d0 2188 if (distance > 600)
2189 return;
2190
5a976006 2191 /* If we have a Basic block note, emit it after the basic block note. */
2192 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2193 before = NEXT_INSN (before);
644459d0 2194
2195 branch_label = gen_label_rtx ();
2196 LABEL_NUSES (branch_label)++;
2197 LABEL_PRESERVE_P (branch_label) = 1;
2198 insn = emit_label_before (branch_label, branch);
2199 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
5a976006 2200 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2201
2202 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2203 recog_memoized (hint);
2204 HINTED_P (branch) = 1;
644459d0 2205
5a976006 2206 if (GET_CODE (target) == LABEL_REF)
2207 HINTED_P (XEXP (target, 0)) = 1;
2208 else if (tablejump_p (branch, 0, &table))
644459d0 2209 {
5a976006 2210 rtvec vec;
2211 int j;
2212 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2213 vec = XVEC (PATTERN (table), 0);
2214 else
2215 vec = XVEC (PATTERN (table), 1);
2216 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2217 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
644459d0 2218 }
5a976006 2219
2220 if (distance >= 588)
644459d0 2221 {
5a976006 2222 /* Make sure the hint isn't scheduled any earlier than this point,
2223 which could make it too far for the branch offest to fit */
2224 recog_memoized (emit_insn_before (gen_blockage (), hint));
2225 }
2226 else if (distance <= 8 * 4)
2227 {
2228 /* To guarantee at least 8 insns between the hint and branch we
2229 insert nops. */
2230 int d;
2231 for (d = distance; d < 8 * 4; d += 4)
2232 {
2233 insn =
2234 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2235 recog_memoized (insn);
2236 }
2237
2238 /* Make sure any nops inserted aren't scheduled before the hint. */
2239 recog_memoized (emit_insn_after (gen_blockage (), hint));
2240
2241 /* Make sure any nops inserted aren't scheduled after the call. */
2242 if (CALL_P (branch) && distance < 8 * 4)
2243 recog_memoized (emit_insn_before (gen_blockage (), branch));
644459d0 2244 }
644459d0 2245}
2246
2247/* Returns 0 if we don't want a hint for this branch. Otherwise return
2248 the rtx for the branch target. */
2249static rtx
2250get_branch_target (rtx branch)
2251{
2252 if (GET_CODE (branch) == JUMP_INSN)
2253 {
2254 rtx set, src;
2255
2256 /* Return statements */
2257 if (GET_CODE (PATTERN (branch)) == RETURN)
2258 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2259
2260 /* jump table */
2261 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2262 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2263 return 0;
2264
2265 set = single_set (branch);
2266 src = SET_SRC (set);
2267 if (GET_CODE (SET_DEST (set)) != PC)
2268 abort ();
2269
2270 if (GET_CODE (src) == IF_THEN_ELSE)
2271 {
2272 rtx lab = 0;
2273 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2274 if (note)
2275 {
2276 /* If the more probable case is not a fall through, then
2277 try a branch hint. */
2278 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2279 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2280 && GET_CODE (XEXP (src, 1)) != PC)
2281 lab = XEXP (src, 1);
2282 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2283 && GET_CODE (XEXP (src, 2)) != PC)
2284 lab = XEXP (src, 2);
2285 }
2286 if (lab)
2287 {
2288 if (GET_CODE (lab) == RETURN)
2289 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2290 return lab;
2291 }
2292 return 0;
2293 }
2294
2295 return src;
2296 }
2297 else if (GET_CODE (branch) == CALL_INSN)
2298 {
2299 rtx call;
2300 /* All of our call patterns are in a PARALLEL and the CALL is
2301 the first pattern in the PARALLEL. */
2302 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2303 abort ();
2304 call = XVECEXP (PATTERN (branch), 0, 0);
2305 if (GET_CODE (call) == SET)
2306 call = SET_SRC (call);
2307 if (GET_CODE (call) != CALL)
2308 abort ();
2309 return XEXP (XEXP (call, 0), 0);
2310 }
2311 return 0;
2312}
2313
5a976006 2314/* The special $hbr register is used to prevent the insn scheduler from
2315 moving hbr insns across instructions which invalidate them. It
2316 should only be used in a clobber, and this function searches for
2317 insns which clobber it. */
2318static bool
2319insn_clobbers_hbr (rtx insn)
2320{
2321 if (INSN_P (insn)
2322 && GET_CODE (PATTERN (insn)) == PARALLEL)
2323 {
2324 rtx parallel = PATTERN (insn);
2325 rtx clobber;
2326 int j;
2327 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2328 {
2329 clobber = XVECEXP (parallel, 0, j);
2330 if (GET_CODE (clobber) == CLOBBER
2331 && GET_CODE (XEXP (clobber, 0)) == REG
2332 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2333 return 1;
2334 }
2335 }
2336 return 0;
2337}
2338
2339/* Search up to 32 insns starting at FIRST:
2340 - at any kind of hinted branch, just return
2341 - at any unconditional branch in the first 15 insns, just return
2342 - at a call or indirect branch, after the first 15 insns, force it to
2343 an even address and return
2344 - at any unconditional branch, after the first 15 insns, force it to
2345 an even address.
2346 At then end of the search, insert an hbrp within 4 insns of FIRST,
2347 and an hbrp within 16 instructions of FIRST.
2348 */
644459d0 2349static void
5a976006 2350insert_hbrp_for_ilb_runout (rtx first)
644459d0 2351{
5a976006 2352 rtx insn, before_4 = 0, before_16 = 0;
2353 int addr = 0, length, first_addr = -1;
2354 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2355 int insert_lnop_after = 0;
2356 for (insn = first; insn; insn = NEXT_INSN (insn))
2357 if (INSN_P (insn))
2358 {
2359 if (first_addr == -1)
2360 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2361 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2362 length = get_attr_length (insn);
2363
2364 if (before_4 == 0 && addr + length >= 4 * 4)
2365 before_4 = insn;
2366 /* We test for 14 instructions because the first hbrp will add
2367 up to 2 instructions. */
2368 if (before_16 == 0 && addr + length >= 14 * 4)
2369 before_16 = insn;
2370
2371 if (INSN_CODE (insn) == CODE_FOR_hbr)
2372 {
2373 /* Make sure an hbrp is at least 2 cycles away from a hint.
2374 Insert an lnop after the hbrp when necessary. */
2375 if (before_4 == 0 && addr > 0)
2376 {
2377 before_4 = insn;
2378 insert_lnop_after |= 1;
2379 }
2380 else if (before_4 && addr <= 4 * 4)
2381 insert_lnop_after |= 1;
2382 if (before_16 == 0 && addr > 10 * 4)
2383 {
2384 before_16 = insn;
2385 insert_lnop_after |= 2;
2386 }
2387 else if (before_16 && addr <= 14 * 4)
2388 insert_lnop_after |= 2;
2389 }
644459d0 2390
5a976006 2391 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2392 {
2393 if (addr < hbrp_addr0)
2394 hbrp_addr0 = addr;
2395 else if (addr < hbrp_addr1)
2396 hbrp_addr1 = addr;
2397 }
644459d0 2398
5a976006 2399 if (CALL_P (insn) || JUMP_P (insn))
2400 {
2401 if (HINTED_P (insn))
2402 return;
2403
2404 /* Any branch after the first 15 insns should be on an even
2405 address to avoid a special case branch. There might be
2406 some nops and/or hbrps inserted, so we test after 10
2407 insns. */
2408 if (addr > 10 * 4)
2409 SCHED_ON_EVEN_P (insn) = 1;
2410 }
644459d0 2411
5a976006 2412 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2413 return;
2414
2415
2416 if (addr + length >= 32 * 4)
644459d0 2417 {
5a976006 2418 gcc_assert (before_4 && before_16);
2419 if (hbrp_addr0 > 4 * 4)
644459d0 2420 {
5a976006 2421 insn =
2422 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2423 recog_memoized (insn);
2424 INSN_ADDRESSES_NEW (insn,
2425 INSN_ADDRESSES (INSN_UID (before_4)));
2426 PUT_MODE (insn, GET_MODE (before_4));
2427 PUT_MODE (before_4, TImode);
2428 if (insert_lnop_after & 1)
644459d0 2429 {
5a976006 2430 insn = emit_insn_before (gen_lnop (), before_4);
2431 recog_memoized (insn);
2432 INSN_ADDRESSES_NEW (insn,
2433 INSN_ADDRESSES (INSN_UID (before_4)));
2434 PUT_MODE (insn, TImode);
644459d0 2435 }
644459d0 2436 }
5a976006 2437 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2438 && hbrp_addr1 > 16 * 4)
644459d0 2439 {
5a976006 2440 insn =
2441 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2442 recog_memoized (insn);
2443 INSN_ADDRESSES_NEW (insn,
2444 INSN_ADDRESSES (INSN_UID (before_16)));
2445 PUT_MODE (insn, GET_MODE (before_16));
2446 PUT_MODE (before_16, TImode);
2447 if (insert_lnop_after & 2)
644459d0 2448 {
5a976006 2449 insn = emit_insn_before (gen_lnop (), before_16);
2450 recog_memoized (insn);
2451 INSN_ADDRESSES_NEW (insn,
2452 INSN_ADDRESSES (INSN_UID
2453 (before_16)));
2454 PUT_MODE (insn, TImode);
644459d0 2455 }
2456 }
5a976006 2457 return;
644459d0 2458 }
644459d0 2459 }
5a976006 2460 else if (BARRIER_P (insn))
2461 return;
644459d0 2462
644459d0 2463}
5a976006 2464
2465/* The SPU might hang when it executes 48 inline instructions after a
2466 hinted branch jumps to its hinted target. The beginning of a
2467 function and the return from a call might have been hinted, and must
2468 be handled as well. To prevent a hang we insert 2 hbrps. The first
2469 should be within 6 insns of the branch target. The second should be
2470 within 22 insns of the branch target. When determining if hbrps are
2471 necessary, we look for only 32 inline instructions, because up to to
2472 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2473 new hbrps, we insert them within 4 and 16 insns of the target. */
644459d0 2474static void
5a976006 2475insert_hbrp (void)
644459d0 2476{
5a976006 2477 rtx insn;
2478 if (TARGET_SAFE_HINTS)
644459d0 2479 {
5a976006 2480 shorten_branches (get_insns ());
2481 /* Insert hbrp at beginning of function */
2482 insn = next_active_insn (get_insns ());
2483 if (insn)
2484 insert_hbrp_for_ilb_runout (insn);
2485 /* Insert hbrp after hinted targets. */
2486 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2487 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2488 insert_hbrp_for_ilb_runout (next_active_insn (insn));
644459d0 2489 }
644459d0 2490}
2491
5a976006 2492static int in_spu_reorg;
2493
2494/* Insert branch hints. There are no branch optimizations after this
2495 pass, so it's safe to set our branch hints now. */
644459d0 2496static void
5a976006 2497spu_machine_dependent_reorg (void)
644459d0 2498{
5a976006 2499 sbitmap blocks;
2500 basic_block bb;
2501 rtx branch, insn;
2502 rtx branch_target = 0;
2503 int branch_addr = 0, insn_addr, required_dist = 0;
2504 int i;
2505 unsigned int j;
644459d0 2506
5a976006 2507 if (!TARGET_BRANCH_HINTS || optimize == 0)
2508 {
2509 /* We still do it for unoptimized code because an external
2510 function might have hinted a call or return. */
2511 insert_hbrp ();
2512 pad_bb ();
2513 return;
2514 }
644459d0 2515
5a976006 2516 blocks = sbitmap_alloc (last_basic_block);
2517 sbitmap_zero (blocks);
644459d0 2518
5a976006 2519 in_spu_reorg = 1;
2520 compute_bb_for_insn ();
2521
2522 compact_blocks ();
2523
2524 spu_bb_info =
2525 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2526 sizeof (struct spu_bb_info));
2527
2528 /* We need exact insn addresses and lengths. */
2529 shorten_branches (get_insns ());
2530
2531 for (i = n_basic_blocks - 1; i >= 0; i--)
644459d0 2532 {
5a976006 2533 bb = BASIC_BLOCK (i);
2534 branch = 0;
2535 if (spu_bb_info[i].prop_jump)
644459d0 2536 {
5a976006 2537 branch = spu_bb_info[i].prop_jump;
2538 branch_target = get_branch_target (branch);
2539 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2540 required_dist = spu_hint_dist;
2541 }
2542 /* Search from end of a block to beginning. In this loop, find
2543 jumps which need a branch and emit them only when:
2544 - it's an indirect branch and we're at the insn which sets
2545 the register
2546 - we're at an insn that will invalidate the hint. e.g., a
2547 call, another hint insn, inline asm that clobbers $hbr, and
2548 some inlined operations (divmodsi4). Don't consider jumps
2549 because they are only at the end of a block and are
2550 considered when we are deciding whether to propagate
2551 - we're getting too far away from the branch. The hbr insns
2552 only have a signed 10 bit offset
2553 We go back as far as possible so the branch will be considered
2554 for propagation when we get to the beginning of the block. */
2555 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2556 {
2557 if (INSN_P (insn))
2558 {
2559 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2560 if (branch
2561 && ((GET_CODE (branch_target) == REG
2562 && set_of (branch_target, insn) != NULL_RTX)
2563 || insn_clobbers_hbr (insn)
2564 || branch_addr - insn_addr > 600))
2565 {
2566 rtx next = NEXT_INSN (insn);
2567 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2568 if (insn != BB_END (bb)
2569 && branch_addr - next_addr >= required_dist)
2570 {
2571 if (dump_file)
2572 fprintf (dump_file,
2573 "hint for %i in block %i before %i\n",
2574 INSN_UID (branch), bb->index,
2575 INSN_UID (next));
2576 spu_emit_branch_hint (next, branch, branch_target,
2577 branch_addr - next_addr, blocks);
2578 }
2579 branch = 0;
2580 }
2581
2582 /* JUMP_P will only be true at the end of a block. When
2583 branch is already set it means we've previously decided
2584 to propagate a hint for that branch into this block. */
2585 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2586 {
2587 branch = 0;
2588 if ((branch_target = get_branch_target (insn)))
2589 {
2590 branch = insn;
2591 branch_addr = insn_addr;
2592 required_dist = spu_hint_dist;
2593 }
2594 }
2595 }
2596 if (insn == BB_HEAD (bb))
2597 break;
2598 }
2599
2600 if (branch)
2601 {
2602 /* If we haven't emitted a hint for this branch yet, it might
2603 be profitable to emit it in one of the predecessor blocks,
2604 especially for loops. */
2605 rtx bbend;
2606 basic_block prev = 0, prop = 0, prev2 = 0;
2607 int loop_exit = 0, simple_loop = 0;
2608 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2609
2610 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2611 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2612 prev = EDGE_PRED (bb, j)->src;
2613 else
2614 prev2 = EDGE_PRED (bb, j)->src;
2615
2616 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2617 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2618 loop_exit = 1;
2619 else if (EDGE_SUCC (bb, j)->dest == bb)
2620 simple_loop = 1;
2621
2622 /* If this branch is a loop exit then propagate to previous
2623 fallthru block. This catches the cases when it is a simple
2624 loop or when there is an initial branch into the loop. */
2625 if (prev && (loop_exit || simple_loop)
2626 && prev->loop_depth <= bb->loop_depth)
2627 prop = prev;
2628
2629 /* If there is only one adjacent predecessor. Don't propagate
2630 outside this loop. This loop_depth test isn't perfect, but
2631 I'm not sure the loop_father member is valid at this point. */
2632 else if (prev && single_pred_p (bb)
2633 && prev->loop_depth == bb->loop_depth)
2634 prop = prev;
2635
2636 /* If this is the JOIN block of a simple IF-THEN then
2637 propogate the hint to the HEADER block. */
2638 else if (prev && prev2
2639 && EDGE_COUNT (bb->preds) == 2
2640 && EDGE_COUNT (prev->preds) == 1
2641 && EDGE_PRED (prev, 0)->src == prev2
2642 && prev2->loop_depth == bb->loop_depth
2643 && GET_CODE (branch_target) != REG)
2644 prop = prev;
2645
2646 /* Don't propagate when:
2647 - this is a simple loop and the hint would be too far
2648 - this is not a simple loop and there are 16 insns in
2649 this block already
2650 - the predecessor block ends in a branch that will be
2651 hinted
2652 - the predecessor block ends in an insn that invalidates
2653 the hint */
2654 if (prop
2655 && prop->index >= 0
2656 && (bbend = BB_END (prop))
2657 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2658 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2659 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2660 {
2661 if (dump_file)
2662 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2663 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2664 bb->index, prop->index, bb->loop_depth,
2665 INSN_UID (branch), loop_exit, simple_loop,
2666 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2667
2668 spu_bb_info[prop->index].prop_jump = branch;
2669 spu_bb_info[prop->index].bb_index = i;
2670 }
2671 else if (branch_addr - next_addr >= required_dist)
2672 {
2673 if (dump_file)
2674 fprintf (dump_file, "hint for %i in block %i before %i\n",
2675 INSN_UID (branch), bb->index,
2676 INSN_UID (NEXT_INSN (insn)));
2677 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2678 branch_addr - next_addr, blocks);
2679 }
2680 branch = 0;
644459d0 2681 }
644459d0 2682 }
5a976006 2683 free (spu_bb_info);
644459d0 2684
5a976006 2685 if (!sbitmap_empty_p (blocks))
2686 find_many_sub_basic_blocks (blocks);
2687
2688 /* We have to schedule to make sure alignment is ok. */
2689 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2690
2691 /* The hints need to be scheduled, so call it again. */
2692 schedule_insns ();
2693
2694 insert_hbrp ();
2695
2696 pad_bb ();
2697
2698
2699 if (spu_flag_var_tracking)
644459d0 2700 {
5a976006 2701 df_analyze ();
2702 timevar_push (TV_VAR_TRACKING);
2703 variable_tracking_main ();
2704 timevar_pop (TV_VAR_TRACKING);
2705 df_finish_pass (false);
644459d0 2706 }
5a976006 2707
2708 free_bb_for_insn ();
2709
2710 in_spu_reorg = 0;
644459d0 2711}
2712\f
2713
2714/* Insn scheduling routines, primarily for dual issue. */
2715static int
2716spu_sched_issue_rate (void)
2717{
2718 return 2;
2719}
2720
2721static int
5a976006 2722uses_ls_unit(rtx insn)
644459d0 2723{
5a976006 2724 rtx set = single_set (insn);
2725 if (set != 0
2726 && (GET_CODE (SET_DEST (set)) == MEM
2727 || GET_CODE (SET_SRC (set)) == MEM))
2728 return 1;
2729 return 0;
644459d0 2730}
2731
2732static int
2733get_pipe (rtx insn)
2734{
2735 enum attr_type t;
2736 /* Handle inline asm */
2737 if (INSN_CODE (insn) == -1)
2738 return -1;
2739 t = get_attr_type (insn);
2740 switch (t)
2741 {
2742 case TYPE_CONVERT:
2743 return -2;
2744 case TYPE_MULTI0:
2745 return -1;
2746
2747 case TYPE_FX2:
2748 case TYPE_FX3:
2749 case TYPE_SPR:
2750 case TYPE_NOP:
2751 case TYPE_FXB:
2752 case TYPE_FPD:
2753 case TYPE_FP6:
2754 case TYPE_FP7:
644459d0 2755 return 0;
2756
2757 case TYPE_LNOP:
2758 case TYPE_SHUF:
2759 case TYPE_LOAD:
2760 case TYPE_STORE:
2761 case TYPE_BR:
2762 case TYPE_MULTI1:
2763 case TYPE_HBR:
5a976006 2764 case TYPE_IPREFETCH:
644459d0 2765 return 1;
2766 default:
2767 abort ();
2768 }
2769}
2770
5a976006 2771
2772/* haifa-sched.c has a static variable that keeps track of the current
2773 cycle. It is passed to spu_sched_reorder, and we record it here for
2774 use by spu_sched_variable_issue. It won't be accurate if the
2775 scheduler updates it's clock_var between the two calls. */
2776static int clock_var;
2777
2778/* This is used to keep track of insn alignment. Set to 0 at the
2779 beginning of each block and increased by the "length" attr of each
2780 insn scheduled. */
2781static int spu_sched_length;
2782
2783/* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2784 ready list appropriately in spu_sched_reorder(). */
2785static int pipe0_clock;
2786static int pipe1_clock;
2787
2788static int prev_clock_var;
2789
2790static int prev_priority;
2791
2792/* The SPU needs to load the next ilb sometime during the execution of
2793 the previous ilb. There is a potential conflict if every cycle has a
2794 load or store. To avoid the conflict we make sure the load/store
2795 unit is free for at least one cycle during the execution of insns in
2796 the previous ilb. */
2797static int spu_ls_first;
2798static int prev_ls_clock;
2799
2800static void
2801spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2802 int max_ready ATTRIBUTE_UNUSED)
2803{
2804 spu_sched_length = 0;
2805}
2806
2807static void
2808spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2809 int max_ready ATTRIBUTE_UNUSED)
2810{
2811 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2812 {
2813 /* When any block might be at least 8-byte aligned, assume they
2814 will all be at least 8-byte aligned to make sure dual issue
2815 works out correctly. */
2816 spu_sched_length = 0;
2817 }
2818 spu_ls_first = INT_MAX;
2819 clock_var = -1;
2820 prev_ls_clock = -1;
2821 pipe0_clock = -1;
2822 pipe1_clock = -1;
2823 prev_clock_var = -1;
2824 prev_priority = -1;
2825}
2826
644459d0 2827static int
5a976006 2828spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2829 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
644459d0 2830{
5a976006 2831 int len;
2832 int p;
644459d0 2833 if (GET_CODE (PATTERN (insn)) == USE
2834 || GET_CODE (PATTERN (insn)) == CLOBBER
5a976006 2835 || (len = get_attr_length (insn)) == 0)
2836 return more;
2837
2838 spu_sched_length += len;
2839
2840 /* Reset on inline asm */
2841 if (INSN_CODE (insn) == -1)
2842 {
2843 spu_ls_first = INT_MAX;
2844 pipe0_clock = -1;
2845 pipe1_clock = -1;
2846 return 0;
2847 }
2848 p = get_pipe (insn);
2849 if (p == 0)
2850 pipe0_clock = clock_var;
2851 else
2852 pipe1_clock = clock_var;
2853
2854 if (in_spu_reorg)
2855 {
2856 if (clock_var - prev_ls_clock > 1
2857 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2858 spu_ls_first = INT_MAX;
2859 if (uses_ls_unit (insn))
2860 {
2861 if (spu_ls_first == INT_MAX)
2862 spu_ls_first = spu_sched_length;
2863 prev_ls_clock = clock_var;
2864 }
2865
2866 /* The scheduler hasn't inserted the nop, but we will later on.
2867 Include those nops in spu_sched_length. */
2868 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2869 spu_sched_length += 4;
2870 prev_clock_var = clock_var;
2871
2872 /* more is -1 when called from spu_sched_reorder for new insns
2873 that don't have INSN_PRIORITY */
2874 if (more >= 0)
2875 prev_priority = INSN_PRIORITY (insn);
2876 }
2877
2878 /* Always try issueing more insns. spu_sched_reorder will decide
2879 when the cycle should be advanced. */
2880 return 1;
2881}
2882
2883/* This function is called for both TARGET_SCHED_REORDER and
2884 TARGET_SCHED_REORDER2. */
2885static int
2886spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2887 rtx *ready, int *nreadyp, int clock)
2888{
2889 int i, nready = *nreadyp;
2890 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2891 rtx insn;
2892
2893 clock_var = clock;
2894
2895 if (nready <= 0 || pipe1_clock >= clock)
2896 return 0;
2897
2898 /* Find any rtl insns that don't generate assembly insns and schedule
2899 them first. */
2900 for (i = nready - 1; i >= 0; i--)
2901 {
2902 insn = ready[i];
2903 if (INSN_CODE (insn) == -1
2904 || INSN_CODE (insn) == CODE_FOR_blockage
2905 || INSN_CODE (insn) == CODE_FOR__spu_convert)
2906 {
2907 ready[i] = ready[nready - 1];
2908 ready[nready - 1] = insn;
2909 return 1;
2910 }
2911 }
2912
2913 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2914 for (i = 0; i < nready; i++)
2915 if (INSN_CODE (ready[i]) != -1)
2916 {
2917 insn = ready[i];
2918 switch (get_attr_type (insn))
2919 {
2920 default:
2921 case TYPE_MULTI0:
2922 case TYPE_CONVERT:
2923 case TYPE_FX2:
2924 case TYPE_FX3:
2925 case TYPE_SPR:
2926 case TYPE_NOP:
2927 case TYPE_FXB:
2928 case TYPE_FPD:
2929 case TYPE_FP6:
2930 case TYPE_FP7:
2931 pipe_0 = i;
2932 break;
2933 case TYPE_LOAD:
2934 case TYPE_STORE:
2935 pipe_ls = i;
2936 case TYPE_LNOP:
2937 case TYPE_SHUF:
2938 case TYPE_BR:
2939 case TYPE_MULTI1:
2940 case TYPE_HBR:
2941 pipe_1 = i;
2942 break;
2943 case TYPE_IPREFETCH:
2944 pipe_hbrp = i;
2945 break;
2946 }
2947 }
2948
2949 /* In the first scheduling phase, schedule loads and stores together
2950 to increase the chance they will get merged during postreload CSE. */
2951 if (!reload_completed && pipe_ls >= 0)
2952 {
2953 insn = ready[pipe_ls];
2954 ready[pipe_ls] = ready[nready - 1];
2955 ready[nready - 1] = insn;
2956 return 1;
2957 }
2958
2959 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2960 if (pipe_hbrp >= 0)
2961 pipe_1 = pipe_hbrp;
2962
2963 /* When we have loads/stores in every cycle of the last 15 insns and
2964 we are about to schedule another load/store, emit an hbrp insn
2965 instead. */
2966 if (in_spu_reorg
2967 && spu_sched_length - spu_ls_first >= 4 * 15
2968 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2969 {
2970 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2971 recog_memoized (insn);
2972 if (pipe0_clock < clock)
2973 PUT_MODE (insn, TImode);
2974 spu_sched_variable_issue (file, verbose, insn, -1);
2975 return 0;
2976 }
2977
2978 /* In general, we want to emit nops to increase dual issue, but dual
2979 issue isn't faster when one of the insns could be scheduled later
2980 without effecting the critical path. We look at INSN_PRIORITY to
2981 make a good guess, but it isn't perfect so -mdual-nops=n can be
2982 used to effect it. */
2983 if (in_spu_reorg && spu_dual_nops < 10)
2984 {
2985 /* When we are at an even address and we are not issueing nops to
2986 improve scheduling then we need to advance the cycle. */
2987 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2988 && (spu_dual_nops == 0
2989 || (pipe_1 != -1
2990 && prev_priority >
2991 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2992 return 0;
2993
2994 /* When at an odd address, schedule the highest priority insn
2995 without considering pipeline. */
2996 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2997 && (spu_dual_nops == 0
2998 || (prev_priority >
2999 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3000 return 1;
3001 }
3002
3003
3004 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3005 pipe0 insn in the ready list, schedule it. */
3006 if (pipe0_clock < clock && pipe_0 >= 0)
3007 schedule_i = pipe_0;
3008
3009 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3010 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3011 else
3012 schedule_i = pipe_1;
3013
3014 if (schedule_i > -1)
3015 {
3016 insn = ready[schedule_i];
3017 ready[schedule_i] = ready[nready - 1];
3018 ready[nready - 1] = insn;
3019 return 1;
3020 }
3021 return 0;
644459d0 3022}
3023
3024/* INSN is dependent on DEP_INSN. */
3025static int
5a976006 3026spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
644459d0 3027{
5a976006 3028 rtx set;
3029
3030 /* The blockage pattern is used to prevent instructions from being
3031 moved across it and has no cost. */
3032 if (INSN_CODE (insn) == CODE_FOR_blockage
3033 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3034 return 0;
3035
3036 if (INSN_CODE (insn) == CODE_FOR__spu_convert
3037 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert)
3038 return 0;
3039
3040 /* Make sure hbrps are spread out. */
3041 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3042 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3043 return 8;
3044
3045 /* Make sure hints and hbrps are 2 cycles apart. */
3046 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3047 || INSN_CODE (insn) == CODE_FOR_hbr)
3048 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3049 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3050 return 2;
3051
3052 /* An hbrp has no real dependency on other insns. */
3053 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3054 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3055 return 0;
3056
3057 /* Assuming that it is unlikely an argument register will be used in
3058 the first cycle of the called function, we reduce the cost for
3059 slightly better scheduling of dep_insn. When not hinted, the
3060 mispredicted branch would hide the cost as well. */
3061 if (CALL_P (insn))
3062 {
3063 rtx target = get_branch_target (insn);
3064 if (GET_CODE (target) != REG || !set_of (target, insn))
3065 return cost - 2;
3066 return cost;
3067 }
3068
3069 /* And when returning from a function, let's assume the return values
3070 are completed sooner too. */
3071 if (CALL_P (dep_insn))
644459d0 3072 return cost - 2;
5a976006 3073
3074 /* Make sure an instruction that loads from the back chain is schedule
3075 away from the return instruction so a hint is more likely to get
3076 issued. */
3077 if (INSN_CODE (insn) == CODE_FOR__return
3078 && (set = single_set (dep_insn))
3079 && GET_CODE (SET_DEST (set)) == REG
3080 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3081 return 20;
3082
644459d0 3083 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3084 scheduler makes every insn in a block anti-dependent on the final
3085 jump_insn. We adjust here so higher cost insns will get scheduled
3086 earlier. */
5a976006 3087 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
9997bd27 3088 return insn_cost (dep_insn) - 3;
5a976006 3089
644459d0 3090 return cost;
3091}
3092\f
3093/* Create a CONST_DOUBLE from a string. */
3094struct rtx_def *
3095spu_float_const (const char *string, enum machine_mode mode)
3096{
3097 REAL_VALUE_TYPE value;
3098 value = REAL_VALUE_ATOF (string, mode);
3099 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3100}
3101
644459d0 3102int
3103spu_constant_address_p (rtx x)
3104{
3105 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3106 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3107 || GET_CODE (x) == HIGH);
3108}
3109
3110static enum spu_immediate
3111which_immediate_load (HOST_WIDE_INT val)
3112{
3113 gcc_assert (val == trunc_int_for_mode (val, SImode));
3114
3115 if (val >= -0x8000 && val <= 0x7fff)
3116 return SPU_IL;
3117 if (val >= 0 && val <= 0x3ffff)
3118 return SPU_ILA;
3119 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3120 return SPU_ILH;
3121 if ((val & 0xffff) == 0)
3122 return SPU_ILHU;
3123
3124 return SPU_NONE;
3125}
3126
dea01258 3127/* Return true when OP can be loaded by one of the il instructions, or
3128 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
644459d0 3129int
3130immediate_load_p (rtx op, enum machine_mode mode)
dea01258 3131{
3132 if (CONSTANT_P (op))
3133 {
3134 enum immediate_class c = classify_immediate (op, mode);
5df189be 3135 return c == IC_IL1 || c == IC_IL1s
3072d30e 3136 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
dea01258 3137 }
3138 return 0;
3139}
3140
3141/* Return true if the first SIZE bytes of arr is a constant that can be
3142 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3143 represent the size and offset of the instruction to use. */
3144static int
3145cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3146{
3147 int cpat, run, i, start;
3148 cpat = 1;
3149 run = 0;
3150 start = -1;
3151 for (i = 0; i < size && cpat; i++)
3152 if (arr[i] != i+16)
3153 {
3154 if (!run)
3155 {
3156 start = i;
3157 if (arr[i] == 3)
3158 run = 1;
3159 else if (arr[i] == 2 && arr[i+1] == 3)
3160 run = 2;
3161 else if (arr[i] == 0)
3162 {
3163 while (arr[i+run] == run && i+run < 16)
3164 run++;
3165 if (run != 4 && run != 8)
3166 cpat = 0;
3167 }
3168 else
3169 cpat = 0;
3170 if ((i & (run-1)) != 0)
3171 cpat = 0;
3172 i += run;
3173 }
3174 else
3175 cpat = 0;
3176 }
b01a6dc3 3177 if (cpat && (run || size < 16))
dea01258 3178 {
3179 if (run == 0)
3180 run = 1;
3181 if (prun)
3182 *prun = run;
3183 if (pstart)
3184 *pstart = start == -1 ? 16-run : start;
3185 return 1;
3186 }
3187 return 0;
3188}
3189
3190/* OP is a CONSTANT_P. Determine what instructions can be used to load
d819917f 3191 it into a register. MODE is only valid when OP is a CONST_INT. */
dea01258 3192static enum immediate_class
3193classify_immediate (rtx op, enum machine_mode mode)
644459d0 3194{
3195 HOST_WIDE_INT val;
3196 unsigned char arr[16];
5df189be 3197 int i, j, repeated, fsmbi, repeat;
dea01258 3198
3199 gcc_assert (CONSTANT_P (op));
3200
644459d0 3201 if (GET_MODE (op) != VOIDmode)
3202 mode = GET_MODE (op);
3203
dea01258 3204 /* A V4SI const_vector with all identical symbols is ok. */
5df189be 3205 if (!flag_pic
3206 && mode == V4SImode
dea01258 3207 && GET_CODE (op) == CONST_VECTOR
3208 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3209 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3210 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3211 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3212 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3213 op = CONST_VECTOR_ELT (op, 0);
644459d0 3214
dea01258 3215 switch (GET_CODE (op))
3216 {
3217 case SYMBOL_REF:
3218 case LABEL_REF:
3219 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
644459d0 3220
dea01258 3221 case CONST:
0cfc65d4 3222 /* We can never know if the resulting address fits in 18 bits and can be
3223 loaded with ila. For now, assume the address will not overflow if
3224 the displacement is "small" (fits 'K' constraint). */
3225 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3226 {
3227 rtx sym = XEXP (XEXP (op, 0), 0);
3228 rtx cst = XEXP (XEXP (op, 0), 1);
3229
3230 if (GET_CODE (sym) == SYMBOL_REF
3231 && GET_CODE (cst) == CONST_INT
3232 && satisfies_constraint_K (cst))
3233 return IC_IL1s;
3234 }
3235 return IC_IL2s;
644459d0 3236
dea01258 3237 case HIGH:
3238 return IC_IL1s;
3239
3240 case CONST_VECTOR:
3241 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3242 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3243 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3244 return IC_POOL;
3245 /* Fall through. */
3246
3247 case CONST_INT:
3248 case CONST_DOUBLE:
3249 constant_to_array (mode, op, arr);
644459d0 3250
dea01258 3251 /* Check that each 4-byte slot is identical. */
3252 repeated = 1;
3253 for (i = 4; i < 16; i += 4)
3254 for (j = 0; j < 4; j++)
3255 if (arr[j] != arr[i + j])
3256 repeated = 0;
3257
3258 if (repeated)
3259 {
3260 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3261 val = trunc_int_for_mode (val, SImode);
3262
3263 if (which_immediate_load (val) != SPU_NONE)
3264 return IC_IL1;
3265 }
3266
3267 /* Any mode of 2 bytes or smaller can be loaded with an il
3268 instruction. */
3269 gcc_assert (GET_MODE_SIZE (mode) > 2);
3270
3271 fsmbi = 1;
5df189be 3272 repeat = 0;
dea01258 3273 for (i = 0; i < 16 && fsmbi; i++)
5df189be 3274 if (arr[i] != 0 && repeat == 0)
3275 repeat = arr[i];
3276 else if (arr[i] != 0 && arr[i] != repeat)
dea01258 3277 fsmbi = 0;
3278 if (fsmbi)
5df189be 3279 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
dea01258 3280
3281 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3282 return IC_CPAT;
3283
3284 if (repeated)
3285 return IC_IL2;
3286
3287 return IC_POOL;
3288 default:
3289 break;
3290 }
3291 gcc_unreachable ();
644459d0 3292}
3293
3294static enum spu_immediate
3295which_logical_immediate (HOST_WIDE_INT val)
3296{
3297 gcc_assert (val == trunc_int_for_mode (val, SImode));
3298
3299 if (val >= -0x200 && val <= 0x1ff)
3300 return SPU_ORI;
3301 if (val >= 0 && val <= 0xffff)
3302 return SPU_IOHL;
3303 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3304 {
3305 val = trunc_int_for_mode (val, HImode);
3306 if (val >= -0x200 && val <= 0x1ff)
3307 return SPU_ORHI;
3308 if ((val & 0xff) == ((val >> 8) & 0xff))
3309 {
3310 val = trunc_int_for_mode (val, QImode);
3311 if (val >= -0x200 && val <= 0x1ff)
3312 return SPU_ORBI;
3313 }
3314 }
3315 return SPU_NONE;
3316}
3317
5df189be 3318/* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3319 CONST_DOUBLEs. */
3320static int
3321const_vector_immediate_p (rtx x)
3322{
3323 int i;
3324 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3325 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3326 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3327 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3328 return 0;
3329 return 1;
3330}
3331
644459d0 3332int
3333logical_immediate_p (rtx op, enum machine_mode mode)
3334{
3335 HOST_WIDE_INT val;
3336 unsigned char arr[16];
3337 int i, j;
3338
3339 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3340 || GET_CODE (op) == CONST_VECTOR);
3341
5df189be 3342 if (GET_CODE (op) == CONST_VECTOR
3343 && !const_vector_immediate_p (op))
3344 return 0;
3345
644459d0 3346 if (GET_MODE (op) != VOIDmode)
3347 mode = GET_MODE (op);
3348
3349 constant_to_array (mode, op, arr);
3350
3351 /* Check that bytes are repeated. */
3352 for (i = 4; i < 16; i += 4)
3353 for (j = 0; j < 4; j++)
3354 if (arr[j] != arr[i + j])
3355 return 0;
3356
3357 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3358 val = trunc_int_for_mode (val, SImode);
3359
3360 i = which_logical_immediate (val);
3361 return i != SPU_NONE && i != SPU_IOHL;
3362}
3363
3364int
3365iohl_immediate_p (rtx op, enum machine_mode mode)
3366{
3367 HOST_WIDE_INT val;
3368 unsigned char arr[16];
3369 int i, j;
3370
3371 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3372 || GET_CODE (op) == CONST_VECTOR);
3373
5df189be 3374 if (GET_CODE (op) == CONST_VECTOR
3375 && !const_vector_immediate_p (op))
3376 return 0;
3377
644459d0 3378 if (GET_MODE (op) != VOIDmode)
3379 mode = GET_MODE (op);
3380
3381 constant_to_array (mode, op, arr);
3382
3383 /* Check that bytes are repeated. */
3384 for (i = 4; i < 16; i += 4)
3385 for (j = 0; j < 4; j++)
3386 if (arr[j] != arr[i + j])
3387 return 0;
3388
3389 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3390 val = trunc_int_for_mode (val, SImode);
3391
3392 return val >= 0 && val <= 0xffff;
3393}
3394
3395int
3396arith_immediate_p (rtx op, enum machine_mode mode,
3397 HOST_WIDE_INT low, HOST_WIDE_INT high)
3398{
3399 HOST_WIDE_INT val;
3400 unsigned char arr[16];
3401 int bytes, i, j;
3402
3403 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3404 || GET_CODE (op) == CONST_VECTOR);
3405
5df189be 3406 if (GET_CODE (op) == CONST_VECTOR
3407 && !const_vector_immediate_p (op))
3408 return 0;
3409
644459d0 3410 if (GET_MODE (op) != VOIDmode)
3411 mode = GET_MODE (op);
3412
3413 constant_to_array (mode, op, arr);
3414
3415 if (VECTOR_MODE_P (mode))
3416 mode = GET_MODE_INNER (mode);
3417
3418 bytes = GET_MODE_SIZE (mode);
3419 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3420
3421 /* Check that bytes are repeated. */
3422 for (i = bytes; i < 16; i += bytes)
3423 for (j = 0; j < bytes; j++)
3424 if (arr[j] != arr[i + j])
3425 return 0;
3426
3427 val = arr[0];
3428 for (j = 1; j < bytes; j++)
3429 val = (val << 8) | arr[j];
3430
3431 val = trunc_int_for_mode (val, mode);
3432
3433 return val >= low && val <= high;
3434}
3435
3436/* We accept:
5b865faf 3437 - any 32-bit constant (SImode, SFmode)
644459d0 3438 - any constant that can be generated with fsmbi (any mode)
5b865faf 3439 - a 64-bit constant where the high and low bits are identical
644459d0 3440 (DImode, DFmode)
5b865faf 3441 - a 128-bit constant where the four 32-bit words match. */
644459d0 3442int
3443spu_legitimate_constant_p (rtx x)
3444{
5df189be 3445 if (GET_CODE (x) == HIGH)
3446 x = XEXP (x, 0);
644459d0 3447 /* V4SI with all identical symbols is valid. */
5df189be 3448 if (!flag_pic
3449 && GET_MODE (x) == V4SImode
644459d0 3450 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3451 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
5df189be 3452 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
644459d0 3453 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3454 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3455 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3456
5df189be 3457 if (GET_CODE (x) == CONST_VECTOR
3458 && !const_vector_immediate_p (x))
3459 return 0;
644459d0 3460 return 1;
3461}
3462
3463/* Valid address are:
3464 - symbol_ref, label_ref, const
3465 - reg
3466 - reg + const, where either reg or const is 16 byte aligned
3467 - reg + reg, alignment doesn't matter
3468 The alignment matters in the reg+const case because lqd and stqd
3469 ignore the 4 least significant bits of the const. (TODO: It might be
3470 preferable to allow any alignment and fix it up when splitting.) */
3471int
3472spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3473 rtx x, int reg_ok_strict)
3474{
3475 if (mode == TImode && GET_CODE (x) == AND
3476 && GET_CODE (XEXP (x, 1)) == CONST_INT
3477 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
3478 x = XEXP (x, 0);
3479 switch (GET_CODE (x))
3480 {
3481 case SYMBOL_REF:
3482 case LABEL_REF:
3483 return !TARGET_LARGE_MEM;
3484
3485 case CONST:
0cfc65d4 3486 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3487 {
3488 rtx sym = XEXP (XEXP (x, 0), 0);
3489 rtx cst = XEXP (XEXP (x, 0), 1);
3490
3491 /* Accept any symbol_ref + constant, assuming it does not
3492 wrap around the local store addressability limit. */
3493 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3494 return 1;
3495 }
3496 return 0;
644459d0 3497
3498 case CONST_INT:
3499 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3500
3501 case SUBREG:
3502 x = XEXP (x, 0);
3503 gcc_assert (GET_CODE (x) == REG);
3504
3505 case REG:
3506 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3507
3508 case PLUS:
3509 case LO_SUM:
3510 {
3511 rtx op0 = XEXP (x, 0);
3512 rtx op1 = XEXP (x, 1);
3513 if (GET_CODE (op0) == SUBREG)
3514 op0 = XEXP (op0, 0);
3515 if (GET_CODE (op1) == SUBREG)
3516 op1 = XEXP (op1, 0);
3517 /* We can't just accept any aligned register because CSE can
3518 change it to a register that is not marked aligned and then
3519 recog will fail. So we only accept frame registers because
3520 they will only be changed to other frame registers. */
3521 if (GET_CODE (op0) == REG
3522 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3523 && GET_CODE (op1) == CONST_INT
3524 && INTVAL (op1) >= -0x2000
3525 && INTVAL (op1) <= 0x1fff
5df189be 3526 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
644459d0 3527 return 1;
3528 if (GET_CODE (op0) == REG
3529 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3530 && GET_CODE (op1) == REG
3531 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3532 return 1;
3533 }
3534 break;
3535
3536 default:
3537 break;
3538 }
3539 return 0;
3540}
3541
3542/* When the address is reg + const_int, force the const_int into a
fa7637bd 3543 register. */
644459d0 3544rtx
3545spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3546 enum machine_mode mode)
3547{
3548 rtx op0, op1;
3549 /* Make sure both operands are registers. */
3550 if (GET_CODE (x) == PLUS)
3551 {
3552 op0 = XEXP (x, 0);
3553 op1 = XEXP (x, 1);
3554 if (ALIGNED_SYMBOL_REF_P (op0))
3555 {
3556 op0 = force_reg (Pmode, op0);
3557 mark_reg_pointer (op0, 128);
3558 }
3559 else if (GET_CODE (op0) != REG)
3560 op0 = force_reg (Pmode, op0);
3561 if (ALIGNED_SYMBOL_REF_P (op1))
3562 {
3563 op1 = force_reg (Pmode, op1);
3564 mark_reg_pointer (op1, 128);
3565 }
3566 else if (GET_CODE (op1) != REG)
3567 op1 = force_reg (Pmode, op1);
3568 x = gen_rtx_PLUS (Pmode, op0, op1);
3569 if (spu_legitimate_address (mode, x, 0))
3570 return x;
3571 }
3572 return NULL_RTX;
3573}
3574
3575/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3576 struct attribute_spec.handler. */
3577static tree
3578spu_handle_fndecl_attribute (tree * node,
3579 tree name,
3580 tree args ATTRIBUTE_UNUSED,
3581 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3582{
3583 if (TREE_CODE (*node) != FUNCTION_DECL)
3584 {
3585 warning (0, "`%s' attribute only applies to functions",
3586 IDENTIFIER_POINTER (name));
3587 *no_add_attrs = true;
3588 }
3589
3590 return NULL_TREE;
3591}
3592
3593/* Handle the "vector" attribute. */
3594static tree
3595spu_handle_vector_attribute (tree * node, tree name,
3596 tree args ATTRIBUTE_UNUSED,
3597 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3598{
3599 tree type = *node, result = NULL_TREE;
3600 enum machine_mode mode;
3601 int unsigned_p;
3602
3603 while (POINTER_TYPE_P (type)
3604 || TREE_CODE (type) == FUNCTION_TYPE
3605 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3606 type = TREE_TYPE (type);
3607
3608 mode = TYPE_MODE (type);
3609
3610 unsigned_p = TYPE_UNSIGNED (type);
3611 switch (mode)
3612 {
3613 case DImode:
3614 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3615 break;
3616 case SImode:
3617 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3618 break;
3619 case HImode:
3620 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3621 break;
3622 case QImode:
3623 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3624 break;
3625 case SFmode:
3626 result = V4SF_type_node;
3627 break;
3628 case DFmode:
3629 result = V2DF_type_node;
3630 break;
3631 default:
3632 break;
3633 }
3634
3635 /* Propagate qualifiers attached to the element type
3636 onto the vector type. */
3637 if (result && result != type && TYPE_QUALS (type))
3638 result = build_qualified_type (result, TYPE_QUALS (type));
3639
3640 *no_add_attrs = true; /* No need to hang on to the attribute. */
3641
3642 if (!result)
3643 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3644 else
d991e6e8 3645 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
644459d0 3646
3647 return NULL_TREE;
3648}
3649
f2b32076 3650/* Return nonzero if FUNC is a naked function. */
644459d0 3651static int
3652spu_naked_function_p (tree func)
3653{
3654 tree a;
3655
3656 if (TREE_CODE (func) != FUNCTION_DECL)
3657 abort ();
3658
3659 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3660 return a != NULL_TREE;
3661}
3662
3663int
3664spu_initial_elimination_offset (int from, int to)
3665{
3666 int saved_regs_size = spu_saved_regs_size ();
3667 int sp_offset = 0;
abe32cce 3668 if (!current_function_is_leaf || crtl->outgoing_args_size
644459d0 3669 || get_frame_size () || saved_regs_size)
3670 sp_offset = STACK_POINTER_OFFSET;
3671 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
a8e019fa 3672 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
644459d0 3673 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
a8e019fa 3674 return get_frame_size ();
644459d0 3675 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
abe32cce 3676 return sp_offset + crtl->outgoing_args_size
644459d0 3677 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3678 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3679 return get_frame_size () + saved_regs_size + sp_offset;
a8e019fa 3680 else
3681 gcc_unreachable ();
644459d0 3682}
3683
3684rtx
fb80456a 3685spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
644459d0 3686{
3687 enum machine_mode mode = TYPE_MODE (type);
3688 int byte_size = ((mode == BLKmode)
3689 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3690
3691 /* Make sure small structs are left justified in a register. */
3692 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3693 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3694 {
3695 enum machine_mode smode;
3696 rtvec v;
3697 int i;
3698 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3699 int n = byte_size / UNITS_PER_WORD;
3700 v = rtvec_alloc (nregs);
3701 for (i = 0; i < n; i++)
3702 {
3703 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3704 gen_rtx_REG (TImode,
3705 FIRST_RETURN_REGNUM
3706 + i),
3707 GEN_INT (UNITS_PER_WORD * i));
3708 byte_size -= UNITS_PER_WORD;
3709 }
3710
3711 if (n < nregs)
3712 {
3713 if (byte_size < 4)
3714 byte_size = 4;
3715 smode =
3716 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3717 RTVEC_ELT (v, n) =
3718 gen_rtx_EXPR_LIST (VOIDmode,
3719 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3720 GEN_INT (UNITS_PER_WORD * n));
3721 }
3722 return gen_rtx_PARALLEL (mode, v);
3723 }
3724 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3725}
3726
3727rtx
3728spu_function_arg (CUMULATIVE_ARGS cum,
3729 enum machine_mode mode,
3730 tree type, int named ATTRIBUTE_UNUSED)
3731{
3732 int byte_size;
3733
3734 if (cum >= MAX_REGISTER_ARGS)
3735 return 0;
3736
3737 byte_size = ((mode == BLKmode)
3738 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3739
3740 /* The ABI does not allow parameters to be passed partially in
3741 reg and partially in stack. */
3742 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3743 return 0;
3744
3745 /* Make sure small structs are left justified in a register. */
3746 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3747 && byte_size < UNITS_PER_WORD && byte_size > 0)
3748 {
3749 enum machine_mode smode;
3750 rtx gr_reg;
3751 if (byte_size < 4)
3752 byte_size = 4;
3753 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3754 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3755 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3756 const0_rtx);
3757 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3758 }
3759 else
3760 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3761}
3762
3763/* Variable sized types are passed by reference. */
3764static bool
3765spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3766 enum machine_mode mode ATTRIBUTE_UNUSED,
fb80456a 3767 const_tree type, bool named ATTRIBUTE_UNUSED)
644459d0 3768{
3769 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3770}
3771\f
3772
3773/* Var args. */
3774
3775/* Create and return the va_list datatype.
3776
3777 On SPU, va_list is an array type equivalent to
3778
3779 typedef struct __va_list_tag
3780 {
3781 void *__args __attribute__((__aligned(16)));
3782 void *__skip __attribute__((__aligned(16)));
3783
3784 } va_list[1];
3785
fa7637bd 3786 where __args points to the arg that will be returned by the next
644459d0 3787 va_arg(), and __skip points to the previous stack frame such that
3788 when __args == __skip we should advance __args by 32 bytes. */
3789static tree
3790spu_build_builtin_va_list (void)
3791{
3792 tree f_args, f_skip, record, type_decl;
3793 bool owp;
3794
3795 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3796
3797 type_decl =
3798 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3799
3800 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3801 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3802
3803 DECL_FIELD_CONTEXT (f_args) = record;
3804 DECL_ALIGN (f_args) = 128;
3805 DECL_USER_ALIGN (f_args) = 1;
3806
3807 DECL_FIELD_CONTEXT (f_skip) = record;
3808 DECL_ALIGN (f_skip) = 128;
3809 DECL_USER_ALIGN (f_skip) = 1;
3810
3811 TREE_CHAIN (record) = type_decl;
3812 TYPE_NAME (record) = type_decl;
3813 TYPE_FIELDS (record) = f_args;
3814 TREE_CHAIN (f_args) = f_skip;
3815
3816 /* We know this is being padded and we want it too. It is an internal
3817 type so hide the warnings from the user. */
3818 owp = warn_padded;
3819 warn_padded = false;
3820
3821 layout_type (record);
3822
3823 warn_padded = owp;
3824
3825 /* The correct type is an array type of one element. */
3826 return build_array_type (record, build_index_type (size_zero_node));
3827}
3828
3829/* Implement va_start by filling the va_list structure VALIST.
3830 NEXTARG points to the first anonymous stack argument.
3831
3832 The following global variables are used to initialize
3833 the va_list structure:
3834
abe32cce 3835 crtl->args.info;
644459d0 3836 the CUMULATIVE_ARGS for this function
3837
abe32cce 3838 crtl->args.arg_offset_rtx:
644459d0 3839 holds the offset of the first anonymous stack argument
3840 (relative to the virtual arg pointer). */
3841
8a58ed0a 3842static void
644459d0 3843spu_va_start (tree valist, rtx nextarg)
3844{
3845 tree f_args, f_skip;
3846 tree args, skip, t;
3847
3848 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3849 f_skip = TREE_CHAIN (f_args);
3850
3851 valist = build_va_arg_indirect_ref (valist);
3852 args =
3853 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3854 skip =
3855 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3856
3857 /* Find the __args area. */
3858 t = make_tree (TREE_TYPE (args), nextarg);
abe32cce 3859 if (crtl->args.pretend_args_size > 0)
0de36bdb 3860 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3861 size_int (-STACK_POINTER_OFFSET));
75a70cf9 3862 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
644459d0 3863 TREE_SIDE_EFFECTS (t) = 1;
3864 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3865
3866 /* Find the __skip area. */
3867 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
0de36bdb 3868 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
abe32cce 3869 size_int (crtl->args.pretend_args_size
0de36bdb 3870 - STACK_POINTER_OFFSET));
75a70cf9 3871 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
644459d0 3872 TREE_SIDE_EFFECTS (t) = 1;
3873 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3874}
3875
3876/* Gimplify va_arg by updating the va_list structure
3877 VALIST as required to retrieve an argument of type
3878 TYPE, and returning that argument.
3879
3880 ret = va_arg(VALIST, TYPE);
3881
3882 generates code equivalent to:
3883
3884 paddedsize = (sizeof(TYPE) + 15) & -16;
3885 if (VALIST.__args + paddedsize > VALIST.__skip
3886 && VALIST.__args <= VALIST.__skip)
3887 addr = VALIST.__skip + 32;
3888 else
3889 addr = VALIST.__args;
3890 VALIST.__args = addr + paddedsize;
3891 ret = *(TYPE *)addr;
3892 */
3893static tree
75a70cf9 3894spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
3895 gimple_seq * post_p ATTRIBUTE_UNUSED)
644459d0 3896{
3897 tree f_args, f_skip;
3898 tree args, skip;
3899 HOST_WIDE_INT size, rsize;
3900 tree paddedsize, addr, tmp;
3901 bool pass_by_reference_p;
3902
3903 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3904 f_skip = TREE_CHAIN (f_args);
3905
3906 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3907 args =
3908 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3909 skip =
3910 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3911
3912 addr = create_tmp_var (ptr_type_node, "va_arg");
3913 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3914
3915 /* if an object is dynamically sized, a pointer to it is passed
3916 instead of the object itself. */
3917 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3918 false);
3919 if (pass_by_reference_p)
3920 type = build_pointer_type (type);
3921 size = int_size_in_bytes (type);
3922 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3923
3924 /* build conditional expression to calculate addr. The expression
3925 will be gimplified later. */
0de36bdb 3926 paddedsize = size_int (rsize);
75a70cf9 3927 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
644459d0 3928 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
75a70cf9 3929 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
3930 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
3931 unshare_expr (skip)));
644459d0 3932
3933 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
75a70cf9 3934 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
3935 size_int (32)), unshare_expr (args));
644459d0 3936
75a70cf9 3937 gimplify_assign (addr, tmp, pre_p);
644459d0 3938
3939 /* update VALIST.__args */
0de36bdb 3940 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
75a70cf9 3941 gimplify_assign (unshare_expr (args), tmp, pre_p);
644459d0 3942
3943 addr = fold_convert (build_pointer_type (type), addr);
3944
3945 if (pass_by_reference_p)
3946 addr = build_va_arg_indirect_ref (addr);
3947
3948 return build_va_arg_indirect_ref (addr);
3949}
3950
3951/* Save parameter registers starting with the register that corresponds
3952 to the first unnamed parameters. If the first unnamed parameter is
3953 in the stack then save no registers. Set pretend_args_size to the
3954 amount of space needed to save the registers. */
3955void
3956spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3957 tree type, int *pretend_size, int no_rtl)
3958{
3959 if (!no_rtl)
3960 {
3961 rtx tmp;
3962 int regno;
3963 int offset;
3964 int ncum = *cum;
3965
3966 /* cum currently points to the last named argument, we want to
3967 start at the next argument. */
3968 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3969
3970 offset = -STACK_POINTER_OFFSET;
3971 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3972 {
3973 tmp = gen_frame_mem (V4SImode,
3974 plus_constant (virtual_incoming_args_rtx,
3975 offset));
3976 emit_move_insn (tmp,
3977 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3978 offset += 16;
3979 }
3980 *pretend_size = offset + STACK_POINTER_OFFSET;
3981 }
3982}
3983\f
3984void
3985spu_conditional_register_usage (void)
3986{
3987 if (flag_pic)
3988 {
3989 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3990 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3991 }
644459d0 3992}
3993
3994/* This is called to decide when we can simplify a load instruction. We
3995 must only return true for registers which we know will always be
3996 aligned. Taking into account that CSE might replace this reg with
3997 another one that has not been marked aligned.
3998 So this is really only true for frame, stack and virtual registers,
fa7637bd 3999 which we know are always aligned and should not be adversely effected
4000 by CSE. */
644459d0 4001static int
4002regno_aligned_for_load (int regno)
4003{
4004 return regno == FRAME_POINTER_REGNUM
5df189be 4005 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
aa71ecd4 4006 || regno == ARG_POINTER_REGNUM
644459d0 4007 || regno == STACK_POINTER_REGNUM
5df189be 4008 || (regno >= FIRST_VIRTUAL_REGISTER
4009 && regno <= LAST_VIRTUAL_REGISTER);
644459d0 4010}
4011
4012/* Return TRUE when mem is known to be 16-byte aligned. */
4013int
4014aligned_mem_p (rtx mem)
4015{
4016 if (MEM_ALIGN (mem) >= 128)
4017 return 1;
4018 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4019 return 1;
4020 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4021 {
4022 rtx p0 = XEXP (XEXP (mem, 0), 0);
4023 rtx p1 = XEXP (XEXP (mem, 0), 1);
4024 if (regno_aligned_for_load (REGNO (p0)))
4025 {
4026 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4027 return 1;
4028 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4029 return 1;
4030 }
4031 }
4032 else if (GET_CODE (XEXP (mem, 0)) == REG)
4033 {
4034 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4035 return 1;
4036 }
4037 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4038 return 1;
4039 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4040 {
4041 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4042 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4043 if (GET_CODE (p0) == SYMBOL_REF
4044 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4045 return 1;
4046 }
4047 return 0;
4048}
4049
69ced2d6 4050/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4051 into its SYMBOL_REF_FLAGS. */
4052static void
4053spu_encode_section_info (tree decl, rtx rtl, int first)
4054{
4055 default_encode_section_info (decl, rtl, first);
4056
4057 /* If a variable has a forced alignment to < 16 bytes, mark it with
4058 SYMBOL_FLAG_ALIGN1. */
4059 if (TREE_CODE (decl) == VAR_DECL
4060 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4061 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4062}
4063
644459d0 4064/* Return TRUE if we are certain the mem refers to a complete object
4065 which is both 16-byte aligned and padded to a 16-byte boundary. This
4066 would make it safe to store with a single instruction.
4067 We guarantee the alignment and padding for static objects by aligning
4068 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4069 FIXME: We currently cannot guarantee this for objects on the stack
4070 because assign_parm_setup_stack calls assign_stack_local with the
4071 alignment of the parameter mode and in that case the alignment never
4072 gets adjusted by LOCAL_ALIGNMENT. */
4073static int
4074store_with_one_insn_p (rtx mem)
4075{
4076 rtx addr = XEXP (mem, 0);
4077 if (GET_MODE (mem) == BLKmode)
4078 return 0;
4079 /* Only static objects. */
4080 if (GET_CODE (addr) == SYMBOL_REF)
4081 {
4082 /* We use the associated declaration to make sure the access is
fa7637bd 4083 referring to the whole object.
644459d0 4084 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4085 if it is necessary. Will there be cases where one exists, and
4086 the other does not? Will there be cases where both exist, but
4087 have different types? */
4088 tree decl = MEM_EXPR (mem);
4089 if (decl
4090 && TREE_CODE (decl) == VAR_DECL
4091 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4092 return 1;
4093 decl = SYMBOL_REF_DECL (addr);
4094 if (decl
4095 && TREE_CODE (decl) == VAR_DECL
4096 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4097 return 1;
4098 }
4099 return 0;
4100}
4101
4102int
4103spu_expand_mov (rtx * ops, enum machine_mode mode)
4104{
4105 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4106 abort ();
4107
4108 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4109 {
4110 rtx from = SUBREG_REG (ops[1]);
8d72495d 4111 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
644459d0 4112
4113 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4114 && GET_MODE_CLASS (imode) == MODE_INT
4115 && subreg_lowpart_p (ops[1]));
4116
4117 if (GET_MODE_SIZE (imode) < 4)
8d72495d 4118 imode = SImode;
4119 if (imode != GET_MODE (from))
4120 from = gen_rtx_SUBREG (imode, from, 0);
644459d0 4121
4122 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4123 {
99bdde56 4124 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
644459d0 4125 emit_insn (GEN_FCN (icode) (ops[0], from));
4126 }
4127 else
4128 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4129 return 1;
4130 }
4131
4132 /* At least one of the operands needs to be a register. */
4133 if ((reload_in_progress | reload_completed) == 0
4134 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4135 {
4136 rtx temp = force_reg (mode, ops[1]);
4137 emit_move_insn (ops[0], temp);
4138 return 1;
4139 }
4140 if (reload_in_progress || reload_completed)
4141 {
dea01258 4142 if (CONSTANT_P (ops[1]))
4143 return spu_split_immediate (ops);
644459d0 4144 return 0;
4145 }
4146 else
4147 {
4148 if (GET_CODE (ops[0]) == MEM)
4149 {
4150 if (!spu_valid_move (ops))
4151 {
4152 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
4153 gen_reg_rtx (TImode)));
4154 return 1;
4155 }
4156 }
4157 else if (GET_CODE (ops[1]) == MEM)
4158 {
4159 if (!spu_valid_move (ops))
4160 {
4161 emit_insn (gen_load
4162 (ops[0], ops[1], gen_reg_rtx (TImode),
4163 gen_reg_rtx (SImode)));
4164 return 1;
4165 }
4166 }
4167 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4168 extend them. */
4169 if (GET_CODE (ops[1]) == CONST_INT)
4170 {
4171 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4172 if (val != INTVAL (ops[1]))
4173 {
4174 emit_move_insn (ops[0], GEN_INT (val));
4175 return 1;
4176 }
4177 }
4178 }
4179 return 0;
4180}
4181
644459d0 4182void
4183spu_split_load (rtx * ops)
4184{
4185 enum machine_mode mode = GET_MODE (ops[0]);
4186 rtx addr, load, rot, mem, p0, p1;
4187 int rot_amt;
4188
4189 addr = XEXP (ops[1], 0);
4190
4191 rot = 0;
4192 rot_amt = 0;
4193 if (GET_CODE (addr) == PLUS)
4194 {
4195 /* 8 cases:
4196 aligned reg + aligned reg => lqx
4197 aligned reg + unaligned reg => lqx, rotqby
4198 aligned reg + aligned const => lqd
4199 aligned reg + unaligned const => lqd, rotqbyi
4200 unaligned reg + aligned reg => lqx, rotqby
4201 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4202 unaligned reg + aligned const => lqd, rotqby
4203 unaligned reg + unaligned const -> not allowed by legitimate address
4204 */
4205 p0 = XEXP (addr, 0);
4206 p1 = XEXP (addr, 1);
aa71ecd4 4207 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0)))
644459d0 4208 {
aa71ecd4 4209 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4210 {
4211 emit_insn (gen_addsi3 (ops[3], p0, p1));
4212 rot = ops[3];
4213 }
4214 else
4215 rot = p0;
4216 }
4217 else
4218 {
4219 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4220 {
4221 rot_amt = INTVAL (p1) & 15;
4222 p1 = GEN_INT (INTVAL (p1) & -16);
4223 addr = gen_rtx_PLUS (SImode, p0, p1);
4224 }
aa71ecd4 4225 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1)))
644459d0 4226 rot = p1;
4227 }
4228 }
4229 else if (GET_CODE (addr) == REG)
4230 {
aa71ecd4 4231 if (!regno_aligned_for_load (REGNO (addr)))
644459d0 4232 rot = addr;
4233 }
4234 else if (GET_CODE (addr) == CONST)
4235 {
4236 if (GET_CODE (XEXP (addr, 0)) == PLUS
4237 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4238 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4239 {
4240 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4241 if (rot_amt & -16)
4242 addr = gen_rtx_CONST (Pmode,
4243 gen_rtx_PLUS (Pmode,
4244 XEXP (XEXP (addr, 0), 0),
4245 GEN_INT (rot_amt & -16)));
4246 else
4247 addr = XEXP (XEXP (addr, 0), 0);
4248 }
4249 else
4250 rot = addr;
4251 }
4252 else if (GET_CODE (addr) == CONST_INT)
4253 {
4254 rot_amt = INTVAL (addr);
4255 addr = GEN_INT (rot_amt & -16);
4256 }
4257 else if (!ALIGNED_SYMBOL_REF_P (addr))
4258 rot = addr;
4259
4260 if (GET_MODE_SIZE (mode) < 4)
4261 rot_amt += GET_MODE_SIZE (mode) - 4;
4262
4263 rot_amt &= 15;
4264
4265 if (rot && rot_amt)
4266 {
4267 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
4268 rot = ops[3];
4269 rot_amt = 0;
4270 }
4271
4272 load = ops[2];
4273
4274 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4275 mem = change_address (ops[1], TImode, addr);
4276
e04cf423 4277 emit_insn (gen_movti (load, mem));
644459d0 4278
4279 if (rot)
4280 emit_insn (gen_rotqby_ti (load, load, rot));
4281 else if (rot_amt)
4282 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
4283
4284 if (reload_completed)
4285 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
4286 else
4287 emit_insn (gen_spu_convert (ops[0], load));
4288}
4289
4290void
4291spu_split_store (rtx * ops)
4292{
4293 enum machine_mode mode = GET_MODE (ops[0]);
4294 rtx pat = ops[2];
4295 rtx reg = ops[3];
4296 rtx addr, p0, p1, p1_lo, smem;
4297 int aform;
4298 int scalar;
4299
4300 addr = XEXP (ops[0], 0);
4301
4302 if (GET_CODE (addr) == PLUS)
4303 {
4304 /* 8 cases:
4305 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4306 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4307 aligned reg + aligned const => lqd, c?d, shuf, stqx
4308 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4309 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4310 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4311 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4312 unaligned reg + unaligned const -> not allowed by legitimate address
4313 */
4314 aform = 0;
4315 p0 = XEXP (addr, 0);
4316 p1 = p1_lo = XEXP (addr, 1);
4317 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
4318 {
4319 p1_lo = GEN_INT (INTVAL (p1) & 15);
4320 p1 = GEN_INT (INTVAL (p1) & -16);
4321 addr = gen_rtx_PLUS (SImode, p0, p1);
4322 }
4323 }
4324 else if (GET_CODE (addr) == REG)
4325 {
4326 aform = 0;
4327 p0 = addr;
4328 p1 = p1_lo = const0_rtx;
4329 }
4330 else
4331 {
4332 aform = 1;
4333 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4334 p1 = 0; /* aform doesn't use p1 */
4335 p1_lo = addr;
4336 if (ALIGNED_SYMBOL_REF_P (addr))
4337 p1_lo = const0_rtx;
4338 else if (GET_CODE (addr) == CONST)
4339 {
4340 if (GET_CODE (XEXP (addr, 0)) == PLUS
4341 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4342 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4343 {
4344 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4345 if ((v & -16) != 0)
4346 addr = gen_rtx_CONST (Pmode,
4347 gen_rtx_PLUS (Pmode,
4348 XEXP (XEXP (addr, 0), 0),
4349 GEN_INT (v & -16)));
4350 else
4351 addr = XEXP (XEXP (addr, 0), 0);
4352 p1_lo = GEN_INT (v & 15);
4353 }
4354 }
4355 else if (GET_CODE (addr) == CONST_INT)
4356 {
4357 p1_lo = GEN_INT (INTVAL (addr) & 15);
4358 addr = GEN_INT (INTVAL (addr) & -16);
4359 }
4360 }
4361
e04cf423 4362 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4363
644459d0 4364 scalar = store_with_one_insn_p (ops[0]);
4365 if (!scalar)
4366 {
4367 /* We could copy the flags from the ops[0] MEM to mem here,
4368 We don't because we want this load to be optimized away if
4369 possible, and copying the flags will prevent that in certain
4370 cases, e.g. consider the volatile flag. */
4371
e04cf423 4372 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4373 set_mem_alias_set (lmem, 0);
4374 emit_insn (gen_movti (reg, lmem));
644459d0 4375
aa71ecd4 4376 if (!p0 || regno_aligned_for_load (REGNO (p0)))
644459d0 4377 p0 = stack_pointer_rtx;
4378 if (!p1_lo)
4379 p1_lo = const0_rtx;
4380
4381 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4382 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4383 }
4384 else if (reload_completed)
4385 {
4386 if (GET_CODE (ops[1]) == REG)
4387 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4388 else if (GET_CODE (ops[1]) == SUBREG)
4389 emit_move_insn (reg,
4390 gen_rtx_REG (GET_MODE (reg),
4391 REGNO (SUBREG_REG (ops[1]))));
4392 else
4393 abort ();
4394 }
4395 else
4396 {
4397 if (GET_CODE (ops[1]) == REG)
4398 emit_insn (gen_spu_convert (reg, ops[1]));
4399 else if (GET_CODE (ops[1]) == SUBREG)
4400 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4401 else
4402 abort ();
4403 }
4404
4405 if (GET_MODE_SIZE (mode) < 4 && scalar)
4406 emit_insn (gen_shlqby_ti
4407 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
4408
644459d0 4409 smem = change_address (ops[0], TImode, addr);
4410 /* We can't use the previous alias set because the memory has changed
4411 size and can potentially overlap objects of other types. */
4412 set_mem_alias_set (smem, 0);
4413
e04cf423 4414 emit_insn (gen_movti (smem, reg));
644459d0 4415}
4416
4417/* Return TRUE if X is MEM which is a struct member reference
4418 and the member can safely be loaded and stored with a single
4419 instruction because it is padded. */
4420static int
4421mem_is_padded_component_ref (rtx x)
4422{
4423 tree t = MEM_EXPR (x);
4424 tree r;
4425 if (!t || TREE_CODE (t) != COMPONENT_REF)
4426 return 0;
4427 t = TREE_OPERAND (t, 1);
4428 if (!t || TREE_CODE (t) != FIELD_DECL
4429 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4430 return 0;
4431 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4432 r = DECL_FIELD_CONTEXT (t);
4433 if (!r || TREE_CODE (r) != RECORD_TYPE)
4434 return 0;
4435 /* Make sure they are the same mode */
4436 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4437 return 0;
4438 /* If there are no following fields then the field alignment assures
fa7637bd 4439 the structure is padded to the alignment which means this field is
4440 padded too. */
644459d0 4441 if (TREE_CHAIN (t) == 0)
4442 return 1;
4443 /* If the following field is also aligned then this field will be
4444 padded. */
4445 t = TREE_CHAIN (t);
4446 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4447 return 1;
4448 return 0;
4449}
4450
c7b91b14 4451/* Parse the -mfixed-range= option string. */
4452static void
4453fix_range (const char *const_str)
4454{
4455 int i, first, last;
4456 char *str, *dash, *comma;
4457
4458 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4459 REG2 are either register names or register numbers. The effect
4460 of this option is to mark the registers in the range from REG1 to
4461 REG2 as ``fixed'' so they won't be used by the compiler. */
4462
4463 i = strlen (const_str);
4464 str = (char *) alloca (i + 1);
4465 memcpy (str, const_str, i + 1);
4466
4467 while (1)
4468 {
4469 dash = strchr (str, '-');
4470 if (!dash)
4471 {
4472 warning (0, "value of -mfixed-range must have form REG1-REG2");
4473 return;
4474 }
4475 *dash = '\0';
4476 comma = strchr (dash + 1, ',');
4477 if (comma)
4478 *comma = '\0';
4479
4480 first = decode_reg_name (str);
4481 if (first < 0)
4482 {
4483 warning (0, "unknown register name: %s", str);
4484 return;
4485 }
4486
4487 last = decode_reg_name (dash + 1);
4488 if (last < 0)
4489 {
4490 warning (0, "unknown register name: %s", dash + 1);
4491 return;
4492 }
4493
4494 *dash = '-';
4495
4496 if (first > last)
4497 {
4498 warning (0, "%s-%s is an empty range", str, dash + 1);
4499 return;
4500 }
4501
4502 for (i = first; i <= last; ++i)
4503 fixed_regs[i] = call_used_regs[i] = 1;
4504
4505 if (!comma)
4506 break;
4507
4508 *comma = ',';
4509 str = comma + 1;
4510 }
4511}
4512
644459d0 4513int
4514spu_valid_move (rtx * ops)
4515{
4516 enum machine_mode mode = GET_MODE (ops[0]);
4517 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4518 return 0;
4519
4520 /* init_expr_once tries to recog against load and store insns to set
4521 the direct_load[] and direct_store[] arrays. We always want to
4522 consider those loads and stores valid. init_expr_once is called in
4523 the context of a dummy function which does not have a decl. */
4524 if (cfun->decl == 0)
4525 return 1;
4526
4527 /* Don't allows loads/stores which would require more than 1 insn.
4528 During and after reload we assume loads and stores only take 1
4529 insn. */
4530 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4531 {
4532 if (GET_CODE (ops[0]) == MEM
4533 && (GET_MODE_SIZE (mode) < 4
4534 || !(store_with_one_insn_p (ops[0])
4535 || mem_is_padded_component_ref (ops[0]))))
4536 return 0;
4537 if (GET_CODE (ops[1]) == MEM
4538 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4539 return 0;
4540 }
4541 return 1;
4542}
4543
4544/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4545 can be generated using the fsmbi instruction. */
4546int
4547fsmbi_const_p (rtx x)
4548{
dea01258 4549 if (CONSTANT_P (x))
4550 {
5df189be 4551 /* We can always choose TImode for CONST_INT because the high bits
dea01258 4552 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5df189be 4553 enum immediate_class c = classify_immediate (x, TImode);
3072d30e 4554 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
dea01258 4555 }
4556 return 0;
4557}
4558
4559/* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4560 can be generated using the cbd, chd, cwd or cdd instruction. */
4561int
4562cpat_const_p (rtx x, enum machine_mode mode)
4563{
4564 if (CONSTANT_P (x))
4565 {
4566 enum immediate_class c = classify_immediate (x, mode);
4567 return c == IC_CPAT;
4568 }
4569 return 0;
4570}
644459d0 4571
dea01258 4572rtx
4573gen_cpat_const (rtx * ops)
4574{
4575 unsigned char dst[16];
4576 int i, offset, shift, isize;
4577 if (GET_CODE (ops[3]) != CONST_INT
4578 || GET_CODE (ops[2]) != CONST_INT
4579 || (GET_CODE (ops[1]) != CONST_INT
4580 && GET_CODE (ops[1]) != REG))
4581 return 0;
4582 if (GET_CODE (ops[1]) == REG
4583 && (!REG_POINTER (ops[1])
4584 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4585 return 0;
644459d0 4586
4587 for (i = 0; i < 16; i++)
dea01258 4588 dst[i] = i + 16;
4589 isize = INTVAL (ops[3]);
4590 if (isize == 1)
4591 shift = 3;
4592 else if (isize == 2)
4593 shift = 2;
4594 else
4595 shift = 0;
4596 offset = (INTVAL (ops[2]) +
4597 (GET_CODE (ops[1]) ==
4598 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4599 for (i = 0; i < isize; i++)
4600 dst[offset + i] = i + shift;
4601 return array_to_constant (TImode, dst);
644459d0 4602}
4603
4604/* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4605 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4606 than 16 bytes, the value is repeated across the rest of the array. */
4607void
4608constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4609{
4610 HOST_WIDE_INT val;
4611 int i, j, first;
4612
4613 memset (arr, 0, 16);
4614 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4615 if (GET_CODE (x) == CONST_INT
4616 || (GET_CODE (x) == CONST_DOUBLE
4617 && (mode == SFmode || mode == DFmode)))
4618 {
4619 gcc_assert (mode != VOIDmode && mode != BLKmode);
4620
4621 if (GET_CODE (x) == CONST_DOUBLE)
4622 val = const_double_to_hwint (x);
4623 else
4624 val = INTVAL (x);
4625 first = GET_MODE_SIZE (mode) - 1;
4626 for (i = first; i >= 0; i--)
4627 {
4628 arr[i] = val & 0xff;
4629 val >>= 8;
4630 }
4631 /* Splat the constant across the whole array. */
4632 for (j = 0, i = first + 1; i < 16; i++)
4633 {
4634 arr[i] = arr[j];
4635 j = (j == first) ? 0 : j + 1;
4636 }
4637 }
4638 else if (GET_CODE (x) == CONST_DOUBLE)
4639 {
4640 val = CONST_DOUBLE_LOW (x);
4641 for (i = 15; i >= 8; i--)
4642 {
4643 arr[i] = val & 0xff;
4644 val >>= 8;
4645 }
4646 val = CONST_DOUBLE_HIGH (x);
4647 for (i = 7; i >= 0; i--)
4648 {
4649 arr[i] = val & 0xff;
4650 val >>= 8;
4651 }
4652 }
4653 else if (GET_CODE (x) == CONST_VECTOR)
4654 {
4655 int units;
4656 rtx elt;
4657 mode = GET_MODE_INNER (mode);
4658 units = CONST_VECTOR_NUNITS (x);
4659 for (i = 0; i < units; i++)
4660 {
4661 elt = CONST_VECTOR_ELT (x, i);
4662 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4663 {
4664 if (GET_CODE (elt) == CONST_DOUBLE)
4665 val = const_double_to_hwint (elt);
4666 else
4667 val = INTVAL (elt);
4668 first = GET_MODE_SIZE (mode) - 1;
4669 if (first + i * GET_MODE_SIZE (mode) > 16)
4670 abort ();
4671 for (j = first; j >= 0; j--)
4672 {
4673 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4674 val >>= 8;
4675 }
4676 }
4677 }
4678 }
4679 else
4680 gcc_unreachable();
4681}
4682
4683/* Convert a 16 byte array to a constant of mode MODE. When MODE is
4684 smaller than 16 bytes, use the bytes that would represent that value
4685 in a register, e.g., for QImode return the value of arr[3]. */
4686rtx
4687array_to_constant (enum machine_mode mode, unsigned char arr[16])
4688{
4689 enum machine_mode inner_mode;
4690 rtvec v;
4691 int units, size, i, j, k;
4692 HOST_WIDE_INT val;
4693
4694 if (GET_MODE_CLASS (mode) == MODE_INT
4695 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4696 {
4697 j = GET_MODE_SIZE (mode);
4698 i = j < 4 ? 4 - j : 0;
4699 for (val = 0; i < j; i++)
4700 val = (val << 8) | arr[i];
4701 val = trunc_int_for_mode (val, mode);
4702 return GEN_INT (val);
4703 }
4704
4705 if (mode == TImode)
4706 {
4707 HOST_WIDE_INT high;
4708 for (i = high = 0; i < 8; i++)
4709 high = (high << 8) | arr[i];
4710 for (i = 8, val = 0; i < 16; i++)
4711 val = (val << 8) | arr[i];
4712 return immed_double_const (val, high, TImode);
4713 }
4714 if (mode == SFmode)
4715 {
4716 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4717 val = trunc_int_for_mode (val, SImode);
171b6d22 4718 return hwint_to_const_double (SFmode, val);
644459d0 4719 }
4720 if (mode == DFmode)
4721 {
1f915911 4722 for (i = 0, val = 0; i < 8; i++)
4723 val = (val << 8) | arr[i];
171b6d22 4724 return hwint_to_const_double (DFmode, val);
644459d0 4725 }
4726
4727 if (!VECTOR_MODE_P (mode))
4728 abort ();
4729
4730 units = GET_MODE_NUNITS (mode);
4731 size = GET_MODE_UNIT_SIZE (mode);
4732 inner_mode = GET_MODE_INNER (mode);
4733 v = rtvec_alloc (units);
4734
4735 for (k = i = 0; i < units; ++i)
4736 {
4737 val = 0;
4738 for (j = 0; j < size; j++, k++)
4739 val = (val << 8) | arr[k];
4740
4741 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4742 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4743 else
4744 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4745 }
4746 if (k > 16)
4747 abort ();
4748
4749 return gen_rtx_CONST_VECTOR (mode, v);
4750}
4751
4752static void
4753reloc_diagnostic (rtx x)
4754{
4755 tree loc_decl, decl = 0;
4756 const char *msg;
4757 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4758 return;
4759
4760 if (GET_CODE (x) == SYMBOL_REF)
4761 decl = SYMBOL_REF_DECL (x);
4762 else if (GET_CODE (x) == CONST
4763 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4764 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4765
4766 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4767 if (decl && !DECL_P (decl))
4768 decl = 0;
4769
4770 /* We use last_assemble_variable_decl to get line information. It's
4771 not always going to be right and might not even be close, but will
4772 be right for the more common cases. */
5df189be 4773 if (!last_assemble_variable_decl || in_section == ctors_section)
644459d0 4774 loc_decl = decl;
4775 else
4776 loc_decl = last_assemble_variable_decl;
4777
4778 /* The decl could be a string constant. */
4779 if (decl && DECL_P (decl))
4780 msg = "%Jcreating run-time relocation for %qD";
4781 else
4782 msg = "creating run-time relocation";
4783
99369027 4784 if (TARGET_WARN_RELOC)
644459d0 4785 warning (0, msg, loc_decl, decl);
99369027 4786 else
4787 error (msg, loc_decl, decl);
644459d0 4788}
4789
4790/* Hook into assemble_integer so we can generate an error for run-time
4791 relocations. The SPU ABI disallows them. */
4792static bool
4793spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4794{
4795 /* By default run-time relocations aren't supported, but we allow them
4796 in case users support it in their own run-time loader. And we provide
4797 a warning for those users that don't. */
4798 if ((GET_CODE (x) == SYMBOL_REF)
4799 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4800 reloc_diagnostic (x);
4801
4802 return default_assemble_integer (x, size, aligned_p);
4803}
4804
4805static void
4806spu_asm_globalize_label (FILE * file, const char *name)
4807{
4808 fputs ("\t.global\t", file);
4809 assemble_name (file, name);
4810 fputs ("\n", file);
4811}
4812
4813static bool
f529eb25 4814spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
4815 bool speed ATTRIBUTE_UNUSED)
644459d0 4816{
4817 enum machine_mode mode = GET_MODE (x);
4818 int cost = COSTS_N_INSNS (2);
4819
4820 /* Folding to a CONST_VECTOR will use extra space but there might
4821 be only a small savings in cycles. We'd like to use a CONST_VECTOR
9505a73b 4822 only if it allows us to fold away multiple insns. Changing the cost
644459d0 4823 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4824 because this cost will only be compared against a single insn.
4825 if (code == CONST_VECTOR)
4826 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4827 */
4828
4829 /* Use defaults for float operations. Not accurate but good enough. */
4830 if (mode == DFmode)
4831 {
4832 *total = COSTS_N_INSNS (13);
4833 return true;
4834 }
4835 if (mode == SFmode)
4836 {
4837 *total = COSTS_N_INSNS (6);
4838 return true;
4839 }
4840 switch (code)
4841 {
4842 case CONST_INT:
4843 if (satisfies_constraint_K (x))
4844 *total = 0;
4845 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4846 *total = COSTS_N_INSNS (1);
4847 else
4848 *total = COSTS_N_INSNS (3);
4849 return true;
4850
4851 case CONST:
4852 *total = COSTS_N_INSNS (3);
4853 return true;
4854
4855 case LABEL_REF:
4856 case SYMBOL_REF:
4857 *total = COSTS_N_INSNS (0);
4858 return true;
4859
4860 case CONST_DOUBLE:
4861 *total = COSTS_N_INSNS (5);
4862 return true;
4863
4864 case FLOAT_EXTEND:
4865 case FLOAT_TRUNCATE:
4866 case FLOAT:
4867 case UNSIGNED_FLOAT:
4868 case FIX:
4869 case UNSIGNED_FIX:
4870 *total = COSTS_N_INSNS (7);
4871 return true;
4872
4873 case PLUS:
4874 if (mode == TImode)
4875 {
4876 *total = COSTS_N_INSNS (9);
4877 return true;
4878 }
4879 break;
4880
4881 case MULT:
4882 cost =
4883 GET_CODE (XEXP (x, 0)) ==
4884 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4885 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4886 {
4887 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4888 {
4889 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4890 cost = COSTS_N_INSNS (14);
4891 if ((val & 0xffff) == 0)
4892 cost = COSTS_N_INSNS (9);
4893 else if (val > 0 && val < 0x10000)
4894 cost = COSTS_N_INSNS (11);
4895 }
4896 }
4897 *total = cost;
4898 return true;
4899 case DIV:
4900 case UDIV:
4901 case MOD:
4902 case UMOD:
4903 *total = COSTS_N_INSNS (20);
4904 return true;
4905 case ROTATE:
4906 case ROTATERT:
4907 case ASHIFT:
4908 case ASHIFTRT:
4909 case LSHIFTRT:
4910 *total = COSTS_N_INSNS (4);
4911 return true;
4912 case UNSPEC:
4913 if (XINT (x, 1) == UNSPEC_CONVERT)
4914 *total = COSTS_N_INSNS (0);
4915 else
4916 *total = COSTS_N_INSNS (4);
4917 return true;
4918 }
4919 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4920 if (GET_MODE_CLASS (mode) == MODE_INT
4921 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4922 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4923 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4924 *total = cost;
4925 return true;
4926}
4927
1bd43494 4928static enum machine_mode
4929spu_unwind_word_mode (void)
644459d0 4930{
1bd43494 4931 return SImode;
644459d0 4932}
4933
4934/* Decide whether we can make a sibling call to a function. DECL is the
4935 declaration of the function being targeted by the call and EXP is the
4936 CALL_EXPR representing the call. */
4937static bool
4938spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4939{
4940 return decl && !TARGET_LARGE_MEM;
4941}
4942
4943/* We need to correctly update the back chain pointer and the Available
4944 Stack Size (which is in the second slot of the sp register.) */
4945void
4946spu_allocate_stack (rtx op0, rtx op1)
4947{
4948 HOST_WIDE_INT v;
4949 rtx chain = gen_reg_rtx (V4SImode);
4950 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4951 rtx sp = gen_reg_rtx (V4SImode);
4952 rtx splatted = gen_reg_rtx (V4SImode);
4953 rtx pat = gen_reg_rtx (TImode);
4954
4955 /* copy the back chain so we can save it back again. */
4956 emit_move_insn (chain, stack_bot);
4957
4958 op1 = force_reg (SImode, op1);
4959
4960 v = 0x1020300010203ll;
4961 emit_move_insn (pat, immed_double_const (v, v, TImode));
4962 emit_insn (gen_shufb (splatted, op1, op1, pat));
4963
4964 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4965 emit_insn (gen_subv4si3 (sp, sp, splatted));
4966
4967 if (flag_stack_check)
4968 {
4969 rtx avail = gen_reg_rtx(SImode);
4970 rtx result = gen_reg_rtx(SImode);
4971 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4972 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4973 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4974 }
4975
4976 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4977
4978 emit_move_insn (stack_bot, chain);
4979
4980 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4981}
4982
4983void
4984spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4985{
4986 static unsigned char arr[16] =
4987 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4988 rtx temp = gen_reg_rtx (SImode);
4989 rtx temp2 = gen_reg_rtx (SImode);
4990 rtx temp3 = gen_reg_rtx (V4SImode);
4991 rtx temp4 = gen_reg_rtx (V4SImode);
4992 rtx pat = gen_reg_rtx (TImode);
4993 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4994
4995 /* Restore the backchain from the first word, sp from the second. */
4996 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4997 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4998
4999 emit_move_insn (pat, array_to_constant (TImode, arr));
5000
5001 /* Compute Available Stack Size for sp */
5002 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5003 emit_insn (gen_shufb (temp3, temp, temp, pat));
5004
5005 /* Compute Available Stack Size for back chain */
5006 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5007 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5008 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5009
5010 emit_insn (gen_addv4si3 (sp, sp, temp3));
5011 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5012}
5013
5014static void
5015spu_init_libfuncs (void)
5016{
5017 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5018 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5019 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5020 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5021 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5022 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5023 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5024 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5025 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5026 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5027 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5028
5029 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5030 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
19a53068 5031
5032 set_optab_libfunc (smul_optab, TImode, "__multi3");
5033 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5034 set_optab_libfunc (smod_optab, TImode, "__modti3");
5035 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5036 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5037 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
644459d0 5038}
5039
5040/* Make a subreg, stripping any existing subreg. We could possibly just
5041 call simplify_subreg, but in this case we know what we want. */
5042rtx
5043spu_gen_subreg (enum machine_mode mode, rtx x)
5044{
5045 if (GET_CODE (x) == SUBREG)
5046 x = SUBREG_REG (x);
5047 if (GET_MODE (x) == mode)
5048 return x;
5049 return gen_rtx_SUBREG (mode, x, 0);
5050}
5051
5052static bool
fb80456a 5053spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
644459d0 5054{
5055 return (TYPE_MODE (type) == BLKmode
5056 && ((type) == 0
5057 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5058 || int_size_in_bytes (type) >
5059 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5060}
5061\f
5062/* Create the built-in types and functions */
5063
5064struct spu_builtin_description spu_builtins[] = {
5065#define DEF_BUILTIN(fcode, icode, name, type, params) \
5066 {fcode, icode, name, type, params, NULL_TREE},
5067#include "spu-builtins.def"
5068#undef DEF_BUILTIN
5069};
5070
5071static void
5072spu_init_builtins (void)
5073{
5074 struct spu_builtin_description *d;
5075 unsigned int i;
5076
5077 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5078 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5079 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5080 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5081 V4SF_type_node = build_vector_type (float_type_node, 4);
5082 V2DF_type_node = build_vector_type (double_type_node, 2);
5083
5084 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5085 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5086 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5087 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5088
c4ecce0c 5089 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
644459d0 5090
5091 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5092 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5093 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5094 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5095 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5096 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5097 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5098 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5099 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5100 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5101 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5102 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5103
5104 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5105 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5106 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5107 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5108 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5109 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5110 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5111 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5112
5113 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5114 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5115
5116 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5117
5118 spu_builtin_types[SPU_BTI_PTR] =
5119 build_pointer_type (build_qualified_type
5120 (void_type_node,
5121 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5122
5123 /* For each builtin we build a new prototype. The tree code will make
5124 sure nodes are shared. */
5125 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5126 {
5127 tree p;
5128 char name[64]; /* build_function will make a copy. */
5129 int parm;
5130
5131 if (d->name == 0)
5132 continue;
5133
5dfbd18f 5134 /* Find last parm. */
644459d0 5135 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5dfbd18f 5136 ;
644459d0 5137
5138 p = void_list_node;
5139 while (parm > 1)
5140 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5141
5142 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5143
5144 sprintf (name, "__builtin_%s", d->name);
5145 d->fndecl =
5146 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5147 NULL, NULL_TREE);
a76866d3 5148 if (d->fcode == SPU_MASK_FOR_LOAD)
5149 TREE_READONLY (d->fndecl) = 1;
5dfbd18f 5150
5151 /* These builtins don't throw. */
5152 TREE_NOTHROW (d->fndecl) = 1;
644459d0 5153 }
5154}
5155
cf31d486 5156void
5157spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5158{
5159 static unsigned char arr[16] =
5160 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5161
5162 rtx temp = gen_reg_rtx (Pmode);
5163 rtx temp2 = gen_reg_rtx (V4SImode);
5164 rtx temp3 = gen_reg_rtx (V4SImode);
5165 rtx pat = gen_reg_rtx (TImode);
5166 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5167
5168 emit_move_insn (pat, array_to_constant (TImode, arr));
5169
5170 /* Restore the sp. */
5171 emit_move_insn (temp, op1);
5172 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5173
5174 /* Compute available stack size for sp. */
5175 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5176 emit_insn (gen_shufb (temp3, temp, temp, pat));
5177
5178 emit_insn (gen_addv4si3 (sp, sp, temp3));
5179 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5180}
5181
644459d0 5182int
5183spu_safe_dma (HOST_WIDE_INT channel)
5184{
006e4b96 5185 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
644459d0 5186}
5187
5188void
5189spu_builtin_splats (rtx ops[])
5190{
5191 enum machine_mode mode = GET_MODE (ops[0]);
5192 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5193 {
5194 unsigned char arr[16];
5195 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5196 emit_move_insn (ops[0], array_to_constant (mode, arr));
5197 }
644459d0 5198 else
5199 {
5200 rtx reg = gen_reg_rtx (TImode);
5201 rtx shuf;
5202 if (GET_CODE (ops[1]) != REG
5203 && GET_CODE (ops[1]) != SUBREG)
5204 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5205 switch (mode)
5206 {
5207 case V2DImode:
5208 case V2DFmode:
5209 shuf =
5210 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5211 TImode);
5212 break;
5213 case V4SImode:
5214 case V4SFmode:
5215 shuf =
5216 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5217 TImode);
5218 break;
5219 case V8HImode:
5220 shuf =
5221 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5222 TImode);
5223 break;
5224 case V16QImode:
5225 shuf =
5226 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5227 TImode);
5228 break;
5229 default:
5230 abort ();
5231 }
5232 emit_move_insn (reg, shuf);
5233 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5234 }
5235}
5236
5237void
5238spu_builtin_extract (rtx ops[])
5239{
5240 enum machine_mode mode;
5241 rtx rot, from, tmp;
5242
5243 mode = GET_MODE (ops[1]);
5244
5245 if (GET_CODE (ops[2]) == CONST_INT)
5246 {
5247 switch (mode)
5248 {
5249 case V16QImode:
5250 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5251 break;
5252 case V8HImode:
5253 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5254 break;
5255 case V4SFmode:
5256 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5257 break;
5258 case V4SImode:
5259 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5260 break;
5261 case V2DImode:
5262 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5263 break;
5264 case V2DFmode:
5265 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5266 break;
5267 default:
5268 abort ();
5269 }
5270 return;
5271 }
5272
5273 from = spu_gen_subreg (TImode, ops[1]);
5274 rot = gen_reg_rtx (TImode);
5275 tmp = gen_reg_rtx (SImode);
5276
5277 switch (mode)
5278 {
5279 case V16QImode:
5280 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5281 break;
5282 case V8HImode:
5283 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5284 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5285 break;
5286 case V4SFmode:
5287 case V4SImode:
5288 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5289 break;
5290 case V2DImode:
5291 case V2DFmode:
5292 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5293 break;
5294 default:
5295 abort ();
5296 }
5297 emit_insn (gen_rotqby_ti (rot, from, tmp));
5298
5299 emit_insn (gen_spu_convert (ops[0], rot));
5300}
5301
5302void
5303spu_builtin_insert (rtx ops[])
5304{
5305 enum machine_mode mode = GET_MODE (ops[0]);
5306 enum machine_mode imode = GET_MODE_INNER (mode);
5307 rtx mask = gen_reg_rtx (TImode);
5308 rtx offset;
5309
5310 if (GET_CODE (ops[3]) == CONST_INT)
5311 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5312 else
5313 {
5314 offset = gen_reg_rtx (SImode);
5315 emit_insn (gen_mulsi3
5316 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5317 }
5318 emit_insn (gen_cpat
5319 (mask, stack_pointer_rtx, offset,
5320 GEN_INT (GET_MODE_SIZE (imode))));
5321 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5322}
5323
5324void
5325spu_builtin_promote (rtx ops[])
5326{
5327 enum machine_mode mode, imode;
5328 rtx rot, from, offset;
5329 HOST_WIDE_INT pos;
5330
5331 mode = GET_MODE (ops[0]);
5332 imode = GET_MODE_INNER (mode);
5333
5334 from = gen_reg_rtx (TImode);
5335 rot = spu_gen_subreg (TImode, ops[0]);
5336
5337 emit_insn (gen_spu_convert (from, ops[1]));
5338
5339 if (GET_CODE (ops[2]) == CONST_INT)
5340 {
5341 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5342 if (GET_MODE_SIZE (imode) < 4)
5343 pos += 4 - GET_MODE_SIZE (imode);
5344 offset = GEN_INT (pos & 15);
5345 }
5346 else
5347 {
5348 offset = gen_reg_rtx (SImode);
5349 switch (mode)
5350 {
5351 case V16QImode:
5352 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5353 break;
5354 case V8HImode:
5355 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5356 emit_insn (gen_addsi3 (offset, offset, offset));
5357 break;
5358 case V4SFmode:
5359 case V4SImode:
5360 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5361 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5362 break;
5363 case V2DImode:
5364 case V2DFmode:
5365 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5366 break;
5367 default:
5368 abort ();
5369 }
5370 }
5371 emit_insn (gen_rotqby_ti (rot, from, offset));
5372}
5373
5374void
5375spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5376{
5377 rtx shuf = gen_reg_rtx (V4SImode);
5378 rtx insn = gen_reg_rtx (V4SImode);
5379 rtx shufc;
5380 rtx insnc;
5381 rtx mem;
5382
5383 fnaddr = force_reg (SImode, fnaddr);
5384 cxt = force_reg (SImode, cxt);
5385
5386 if (TARGET_LARGE_MEM)
5387 {
5388 rtx rotl = gen_reg_rtx (V4SImode);
5389 rtx mask = gen_reg_rtx (V4SImode);
5390 rtx bi = gen_reg_rtx (SImode);
5391 unsigned char shufa[16] = {
5392 2, 3, 0, 1, 18, 19, 16, 17,
5393 0, 1, 2, 3, 16, 17, 18, 19
5394 };
5395 unsigned char insna[16] = {
5396 0x41, 0, 0, 79,
5397 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5398 0x60, 0x80, 0, 79,
5399 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5400 };
5401
5402 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5403 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5404
5405 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4d54df85 5406 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
644459d0 5407 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5408 emit_insn (gen_selb (insn, insnc, rotl, mask));
5409
5410 mem = memory_address (Pmode, tramp);
5411 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5412
5413 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5414 mem = memory_address (Pmode, plus_constant (tramp, 16));
5415 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5416 }
5417 else
5418 {
5419 rtx scxt = gen_reg_rtx (SImode);
5420 rtx sfnaddr = gen_reg_rtx (SImode);
5421 unsigned char insna[16] = {
5422 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5423 0x30, 0, 0, 0,
5424 0, 0, 0, 0,
5425 0, 0, 0, 0
5426 };
5427
5428 shufc = gen_reg_rtx (TImode);
5429 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5430
5431 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5432 fits 18 bits and the last 4 are zeros. This will be true if
5433 the stack pointer is initialized to 0x3fff0 at program start,
5434 otherwise the ila instruction will be garbage. */
5435
5436 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5437 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5438 emit_insn (gen_cpat
5439 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5440 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5441 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5442
5443 mem = memory_address (Pmode, tramp);
5444 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5445
5446 }
5447 emit_insn (gen_sync ());
5448}
5449
5450void
5451spu_expand_sign_extend (rtx ops[])
5452{
5453 unsigned char arr[16];
5454 rtx pat = gen_reg_rtx (TImode);
5455 rtx sign, c;
5456 int i, last;
5457 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5458 if (GET_MODE (ops[1]) == QImode)
5459 {
5460 sign = gen_reg_rtx (HImode);
5461 emit_insn (gen_extendqihi2 (sign, ops[1]));
5462 for (i = 0; i < 16; i++)
5463 arr[i] = 0x12;
5464 arr[last] = 0x13;
5465 }
5466 else
5467 {
5468 for (i = 0; i < 16; i++)
5469 arr[i] = 0x10;
5470 switch (GET_MODE (ops[1]))
5471 {
5472 case HImode:
5473 sign = gen_reg_rtx (SImode);
5474 emit_insn (gen_extendhisi2 (sign, ops[1]));
5475 arr[last] = 0x03;
5476 arr[last - 1] = 0x02;
5477 break;
5478 case SImode:
5479 sign = gen_reg_rtx (SImode);
5480 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5481 for (i = 0; i < 4; i++)
5482 arr[last - i] = 3 - i;
5483 break;
5484 case DImode:
5485 sign = gen_reg_rtx (SImode);
5486 c = gen_reg_rtx (SImode);
5487 emit_insn (gen_spu_convert (c, ops[1]));
5488 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5489 for (i = 0; i < 8; i++)
5490 arr[last - i] = 7 - i;
5491 break;
5492 default:
5493 abort ();
5494 }
5495 }
5496 emit_move_insn (pat, array_to_constant (TImode, arr));
5497 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5498}
5499
5500/* expand vector initialization. If there are any constant parts,
5501 load constant parts first. Then load any non-constant parts. */
5502void
5503spu_expand_vector_init (rtx target, rtx vals)
5504{
5505 enum machine_mode mode = GET_MODE (target);
5506 int n_elts = GET_MODE_NUNITS (mode);
5507 int n_var = 0;
5508 bool all_same = true;
790c536c 5509 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
644459d0 5510 int i;
5511
5512 first = XVECEXP (vals, 0, 0);
5513 for (i = 0; i < n_elts; ++i)
5514 {
5515 x = XVECEXP (vals, 0, i);
e442af0b 5516 if (!(CONST_INT_P (x)
5517 || GET_CODE (x) == CONST_DOUBLE
5518 || GET_CODE (x) == CONST_FIXED))
644459d0 5519 ++n_var;
5520 else
5521 {
5522 if (first_constant == NULL_RTX)
5523 first_constant = x;
5524 }
5525 if (i > 0 && !rtx_equal_p (x, first))
5526 all_same = false;
5527 }
5528
5529 /* if all elements are the same, use splats to repeat elements */
5530 if (all_same)
5531 {
5532 if (!CONSTANT_P (first)
5533 && !register_operand (first, GET_MODE (x)))
5534 first = force_reg (GET_MODE (first), first);
5535 emit_insn (gen_spu_splats (target, first));
5536 return;
5537 }
5538
5539 /* load constant parts */
5540 if (n_var != n_elts)
5541 {
5542 if (n_var == 0)
5543 {
5544 emit_move_insn (target,
5545 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5546 }
5547 else
5548 {
5549 rtx constant_parts_rtx = copy_rtx (vals);
5550
5551 gcc_assert (first_constant != NULL_RTX);
5552 /* fill empty slots with the first constant, this increases
5553 our chance of using splats in the recursive call below. */
5554 for (i = 0; i < n_elts; ++i)
e442af0b 5555 {
5556 x = XVECEXP (constant_parts_rtx, 0, i);
5557 if (!(CONST_INT_P (x)
5558 || GET_CODE (x) == CONST_DOUBLE
5559 || GET_CODE (x) == CONST_FIXED))
5560 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5561 }
644459d0 5562
5563 spu_expand_vector_init (target, constant_parts_rtx);
5564 }
5565 }
5566
5567 /* load variable parts */
5568 if (n_var != 0)
5569 {
5570 rtx insert_operands[4];
5571
5572 insert_operands[0] = target;
5573 insert_operands[2] = target;
5574 for (i = 0; i < n_elts; ++i)
5575 {
5576 x = XVECEXP (vals, 0, i);
e442af0b 5577 if (!(CONST_INT_P (x)
5578 || GET_CODE (x) == CONST_DOUBLE
5579 || GET_CODE (x) == CONST_FIXED))
644459d0 5580 {
5581 if (!register_operand (x, GET_MODE (x)))
5582 x = force_reg (GET_MODE (x), x);
5583 insert_operands[1] = x;
5584 insert_operands[3] = GEN_INT (i);
5585 spu_builtin_insert (insert_operands);
5586 }
5587 }
5588 }
5589}
6352eedf 5590
5474166e 5591/* Return insn index for the vector compare instruction for given CODE,
5592 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5593
5594static int
5595get_vec_cmp_insn (enum rtx_code code,
5596 enum machine_mode dest_mode,
5597 enum machine_mode op_mode)
5598
5599{
5600 switch (code)
5601 {
5602 case EQ:
5603 if (dest_mode == V16QImode && op_mode == V16QImode)
5604 return CODE_FOR_ceq_v16qi;
5605 if (dest_mode == V8HImode && op_mode == V8HImode)
5606 return CODE_FOR_ceq_v8hi;
5607 if (dest_mode == V4SImode && op_mode == V4SImode)
5608 return CODE_FOR_ceq_v4si;
5609 if (dest_mode == V4SImode && op_mode == V4SFmode)
5610 return CODE_FOR_ceq_v4sf;
5611 if (dest_mode == V2DImode && op_mode == V2DFmode)
5612 return CODE_FOR_ceq_v2df;
5613 break;
5614 case GT:
5615 if (dest_mode == V16QImode && op_mode == V16QImode)
5616 return CODE_FOR_cgt_v16qi;
5617 if (dest_mode == V8HImode && op_mode == V8HImode)
5618 return CODE_FOR_cgt_v8hi;
5619 if (dest_mode == V4SImode && op_mode == V4SImode)
5620 return CODE_FOR_cgt_v4si;
5621 if (dest_mode == V4SImode && op_mode == V4SFmode)
5622 return CODE_FOR_cgt_v4sf;
5623 if (dest_mode == V2DImode && op_mode == V2DFmode)
5624 return CODE_FOR_cgt_v2df;
5625 break;
5626 case GTU:
5627 if (dest_mode == V16QImode && op_mode == V16QImode)
5628 return CODE_FOR_clgt_v16qi;
5629 if (dest_mode == V8HImode && op_mode == V8HImode)
5630 return CODE_FOR_clgt_v8hi;
5631 if (dest_mode == V4SImode && op_mode == V4SImode)
5632 return CODE_FOR_clgt_v4si;
5633 break;
5634 default:
5635 break;
5636 }
5637 return -1;
5638}
5639
5640/* Emit vector compare for operands OP0 and OP1 using code RCODE.
5641 DMODE is expected destination mode. This is a recursive function. */
5642
5643static rtx
5644spu_emit_vector_compare (enum rtx_code rcode,
5645 rtx op0, rtx op1,
5646 enum machine_mode dmode)
5647{
5648 int vec_cmp_insn;
5649 rtx mask;
5650 enum machine_mode dest_mode;
5651 enum machine_mode op_mode = GET_MODE (op1);
5652
5653 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5654
5655 /* Floating point vector compare instructions uses destination V4SImode.
5656 Double floating point vector compare instructions uses destination V2DImode.
5657 Move destination to appropriate mode later. */
5658 if (dmode == V4SFmode)
5659 dest_mode = V4SImode;
5660 else if (dmode == V2DFmode)
5661 dest_mode = V2DImode;
5662 else
5663 dest_mode = dmode;
5664
5665 mask = gen_reg_rtx (dest_mode);
5666 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5667
5668 if (vec_cmp_insn == -1)
5669 {
5670 bool swap_operands = false;
5671 bool try_again = false;
5672 switch (rcode)
5673 {
5674 case LT:
5675 rcode = GT;
5676 swap_operands = true;
5677 try_again = true;
5678 break;
5679 case LTU:
5680 rcode = GTU;
5681 swap_operands = true;
5682 try_again = true;
5683 break;
5684 case NE:
5685 /* Treat A != B as ~(A==B). */
5686 {
5687 enum insn_code nor_code;
5688 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
99bdde56 5689 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5474166e 5690 gcc_assert (nor_code != CODE_FOR_nothing);
5691 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5692 if (dmode != dest_mode)
5693 {
5694 rtx temp = gen_reg_rtx (dest_mode);
5695 convert_move (temp, mask, 0);
5696 return temp;
5697 }
5698 return mask;
5699 }
5700 break;
5701 case GE:
5702 case GEU:
5703 case LE:
5704 case LEU:
5705 /* Try GT/GTU/LT/LTU OR EQ */
5706 {
5707 rtx c_rtx, eq_rtx;
5708 enum insn_code ior_code;
5709 enum rtx_code new_code;
5710
5711 switch (rcode)
5712 {
5713 case GE: new_code = GT; break;
5714 case GEU: new_code = GTU; break;
5715 case LE: new_code = LT; break;
5716 case LEU: new_code = LTU; break;
5717 default:
5718 gcc_unreachable ();
5719 }
5720
5721 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5722 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5723
99bdde56 5724 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5474166e 5725 gcc_assert (ior_code != CODE_FOR_nothing);
5726 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5727 if (dmode != dest_mode)
5728 {
5729 rtx temp = gen_reg_rtx (dest_mode);
5730 convert_move (temp, mask, 0);
5731 return temp;
5732 }
5733 return mask;
5734 }
5735 break;
5736 default:
5737 gcc_unreachable ();
5738 }
5739
5740 /* You only get two chances. */
5741 if (try_again)
5742 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5743
5744 gcc_assert (vec_cmp_insn != -1);
5745
5746 if (swap_operands)
5747 {
5748 rtx tmp;
5749 tmp = op0;
5750 op0 = op1;
5751 op1 = tmp;
5752 }
5753 }
5754
5755 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5756 if (dmode != dest_mode)
5757 {
5758 rtx temp = gen_reg_rtx (dest_mode);
5759 convert_move (temp, mask, 0);
5760 return temp;
5761 }
5762 return mask;
5763}
5764
5765
5766/* Emit vector conditional expression.
5767 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5768 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5769
5770int
5771spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5772 rtx cond, rtx cc_op0, rtx cc_op1)
5773{
5774 enum machine_mode dest_mode = GET_MODE (dest);
5775 enum rtx_code rcode = GET_CODE (cond);
5776 rtx mask;
5777
5778 /* Get the vector mask for the given relational operations. */
5779 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5780
5781 emit_insn(gen_selb (dest, op2, op1, mask));
5782
5783 return 1;
5784}
5785
6352eedf 5786static rtx
5787spu_force_reg (enum machine_mode mode, rtx op)
5788{
5789 rtx x, r;
5790 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5791 {
5792 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5793 || GET_MODE (op) == BLKmode)
5794 return force_reg (mode, convert_to_mode (mode, op, 0));
5795 abort ();
5796 }
5797
5798 r = force_reg (GET_MODE (op), op);
5799 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5800 {
5801 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5802 if (x)
5803 return x;
5804 }
5805
5806 x = gen_reg_rtx (mode);
5807 emit_insn (gen_spu_convert (x, r));
5808 return x;
5809}
5810
5811static void
5812spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5813{
5814 HOST_WIDE_INT v = 0;
5815 int lsbits;
5816 /* Check the range of immediate operands. */
5817 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5818 {
5819 int range = p - SPU_BTI_7;
5df189be 5820
5821 if (!CONSTANT_P (op))
6352eedf 5822 error ("%s expects an integer literal in the range [%d, %d].",
5823 d->name,
5824 spu_builtin_range[range].low, spu_builtin_range[range].high);
5825
5826 if (GET_CODE (op) == CONST
5827 && (GET_CODE (XEXP (op, 0)) == PLUS
5828 || GET_CODE (XEXP (op, 0)) == MINUS))
5829 {
5830 v = INTVAL (XEXP (XEXP (op, 0), 1));
5831 op = XEXP (XEXP (op, 0), 0);
5832 }
5833 else if (GET_CODE (op) == CONST_INT)
5834 v = INTVAL (op);
5df189be 5835 else if (GET_CODE (op) == CONST_VECTOR
5836 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5837 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5838
5839 /* The default for v is 0 which is valid in every range. */
5840 if (v < spu_builtin_range[range].low
5841 || v > spu_builtin_range[range].high)
5842 error ("%s expects an integer literal in the range [%d, %d]. ("
5843 HOST_WIDE_INT_PRINT_DEC ")",
5844 d->name,
5845 spu_builtin_range[range].low, spu_builtin_range[range].high,
5846 v);
6352eedf 5847
5848 switch (p)
5849 {
5850 case SPU_BTI_S10_4:
5851 lsbits = 4;
5852 break;
5853 case SPU_BTI_U16_2:
5854 /* This is only used in lqa, and stqa. Even though the insns
5855 encode 16 bits of the address (all but the 2 least
5856 significant), only 14 bits are used because it is masked to
5857 be 16 byte aligned. */
5858 lsbits = 4;
5859 break;
5860 case SPU_BTI_S16_2:
5861 /* This is used for lqr and stqr. */
5862 lsbits = 2;
5863 break;
5864 default:
5865 lsbits = 0;
5866 }
5867
5868 if (GET_CODE (op) == LABEL_REF
5869 || (GET_CODE (op) == SYMBOL_REF
5870 && SYMBOL_REF_FUNCTION_P (op))
5df189be 5871 || (v & ((1 << lsbits) - 1)) != 0)
6352eedf 5872 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5873 d->name);
5874 }
5875}
5876
5877
70ca06f8 5878static int
5df189be 5879expand_builtin_args (struct spu_builtin_description *d, tree exp,
6352eedf 5880 rtx target, rtx ops[])
5881{
5882 enum insn_code icode = d->icode;
5df189be 5883 int i = 0, a;
6352eedf 5884
5885 /* Expand the arguments into rtl. */
5886
5887 if (d->parm[0] != SPU_BTI_VOID)
5888 ops[i++] = target;
5889
70ca06f8 5890 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6352eedf 5891 {
5df189be 5892 tree arg = CALL_EXPR_ARG (exp, a);
6352eedf 5893 if (arg == 0)
5894 abort ();
5895 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
6352eedf 5896 }
70ca06f8 5897
5898 /* The insn pattern may have additional operands (SCRATCH).
5899 Return the number of actual non-SCRATCH operands. */
5900 gcc_assert (i <= insn_data[icode].n_operands);
5901 return i;
6352eedf 5902}
5903
5904static rtx
5905spu_expand_builtin_1 (struct spu_builtin_description *d,
5df189be 5906 tree exp, rtx target)
6352eedf 5907{
5908 rtx pat;
5909 rtx ops[8];
5910 enum insn_code icode = d->icode;
5911 enum machine_mode mode, tmode;
5912 int i, p;
70ca06f8 5913 int n_operands;
6352eedf 5914 tree return_type;
5915
5916 /* Set up ops[] with values from arglist. */
70ca06f8 5917 n_operands = expand_builtin_args (d, exp, target, ops);
6352eedf 5918
5919 /* Handle the target operand which must be operand 0. */
5920 i = 0;
5921 if (d->parm[0] != SPU_BTI_VOID)
5922 {
5923
5924 /* We prefer the mode specified for the match_operand otherwise
5925 use the mode from the builtin function prototype. */
5926 tmode = insn_data[d->icode].operand[0].mode;
5927 if (tmode == VOIDmode)
5928 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5929
5930 /* Try to use target because not using it can lead to extra copies
5931 and when we are using all of the registers extra copies leads
5932 to extra spills. */
5933 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5934 ops[0] = target;
5935 else
5936 target = ops[0] = gen_reg_rtx (tmode);
5937
5938 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5939 abort ();
5940
5941 i++;
5942 }
5943
a76866d3 5944 if (d->fcode == SPU_MASK_FOR_LOAD)
5945 {
5946 enum machine_mode mode = insn_data[icode].operand[1].mode;
5947 tree arg;
5948 rtx addr, op, pat;
5949
5950 /* get addr */
5df189be 5951 arg = CALL_EXPR_ARG (exp, 0);
a76866d3 5952 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5953 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5954 addr = memory_address (mode, op);
5955
5956 /* negate addr */
5957 op = gen_reg_rtx (GET_MODE (addr));
5958 emit_insn (gen_rtx_SET (VOIDmode, op,
5959 gen_rtx_NEG (GET_MODE (addr), addr)));
5960 op = gen_rtx_MEM (mode, op);
5961
5962 pat = GEN_FCN (icode) (target, op);
5963 if (!pat)
5964 return 0;
5965 emit_insn (pat);
5966 return target;
5967 }
5968
6352eedf 5969 /* Ignore align_hint, but still expand it's args in case they have
5970 side effects. */
5971 if (icode == CODE_FOR_spu_align_hint)
5972 return 0;
5973
5974 /* Handle the rest of the operands. */
70ca06f8 5975 for (p = 1; i < n_operands; i++, p++)
6352eedf 5976 {
5977 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5978 mode = insn_data[d->icode].operand[i].mode;
5979 else
5980 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5981
5982 /* mode can be VOIDmode here for labels */
5983
5984 /* For specific intrinsics with an immediate operand, e.g.,
5985 si_ai(), we sometimes need to convert the scalar argument to a
5986 vector argument by splatting the scalar. */
5987 if (VECTOR_MODE_P (mode)
5988 && (GET_CODE (ops[i]) == CONST_INT
5989 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
3b442530 5990 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6352eedf 5991 {
5992 if (GET_CODE (ops[i]) == CONST_INT)
5993 ops[i] = spu_const (mode, INTVAL (ops[i]));
5994 else
5995 {
5996 rtx reg = gen_reg_rtx (mode);
5997 enum machine_mode imode = GET_MODE_INNER (mode);
5998 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5999 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6000 if (imode != GET_MODE (ops[i]))
6001 ops[i] = convert_to_mode (imode, ops[i],
6002 TYPE_UNSIGNED (spu_builtin_types
6003 [d->parm[i]]));
6004 emit_insn (gen_spu_splats (reg, ops[i]));
6005 ops[i] = reg;
6006 }
6007 }
6008
5df189be 6009 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6010
6352eedf 6011 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6012 ops[i] = spu_force_reg (mode, ops[i]);
6352eedf 6013 }
6014
70ca06f8 6015 switch (n_operands)
6352eedf 6016 {
6017 case 0:
6018 pat = GEN_FCN (icode) (0);
6019 break;
6020 case 1:
6021 pat = GEN_FCN (icode) (ops[0]);
6022 break;
6023 case 2:
6024 pat = GEN_FCN (icode) (ops[0], ops[1]);
6025 break;
6026 case 3:
6027 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6028 break;
6029 case 4:
6030 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6031 break;
6032 case 5:
6033 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6034 break;
6035 case 6:
6036 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6037 break;
6038 default:
6039 abort ();
6040 }
6041
6042 if (!pat)
6043 abort ();
6044
6045 if (d->type == B_CALL || d->type == B_BISLED)
6046 emit_call_insn (pat);
6047 else if (d->type == B_JUMP)
6048 {
6049 emit_jump_insn (pat);
6050 emit_barrier ();
6051 }
6052 else
6053 emit_insn (pat);
6054
6055 return_type = spu_builtin_types[d->parm[0]];
6056 if (d->parm[0] != SPU_BTI_VOID
6057 && GET_MODE (target) != TYPE_MODE (return_type))
6058 {
6059 /* target is the return value. It should always be the mode of
6060 the builtin function prototype. */
6061 target = spu_force_reg (TYPE_MODE (return_type), target);
6062 }
6063
6064 return target;
6065}
6066
6067rtx
6068spu_expand_builtin (tree exp,
6069 rtx target,
6070 rtx subtarget ATTRIBUTE_UNUSED,
6071 enum machine_mode mode ATTRIBUTE_UNUSED,
6072 int ignore ATTRIBUTE_UNUSED)
6073{
5df189be 6074 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6352eedf 6075 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6352eedf 6076 struct spu_builtin_description *d;
6077
6078 if (fcode < NUM_SPU_BUILTINS)
6079 {
6080 d = &spu_builtins[fcode];
6081
5df189be 6082 return spu_expand_builtin_1 (d, exp, target);
6352eedf 6083 }
6084 abort ();
6085}
6086
e99f512d 6087/* Implement targetm.vectorize.builtin_mul_widen_even. */
6088static tree
6089spu_builtin_mul_widen_even (tree type)
6090{
e99f512d 6091 switch (TYPE_MODE (type))
6092 {
6093 case V8HImode:
6094 if (TYPE_UNSIGNED (type))
6095 return spu_builtins[SPU_MULE_0].fndecl;
6096 else
6097 return spu_builtins[SPU_MULE_1].fndecl;
6098 break;
6099 default:
6100 return NULL_TREE;
6101 }
6102}
6103
6104/* Implement targetm.vectorize.builtin_mul_widen_odd. */
6105static tree
6106spu_builtin_mul_widen_odd (tree type)
6107{
6108 switch (TYPE_MODE (type))
6109 {
6110 case V8HImode:
6111 if (TYPE_UNSIGNED (type))
6112 return spu_builtins[SPU_MULO_1].fndecl;
6113 else
6114 return spu_builtins[SPU_MULO_0].fndecl;
6115 break;
6116 default:
6117 return NULL_TREE;
6118 }
6119}
6120
a76866d3 6121/* Implement targetm.vectorize.builtin_mask_for_load. */
6122static tree
6123spu_builtin_mask_for_load (void)
6124{
6125 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6126 gcc_assert (d);
6127 return d->fndecl;
6128}
5df189be 6129
a28df51d 6130/* Implement targetm.vectorize.builtin_vectorization_cost. */
6131static int
6132spu_builtin_vectorization_cost (bool runtime_test)
6133{
6134 /* If the branch of the runtime test is taken - i.e. - the vectorized
6135 version is skipped - this incurs a misprediction cost (because the
6136 vectorized version is expected to be the fall-through). So we subtract
becfaa62 6137 the latency of a mispredicted branch from the costs that are incurred
a28df51d 6138 when the vectorized version is executed. */
6139 if (runtime_test)
6140 return -19;
6141 else
6142 return 0;
6143}
6144
0e87db76 6145/* Return true iff, data reference of TYPE can reach vector alignment (16)
6146 after applying N number of iterations. This routine does not determine
6147 how may iterations are required to reach desired alignment. */
6148
6149static bool
a9f1838b 6150spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
0e87db76 6151{
6152 if (is_packed)
6153 return false;
6154
6155 /* All other types are naturally aligned. */
6156 return true;
6157}
6158
a0515226 6159/* Implement targetm.vectorize.builtin_vec_perm. */
6160tree
6161spu_builtin_vec_perm (tree type, tree *mask_element_type)
6162{
6163 struct spu_builtin_description *d;
6164
6165 *mask_element_type = unsigned_char_type_node;
6166
6167 switch (TYPE_MODE (type))
6168 {
6169 case V16QImode:
6170 if (TYPE_UNSIGNED (type))
6171 d = &spu_builtins[SPU_SHUFFLE_0];
6172 else
6173 d = &spu_builtins[SPU_SHUFFLE_1];
6174 break;
6175
6176 case V8HImode:
6177 if (TYPE_UNSIGNED (type))
6178 d = &spu_builtins[SPU_SHUFFLE_2];
6179 else
6180 d = &spu_builtins[SPU_SHUFFLE_3];
6181 break;
6182
6183 case V4SImode:
6184 if (TYPE_UNSIGNED (type))
6185 d = &spu_builtins[SPU_SHUFFLE_4];
6186 else
6187 d = &spu_builtins[SPU_SHUFFLE_5];
6188 break;
6189
6190 case V2DImode:
6191 if (TYPE_UNSIGNED (type))
6192 d = &spu_builtins[SPU_SHUFFLE_6];
6193 else
6194 d = &spu_builtins[SPU_SHUFFLE_7];
6195 break;
6196
6197 case V4SFmode:
6198 d = &spu_builtins[SPU_SHUFFLE_8];
6199 break;
6200
6201 case V2DFmode:
6202 d = &spu_builtins[SPU_SHUFFLE_9];
6203 break;
6204
6205 default:
6206 return NULL_TREE;
6207 }
6208
6209 gcc_assert (d);
6210 return d->fndecl;
6211}
6212
d52fd16a 6213/* Count the total number of instructions in each pipe and return the
6214 maximum, which is used as the Minimum Iteration Interval (MII)
6215 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6216 -2 are instructions that can go in pipe0 or pipe1. */
6217static int
6218spu_sms_res_mii (struct ddg *g)
6219{
6220 int i;
6221 unsigned t[4] = {0, 0, 0, 0};
6222
6223 for (i = 0; i < g->num_nodes; i++)
6224 {
6225 rtx insn = g->nodes[i].insn;
6226 int p = get_pipe (insn) + 2;
6227
6228 assert (p >= 0);
6229 assert (p < 4);
6230
6231 t[p]++;
6232 if (dump_file && INSN_P (insn))
6233 fprintf (dump_file, "i%d %s %d %d\n",
6234 INSN_UID (insn),
6235 insn_data[INSN_CODE(insn)].name,
6236 p, t[p]);
6237 }
6238 if (dump_file)
6239 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6240
6241 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6242}
6243
6244
5df189be 6245void
6246spu_init_expanders (void)
6247{
6248 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6249 * frame_pointer_needed is true. We don't know that until we're
6250 * expanding the prologue. */
6251 if (cfun)
6252 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
ea32e033 6253}
6254
6255static enum machine_mode
6256spu_libgcc_cmp_return_mode (void)
6257{
6258
6259/* For SPU word mode is TI mode so it is better to use SImode
6260 for compare returns. */
6261 return SImode;
6262}
6263
6264static enum machine_mode
6265spu_libgcc_shift_count_mode (void)
6266{
6267/* For SPU word mode is TI mode so it is better to use SImode
6268 for shift counts. */
6269 return SImode;
6270}
5a976006 6271
6272/* An early place to adjust some flags after GCC has finished processing
6273 * them. */
6274static void
6275asm_file_start (void)
6276{
6277 /* Variable tracking should be run after all optimizations which
6278 change order of insns. It also needs a valid CFG. */
6279 spu_flag_var_tracking = flag_var_tracking;
6280 flag_var_tracking = 0;
6281
6282 default_file_start ();
6283}
6284
a08dfd55 6285/* Implement targetm.section_type_flags. */
6286static unsigned int
6287spu_section_type_flags (tree decl, const char *name, int reloc)
6288{
6289 /* .toe needs to have type @nobits. */
6290 if (strcmp (name, ".toe") == 0)
6291 return SECTION_BSS;
6292 return default_section_type_flags (decl, name, reloc);
6293}
6294